From a46d7f5dc7241d06bf18ac79876ebd94c70eac7d Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Thu, 29 Oct 2020 14:29:31 -0700 Subject: [PATCH 1/4] Change how goodread import writes reviews - adds published date - broadcasts review imports - completes review and shelve actions as it goes - some small connector fixes fixes #247 --- bookwyrm/connectors/abstract_connector.py | 23 ++++--- bookwyrm/goodreads_import.py | 9 +-- bookwyrm/models/import_job.py | 11 +++- bookwyrm/outgoing.py | 78 +++++++++++------------ bookwyrm/views.py | 3 +- 5 files changed, 67 insertions(+), 57 deletions(-) diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 25db648cb..72dda4e9a 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -2,14 +2,16 @@ from abc import ABC, abstractmethod from dateutil import parser import pytz +from urllib3.exceptions import ProtocolError import requests +from requests import HTTPError from django.db import transaction from bookwyrm import models -class ConnectorException(Exception): +class ConnectorException(HTTPError): ''' when the connector can't do what was asked ''' @@ -155,9 +157,11 @@ class AbstractConnector(ABC): ''' for creating a new book or syncing with data ''' book = update_from_mappings(book, data, self.book_mappings) + author_text = [] for author in self.get_authors_from_data(data): book.authors.add(author) - book.author_text = ', '.join(a.display_name for a in book.authors.all()) + author_text += author.display_name + book.author_text = ', '.join(author_text) book.save() if not update_cover: @@ -287,12 +291,15 @@ def get_date(date_string): def get_data(url): ''' wrapper for request.get ''' - resp = requests.get( - url, - headers={ - 'Accept': 'application/json; charset=utf-8', - }, - ) + try: + resp = requests.get( + url, + headers={ + 'Accept': 'application/json; charset=utf-8', + }, + ) + except ProtocolError: + raise ConnectorException() if not resp.ok: resp.raise_for_status() data = resp.json() diff --git a/bookwyrm/goodreads_import.py b/bookwyrm/goodreads_import.py index 7b64baa33..73057e43b 100644 --- a/bookwyrm/goodreads_import.py +++ b/bookwyrm/goodreads_import.py @@ -42,13 +42,10 @@ def import_data(job_id): if item.book: item.save() results.append(item) + # shelves book and handles reviews + outgoing.handle_imported_book(job.user, item) else: - item.fail_reason = "Could not match book on OpenLibrary" + item.fail_reason = "Could not find a match for book" item.save() - - status = outgoing.handle_import_books(job.user, results) - if status: - job.import_status = status - job.save() finally: create_notification(job.user, 'IMPORT', related_import=job) diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py index 085cfea6a..bd63ea798 100644 --- a/bookwyrm/models/import_job.py +++ b/bookwyrm/models/import_job.py @@ -40,8 +40,7 @@ class ImportJob(models.Model): user = models.ForeignKey(User, on_delete=models.CASCADE) created_date = models.DateTimeField(default=timezone.now) task_id = models.CharField(max_length=100, null=True) - import_status = models.ForeignKey( - 'Status', null=True, on_delete=models.PROTECT) + class ImportItem(models.Model): ''' a single line of a csv being imported ''' @@ -71,6 +70,8 @@ class ImportItem(models.Model): return books_manager.get_or_create_book(search_result.key) except ConnectorException: pass + return None + def get_book_from_title_author(self): ''' search by title and author ''' @@ -84,6 +85,8 @@ class ImportItem(models.Model): return books_manager.get_or_create_book(search_result.key) except ConnectorException: pass + return None + @property def isbn(self): @@ -95,6 +98,7 @@ class ImportItem(models.Model): ''' the goodreads shelf field ''' if self.data['Exclusive Shelf']: return GOODREADS_SHELVES.get(self.data['Exclusive Shelf']) + return None @property def review(self): @@ -111,12 +115,14 @@ class ImportItem(models.Model): ''' when the book was added to this dataset ''' if self.data['Date Added']: return dateutil.parser.parse(self.data['Date Added']) + return None @property def date_read(self): ''' the date a book was completed ''' if self.data['Date Read']: return dateutil.parser.parse(self.data['Date Read']) + return None @property def reads(self): @@ -126,6 +132,7 @@ class ImportItem(models.Model): return [ReadThrough(start_date=self.date_added)] if self.date_read: return [ReadThrough( + start_date=self.date_added, finish_date=self.date_read, )] return [] diff --git a/bookwyrm/outgoing.py b/bookwyrm/outgoing.py index 4681a6705..eca3a2c8b 100644 --- a/bookwyrm/outgoing.py +++ b/bookwyrm/outgoing.py @@ -155,51 +155,49 @@ def handle_unshelve(user, book, shelf): broadcast(user, activity) -def handle_import_books(user, items): +def handle_imported_book(user, item): ''' process a goodreads csv and then post about it ''' - new_books = [] - for item in items: - if item.shelf: - desired_shelf = models.Shelf.objects.get( - identifier=item.shelf, - user=user - ) - if isinstance(item.book, models.Work): - item.book = item.book.default_edition - if not item.book: - continue - shelf_book, created = models.ShelfBook.objects.get_or_create( - book=item.book, shelf=desired_shelf, added_by=user) - if created: - new_books.append(item.book) - activity = shelf_book.to_add_activity(user) - broadcast(user, activity) + if isinstance(item.book, models.Work): + item.book = item.book.default_edition + if not item.book: + return - if item.rating or item.review: - review_title = 'Review of {!r} on Goodreads'.format( - item.book.title, - ) if item.review else '' + if item.shelf: + desired_shelf = models.Shelf.objects.get( + identifier=item.shelf, + user=user + ) + # shelve the book if it hasn't been shelved already + shelf_book, created = models.ShelfBook.objects.get_or_create( + book=item.book, shelf=desired_shelf, added_by=user) + if created: + broadcast(user, shelf_book.to_add_activity(user)) - models.Review.objects.create( - user=user, - book=item.book, - name=review_title, - content=item.review, - rating=item.rating, - ) - for read in item.reads: - read.book = item.book - read.user = user - read.save() + # only add new read-throughs if the item isn't already shelved + for read in item.reads: + read.book = item.book + read.user = user + read.save() - if new_books: - message = 'imported {} books'.format(len(new_books)) - status = create_generated_note(user, message, mention_books=new_books) - status.save() + if item.rating or item.review: + review_title = 'Review of {!r} on Goodreads'.format( + item.book.title, + ) if item.review else '' - broadcast(user, status.to_create_activity(user)) - return status - return None + # we don't know the publication date of the review, + # but "now" is a bad guess + published_date_guess = item.date_read or item.date_added + review = models.Review.objects.create( + user=user, + book=item.book, + name=review_title, + content=item.review, + rating=item.rating, + published_date=published_date_guess, + ) + # we don't need to send out pure activities because non-bookwyrm + # instances don't need this data + broadcast(user, review.to_create_activity(user)) def handle_delete_status(user, status): diff --git a/bookwyrm/views.py b/bookwyrm/views.py index 9bf029590..b1bb25f45 100644 --- a/bookwyrm/views.py +++ b/bookwyrm/views.py @@ -489,7 +489,8 @@ def book_page(request, book_id): ).values_list('identifier', flat=True) readthroughs = models.ReadThrough.objects.filter( - user=request.user + user=request.user, + book=book, ).order_by('start_date') From 7ce0890a41d98a15fc79ac16cbbef0aab0eed22e Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Thu, 29 Oct 2020 15:29:23 -0700 Subject: [PATCH 2/4] Stop assuming every book is Hamlet --- bookwyrm/books_manager.py | 12 ++++++------ bookwyrm/connectors/abstract_connector.py | 19 ++++++++++--------- bookwyrm/connectors/openlibrary.py | 8 ++++---- bookwyrm/connectors/self_connector.py | 13 +++++++------ bookwyrm/models/import_job.py | 8 ++++++-- bookwyrm/templates/search_results.html | 1 + bookwyrm/tests/models/test_import_model.py | 8 +++----- 7 files changed, 37 insertions(+), 32 deletions(-) diff --git a/bookwyrm/books_manager.py b/bookwyrm/books_manager.py index bfc543dec..37a31766c 100644 --- a/bookwyrm/books_manager.py +++ b/bookwyrm/books_manager.py @@ -64,14 +64,14 @@ def load_more_data(book_id): connector.expand_book_data(book) -def search(query): +def search(query, min_confidence=0.1): ''' find books based on arbitary keywords ''' results = [] dedup_slug = lambda r: '%s/%s/%s' % (r.title, r.author, r.year) result_index = set() for connector in get_connectors(): try: - result_set = connector.search(query) + result_set = connector.search(query, min_confidence=min_confidence) except HTTPError: continue @@ -87,16 +87,16 @@ def search(query): return results -def local_search(query): +def local_search(query, min_confidence=0.1): ''' only look at local search results ''' connector = load_connector(models.Connector.objects.get(local=True)) - return connector.search(query) + return connector.search(query, min_confidence=min_confidence) -def first_search_result(query): +def first_search_result(query, min_confidence=0.1): ''' search until you find a result that fits ''' for connector in get_connectors(): - result = connector.search(query) + result = connector.search(query, min_confidence=min_confidence) if result: return result[0] return None diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 72dda4e9a..a34eb3019 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -1,8 +1,8 @@ ''' functionality outline for a book data connector ''' from abc import ABC, abstractmethod +from dataclasses import dataclass from dateutil import parser import pytz -from urllib3.exceptions import ProtocolError import requests from requests import HTTPError @@ -52,7 +52,7 @@ class AbstractConnector(ABC): return True - def search(self, query): + def search(self, query, min_confidence=None): ''' free text search ''' resp = requests.get( '%s%s' % (self.search_url, query), @@ -160,7 +160,7 @@ class AbstractConnector(ABC): author_text = [] for author in self.get_authors_from_data(data): book.authors.add(author) - author_text += author.display_name + author_text.append(author.display_name) book.author_text = ', '.join(author_text) book.save() @@ -298,7 +298,7 @@ def get_data(url): 'Accept': 'application/json; charset=utf-8', }, ) - except ProtocolError: + except ConnectionError: raise ConnectorException() if not resp.ok: resp.raise_for_status() @@ -306,13 +306,14 @@ def get_data(url): return data +@dataclass class SearchResult: ''' standardized search result object ''' - def __init__(self, title, key, author, year): - self.title = title - self.key = key - self.author = author - self.year = year + title: str + key: str + author: str + year: str + confidence: int = 1 def __repr__(self): return "".format( diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index d70ab3e23..0ae3ce35b 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -129,10 +129,10 @@ class Connector(AbstractConnector): key = self.books_url + search_result['key'] author = search_result.get('author_name') or ['Unknown'] return SearchResult( - search_result.get('title'), - key, - ', '.join(author), - search_result.get('first_publish_year'), + title=search_result.get('title'), + key=key, + author=', '.join(author), + year=search_result.get('first_publish_year'), ) diff --git a/bookwyrm/connectors/self_connector.py b/bookwyrm/connectors/self_connector.py index 2711bb1a2..0e77ecf67 100644 --- a/bookwyrm/connectors/self_connector.py +++ b/bookwyrm/connectors/self_connector.py @@ -7,7 +7,7 @@ from .abstract_connector import AbstractConnector, SearchResult class Connector(AbstractConnector): ''' instantiate a connector ''' - def search(self, query): + def search(self, query, min_confidence=0.1): ''' right now you can't search bookwyrm sorry, but when that gets implemented it will totally rule ''' vector = SearchVector('title', weight='A') +\ @@ -28,7 +28,7 @@ class Connector(AbstractConnector): ).annotate( rank=SearchRank(vector, query) ).filter( - rank__gt=0 + rank__gt=min_confidence ).order_by('-rank') results = results.filter(default=True) or results @@ -42,11 +42,12 @@ class Connector(AbstractConnector): def format_search_result(self, search_result): return SearchResult( - search_result.title, - search_result.local_id, - search_result.author_text, - search_result.published_date.year if \ + title=search_result.title, + key=search_result.local_id, + author=search_result.author_text, + year=search_result.published_date.year if \ search_result.published_date else None, + confidence=search_result.rank, ) diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py index bd63ea798..240e0694d 100644 --- a/bookwyrm/models/import_job.py +++ b/bookwyrm/models/import_job.py @@ -63,7 +63,9 @@ class ImportItem(models.Model): def get_book_from_isbn(self): ''' search by isbn ''' - search_result = books_manager.first_search_result(self.isbn) + search_result = books_manager.first_search_result( + self.isbn, min_confidence=0.5 + ) if search_result: try: # don't crash the import when the connector fails @@ -79,7 +81,9 @@ class ImportItem(models.Model): self.data['Title'], self.data['Author'] ) - search_result = books_manager.first_search_result(search_term) + search_result = books_manager.first_search_result( + search_term, min_confidence=0.5 + ) if search_result: try: return books_manager.get_or_create_book(search_result.key) diff --git a/bookwyrm/templates/search_results.html b/bookwyrm/templates/search_results.html index bd5096fec..489386cd5 100644 --- a/bookwyrm/templates/search_results.html +++ b/bookwyrm/templates/search_results.html @@ -14,6 +14,7 @@ {% for result in result_set.results %}
+ {{ result.confidence }}
{% csrf_token %} diff --git a/bookwyrm/tests/models/test_import_model.py b/bookwyrm/tests/models/test_import_model.py index 5e488199b..1d5aaa726 100644 --- a/bookwyrm/tests/models/test_import_model.py +++ b/bookwyrm/tests/models/test_import_model.py @@ -24,7 +24,7 @@ class ImportJob(TestCase): 'Number of Pages': 416, 'Year Published': 2019, 'Original Publication Year': 2019, - 'Date Read': '2019/04/09', + 'Date Read': '2019/04/12', 'Date Added': '2019/04/09', 'Bookshelves': '', 'Bookshelves with positions': '', @@ -97,11 +97,9 @@ class ImportJob(TestCase): self.assertEqual(actual.reads[0].finish_date, expected[0].finish_date) def test_read_reads(self): - expected = [models.ReadThrough( - finish_date=datetime.datetime(2019, 4, 9, 0, 0))] actual = models.ImportItem.objects.get(index=2) - self.assertEqual(actual.reads[0].start_date, expected[0].start_date) - self.assertEqual(actual.reads[0].finish_date, expected[0].finish_date) + self.assertEqual(actual.reads[0].start_date, datetime.datetime(2019, 4, 9, 0, 0)) + self.assertEqual(actual.reads[0].finish_date, datetime.datetime(2019, 4, 12, 0, 0)) def test_unread_reads(self): expected = [] From cca98d90513ce342dc1a29e71c6bd01459969072 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Thu, 29 Oct 2020 16:08:23 -0700 Subject: [PATCH 3/4] removes confidence displaying in search results page --- bookwyrm/templates/search_results.html | 1 - 1 file changed, 1 deletion(-) diff --git a/bookwyrm/templates/search_results.html b/bookwyrm/templates/search_results.html index 489386cd5..bd5096fec 100644 --- a/bookwyrm/templates/search_results.html +++ b/bookwyrm/templates/search_results.html @@ -14,7 +14,6 @@ {% for result in result_set.results %}
- {{ result.confidence }} {% csrf_token %} From 7fb593af8cff15f6d06e92a9404dcd30f90f60b2 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Thu, 29 Oct 2020 16:48:28 -0700 Subject: [PATCH 4/4] Remove status associated with import --- .../0058_remove_importjob_import_status.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 bookwyrm/migrations/0058_remove_importjob_import_status.py diff --git a/bookwyrm/migrations/0058_remove_importjob_import_status.py b/bookwyrm/migrations/0058_remove_importjob_import_status.py new file mode 100644 index 000000000..61f41546e --- /dev/null +++ b/bookwyrm/migrations/0058_remove_importjob_import_status.py @@ -0,0 +1,17 @@ +# Generated by Django 3.0.7 on 2020-10-29 23:48 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('bookwyrm', '0057_auto_20201026_2131'), + ] + + operations = [ + migrations.RemoveField( + model_name='importjob', + name='import_status', + ), + ]