From 86504989b45abaeb0caf6f4b5aa9b666f385d010 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Fri, 13 Nov 2020 09:47:35 -0800 Subject: [PATCH] fixes import matching with local books --- bookwyrm/connectors/abstract_connector.py | 4 +-- bookwyrm/connectors/bookwyrm_connector.py | 4 +-- bookwyrm/connectors/self_connector.py | 2 +- bookwyrm/goodreads_import.py | 5 ++- .../migrations/0011_auto_20201113_1727.py | 33 +++++++++++++++++ bookwyrm/models/base_model.py | 4 +-- bookwyrm/models/book.py | 35 ++++++------------- bookwyrm/models/import_job.py | 6 ++-- bookwyrm/models/status.py | 10 +++--- bookwyrm/templates/import_status.html | 1 - bookwyrm/tests/models/test_book_model.py | 9 ++--- 11 files changed, 63 insertions(+), 50 deletions(-) create mode 100644 bookwyrm/migrations/0011_auto_20201113_1727.py diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 8889932d..7fc4596b 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -75,7 +75,7 @@ class AbstractConnector(ABC): ''' pull up a book record by whatever means possible ''' # try to load the book book = models.Book.objects.select_subclasses().filter( - remote_id=remote_id + origin_id=remote_id ).first() if book: if isinstance(book, models.Work): @@ -148,7 +148,7 @@ class AbstractConnector(ABC): def create_book(self, remote_id, data, model): ''' create a work or edition from data ''' book = model.objects.create( - remote_id=remote_id, + origin_id=remote_id, title=data['title'], connector=self.connector, ) diff --git a/bookwyrm/connectors/bookwyrm_connector.py b/bookwyrm/connectors/bookwyrm_connector.py index ecceb457..6ed9dda1 100644 --- a/bookwyrm/connectors/bookwyrm_connector.py +++ b/bookwyrm/connectors/bookwyrm_connector.py @@ -99,14 +99,14 @@ class Connector(AbstractConnector): def get_or_create_author(self, remote_id): ''' load that author ''' try: - return models.Author.objects.get(remote_id=remote_id) + return models.Author.objects.get(origin_id=remote_id) except ObjectDoesNotExist: pass data = get_data(remote_id) # ingest a new author - author = models.Author(remote_id=remote_id) + author = models.Author(origin_id=remote_id) author = update_from_mappings(author, data, self.author_mappings) author.save() diff --git a/bookwyrm/connectors/self_connector.py b/bookwyrm/connectors/self_connector.py index 2df07c19..2c58d9d7 100644 --- a/bookwyrm/connectors/self_connector.py +++ b/bookwyrm/connectors/self_connector.py @@ -47,7 +47,7 @@ class Connector(AbstractConnector): def format_search_result(self, search_result): return SearchResult( title=search_result.title, - key=search_result.local_id, + key=search_result.remote_id, author=search_result.author_text, year=search_result.published_date.year if \ search_result.published_date else None, diff --git a/bookwyrm/goodreads_import.py b/bookwyrm/goodreads_import.py index 1582c37a..3fd330ab 100644 --- a/bookwyrm/goodreads_import.py +++ b/bookwyrm/goodreads_import.py @@ -1,11 +1,13 @@ ''' handle reading a csv from goodreads ''' import csv +import logging from bookwyrm import outgoing from bookwyrm.tasks import app from bookwyrm.models import ImportJob, ImportItem from bookwyrm.status import create_notification +logger = logging.getLogger(__name__) # TODO: remove or increase once we're confident it's not causing problems. MAX_ENTRIES = 500 @@ -51,7 +53,8 @@ def import_data(job_id): for item in job.items.all(): try: item.resolve() - except: + except Exception as e: + logger.exception(e) item.fail_reason = 'Error loading book' item.save() continue diff --git a/bookwyrm/migrations/0011_auto_20201113_1727.py b/bookwyrm/migrations/0011_auto_20201113_1727.py new file mode 100644 index 00000000..15e853a3 --- /dev/null +++ b/bookwyrm/migrations/0011_auto_20201113_1727.py @@ -0,0 +1,33 @@ +# Generated by Django 3.0.7 on 2020-11-13 17:27 + +from django.db import migrations, models + +def set_origin_id(app_registry, schema_editor): + db_alias = schema_editor.connection.alias + books = app_registry.get_model('bookwyrm', 'Book').objects.using(db_alias) + for book in books: + book.origin_id = book.remote_id + # the remote_id will be set automatically + book.remote_id = None + book.save() + + +class Migration(migrations.Migration): + + dependencies = [ + ('bookwyrm', '0010_importjob_retry'), + ] + + operations = [ + migrations.AddField( + model_name='author', + name='origin_id', + field=models.CharField(max_length=255, null=True), + ), + migrations.AddField( + model_name='book', + name='origin_id', + field=models.CharField(max_length=255, null=True), + ), + migrations.RunPython(set_origin_id), + ] diff --git a/bookwyrm/models/base_model.py b/bookwyrm/models/base_model.py index b48ceea9..8150d650 100644 --- a/bookwyrm/models/base_model.py +++ b/bookwyrm/models/base_model.py @@ -68,9 +68,7 @@ class ActivitypubMixin: if not hasattr(self, mapping.model_key) or not mapping.activity_key: continue value = getattr(self, mapping.model_key) - if hasattr(value, 'local_id'): - value = value.local_id - elif hasattr(value, 'remote_id'): + if hasattr(value, 'remote_id'): value = value.remote_id if isinstance(value, datetime): value = value.isoformat() diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index 484f6824..d0702a3e 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -15,6 +15,7 @@ from .base_model import ActivitypubMixin, OrderedCollectionPageMixin class Book(ActivitypubMixin, BookWyrmModel): ''' a generic book, which can mean either an edition or a work ''' + origin_id = models.CharField(max_length=255, null=True) # these identifiers apply to both works and editions openlibrary_key = models.CharField(max_length=255, blank=True, null=True) librarything_key = models.CharField(max_length=255, blank=True, null=True) @@ -57,7 +58,7 @@ class Book(ActivitypubMixin, BookWyrmModel): @property def ap_authors(self): ''' the activitypub serialization should be a list of author ids ''' - return [a.local_id for a in self.authors.all()] + return [a.remote_id for a in self.authors.all()] @property def ap_cover(self): @@ -71,10 +72,10 @@ class Book(ActivitypubMixin, BookWyrmModel): @property def ap_parent_work(self): ''' reference the work via local id not remote ''' - return self.parent_work.local_id + return self.parent_work.remote_id activity_mappings = [ - ActivityMapping('id', 'local_id'), + ActivityMapping('id', 'remote_id'), ActivityMapping('authors', 'ap_authors'), ActivityMapping('first_published_date', 'first_published_date'), @@ -112,6 +113,8 @@ class Book(ActivitypubMixin, BookWyrmModel): ''' can't be abstract for query reasons, but you shouldn't USE it ''' if not isinstance(self, Edition) and not isinstance(self, Work): raise ValueError('Books should be added as Editions or Works') + if self.id and not self.remote_id: + self.remote_id = self.get_remote_id() super().save(*args, **kwargs) @@ -119,15 +122,6 @@ class Book(ActivitypubMixin, BookWyrmModel): ''' editions and works both use "book" instead of model_name ''' return 'https://%s/book/%d' % (DOMAIN, self.id) - - @property - def local_id(self): - ''' when a book is ingested from an outside source, it becomes local to - an instance, so it needs a local url for federation. but it still needs - the remote_id for easier deduplication and, if appropriate, to sync with - the remote canonical copy ''' - return 'https://%s/book/%d' % (DOMAIN, self.id) - def __repr__(self): return "<{} key={!r} title={!r}>".format( self.__class__, @@ -152,14 +146,14 @@ class Work(OrderedCollectionPageMixin, Book): ''' it'd be nice to serialize the edition instead but, recursion ''' default = self.default_edition ed_list = [ - e.local_id for e in self.edition_set.filter(~Q(id=default.id)).all() + e.remote_id for e in self.edition_set.filter(~Q(id=default.id)).all() ] - return [default.local_id] + ed_list + return [default.remote_id] + ed_list def to_edition_list(self, **kwargs): ''' activitypub serialization for this work's editions ''' - remote_id = self.local_id + '/editions' + remote_id = self.remote_id + '/editions' return self.to_ordered_collection( self.edition_set, remote_id=remote_id, @@ -246,6 +240,7 @@ def isbn_13_to_10(isbn_13): class Author(ActivitypubMixin, BookWyrmModel): + origin_id = models.CharField(max_length=255, null=True) ''' copy of an author from OL ''' openlibrary_key = models.CharField(max_length=255, blank=True, null=True) sync = models.BooleanField(default=True) @@ -262,14 +257,6 @@ class Author(ActivitypubMixin, BookWyrmModel): ) bio = models.TextField(null=True, blank=True) - @property - def local_id(self): - ''' when a book is ingested from an outside source, it becomes local to - an instance, so it needs a local url for federation. but it still needs - the remote_id for easier deduplication and, if appropriate, to sync with - the remote canonical copy (ditto here for author)''' - return 'https://%s/author/%d' % (DOMAIN, self.id) - @property def display_name(self): ''' Helper to return a displayable name''' @@ -281,7 +268,7 @@ class Author(ActivitypubMixin, BookWyrmModel): return self.last_name or self.first_name activity_mappings = [ - ActivityMapping('id', 'local_id'), + ActivityMapping('id', 'remote_id'), ActivityMapping('name', 'display_name'), ActivityMapping('born', 'born'), ActivityMapping('died', 'died'), diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py index 3c0cfd86..58bf4de6 100644 --- a/bookwyrm/models/import_job.py +++ b/bookwyrm/models/import_job.py @@ -94,10 +94,8 @@ class ImportItem(models.Model): search_term, min_confidence=0.995 ) if search_result: - try: - return books_manager.get_or_create_book(search_result.key) - except ConnectorException: - pass + # raises ConnectorException + return books_manager.get_or_create_book(search_result.key) return None diff --git a/bookwyrm/models/status.py b/bookwyrm/models/status.py index efe1a58f..09ceda85 100644 --- a/bookwyrm/models/status.py +++ b/bookwyrm/models/status.py @@ -64,7 +64,7 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel): tags = [] for book in self.mention_books.all(): tags.append(activitypub.Link( - href=book.local_id, + href=book.remote_id, name=book.title, type='Book' )) @@ -160,7 +160,7 @@ class GeneratedNote(Status): ''' indicate the book in question for mastodon (or w/e) users ''' message = self.content books = ', '.join( - '"%s"' % (self.book.local_id, self.book.title) \ + '"%s"' % (self.book.remote_id, self.book.title) \ for book in self.mention_books.all() ) return '%s %s' % (message, books) @@ -177,7 +177,7 @@ class Comment(Status): def ap_pure_content(self): ''' indicate the book in question for mastodon (or w/e) users ''' return self.content + '

(comment on "%s")' % \ - (self.book.local_id, self.book.title) + (self.book.remote_id, self.book.title) activity_serializer = activitypub.Comment pure_activity_serializer = activitypub.Note @@ -193,7 +193,7 @@ class Quotation(Status): ''' indicate the book in question for mastodon (or w/e) users ''' return '"%s"
-- "%s"

%s' % ( self.quote, - self.book.local_id, + self.book.remote_id, self.book.title, self.content, ) @@ -231,7 +231,7 @@ class Review(Status): def ap_pure_content(self): ''' indicate the book in question for mastodon (or w/e) users ''' return self.content + '

("%s")' % \ - (self.book.local_id, self.book.title) + (self.book.remote_id, self.book.title) activity_serializer = activitypub.Review pure_activity_serializer = activitypub.Article diff --git a/bookwyrm/templates/import_status.html b/bookwyrm/templates/import_status.html index 1c7da003..cda74a21 100644 --- a/bookwyrm/templates/import_status.html +++ b/bookwyrm/templates/import_status.html @@ -45,7 +45,6 @@

{{ item.fail_reason }}. - Manually add book

{% endfor %} diff --git a/bookwyrm/tests/models/test_book_model.py b/bookwyrm/tests/models/test_book_model.py index 7dfad61f..d01eea2e 100644 --- a/bookwyrm/tests/models/test_book_model.py +++ b/bookwyrm/tests/models/test_book_model.py @@ -22,15 +22,10 @@ class Book(TestCase): ) def test_remote_id(self): - local_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id) - self.assertEqual(self.work.get_remote_id(), local_id) + remote_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id) + self.assertEqual(self.work.get_remote_id(), remote_id) self.assertEqual(self.work.remote_id, 'https://example.com/book/1') - def test_local_id(self): - ''' the local_id property for books ''' - expected_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id) - self.assertEqual(self.work.local_id, expected_id) - def test_create_book(self): ''' you shouldn't be able to create Books (only editions and works) ''' self.assertRaises(