From 083b576bc49f01fbe85571b9d73e5f0f6a202ad5 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Fri, 13 Nov 2020 07:34:37 -0800 Subject: [PATCH 1/5] fixes broken activity serializer error import --- bookwyrm/activitypub/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bookwyrm/activitypub/__init__.py b/bookwyrm/activitypub/__init__.py index 105e889b6..931af0a0c 100644 --- a/bookwyrm/activitypub/__init__.py +++ b/bookwyrm/activitypub/__init__.py @@ -4,6 +4,7 @@ import sys from .base_activity import ActivityEncoder, Image, PublicKey, Signature from .base_activity import Link, Mention +from .base_activity import ActivitySerializerError from .note import Note, GeneratedNote, Article, Comment, Review, Quotation from .note import Tombstone from .interaction import Boost, Like From e3a803b907c501fcd871248588a16876bba38595 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Fri, 13 Nov 2020 09:02:41 -0800 Subject: [PATCH 2/5] Allow import retry --- bookwyrm/goodreads_import.py | 12 ++++- bookwyrm/migrations/0010_importjob_retry.py | 18 ++++++++ bookwyrm/models/import_job.py | 11 +++++ bookwyrm/templates/import_status.html | 51 +++++++++++++++++---- bookwyrm/urls.py | 7 +-- bookwyrm/view_actions.py | 19 +++++++- 6 files changed, 103 insertions(+), 15 deletions(-) create mode 100644 bookwyrm/migrations/0010_importjob_retry.py diff --git a/bookwyrm/goodreads_import.py b/bookwyrm/goodreads_import.py index d5c0ad420..1582c37a2 100644 --- a/bookwyrm/goodreads_import.py +++ b/bookwyrm/goodreads_import.py @@ -1,6 +1,5 @@ ''' handle reading a csv from goodreads ''' import csv -from requests import HTTPError from bookwyrm import outgoing from bookwyrm.tasks import app @@ -24,6 +23,17 @@ def create_job(user, csv_file, include_reviews, privacy): ImportItem(job=job, index=index, data=entry).save() return job +def create_retry_job(user, original_job, items): + ''' retry items that didn't import ''' + job = ImportJob.objects.create( + user=user, + include_reviews=original_job.include_reviews, + privacy=original_job.privacy, + retry=True + ) + for item in items: + ImportItem(job=job, index=item.index, data=item.data).save() + return job def start_import(job): ''' initalizes a csv import job ''' diff --git a/bookwyrm/migrations/0010_importjob_retry.py b/bookwyrm/migrations/0010_importjob_retry.py new file mode 100644 index 000000000..21296cc45 --- /dev/null +++ b/bookwyrm/migrations/0010_importjob_retry.py @@ -0,0 +1,18 @@ +# Generated by Django 3.0.7 on 2020-11-13 15:54 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('bookwyrm', '0009_shelf_privacy'), + ] + + operations = [ + migrations.AddField( + model_name='importjob', + name='retry', + field=models.BooleanField(default=False), + ), + ] diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py index 891bfd1b7..3c0cfd86a 100644 --- a/bookwyrm/models/import_job.py +++ b/bookwyrm/models/import_job.py @@ -48,6 +48,7 @@ class ImportJob(models.Model): default='public', choices=PrivacyLevels.choices ) + retry = models.BooleanField(default=False) class ImportItem(models.Model): @@ -100,6 +101,16 @@ class ImportItem(models.Model): return None + @property + def title(self): + ''' get the book title ''' + return self.data['Title'] + + @property + def author(self): + ''' get the book title ''' + return self.data['Author'] + @property def isbn(self): ''' pulls out the isbn13 field from the csv line data ''' diff --git a/bookwyrm/templates/import_status.html b/bookwyrm/templates/import_status.html index c1dbb26e2..1c7da0036 100644 --- a/bookwyrm/templates/import_status.html +++ b/bookwyrm/templates/import_status.html @@ -29,16 +29,47 @@ {% if failed_items %}

Failed to load

-
    - {% for item in failed_items %} -
  • - Line {{ item.index }}: - {{ item.data|dict_key:'Title' }} by - {{ item.data|dict_key:'Author' }} - ({{ item.fail_reason }}) -
  • - {% endfor %} -
+ {% if not job.retry %} +
+ {% csrf_token %} + +
    +
    + {% for item in failed_items %} +
  • + + +

    + {{ item.fail_reason }}. + Manually add book +

    +
  • + {% endfor %} +
    +
+ + {% else %} +
    + {% for item in failed_items %} +
  • +

    + Line {{ item.index }}: + {{ item.data|dict_key:'Title' }} by + {{ item.data|dict_key:'Author' }} +

    +

    + {{ item.fail_reason }}. + Manually add book +

    +
  • + {% endfor %} +
+ {% endif %} +
{% endif %} diff --git a/bookwyrm/urls.py b/bookwyrm/urls.py index 8fa3bc1b8..1b81c601f 100644 --- a/bookwyrm/urls.py +++ b/bookwyrm/urls.py @@ -54,9 +54,9 @@ urlpatterns = [ path('', views.home), re_path(r'^(?Phome|local|federated)/?$', views.home_tab), re_path(r'^notifications/?', views.notifications_page), - re_path(r'import/?$', views.import_page), - re_path(r'import-status/(\d+)/?$', views.import_status), - re_path(r'user-edit/?$', views.edit_profile_page), + re_path(r'^import/?$', views.import_page), + re_path(r'^import-status/(\d+)/?$', views.import_status), + re_path(r'^user-edit/?$', views.edit_profile_page), # should return a ui view or activitypub json blob as requested # users @@ -98,6 +98,7 @@ urlpatterns = [ re_path(r'^edit-profile/?$', actions.edit_profile), re_path(r'^import-data/?', actions.import_data), + re_path(r'^retry-import/?', actions.retry_import), re_path(r'^resolve-book/?', actions.resolve_book), re_path(r'^edit-book/(?P\d+)/?', actions.edit_book), re_path(r'^upload-cover/(?P\d+)/?', actions.upload_cover), diff --git a/bookwyrm/view_actions.py b/bookwyrm/view_actions.py index cf9af2a33..b7e92c89f 100644 --- a/bookwyrm/view_actions.py +++ b/bookwyrm/view_actions.py @@ -662,10 +662,27 @@ def import_data(request): except (UnicodeDecodeError, ValueError): return HttpResponseBadRequest('Not a valid csv file') goodreads_import.start_import(job) - return redirect('/import-status/%d' % (job.id,)) + return redirect('/import-status/%d' % job.id) return HttpResponseBadRequest() +@login_required +def retry_import(request): + ''' ingest a goodreads csv ''' + job = get_object_or_404(models.ImportJob, id=request.POST.get('import_job')) + items = [] + for item in request.POST.getlist('import_item'): + items.append(get_object_or_404(models.ImportItem, id=item)) + + job = goodreads_import.create_retry_job( + request.user, + job, + items, + ) + goodreads_import.start_import(job) + return redirect('/import-status/%d' % job.id) + + @login_required @permission_required('bookwyrm.create_invites', raise_exception=True) def create_invite(request): From 86504989b45abaeb0caf6f4b5aa9b666f385d010 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Fri, 13 Nov 2020 09:47:35 -0800 Subject: [PATCH 3/5] fixes import matching with local books --- bookwyrm/connectors/abstract_connector.py | 4 +-- bookwyrm/connectors/bookwyrm_connector.py | 4 +-- bookwyrm/connectors/self_connector.py | 2 +- bookwyrm/goodreads_import.py | 5 ++- .../migrations/0011_auto_20201113_1727.py | 33 +++++++++++++++++ bookwyrm/models/base_model.py | 4 +-- bookwyrm/models/book.py | 35 ++++++------------- bookwyrm/models/import_job.py | 6 ++-- bookwyrm/models/status.py | 10 +++--- bookwyrm/templates/import_status.html | 1 - bookwyrm/tests/models/test_book_model.py | 9 ++--- 11 files changed, 63 insertions(+), 50 deletions(-) create mode 100644 bookwyrm/migrations/0011_auto_20201113_1727.py diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 8889932d1..7fc4596b3 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -75,7 +75,7 @@ class AbstractConnector(ABC): ''' pull up a book record by whatever means possible ''' # try to load the book book = models.Book.objects.select_subclasses().filter( - remote_id=remote_id + origin_id=remote_id ).first() if book: if isinstance(book, models.Work): @@ -148,7 +148,7 @@ class AbstractConnector(ABC): def create_book(self, remote_id, data, model): ''' create a work or edition from data ''' book = model.objects.create( - remote_id=remote_id, + origin_id=remote_id, title=data['title'], connector=self.connector, ) diff --git a/bookwyrm/connectors/bookwyrm_connector.py b/bookwyrm/connectors/bookwyrm_connector.py index ecceb4570..6ed9dda1d 100644 --- a/bookwyrm/connectors/bookwyrm_connector.py +++ b/bookwyrm/connectors/bookwyrm_connector.py @@ -99,14 +99,14 @@ class Connector(AbstractConnector): def get_or_create_author(self, remote_id): ''' load that author ''' try: - return models.Author.objects.get(remote_id=remote_id) + return models.Author.objects.get(origin_id=remote_id) except ObjectDoesNotExist: pass data = get_data(remote_id) # ingest a new author - author = models.Author(remote_id=remote_id) + author = models.Author(origin_id=remote_id) author = update_from_mappings(author, data, self.author_mappings) author.save() diff --git a/bookwyrm/connectors/self_connector.py b/bookwyrm/connectors/self_connector.py index 2df07c199..2c58d9d7e 100644 --- a/bookwyrm/connectors/self_connector.py +++ b/bookwyrm/connectors/self_connector.py @@ -47,7 +47,7 @@ class Connector(AbstractConnector): def format_search_result(self, search_result): return SearchResult( title=search_result.title, - key=search_result.local_id, + key=search_result.remote_id, author=search_result.author_text, year=search_result.published_date.year if \ search_result.published_date else None, diff --git a/bookwyrm/goodreads_import.py b/bookwyrm/goodreads_import.py index 1582c37a2..3fd330ab4 100644 --- a/bookwyrm/goodreads_import.py +++ b/bookwyrm/goodreads_import.py @@ -1,11 +1,13 @@ ''' handle reading a csv from goodreads ''' import csv +import logging from bookwyrm import outgoing from bookwyrm.tasks import app from bookwyrm.models import ImportJob, ImportItem from bookwyrm.status import create_notification +logger = logging.getLogger(__name__) # TODO: remove or increase once we're confident it's not causing problems. MAX_ENTRIES = 500 @@ -51,7 +53,8 @@ def import_data(job_id): for item in job.items.all(): try: item.resolve() - except: + except Exception as e: + logger.exception(e) item.fail_reason = 'Error loading book' item.save() continue diff --git a/bookwyrm/migrations/0011_auto_20201113_1727.py b/bookwyrm/migrations/0011_auto_20201113_1727.py new file mode 100644 index 000000000..15e853a35 --- /dev/null +++ b/bookwyrm/migrations/0011_auto_20201113_1727.py @@ -0,0 +1,33 @@ +# Generated by Django 3.0.7 on 2020-11-13 17:27 + +from django.db import migrations, models + +def set_origin_id(app_registry, schema_editor): + db_alias = schema_editor.connection.alias + books = app_registry.get_model('bookwyrm', 'Book').objects.using(db_alias) + for book in books: + book.origin_id = book.remote_id + # the remote_id will be set automatically + book.remote_id = None + book.save() + + +class Migration(migrations.Migration): + + dependencies = [ + ('bookwyrm', '0010_importjob_retry'), + ] + + operations = [ + migrations.AddField( + model_name='author', + name='origin_id', + field=models.CharField(max_length=255, null=True), + ), + migrations.AddField( + model_name='book', + name='origin_id', + field=models.CharField(max_length=255, null=True), + ), + migrations.RunPython(set_origin_id), + ] diff --git a/bookwyrm/models/base_model.py b/bookwyrm/models/base_model.py index b48ceea9b..8150d650c 100644 --- a/bookwyrm/models/base_model.py +++ b/bookwyrm/models/base_model.py @@ -68,9 +68,7 @@ class ActivitypubMixin: if not hasattr(self, mapping.model_key) or not mapping.activity_key: continue value = getattr(self, mapping.model_key) - if hasattr(value, 'local_id'): - value = value.local_id - elif hasattr(value, 'remote_id'): + if hasattr(value, 'remote_id'): value = value.remote_id if isinstance(value, datetime): value = value.isoformat() diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index 484f68241..d0702a3e1 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -15,6 +15,7 @@ from .base_model import ActivitypubMixin, OrderedCollectionPageMixin class Book(ActivitypubMixin, BookWyrmModel): ''' a generic book, which can mean either an edition or a work ''' + origin_id = models.CharField(max_length=255, null=True) # these identifiers apply to both works and editions openlibrary_key = models.CharField(max_length=255, blank=True, null=True) librarything_key = models.CharField(max_length=255, blank=True, null=True) @@ -57,7 +58,7 @@ class Book(ActivitypubMixin, BookWyrmModel): @property def ap_authors(self): ''' the activitypub serialization should be a list of author ids ''' - return [a.local_id for a in self.authors.all()] + return [a.remote_id for a in self.authors.all()] @property def ap_cover(self): @@ -71,10 +72,10 @@ class Book(ActivitypubMixin, BookWyrmModel): @property def ap_parent_work(self): ''' reference the work via local id not remote ''' - return self.parent_work.local_id + return self.parent_work.remote_id activity_mappings = [ - ActivityMapping('id', 'local_id'), + ActivityMapping('id', 'remote_id'), ActivityMapping('authors', 'ap_authors'), ActivityMapping('first_published_date', 'first_published_date'), @@ -112,6 +113,8 @@ class Book(ActivitypubMixin, BookWyrmModel): ''' can't be abstract for query reasons, but you shouldn't USE it ''' if not isinstance(self, Edition) and not isinstance(self, Work): raise ValueError('Books should be added as Editions or Works') + if self.id and not self.remote_id: + self.remote_id = self.get_remote_id() super().save(*args, **kwargs) @@ -119,15 +122,6 @@ class Book(ActivitypubMixin, BookWyrmModel): ''' editions and works both use "book" instead of model_name ''' return 'https://%s/book/%d' % (DOMAIN, self.id) - - @property - def local_id(self): - ''' when a book is ingested from an outside source, it becomes local to - an instance, so it needs a local url for federation. but it still needs - the remote_id for easier deduplication and, if appropriate, to sync with - the remote canonical copy ''' - return 'https://%s/book/%d' % (DOMAIN, self.id) - def __repr__(self): return "<{} key={!r} title={!r}>".format( self.__class__, @@ -152,14 +146,14 @@ class Work(OrderedCollectionPageMixin, Book): ''' it'd be nice to serialize the edition instead but, recursion ''' default = self.default_edition ed_list = [ - e.local_id for e in self.edition_set.filter(~Q(id=default.id)).all() + e.remote_id for e in self.edition_set.filter(~Q(id=default.id)).all() ] - return [default.local_id] + ed_list + return [default.remote_id] + ed_list def to_edition_list(self, **kwargs): ''' activitypub serialization for this work's editions ''' - remote_id = self.local_id + '/editions' + remote_id = self.remote_id + '/editions' return self.to_ordered_collection( self.edition_set, remote_id=remote_id, @@ -246,6 +240,7 @@ def isbn_13_to_10(isbn_13): class Author(ActivitypubMixin, BookWyrmModel): + origin_id = models.CharField(max_length=255, null=True) ''' copy of an author from OL ''' openlibrary_key = models.CharField(max_length=255, blank=True, null=True) sync = models.BooleanField(default=True) @@ -262,14 +257,6 @@ class Author(ActivitypubMixin, BookWyrmModel): ) bio = models.TextField(null=True, blank=True) - @property - def local_id(self): - ''' when a book is ingested from an outside source, it becomes local to - an instance, so it needs a local url for federation. but it still needs - the remote_id for easier deduplication and, if appropriate, to sync with - the remote canonical copy (ditto here for author)''' - return 'https://%s/author/%d' % (DOMAIN, self.id) - @property def display_name(self): ''' Helper to return a displayable name''' @@ -281,7 +268,7 @@ class Author(ActivitypubMixin, BookWyrmModel): return self.last_name or self.first_name activity_mappings = [ - ActivityMapping('id', 'local_id'), + ActivityMapping('id', 'remote_id'), ActivityMapping('name', 'display_name'), ActivityMapping('born', 'born'), ActivityMapping('died', 'died'), diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py index 3c0cfd86a..58bf4de61 100644 --- a/bookwyrm/models/import_job.py +++ b/bookwyrm/models/import_job.py @@ -94,10 +94,8 @@ class ImportItem(models.Model): search_term, min_confidence=0.995 ) if search_result: - try: - return books_manager.get_or_create_book(search_result.key) - except ConnectorException: - pass + # raises ConnectorException + return books_manager.get_or_create_book(search_result.key) return None diff --git a/bookwyrm/models/status.py b/bookwyrm/models/status.py index efe1a58f3..09ceda856 100644 --- a/bookwyrm/models/status.py +++ b/bookwyrm/models/status.py @@ -64,7 +64,7 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel): tags = [] for book in self.mention_books.all(): tags.append(activitypub.Link( - href=book.local_id, + href=book.remote_id, name=book.title, type='Book' )) @@ -160,7 +160,7 @@ class GeneratedNote(Status): ''' indicate the book in question for mastodon (or w/e) users ''' message = self.content books = ', '.join( - '"%s"' % (self.book.local_id, self.book.title) \ + '"%s"' % (self.book.remote_id, self.book.title) \ for book in self.mention_books.all() ) return '%s %s' % (message, books) @@ -177,7 +177,7 @@ class Comment(Status): def ap_pure_content(self): ''' indicate the book in question for mastodon (or w/e) users ''' return self.content + '

(comment on "%s")' % \ - (self.book.local_id, self.book.title) + (self.book.remote_id, self.book.title) activity_serializer = activitypub.Comment pure_activity_serializer = activitypub.Note @@ -193,7 +193,7 @@ class Quotation(Status): ''' indicate the book in question for mastodon (or w/e) users ''' return '"%s"
-- "%s"

%s' % ( self.quote, - self.book.local_id, + self.book.remote_id, self.book.title, self.content, ) @@ -231,7 +231,7 @@ class Review(Status): def ap_pure_content(self): ''' indicate the book in question for mastodon (or w/e) users ''' return self.content + '

("%s")' % \ - (self.book.local_id, self.book.title) + (self.book.remote_id, self.book.title) activity_serializer = activitypub.Review pure_activity_serializer = activitypub.Article diff --git a/bookwyrm/templates/import_status.html b/bookwyrm/templates/import_status.html index 1c7da0036..cda74a217 100644 --- a/bookwyrm/templates/import_status.html +++ b/bookwyrm/templates/import_status.html @@ -45,7 +45,6 @@

{{ item.fail_reason }}. - Manually add book

{% endfor %} diff --git a/bookwyrm/tests/models/test_book_model.py b/bookwyrm/tests/models/test_book_model.py index 7dfad61f3..d01eea2e2 100644 --- a/bookwyrm/tests/models/test_book_model.py +++ b/bookwyrm/tests/models/test_book_model.py @@ -22,15 +22,10 @@ class Book(TestCase): ) def test_remote_id(self): - local_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id) - self.assertEqual(self.work.get_remote_id(), local_id) + remote_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id) + self.assertEqual(self.work.get_remote_id(), remote_id) self.assertEqual(self.work.remote_id, 'https://example.com/book/1') - def test_local_id(self): - ''' the local_id property for books ''' - expected_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id) - self.assertEqual(self.work.local_id, expected_id) - def test_create_book(self): ''' you shouldn't be able to create Books (only editions and works) ''' self.assertRaises( From 6300c37a4eb08065004756d8c2b98e80a04f0e0b Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Fri, 13 Nov 2020 10:14:24 -0800 Subject: [PATCH 4/5] Select all checkbox --- bookwyrm/static/js/shared.js | 4 ++++ bookwyrm/templates/import_status.html | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/bookwyrm/static/js/shared.js b/bookwyrm/static/js/shared.js index 7bac49124..b99459b23 100644 --- a/bookwyrm/static/js/shared.js +++ b/bookwyrm/static/js/shared.js @@ -20,6 +20,10 @@ function reply(e) { return true; } +function selectAll(el) { + el.parentElement.querySelectorAll('[type="checkbox"]') + .forEach(t => t.checked=true); +} function rate_stars(e) { e.preventDefault(); diff --git a/bookwyrm/templates/import_status.html b/bookwyrm/templates/import_status.html index cda74a217..886aea673 100644 --- a/bookwyrm/templates/import_status.html +++ b/bookwyrm/templates/import_status.html @@ -50,6 +50,12 @@ {% endfor %} +
+ +
{% else %}
    From 28e1c9525c5ae5653c3db129fa84e496288b3f95 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Fri, 13 Nov 2020 11:03:39 -0800 Subject: [PATCH 5/5] tweaks search rankings for better results --- bookwyrm/connectors/self_connector.py | 16 ++++++++-------- bookwyrm/models/import_job.py | 11 ++++------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/bookwyrm/connectors/self_connector.py b/bookwyrm/connectors/self_connector.py index 2c58d9d7e..80d3a67d9 100644 --- a/bookwyrm/connectors/self_connector.py +++ b/bookwyrm/connectors/self_connector.py @@ -13,16 +13,16 @@ class Connector(AbstractConnector): that gets implemented it will totally rule ''' vector = SearchVector('title', weight='A') +\ SearchVector('subtitle', weight='B') +\ - SearchVector('author_text', weight='A') +\ + SearchVector('author_text', weight='C') +\ SearchVector('isbn_13', weight='A') +\ SearchVector('isbn_10', weight='A') +\ - SearchVector('openlibrary_key', weight='B') +\ - SearchVector('goodreads_key', weight='B') +\ - SearchVector('asin', weight='B') +\ - SearchVector('oclc_number', weight='B') +\ - SearchVector('remote_id', weight='B') +\ - SearchVector('description', weight='C') +\ - SearchVector('series', weight='C') + SearchVector('openlibrary_key', weight='C') +\ + SearchVector('goodreads_key', weight='C') +\ + SearchVector('asin', weight='C') +\ + SearchVector('oclc_number', weight='C') +\ + SearchVector('remote_id', weight='C') +\ + SearchVector('description', weight='D') +\ + SearchVector('series', weight='D') results = models.Edition.objects.annotate( search=vector diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py index 58bf4de61..b35f79921 100644 --- a/bookwyrm/models/import_job.py +++ b/bookwyrm/models/import_job.py @@ -73,14 +73,11 @@ class ImportItem(models.Model): def get_book_from_isbn(self): ''' search by isbn ''' search_result = books_manager.first_search_result( - self.isbn, min_confidence=0.995 + self.isbn, min_confidence=0.999 ) if search_result: - try: - # don't crash the import when the connector fails - return books_manager.get_or_create_book(search_result.key) - except ConnectorException: - pass + # raises ConnectorException + return books_manager.get_or_create_book(search_result.key) return None @@ -91,7 +88,7 @@ class ImportItem(models.Model): self.data['Author'] ) search_result = books_manager.first_search_result( - search_term, min_confidence=0.995 + search_term, min_confidence=0.999 ) if search_result: # raises ConnectorException