Merge branch 'main' into production

2025-04-27 20:44:43 +00:00 · 2020-11-13 11:48:55 -08:00 · 2020-11-13 11:48:55 -08:00 · 8f595b6a35
commit 8f595b6a35
parent e9ed457012 6f7c60bf79
16 changed files with 188 additions and 79 deletions
--- a/bookwyrm/activitypub/init.py
+++ b/bookwyrm/activitypub/init.py
@ -4,6 +4,7 @@ import sys

 from .base_activity import ActivityEncoder, Image, PublicKey, Signature
 from .base_activity import Link, Mention
+from .base_activity import ActivitySerializerError
 from .note import Note, GeneratedNote, Article, Comment, Review, Quotation
 from .note import Tombstone
 from .interaction import Boost, Like
--- a/bookwyrm/connectors/abstract_connector.py
+++ b/bookwyrm/connectors/abstract_connector.py
@ -75,7 +75,7 @@ class AbstractConnector(ABC):
        ''' pull up a book record by whatever means possible '''
        # try to load the book
        book = models.Book.objects.select_subclasses().filter(
-            remote_id=remote_id
+            origin_id=remote_id
        ).first()
        if book:
            if isinstance(book, models.Work):
@ -148,7 +148,7 @@ class AbstractConnector(ABC):
    def create_book(self, remote_id, data, model):
        ''' create a work or edition from data '''
        book = model.objects.create(
-            remote_id=remote_id,
+            origin_id=remote_id,
            title=data['title'],
            connector=self.connector,
        )
--- a/bookwyrm/connectors/bookwyrm_connector.py
+++ b/bookwyrm/connectors/bookwyrm_connector.py
@ -99,14 +99,14 @@ class Connector(AbstractConnector):
    def get_or_create_author(self, remote_id):
        ''' load that author '''
        try:
-            return models.Author.objects.get(remote_id=remote_id)
+            return models.Author.objects.get(origin_id=remote_id)
        except ObjectDoesNotExist:
            pass

        data = get_data(remote_id)

        # ingest a new author
-        author = models.Author(remote_id=remote_id)
+        author = models.Author(origin_id=remote_id)
        author = update_from_mappings(author, data, self.author_mappings)
        author.save()

--- a/bookwyrm/connectors/self_connector.py
+++ b/bookwyrm/connectors/self_connector.py
@ -13,16 +13,16 @@ class Connector(AbstractConnector):
        that gets implemented it will totally rule '''
        vector = SearchVector('title', weight='A') +\
            SearchVector('subtitle', weight='B') +\
-            SearchVector('author_text', weight='A') +\
+            SearchVector('author_text', weight='C') +\
            SearchVector('isbn_13', weight='A') +\
            SearchVector('isbn_10', weight='A') +\
-            SearchVector('openlibrary_key', weight='B') +\
-            SearchVector('goodreads_key', weight='B') +\
-            SearchVector('asin', weight='B') +\
-            SearchVector('oclc_number', weight='B') +\
-            SearchVector('remote_id', weight='B') +\
-            SearchVector('description', weight='C') +\
-            SearchVector('series', weight='C')
+            SearchVector('openlibrary_key', weight='C') +\
+            SearchVector('goodreads_key', weight='C') +\
+            SearchVector('asin', weight='C') +\
+            SearchVector('oclc_number', weight='C') +\
+            SearchVector('remote_id', weight='C') +\
+            SearchVector('description', weight='D') +\
+            SearchVector('series', weight='D')

        results = models.Edition.objects.annotate(
            search=vector
@ -47,7 +47,7 @@ class Connector(AbstractConnector):
    def format_search_result(self, search_result):
        return SearchResult(
            title=search_result.title,
-            key=search_result.local_id,
+            key=search_result.remote_id,
            author=search_result.author_text,
            year=search_result.published_date.year if \
                    search_result.published_date else None,
--- a/bookwyrm/goodreads_import.py
+++ b/bookwyrm/goodreads_import.py
@ -1,12 +1,13 @@
 ''' handle reading a csv from goodreads '''
 import csv
-from requests import HTTPError
+import logging

 from bookwyrm import outgoing
 from bookwyrm.tasks import app
 from bookwyrm.models import ImportJob, ImportItem
 from bookwyrm.status import create_notification

+logger = logging.getLogger(__name__)
 # TODO: remove or increase once we're confident it's not causing problems.
 MAX_ENTRIES = 500

@ -24,6 +25,17 @@ def create_job(user, csv_file, include_reviews, privacy):
        ImportItem(job=job, index=index, data=entry).save()
    return job

+def create_retry_job(user, original_job, items):
+    ''' retry items that didn't import '''
+    job = ImportJob.objects.create(
+        user=user,
+        include_reviews=original_job.include_reviews,
+        privacy=original_job.privacy,
+        retry=True
+    )
+    for item in items:
+        ImportItem(job=job, index=item.index, data=item.data).save()
+    return job

 def start_import(job):
    ''' initalizes a csv import job '''
@ -41,7 +53,8 @@ def import_data(job_id):
        for item in job.items.all():
            try:
                item.resolve()
-            except:
+            except Exception as e:
+                logger.exception(e)
                item.fail_reason = 'Error loading book'
                item.save()
                continue
--- a/bookwyrm/migrations/0010_importjob_retry.py
+++ b/bookwyrm/migrations/0010_importjob_retry.py
@ -0,0 +1,18 @@
+# Generated by Django 3.0.7 on 2020-11-13 15:54
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('bookwyrm', '0009_shelf_privacy'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='importjob',
+            name='retry',
+            field=models.BooleanField(default=False),
+        ),
+    ]
--- a/bookwyrm/migrations/0011_auto_20201113_1727.py
+++ b/bookwyrm/migrations/0011_auto_20201113_1727.py
@ -0,0 +1,33 @@
+# Generated by Django 3.0.7 on 2020-11-13 17:27
+
+from django.db import migrations, models
+
+def set_origin_id(app_registry, schema_editor):
+    db_alias = schema_editor.connection.alias
+    books = app_registry.get_model('bookwyrm', 'Book').objects.using(db_alias)
+    for book in books:
+        book.origin_id = book.remote_id
+        # the remote_id will be set automatically
+        book.remote_id = None
+        book.save()
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('bookwyrm', '0010_importjob_retry'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='author',
+            name='origin_id',
+            field=models.CharField(max_length=255, null=True),
+        ),
+        migrations.AddField(
+            model_name='book',
+            name='origin_id',
+            field=models.CharField(max_length=255, null=True),
+        ),
+        migrations.RunPython(set_origin_id),
+    ]
--- a/bookwyrm/models/base_model.py
+++ b/bookwyrm/models/base_model.py
@ -68,9 +68,7 @@ class ActivitypubMixin:
            if not hasattr(self, mapping.model_key) or not mapping.activity_key:
                continue
            value = getattr(self, mapping.model_key)
-            if hasattr(value, 'local_id'):
-                value = value.local_id
-            elif hasattr(value, 'remote_id'):
+            if hasattr(value, 'remote_id'):
                value = value.remote_id
            if isinstance(value, datetime):
                value = value.isoformat()
--- a/bookwyrm/models/book.py
+++ b/bookwyrm/models/book.py
@ -15,6 +15,7 @@ from .base_model import ActivitypubMixin, OrderedCollectionPageMixin

 class Book(ActivitypubMixin, BookWyrmModel):
    ''' a generic book, which can mean either an edition or a work '''
+    origin_id = models.CharField(max_length=255, null=True)
    # these identifiers apply to both works and editions
    openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
    librarything_key = models.CharField(max_length=255, blank=True, null=True)
@ -57,7 +58,7 @@ class Book(ActivitypubMixin, BookWyrmModel):
    @property
    def ap_authors(self):
        ''' the activitypub serialization should be a list of author ids '''
-        return [a.local_id for a in self.authors.all()]
+        return [a.remote_id for a in self.authors.all()]

    @property
    def ap_cover(self):
@ -71,10 +72,10 @@ class Book(ActivitypubMixin, BookWyrmModel):
    @property
    def ap_parent_work(self):
        ''' reference the work via local id not remote '''
-        return self.parent_work.local_id
+        return self.parent_work.remote_id

    activity_mappings = [
-        ActivityMapping('id', 'local_id'),
+        ActivityMapping('id', 'remote_id'),

        ActivityMapping('authors', 'ap_authors'),
        ActivityMapping('first_published_date', 'first_published_date'),
@ -112,6 +113,8 @@ class Book(ActivitypubMixin, BookWyrmModel):
        ''' can't be abstract for query reasons, but you shouldn't USE it '''
        if not isinstance(self, Edition) and not isinstance(self, Work):
            raise ValueError('Books should be added as Editions or Works')
+        if self.id and not self.remote_id:
+            self.remote_id = self.get_remote_id()

        super().save(*args, **kwargs)

@ -119,15 +122,6 @@ class Book(ActivitypubMixin, BookWyrmModel):
        ''' editions and works both use "book" instead of model_name '''
        return 'https://%s/book/%d' % (DOMAIN, self.id)

-
-    @property
-    def local_id(self):
-        ''' when a book is ingested from an outside source, it becomes local to
-        an instance, so it needs a local url for federation. but it still needs
-        the remote_id for easier deduplication and, if appropriate, to sync with
-        the remote canonical copy '''
-        return 'https://%s/book/%d' % (DOMAIN, self.id)
-
    def __repr__(self):
        return "<{} key={!r} title={!r}>".format(
            self.__class__,
@ -152,14 +146,14 @@ class Work(OrderedCollectionPageMixin, Book):
        ''' it'd be nice to serialize the edition instead but, recursion '''
        default = self.default_edition
        ed_list = [
-            e.local_id for e in self.edition_set.filter(~Q(id=default.id)).all()
+            e.remote_id for e in self.edition_set.filter(~Q(id=default.id)).all()
        ]
-        return [default.local_id] + ed_list
+        return [default.remote_id] + ed_list


    def to_edition_list(self, **kwargs):
        ''' activitypub serialization for this work's editions '''
-        remote_id = self.local_id + '/editions'
+        remote_id = self.remote_id + '/editions'
        return self.to_ordered_collection(
            self.edition_set,
            remote_id=remote_id,
@ -246,6 +240,7 @@ def isbn_13_to_10(isbn_13):


 class Author(ActivitypubMixin, BookWyrmModel):
+    origin_id = models.CharField(max_length=255, null=True)
    ''' copy of an author from OL '''
    openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
    sync = models.BooleanField(default=True)
@ -262,14 +257,6 @@ class Author(ActivitypubMixin, BookWyrmModel):
    )
    bio = models.TextField(null=True, blank=True)

-    @property
-    def local_id(self):
-        ''' when a book is ingested from an outside source, it becomes local to
-        an instance, so it needs a local url for federation. but it still needs
-        the remote_id for easier deduplication and, if appropriate, to sync with
-        the remote canonical copy (ditto here for author)'''
-        return 'https://%s/author/%d' % (DOMAIN, self.id)
-
    @property
    def display_name(self):
        ''' Helper to return a displayable name'''
@ -281,7 +268,7 @@ class Author(ActivitypubMixin, BookWyrmModel):
        return self.last_name or self.first_name

    activity_mappings = [
-        ActivityMapping('id', 'local_id'),
+        ActivityMapping('id', 'remote_id'),
        ActivityMapping('name', 'display_name'),
        ActivityMapping('born', 'born'),
        ActivityMapping('died', 'died'),
--- a/bookwyrm/models/import_job.py
+++ b/bookwyrm/models/import_job.py
@ -48,6 +48,7 @@ class ImportJob(models.Model):
        default='public',
        choices=PrivacyLevels.choices
    )
+    retry = models.BooleanField(default=False)


 class ImportItem(models.Model):
@ -72,14 +73,11 @@ class ImportItem(models.Model):
    def get_book_from_isbn(self):
        ''' search by isbn '''
        search_result = books_manager.first_search_result(
-            self.isbn, min_confidence=0.995
+            self.isbn, min_confidence=0.999
        )
        if search_result:
-            try:
-                # don't crash the import when the connector fails
+            # raises ConnectorException
            return books_manager.get_or_create_book(search_result.key)
-            except ConnectorException:
-                pass
        return None


@ -90,16 +88,24 @@ class ImportItem(models.Model):
            self.data['Author']
        )
        search_result = books_manager.first_search_result(
-            search_term, min_confidence=0.995
+            search_term, min_confidence=0.999
        )
        if search_result:
-            try:
+            # raises ConnectorException
            return books_manager.get_or_create_book(search_result.key)
-            except ConnectorException:
-                pass
        return None


+    @property
+    def title(self):
+        ''' get the book title '''
+        return self.data['Title']
+
+    @property
+    def author(self):
+        ''' get the book title '''
+        return self.data['Author']
+
    @property
    def isbn(self):
        ''' pulls out the isbn13 field from the csv line data '''
--- a/bookwyrm/models/status.py
+++ b/bookwyrm/models/status.py
@ -64,7 +64,7 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel):
        tags = []
        for book in self.mention_books.all():
            tags.append(activitypub.Link(
-                href=book.local_id,
+                href=book.remote_id,
                name=book.title,
                type='Book'
            ))
@ -160,7 +160,7 @@ class GeneratedNote(Status):
        ''' indicate the book in question for mastodon (or w/e) users '''
        message = self.content
        books = ', '.join(
-            '<a href="%s">"%s"</a>' % (self.book.local_id, self.book.title) \
+            '<a href="%s">"%s"</a>' % (self.book.remote_id, self.book.title) \
            for book in self.mention_books.all()
        )
        return '%s %s' % (message, books)
@ -177,7 +177,7 @@ class Comment(Status):
    def ap_pure_content(self):
        ''' indicate the book in question for mastodon (or w/e) users '''
        return self.content + '<br><br>(comment on <a href="%s">"%s"</a>)' % \
-                (self.book.local_id, self.book.title)
+                (self.book.remote_id, self.book.title)

    activity_serializer = activitypub.Comment
    pure_activity_serializer = activitypub.Note
@ -193,7 +193,7 @@ class Quotation(Status):
        ''' indicate the book in question for mastodon (or w/e) users '''
        return '"%s"<br>-- <a href="%s">"%s"</a><br><br>%s' % (
            self.quote,
-            self.book.local_id,
+            self.book.remote_id,
            self.book.title,
            self.content,
        )
@ -231,7 +231,7 @@ class Review(Status):
    def ap_pure_content(self):
        ''' indicate the book in question for mastodon (or w/e) users '''
        return self.content + '<br><br>(<a href="%s">"%s"</a>)' % \
-                (self.book.local_id, self.book.title)
+                (self.book.remote_id, self.book.title)

    activity_serializer = activitypub.Review
    pure_activity_serializer = activitypub.Article
--- a/bookwyrm/static/js/shared.js
+++ b/bookwyrm/static/js/shared.js
@ -20,6 +20,10 @@ function reply(e) {
    return true;
 }

+function selectAll(el) {
+    el.parentElement.querySelectorAll('[type="checkbox"]')
+        .forEach(t => t.checked=true);
+}

 function rate_stars(e) {
    e.preventDefault();
--- a/bookwyrm/templates/import_status.html
+++ b/bookwyrm/templates/import_status.html
@ -29,16 +29,52 @@
 {% if failed_items %}
 <div class="block">
    <h2 class="title is-4">Failed to load</h2>
+    {% if not job.retry %}
+    <form name="retry" action="/retry-import/" method="post">
+        {% csrf_token %}
+        <input type="hidden" name="import_job" value="{{ job.id }}">
        <ul>
+            <fieldset>
            {% for item in failed_items %}
-        <li>
+                <li class="pb-1">
+                    <input class="checkbox" type="checkbox" name="import_item" value="{{ item.id }}" id="import-item-{{ item.id }}">
+                    <label for="import-item-{{ item.id }}">
                        Line {{ item.index }}:
                        <strong>{{ item.data|dict_key:'Title' }}</strong> by
                        {{ item.data|dict_key:'Author' }}
-            ({{ item.fail_reason }})
+                    </label>
+                    <p>
+                        {{ item.fail_reason }}.
+                    </p>
+                </li>
+            {% endfor %}
+            </fieldset>
+        </ul>
+        <div class="block pt-1" onclick="selectAll(this)">
+            <label class="label">
+                <input type="checkbox" class="checkbox">
+                Select all
+            </label>
+        </div>
+        <button class="button" type="submit">Retry items</button>
+        {% else %}
+        <ul>
+            {% for item in failed_items %}
+            <li class="pb-1">
+                <p>
+                    Line {{ item.index }}:
+                    <strong>{{ item.data|dict_key:'Title' }}</strong> by
+                    {{ item.data|dict_key:'Author' }}
+                </p>
+                <p>
+                    {{ item.fail_reason }}.
+                    <a href="/create-book/{{ item.id }}">Manually add book</a>
+                </p>
            </li>
            {% endfor %}
        </ul>
+        {% endif %}
+    </form>
 </div>
 {% endif %}

--- a/bookwyrm/tests/models/test_book_model.py
+++ b/bookwyrm/tests/models/test_book_model.py
@ -22,15 +22,10 @@ class Book(TestCase):
        )

    def test_remote_id(self):
-        local_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id)
-        self.assertEqual(self.work.get_remote_id(), local_id)
+        remote_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id)
+        self.assertEqual(self.work.get_remote_id(), remote_id)
        self.assertEqual(self.work.remote_id, 'https://example.com/book/1')

-    def test_local_id(self):
-        ''' the local_id property for books '''
-        expected_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id)
-        self.assertEqual(self.work.local_id, expected_id)
-
    def test_create_book(self):
        ''' you shouldn't be able to create Books (only editions and works) '''
        self.assertRaises(
--- a/bookwyrm/urls.py
+++ b/bookwyrm/urls.py
@ -54,9 +54,9 @@ urlpatterns = [
    path('', views.home),
    re_path(r'^(?P<tab>home|local|federated)/?$', views.home_tab),
    re_path(r'^notifications/?', views.notifications_page),
-    re_path(r'import/?$', views.import_page),
-    re_path(r'import-status/(\d+)/?$', views.import_status),
-    re_path(r'user-edit/?$', views.edit_profile_page),
+    re_path(r'^import/?$', views.import_page),
+    re_path(r'^import-status/(\d+)/?$', views.import_status),
+    re_path(r'^user-edit/?$', views.edit_profile_page),

    # should return a ui view or activitypub json blob as requested
    # users
@ -98,6 +98,7 @@ urlpatterns = [
    re_path(r'^edit-profile/?$', actions.edit_profile),

    re_path(r'^import-data/?', actions.import_data),
+    re_path(r'^retry-import/?', actions.retry_import),
    re_path(r'^resolve-book/?', actions.resolve_book),
    re_path(r'^edit-book/(?P<book_id>\d+)/?', actions.edit_book),
    re_path(r'^upload-cover/(?P<book_id>\d+)/?', actions.upload_cover),
--- a/bookwyrm/view_actions.py
+++ b/bookwyrm/view_actions.py
@ -662,10 +662,27 @@ def import_data(request):
        except (UnicodeDecodeError, ValueError):
            return HttpResponseBadRequest('Not a valid csv file')
        goodreads_import.start_import(job)
-        return redirect('/import-status/%d' % (job.id,))
+        return redirect('/import-status/%d' % job.id)
    return HttpResponseBadRequest()


+@login_required
+def retry_import(request):
+    ''' ingest a goodreads csv '''
+    job = get_object_or_404(models.ImportJob, id=request.POST.get('import_job'))
+    items = []
+    for item in request.POST.getlist('import_item'):
+        items.append(get_object_or_404(models.ImportItem, id=item))
+
+    job = goodreads_import.create_retry_job(
+        request.user,
+        job,
+        items,
+    )
+    goodreads_import.start_import(job)
+    return redirect('/import-status/%d' % job.id)
+
+
@login_required
@permission_required('bookwyrm.create_invites', raise_exception=True)
 def create_invite(request):