fixes import matching with local books

This commit is contained in:
Mouse Reeve 2020-11-13 09:47:35 -08:00
parent e3a803b907
commit 86504989b4
11 changed files with 63 additions and 50 deletions

View file

@ -75,7 +75,7 @@ class AbstractConnector(ABC):
''' pull up a book record by whatever means possible '''
# try to load the book
book = models.Book.objects.select_subclasses().filter(
remote_id=remote_id
origin_id=remote_id
).first()
if book:
if isinstance(book, models.Work):
@ -148,7 +148,7 @@ class AbstractConnector(ABC):
def create_book(self, remote_id, data, model):
''' create a work or edition from data '''
book = model.objects.create(
remote_id=remote_id,
origin_id=remote_id,
title=data['title'],
connector=self.connector,
)

View file

@ -99,14 +99,14 @@ class Connector(AbstractConnector):
def get_or_create_author(self, remote_id):
''' load that author '''
try:
return models.Author.objects.get(remote_id=remote_id)
return models.Author.objects.get(origin_id=remote_id)
except ObjectDoesNotExist:
pass
data = get_data(remote_id)
# ingest a new author
author = models.Author(remote_id=remote_id)
author = models.Author(origin_id=remote_id)
author = update_from_mappings(author, data, self.author_mappings)
author.save()

View file

@ -47,7 +47,7 @@ class Connector(AbstractConnector):
def format_search_result(self, search_result):
return SearchResult(
title=search_result.title,
key=search_result.local_id,
key=search_result.remote_id,
author=search_result.author_text,
year=search_result.published_date.year if \
search_result.published_date else None,

View file

@ -1,11 +1,13 @@
''' handle reading a csv from goodreads '''
import csv
import logging
from bookwyrm import outgoing
from bookwyrm.tasks import app
from bookwyrm.models import ImportJob, ImportItem
from bookwyrm.status import create_notification
logger = logging.getLogger(__name__)
# TODO: remove or increase once we're confident it's not causing problems.
MAX_ENTRIES = 500
@ -51,7 +53,8 @@ def import_data(job_id):
for item in job.items.all():
try:
item.resolve()
except:
except Exception as e:
logger.exception(e)
item.fail_reason = 'Error loading book'
item.save()
continue

View file

@ -0,0 +1,33 @@
# Generated by Django 3.0.7 on 2020-11-13 17:27
from django.db import migrations, models
def set_origin_id(app_registry, schema_editor):
db_alias = schema_editor.connection.alias
books = app_registry.get_model('bookwyrm', 'Book').objects.using(db_alias)
for book in books:
book.origin_id = book.remote_id
# the remote_id will be set automatically
book.remote_id = None
book.save()
class Migration(migrations.Migration):
dependencies = [
('bookwyrm', '0010_importjob_retry'),
]
operations = [
migrations.AddField(
model_name='author',
name='origin_id',
field=models.CharField(max_length=255, null=True),
),
migrations.AddField(
model_name='book',
name='origin_id',
field=models.CharField(max_length=255, null=True),
),
migrations.RunPython(set_origin_id),
]

View file

@ -68,9 +68,7 @@ class ActivitypubMixin:
if not hasattr(self, mapping.model_key) or not mapping.activity_key:
continue
value = getattr(self, mapping.model_key)
if hasattr(value, 'local_id'):
value = value.local_id
elif hasattr(value, 'remote_id'):
if hasattr(value, 'remote_id'):
value = value.remote_id
if isinstance(value, datetime):
value = value.isoformat()

View file

@ -15,6 +15,7 @@ from .base_model import ActivitypubMixin, OrderedCollectionPageMixin
class Book(ActivitypubMixin, BookWyrmModel):
''' a generic book, which can mean either an edition or a work '''
origin_id = models.CharField(max_length=255, null=True)
# these identifiers apply to both works and editions
openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
librarything_key = models.CharField(max_length=255, blank=True, null=True)
@ -57,7 +58,7 @@ class Book(ActivitypubMixin, BookWyrmModel):
@property
def ap_authors(self):
''' the activitypub serialization should be a list of author ids '''
return [a.local_id for a in self.authors.all()]
return [a.remote_id for a in self.authors.all()]
@property
def ap_cover(self):
@ -71,10 +72,10 @@ class Book(ActivitypubMixin, BookWyrmModel):
@property
def ap_parent_work(self):
''' reference the work via local id not remote '''
return self.parent_work.local_id
return self.parent_work.remote_id
activity_mappings = [
ActivityMapping('id', 'local_id'),
ActivityMapping('id', 'remote_id'),
ActivityMapping('authors', 'ap_authors'),
ActivityMapping('first_published_date', 'first_published_date'),
@ -112,6 +113,8 @@ class Book(ActivitypubMixin, BookWyrmModel):
''' can't be abstract for query reasons, but you shouldn't USE it '''
if not isinstance(self, Edition) and not isinstance(self, Work):
raise ValueError('Books should be added as Editions or Works')
if self.id and not self.remote_id:
self.remote_id = self.get_remote_id()
super().save(*args, **kwargs)
@ -119,15 +122,6 @@ class Book(ActivitypubMixin, BookWyrmModel):
''' editions and works both use "book" instead of model_name '''
return 'https://%s/book/%d' % (DOMAIN, self.id)
@property
def local_id(self):
''' when a book is ingested from an outside source, it becomes local to
an instance, so it needs a local url for federation. but it still needs
the remote_id for easier deduplication and, if appropriate, to sync with
the remote canonical copy '''
return 'https://%s/book/%d' % (DOMAIN, self.id)
def __repr__(self):
return "<{} key={!r} title={!r}>".format(
self.__class__,
@ -152,14 +146,14 @@ class Work(OrderedCollectionPageMixin, Book):
''' it'd be nice to serialize the edition instead but, recursion '''
default = self.default_edition
ed_list = [
e.local_id for e in self.edition_set.filter(~Q(id=default.id)).all()
e.remote_id for e in self.edition_set.filter(~Q(id=default.id)).all()
]
return [default.local_id] + ed_list
return [default.remote_id] + ed_list
def to_edition_list(self, **kwargs):
''' activitypub serialization for this work's editions '''
remote_id = self.local_id + '/editions'
remote_id = self.remote_id + '/editions'
return self.to_ordered_collection(
self.edition_set,
remote_id=remote_id,
@ -246,6 +240,7 @@ def isbn_13_to_10(isbn_13):
class Author(ActivitypubMixin, BookWyrmModel):
origin_id = models.CharField(max_length=255, null=True)
''' copy of an author from OL '''
openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
sync = models.BooleanField(default=True)
@ -262,14 +257,6 @@ class Author(ActivitypubMixin, BookWyrmModel):
)
bio = models.TextField(null=True, blank=True)
@property
def local_id(self):
''' when a book is ingested from an outside source, it becomes local to
an instance, so it needs a local url for federation. but it still needs
the remote_id for easier deduplication and, if appropriate, to sync with
the remote canonical copy (ditto here for author)'''
return 'https://%s/author/%d' % (DOMAIN, self.id)
@property
def display_name(self):
''' Helper to return a displayable name'''
@ -281,7 +268,7 @@ class Author(ActivitypubMixin, BookWyrmModel):
return self.last_name or self.first_name
activity_mappings = [
ActivityMapping('id', 'local_id'),
ActivityMapping('id', 'remote_id'),
ActivityMapping('name', 'display_name'),
ActivityMapping('born', 'born'),
ActivityMapping('died', 'died'),

View file

@ -94,10 +94,8 @@ class ImportItem(models.Model):
search_term, min_confidence=0.995
)
if search_result:
try:
# raises ConnectorException
return books_manager.get_or_create_book(search_result.key)
except ConnectorException:
pass
return None

View file

@ -64,7 +64,7 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel):
tags = []
for book in self.mention_books.all():
tags.append(activitypub.Link(
href=book.local_id,
href=book.remote_id,
name=book.title,
type='Book'
))
@ -160,7 +160,7 @@ class GeneratedNote(Status):
''' indicate the book in question for mastodon (or w/e) users '''
message = self.content
books = ', '.join(
'<a href="%s">"%s"</a>' % (self.book.local_id, self.book.title) \
'<a href="%s">"%s"</a>' % (self.book.remote_id, self.book.title) \
for book in self.mention_books.all()
)
return '%s %s' % (message, books)
@ -177,7 +177,7 @@ class Comment(Status):
def ap_pure_content(self):
''' indicate the book in question for mastodon (or w/e) users '''
return self.content + '<br><br>(comment on <a href="%s">"%s"</a>)' % \
(self.book.local_id, self.book.title)
(self.book.remote_id, self.book.title)
activity_serializer = activitypub.Comment
pure_activity_serializer = activitypub.Note
@ -193,7 +193,7 @@ class Quotation(Status):
''' indicate the book in question for mastodon (or w/e) users '''
return '"%s"<br>-- <a href="%s">"%s"</a><br><br>%s' % (
self.quote,
self.book.local_id,
self.book.remote_id,
self.book.title,
self.content,
)
@ -231,7 +231,7 @@ class Review(Status):
def ap_pure_content(self):
''' indicate the book in question for mastodon (or w/e) users '''
return self.content + '<br><br>(<a href="%s">"%s"</a>)' % \
(self.book.local_id, self.book.title)
(self.book.remote_id, self.book.title)
activity_serializer = activitypub.Review
pure_activity_serializer = activitypub.Article

View file

@ -45,7 +45,6 @@
</label>
<p>
{{ item.fail_reason }}.
<a href="/create-book/{{ item.id }}">Manually add book</a>
</p>
</li>
{% endfor %}

View file

@ -22,15 +22,10 @@ class Book(TestCase):
)
def test_remote_id(self):
local_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id)
self.assertEqual(self.work.get_remote_id(), local_id)
remote_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id)
self.assertEqual(self.work.get_remote_id(), remote_id)
self.assertEqual(self.work.remote_id, 'https://example.com/book/1')
def test_local_id(self):
''' the local_id property for books '''
expected_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id)
self.assertEqual(self.work.local_id, expected_id)
def test_create_book(self):
''' you shouldn't be able to create Books (only editions and works) '''
self.assertRaises(