fixes import matching with local books

This commit is contained in:
Mouse Reeve 2020-11-13 09:47:35 -08:00
parent e3a803b907
commit 86504989b4
11 changed files with 63 additions and 50 deletions

View file

@ -75,7 +75,7 @@ class AbstractConnector(ABC):
''' pull up a book record by whatever means possible ''' ''' pull up a book record by whatever means possible '''
# try to load the book # try to load the book
book = models.Book.objects.select_subclasses().filter( book = models.Book.objects.select_subclasses().filter(
remote_id=remote_id origin_id=remote_id
).first() ).first()
if book: if book:
if isinstance(book, models.Work): if isinstance(book, models.Work):
@ -148,7 +148,7 @@ class AbstractConnector(ABC):
def create_book(self, remote_id, data, model): def create_book(self, remote_id, data, model):
''' create a work or edition from data ''' ''' create a work or edition from data '''
book = model.objects.create( book = model.objects.create(
remote_id=remote_id, origin_id=remote_id,
title=data['title'], title=data['title'],
connector=self.connector, connector=self.connector,
) )

View file

@ -99,14 +99,14 @@ class Connector(AbstractConnector):
def get_or_create_author(self, remote_id): def get_or_create_author(self, remote_id):
''' load that author ''' ''' load that author '''
try: try:
return models.Author.objects.get(remote_id=remote_id) return models.Author.objects.get(origin_id=remote_id)
except ObjectDoesNotExist: except ObjectDoesNotExist:
pass pass
data = get_data(remote_id) data = get_data(remote_id)
# ingest a new author # ingest a new author
author = models.Author(remote_id=remote_id) author = models.Author(origin_id=remote_id)
author = update_from_mappings(author, data, self.author_mappings) author = update_from_mappings(author, data, self.author_mappings)
author.save() author.save()

View file

@ -47,7 +47,7 @@ class Connector(AbstractConnector):
def format_search_result(self, search_result): def format_search_result(self, search_result):
return SearchResult( return SearchResult(
title=search_result.title, title=search_result.title,
key=search_result.local_id, key=search_result.remote_id,
author=search_result.author_text, author=search_result.author_text,
year=search_result.published_date.year if \ year=search_result.published_date.year if \
search_result.published_date else None, search_result.published_date else None,

View file

@ -1,11 +1,13 @@
''' handle reading a csv from goodreads ''' ''' handle reading a csv from goodreads '''
import csv import csv
import logging
from bookwyrm import outgoing from bookwyrm import outgoing
from bookwyrm.tasks import app from bookwyrm.tasks import app
from bookwyrm.models import ImportJob, ImportItem from bookwyrm.models import ImportJob, ImportItem
from bookwyrm.status import create_notification from bookwyrm.status import create_notification
logger = logging.getLogger(__name__)
# TODO: remove or increase once we're confident it's not causing problems. # TODO: remove or increase once we're confident it's not causing problems.
MAX_ENTRIES = 500 MAX_ENTRIES = 500
@ -51,7 +53,8 @@ def import_data(job_id):
for item in job.items.all(): for item in job.items.all():
try: try:
item.resolve() item.resolve()
except: except Exception as e:
logger.exception(e)
item.fail_reason = 'Error loading book' item.fail_reason = 'Error loading book'
item.save() item.save()
continue continue

View file

@ -0,0 +1,33 @@
# Generated by Django 3.0.7 on 2020-11-13 17:27
from django.db import migrations, models
def set_origin_id(app_registry, schema_editor):
db_alias = schema_editor.connection.alias
books = app_registry.get_model('bookwyrm', 'Book').objects.using(db_alias)
for book in books:
book.origin_id = book.remote_id
# the remote_id will be set automatically
book.remote_id = None
book.save()
class Migration(migrations.Migration):
dependencies = [
('bookwyrm', '0010_importjob_retry'),
]
operations = [
migrations.AddField(
model_name='author',
name='origin_id',
field=models.CharField(max_length=255, null=True),
),
migrations.AddField(
model_name='book',
name='origin_id',
field=models.CharField(max_length=255, null=True),
),
migrations.RunPython(set_origin_id),
]

View file

@ -68,9 +68,7 @@ class ActivitypubMixin:
if not hasattr(self, mapping.model_key) or not mapping.activity_key: if not hasattr(self, mapping.model_key) or not mapping.activity_key:
continue continue
value = getattr(self, mapping.model_key) value = getattr(self, mapping.model_key)
if hasattr(value, 'local_id'): if hasattr(value, 'remote_id'):
value = value.local_id
elif hasattr(value, 'remote_id'):
value = value.remote_id value = value.remote_id
if isinstance(value, datetime): if isinstance(value, datetime):
value = value.isoformat() value = value.isoformat()

View file

@ -15,6 +15,7 @@ from .base_model import ActivitypubMixin, OrderedCollectionPageMixin
class Book(ActivitypubMixin, BookWyrmModel): class Book(ActivitypubMixin, BookWyrmModel):
''' a generic book, which can mean either an edition or a work ''' ''' a generic book, which can mean either an edition or a work '''
origin_id = models.CharField(max_length=255, null=True)
# these identifiers apply to both works and editions # these identifiers apply to both works and editions
openlibrary_key = models.CharField(max_length=255, blank=True, null=True) openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
librarything_key = models.CharField(max_length=255, blank=True, null=True) librarything_key = models.CharField(max_length=255, blank=True, null=True)
@ -57,7 +58,7 @@ class Book(ActivitypubMixin, BookWyrmModel):
@property @property
def ap_authors(self): def ap_authors(self):
''' the activitypub serialization should be a list of author ids ''' ''' the activitypub serialization should be a list of author ids '''
return [a.local_id for a in self.authors.all()] return [a.remote_id for a in self.authors.all()]
@property @property
def ap_cover(self): def ap_cover(self):
@ -71,10 +72,10 @@ class Book(ActivitypubMixin, BookWyrmModel):
@property @property
def ap_parent_work(self): def ap_parent_work(self):
''' reference the work via local id not remote ''' ''' reference the work via local id not remote '''
return self.parent_work.local_id return self.parent_work.remote_id
activity_mappings = [ activity_mappings = [
ActivityMapping('id', 'local_id'), ActivityMapping('id', 'remote_id'),
ActivityMapping('authors', 'ap_authors'), ActivityMapping('authors', 'ap_authors'),
ActivityMapping('first_published_date', 'first_published_date'), ActivityMapping('first_published_date', 'first_published_date'),
@ -112,6 +113,8 @@ class Book(ActivitypubMixin, BookWyrmModel):
''' can't be abstract for query reasons, but you shouldn't USE it ''' ''' can't be abstract for query reasons, but you shouldn't USE it '''
if not isinstance(self, Edition) and not isinstance(self, Work): if not isinstance(self, Edition) and not isinstance(self, Work):
raise ValueError('Books should be added as Editions or Works') raise ValueError('Books should be added as Editions or Works')
if self.id and not self.remote_id:
self.remote_id = self.get_remote_id()
super().save(*args, **kwargs) super().save(*args, **kwargs)
@ -119,15 +122,6 @@ class Book(ActivitypubMixin, BookWyrmModel):
''' editions and works both use "book" instead of model_name ''' ''' editions and works both use "book" instead of model_name '''
return 'https://%s/book/%d' % (DOMAIN, self.id) return 'https://%s/book/%d' % (DOMAIN, self.id)
@property
def local_id(self):
''' when a book is ingested from an outside source, it becomes local to
an instance, so it needs a local url for federation. but it still needs
the remote_id for easier deduplication and, if appropriate, to sync with
the remote canonical copy '''
return 'https://%s/book/%d' % (DOMAIN, self.id)
def __repr__(self): def __repr__(self):
return "<{} key={!r} title={!r}>".format( return "<{} key={!r} title={!r}>".format(
self.__class__, self.__class__,
@ -152,14 +146,14 @@ class Work(OrderedCollectionPageMixin, Book):
''' it'd be nice to serialize the edition instead but, recursion ''' ''' it'd be nice to serialize the edition instead but, recursion '''
default = self.default_edition default = self.default_edition
ed_list = [ ed_list = [
e.local_id for e in self.edition_set.filter(~Q(id=default.id)).all() e.remote_id for e in self.edition_set.filter(~Q(id=default.id)).all()
] ]
return [default.local_id] + ed_list return [default.remote_id] + ed_list
def to_edition_list(self, **kwargs): def to_edition_list(self, **kwargs):
''' activitypub serialization for this work's editions ''' ''' activitypub serialization for this work's editions '''
remote_id = self.local_id + '/editions' remote_id = self.remote_id + '/editions'
return self.to_ordered_collection( return self.to_ordered_collection(
self.edition_set, self.edition_set,
remote_id=remote_id, remote_id=remote_id,
@ -246,6 +240,7 @@ def isbn_13_to_10(isbn_13):
class Author(ActivitypubMixin, BookWyrmModel): class Author(ActivitypubMixin, BookWyrmModel):
origin_id = models.CharField(max_length=255, null=True)
''' copy of an author from OL ''' ''' copy of an author from OL '''
openlibrary_key = models.CharField(max_length=255, blank=True, null=True) openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
sync = models.BooleanField(default=True) sync = models.BooleanField(default=True)
@ -262,14 +257,6 @@ class Author(ActivitypubMixin, BookWyrmModel):
) )
bio = models.TextField(null=True, blank=True) bio = models.TextField(null=True, blank=True)
@property
def local_id(self):
''' when a book is ingested from an outside source, it becomes local to
an instance, so it needs a local url for federation. but it still needs
the remote_id for easier deduplication and, if appropriate, to sync with
the remote canonical copy (ditto here for author)'''
return 'https://%s/author/%d' % (DOMAIN, self.id)
@property @property
def display_name(self): def display_name(self):
''' Helper to return a displayable name''' ''' Helper to return a displayable name'''
@ -281,7 +268,7 @@ class Author(ActivitypubMixin, BookWyrmModel):
return self.last_name or self.first_name return self.last_name or self.first_name
activity_mappings = [ activity_mappings = [
ActivityMapping('id', 'local_id'), ActivityMapping('id', 'remote_id'),
ActivityMapping('name', 'display_name'), ActivityMapping('name', 'display_name'),
ActivityMapping('born', 'born'), ActivityMapping('born', 'born'),
ActivityMapping('died', 'died'), ActivityMapping('died', 'died'),

View file

@ -94,10 +94,8 @@ class ImportItem(models.Model):
search_term, min_confidence=0.995 search_term, min_confidence=0.995
) )
if search_result: if search_result:
try: # raises ConnectorException
return books_manager.get_or_create_book(search_result.key) return books_manager.get_or_create_book(search_result.key)
except ConnectorException:
pass
return None return None

View file

@ -64,7 +64,7 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel):
tags = [] tags = []
for book in self.mention_books.all(): for book in self.mention_books.all():
tags.append(activitypub.Link( tags.append(activitypub.Link(
href=book.local_id, href=book.remote_id,
name=book.title, name=book.title,
type='Book' type='Book'
)) ))
@ -160,7 +160,7 @@ class GeneratedNote(Status):
''' indicate the book in question for mastodon (or w/e) users ''' ''' indicate the book in question for mastodon (or w/e) users '''
message = self.content message = self.content
books = ', '.join( books = ', '.join(
'<a href="%s">"%s"</a>' % (self.book.local_id, self.book.title) \ '<a href="%s">"%s"</a>' % (self.book.remote_id, self.book.title) \
for book in self.mention_books.all() for book in self.mention_books.all()
) )
return '%s %s' % (message, books) return '%s %s' % (message, books)
@ -177,7 +177,7 @@ class Comment(Status):
def ap_pure_content(self): def ap_pure_content(self):
''' indicate the book in question for mastodon (or w/e) users ''' ''' indicate the book in question for mastodon (or w/e) users '''
return self.content + '<br><br>(comment on <a href="%s">"%s"</a>)' % \ return self.content + '<br><br>(comment on <a href="%s">"%s"</a>)' % \
(self.book.local_id, self.book.title) (self.book.remote_id, self.book.title)
activity_serializer = activitypub.Comment activity_serializer = activitypub.Comment
pure_activity_serializer = activitypub.Note pure_activity_serializer = activitypub.Note
@ -193,7 +193,7 @@ class Quotation(Status):
''' indicate the book in question for mastodon (or w/e) users ''' ''' indicate the book in question for mastodon (or w/e) users '''
return '"%s"<br>-- <a href="%s">"%s"</a><br><br>%s' % ( return '"%s"<br>-- <a href="%s">"%s"</a><br><br>%s' % (
self.quote, self.quote,
self.book.local_id, self.book.remote_id,
self.book.title, self.book.title,
self.content, self.content,
) )
@ -231,7 +231,7 @@ class Review(Status):
def ap_pure_content(self): def ap_pure_content(self):
''' indicate the book in question for mastodon (or w/e) users ''' ''' indicate the book in question for mastodon (or w/e) users '''
return self.content + '<br><br>(<a href="%s">"%s"</a>)' % \ return self.content + '<br><br>(<a href="%s">"%s"</a>)' % \
(self.book.local_id, self.book.title) (self.book.remote_id, self.book.title)
activity_serializer = activitypub.Review activity_serializer = activitypub.Review
pure_activity_serializer = activitypub.Article pure_activity_serializer = activitypub.Article

View file

@ -45,7 +45,6 @@
</label> </label>
<p> <p>
{{ item.fail_reason }}. {{ item.fail_reason }}.
<a href="/create-book/{{ item.id }}">Manually add book</a>
</p> </p>
</li> </li>
{% endfor %} {% endfor %}

View file

@ -22,15 +22,10 @@ class Book(TestCase):
) )
def test_remote_id(self): def test_remote_id(self):
local_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id) remote_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id)
self.assertEqual(self.work.get_remote_id(), local_id) self.assertEqual(self.work.get_remote_id(), remote_id)
self.assertEqual(self.work.remote_id, 'https://example.com/book/1') self.assertEqual(self.work.remote_id, 'https://example.com/book/1')
def test_local_id(self):
''' the local_id property for books '''
expected_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id)
self.assertEqual(self.work.local_id, expected_id)
def test_create_book(self): def test_create_book(self):
''' you shouldn't be able to create Books (only editions and works) ''' ''' you shouldn't be able to create Books (only editions and works) '''
self.assertRaises( self.assertRaises(