Merge branch 'main' into production

This commit is contained in:
Mouse Reeve 2020-11-13 11:48:55 -08:00
commit 8f595b6a35
16 changed files with 188 additions and 79 deletions

View file

@ -4,6 +4,7 @@ import sys
from .base_activity import ActivityEncoder, Image, PublicKey, Signature from .base_activity import ActivityEncoder, Image, PublicKey, Signature
from .base_activity import Link, Mention from .base_activity import Link, Mention
from .base_activity import ActivitySerializerError
from .note import Note, GeneratedNote, Article, Comment, Review, Quotation from .note import Note, GeneratedNote, Article, Comment, Review, Quotation
from .note import Tombstone from .note import Tombstone
from .interaction import Boost, Like from .interaction import Boost, Like

View file

@ -75,7 +75,7 @@ class AbstractConnector(ABC):
''' pull up a book record by whatever means possible ''' ''' pull up a book record by whatever means possible '''
# try to load the book # try to load the book
book = models.Book.objects.select_subclasses().filter( book = models.Book.objects.select_subclasses().filter(
remote_id=remote_id origin_id=remote_id
).first() ).first()
if book: if book:
if isinstance(book, models.Work): if isinstance(book, models.Work):
@ -148,7 +148,7 @@ class AbstractConnector(ABC):
def create_book(self, remote_id, data, model): def create_book(self, remote_id, data, model):
''' create a work or edition from data ''' ''' create a work or edition from data '''
book = model.objects.create( book = model.objects.create(
remote_id=remote_id, origin_id=remote_id,
title=data['title'], title=data['title'],
connector=self.connector, connector=self.connector,
) )

View file

@ -99,14 +99,14 @@ class Connector(AbstractConnector):
def get_or_create_author(self, remote_id): def get_or_create_author(self, remote_id):
''' load that author ''' ''' load that author '''
try: try:
return models.Author.objects.get(remote_id=remote_id) return models.Author.objects.get(origin_id=remote_id)
except ObjectDoesNotExist: except ObjectDoesNotExist:
pass pass
data = get_data(remote_id) data = get_data(remote_id)
# ingest a new author # ingest a new author
author = models.Author(remote_id=remote_id) author = models.Author(origin_id=remote_id)
author = update_from_mappings(author, data, self.author_mappings) author = update_from_mappings(author, data, self.author_mappings)
author.save() author.save()

View file

@ -13,16 +13,16 @@ class Connector(AbstractConnector):
that gets implemented it will totally rule ''' that gets implemented it will totally rule '''
vector = SearchVector('title', weight='A') +\ vector = SearchVector('title', weight='A') +\
SearchVector('subtitle', weight='B') +\ SearchVector('subtitle', weight='B') +\
SearchVector('author_text', weight='A') +\ SearchVector('author_text', weight='C') +\
SearchVector('isbn_13', weight='A') +\ SearchVector('isbn_13', weight='A') +\
SearchVector('isbn_10', weight='A') +\ SearchVector('isbn_10', weight='A') +\
SearchVector('openlibrary_key', weight='B') +\ SearchVector('openlibrary_key', weight='C') +\
SearchVector('goodreads_key', weight='B') +\ SearchVector('goodreads_key', weight='C') +\
SearchVector('asin', weight='B') +\ SearchVector('asin', weight='C') +\
SearchVector('oclc_number', weight='B') +\ SearchVector('oclc_number', weight='C') +\
SearchVector('remote_id', weight='B') +\ SearchVector('remote_id', weight='C') +\
SearchVector('description', weight='C') +\ SearchVector('description', weight='D') +\
SearchVector('series', weight='C') SearchVector('series', weight='D')
results = models.Edition.objects.annotate( results = models.Edition.objects.annotate(
search=vector search=vector
@ -47,7 +47,7 @@ class Connector(AbstractConnector):
def format_search_result(self, search_result): def format_search_result(self, search_result):
return SearchResult( return SearchResult(
title=search_result.title, title=search_result.title,
key=search_result.local_id, key=search_result.remote_id,
author=search_result.author_text, author=search_result.author_text,
year=search_result.published_date.year if \ year=search_result.published_date.year if \
search_result.published_date else None, search_result.published_date else None,

View file

@ -1,12 +1,13 @@
''' handle reading a csv from goodreads ''' ''' handle reading a csv from goodreads '''
import csv import csv
from requests import HTTPError import logging
from bookwyrm import outgoing from bookwyrm import outgoing
from bookwyrm.tasks import app from bookwyrm.tasks import app
from bookwyrm.models import ImportJob, ImportItem from bookwyrm.models import ImportJob, ImportItem
from bookwyrm.status import create_notification from bookwyrm.status import create_notification
logger = logging.getLogger(__name__)
# TODO: remove or increase once we're confident it's not causing problems. # TODO: remove or increase once we're confident it's not causing problems.
MAX_ENTRIES = 500 MAX_ENTRIES = 500
@ -24,6 +25,17 @@ def create_job(user, csv_file, include_reviews, privacy):
ImportItem(job=job, index=index, data=entry).save() ImportItem(job=job, index=index, data=entry).save()
return job return job
def create_retry_job(user, original_job, items):
''' retry items that didn't import '''
job = ImportJob.objects.create(
user=user,
include_reviews=original_job.include_reviews,
privacy=original_job.privacy,
retry=True
)
for item in items:
ImportItem(job=job, index=item.index, data=item.data).save()
return job
def start_import(job): def start_import(job):
''' initalizes a csv import job ''' ''' initalizes a csv import job '''
@ -41,7 +53,8 @@ def import_data(job_id):
for item in job.items.all(): for item in job.items.all():
try: try:
item.resolve() item.resolve()
except: except Exception as e:
logger.exception(e)
item.fail_reason = 'Error loading book' item.fail_reason = 'Error loading book'
item.save() item.save()
continue continue

View file

@ -0,0 +1,18 @@
# Generated by Django 3.0.7 on 2020-11-13 15:54
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('bookwyrm', '0009_shelf_privacy'),
]
operations = [
migrations.AddField(
model_name='importjob',
name='retry',
field=models.BooleanField(default=False),
),
]

View file

@ -0,0 +1,33 @@
# Generated by Django 3.0.7 on 2020-11-13 17:27
from django.db import migrations, models
def set_origin_id(app_registry, schema_editor):
db_alias = schema_editor.connection.alias
books = app_registry.get_model('bookwyrm', 'Book').objects.using(db_alias)
for book in books:
book.origin_id = book.remote_id
# the remote_id will be set automatically
book.remote_id = None
book.save()
class Migration(migrations.Migration):
dependencies = [
('bookwyrm', '0010_importjob_retry'),
]
operations = [
migrations.AddField(
model_name='author',
name='origin_id',
field=models.CharField(max_length=255, null=True),
),
migrations.AddField(
model_name='book',
name='origin_id',
field=models.CharField(max_length=255, null=True),
),
migrations.RunPython(set_origin_id),
]

View file

@ -68,9 +68,7 @@ class ActivitypubMixin:
if not hasattr(self, mapping.model_key) or not mapping.activity_key: if not hasattr(self, mapping.model_key) or not mapping.activity_key:
continue continue
value = getattr(self, mapping.model_key) value = getattr(self, mapping.model_key)
if hasattr(value, 'local_id'): if hasattr(value, 'remote_id'):
value = value.local_id
elif hasattr(value, 'remote_id'):
value = value.remote_id value = value.remote_id
if isinstance(value, datetime): if isinstance(value, datetime):
value = value.isoformat() value = value.isoformat()

View file

@ -15,6 +15,7 @@ from .base_model import ActivitypubMixin, OrderedCollectionPageMixin
class Book(ActivitypubMixin, BookWyrmModel): class Book(ActivitypubMixin, BookWyrmModel):
''' a generic book, which can mean either an edition or a work ''' ''' a generic book, which can mean either an edition or a work '''
origin_id = models.CharField(max_length=255, null=True)
# these identifiers apply to both works and editions # these identifiers apply to both works and editions
openlibrary_key = models.CharField(max_length=255, blank=True, null=True) openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
librarything_key = models.CharField(max_length=255, blank=True, null=True) librarything_key = models.CharField(max_length=255, blank=True, null=True)
@ -57,7 +58,7 @@ class Book(ActivitypubMixin, BookWyrmModel):
@property @property
def ap_authors(self): def ap_authors(self):
''' the activitypub serialization should be a list of author ids ''' ''' the activitypub serialization should be a list of author ids '''
return [a.local_id for a in self.authors.all()] return [a.remote_id for a in self.authors.all()]
@property @property
def ap_cover(self): def ap_cover(self):
@ -71,10 +72,10 @@ class Book(ActivitypubMixin, BookWyrmModel):
@property @property
def ap_parent_work(self): def ap_parent_work(self):
''' reference the work via local id not remote ''' ''' reference the work via local id not remote '''
return self.parent_work.local_id return self.parent_work.remote_id
activity_mappings = [ activity_mappings = [
ActivityMapping('id', 'local_id'), ActivityMapping('id', 'remote_id'),
ActivityMapping('authors', 'ap_authors'), ActivityMapping('authors', 'ap_authors'),
ActivityMapping('first_published_date', 'first_published_date'), ActivityMapping('first_published_date', 'first_published_date'),
@ -112,6 +113,8 @@ class Book(ActivitypubMixin, BookWyrmModel):
''' can't be abstract for query reasons, but you shouldn't USE it ''' ''' can't be abstract for query reasons, but you shouldn't USE it '''
if not isinstance(self, Edition) and not isinstance(self, Work): if not isinstance(self, Edition) and not isinstance(self, Work):
raise ValueError('Books should be added as Editions or Works') raise ValueError('Books should be added as Editions or Works')
if self.id and not self.remote_id:
self.remote_id = self.get_remote_id()
super().save(*args, **kwargs) super().save(*args, **kwargs)
@ -119,15 +122,6 @@ class Book(ActivitypubMixin, BookWyrmModel):
''' editions and works both use "book" instead of model_name ''' ''' editions and works both use "book" instead of model_name '''
return 'https://%s/book/%d' % (DOMAIN, self.id) return 'https://%s/book/%d' % (DOMAIN, self.id)
@property
def local_id(self):
''' when a book is ingested from an outside source, it becomes local to
an instance, so it needs a local url for federation. but it still needs
the remote_id for easier deduplication and, if appropriate, to sync with
the remote canonical copy '''
return 'https://%s/book/%d' % (DOMAIN, self.id)
def __repr__(self): def __repr__(self):
return "<{} key={!r} title={!r}>".format( return "<{} key={!r} title={!r}>".format(
self.__class__, self.__class__,
@ -152,14 +146,14 @@ class Work(OrderedCollectionPageMixin, Book):
''' it'd be nice to serialize the edition instead but, recursion ''' ''' it'd be nice to serialize the edition instead but, recursion '''
default = self.default_edition default = self.default_edition
ed_list = [ ed_list = [
e.local_id for e in self.edition_set.filter(~Q(id=default.id)).all() e.remote_id for e in self.edition_set.filter(~Q(id=default.id)).all()
] ]
return [default.local_id] + ed_list return [default.remote_id] + ed_list
def to_edition_list(self, **kwargs): def to_edition_list(self, **kwargs):
''' activitypub serialization for this work's editions ''' ''' activitypub serialization for this work's editions '''
remote_id = self.local_id + '/editions' remote_id = self.remote_id + '/editions'
return self.to_ordered_collection( return self.to_ordered_collection(
self.edition_set, self.edition_set,
remote_id=remote_id, remote_id=remote_id,
@ -246,6 +240,7 @@ def isbn_13_to_10(isbn_13):
class Author(ActivitypubMixin, BookWyrmModel): class Author(ActivitypubMixin, BookWyrmModel):
origin_id = models.CharField(max_length=255, null=True)
''' copy of an author from OL ''' ''' copy of an author from OL '''
openlibrary_key = models.CharField(max_length=255, blank=True, null=True) openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
sync = models.BooleanField(default=True) sync = models.BooleanField(default=True)
@ -262,14 +257,6 @@ class Author(ActivitypubMixin, BookWyrmModel):
) )
bio = models.TextField(null=True, blank=True) bio = models.TextField(null=True, blank=True)
@property
def local_id(self):
''' when a book is ingested from an outside source, it becomes local to
an instance, so it needs a local url for federation. but it still needs
the remote_id for easier deduplication and, if appropriate, to sync with
the remote canonical copy (ditto here for author)'''
return 'https://%s/author/%d' % (DOMAIN, self.id)
@property @property
def display_name(self): def display_name(self):
''' Helper to return a displayable name''' ''' Helper to return a displayable name'''
@ -281,7 +268,7 @@ class Author(ActivitypubMixin, BookWyrmModel):
return self.last_name or self.first_name return self.last_name or self.first_name
activity_mappings = [ activity_mappings = [
ActivityMapping('id', 'local_id'), ActivityMapping('id', 'remote_id'),
ActivityMapping('name', 'display_name'), ActivityMapping('name', 'display_name'),
ActivityMapping('born', 'born'), ActivityMapping('born', 'born'),
ActivityMapping('died', 'died'), ActivityMapping('died', 'died'),

View file

@ -48,6 +48,7 @@ class ImportJob(models.Model):
default='public', default='public',
choices=PrivacyLevels.choices choices=PrivacyLevels.choices
) )
retry = models.BooleanField(default=False)
class ImportItem(models.Model): class ImportItem(models.Model):
@ -72,14 +73,11 @@ class ImportItem(models.Model):
def get_book_from_isbn(self): def get_book_from_isbn(self):
''' search by isbn ''' ''' search by isbn '''
search_result = books_manager.first_search_result( search_result = books_manager.first_search_result(
self.isbn, min_confidence=0.995 self.isbn, min_confidence=0.999
) )
if search_result: if search_result:
try: # raises ConnectorException
# don't crash the import when the connector fails
return books_manager.get_or_create_book(search_result.key) return books_manager.get_or_create_book(search_result.key)
except ConnectorException:
pass
return None return None
@ -90,16 +88,24 @@ class ImportItem(models.Model):
self.data['Author'] self.data['Author']
) )
search_result = books_manager.first_search_result( search_result = books_manager.first_search_result(
search_term, min_confidence=0.995 search_term, min_confidence=0.999
) )
if search_result: if search_result:
try: # raises ConnectorException
return books_manager.get_or_create_book(search_result.key) return books_manager.get_or_create_book(search_result.key)
except ConnectorException:
pass
return None return None
@property
def title(self):
''' get the book title '''
return self.data['Title']
@property
def author(self):
''' get the book title '''
return self.data['Author']
@property @property
def isbn(self): def isbn(self):
''' pulls out the isbn13 field from the csv line data ''' ''' pulls out the isbn13 field from the csv line data '''

View file

@ -64,7 +64,7 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel):
tags = [] tags = []
for book in self.mention_books.all(): for book in self.mention_books.all():
tags.append(activitypub.Link( tags.append(activitypub.Link(
href=book.local_id, href=book.remote_id,
name=book.title, name=book.title,
type='Book' type='Book'
)) ))
@ -160,7 +160,7 @@ class GeneratedNote(Status):
''' indicate the book in question for mastodon (or w/e) users ''' ''' indicate the book in question for mastodon (or w/e) users '''
message = self.content message = self.content
books = ', '.join( books = ', '.join(
'<a href="%s">"%s"</a>' % (self.book.local_id, self.book.title) \ '<a href="%s">"%s"</a>' % (self.book.remote_id, self.book.title) \
for book in self.mention_books.all() for book in self.mention_books.all()
) )
return '%s %s' % (message, books) return '%s %s' % (message, books)
@ -177,7 +177,7 @@ class Comment(Status):
def ap_pure_content(self): def ap_pure_content(self):
''' indicate the book in question for mastodon (or w/e) users ''' ''' indicate the book in question for mastodon (or w/e) users '''
return self.content + '<br><br>(comment on <a href="%s">"%s"</a>)' % \ return self.content + '<br><br>(comment on <a href="%s">"%s"</a>)' % \
(self.book.local_id, self.book.title) (self.book.remote_id, self.book.title)
activity_serializer = activitypub.Comment activity_serializer = activitypub.Comment
pure_activity_serializer = activitypub.Note pure_activity_serializer = activitypub.Note
@ -193,7 +193,7 @@ class Quotation(Status):
''' indicate the book in question for mastodon (or w/e) users ''' ''' indicate the book in question for mastodon (or w/e) users '''
return '"%s"<br>-- <a href="%s">"%s"</a><br><br>%s' % ( return '"%s"<br>-- <a href="%s">"%s"</a><br><br>%s' % (
self.quote, self.quote,
self.book.local_id, self.book.remote_id,
self.book.title, self.book.title,
self.content, self.content,
) )
@ -231,7 +231,7 @@ class Review(Status):
def ap_pure_content(self): def ap_pure_content(self):
''' indicate the book in question for mastodon (or w/e) users ''' ''' indicate the book in question for mastodon (or w/e) users '''
return self.content + '<br><br>(<a href="%s">"%s"</a>)' % \ return self.content + '<br><br>(<a href="%s">"%s"</a>)' % \
(self.book.local_id, self.book.title) (self.book.remote_id, self.book.title)
activity_serializer = activitypub.Review activity_serializer = activitypub.Review
pure_activity_serializer = activitypub.Article pure_activity_serializer = activitypub.Article

View file

@ -20,6 +20,10 @@ function reply(e) {
return true; return true;
} }
function selectAll(el) {
el.parentElement.querySelectorAll('[type="checkbox"]')
.forEach(t => t.checked=true);
}
function rate_stars(e) { function rate_stars(e) {
e.preventDefault(); e.preventDefault();

View file

@ -29,16 +29,52 @@
{% if failed_items %} {% if failed_items %}
<div class="block"> <div class="block">
<h2 class="title is-4">Failed to load</h2> <h2 class="title is-4">Failed to load</h2>
{% if not job.retry %}
<form name="retry" action="/retry-import/" method="post">
{% csrf_token %}
<input type="hidden" name="import_job" value="{{ job.id }}">
<ul> <ul>
<fieldset>
{% for item in failed_items %} {% for item in failed_items %}
<li> <li class="pb-1">
<input class="checkbox" type="checkbox" name="import_item" value="{{ item.id }}" id="import-item-{{ item.id }}">
<label for="import-item-{{ item.id }}">
Line {{ item.index }}: Line {{ item.index }}:
<strong>{{ item.data|dict_key:'Title' }}</strong> by <strong>{{ item.data|dict_key:'Title' }}</strong> by
{{ item.data|dict_key:'Author' }} {{ item.data|dict_key:'Author' }}
({{ item.fail_reason }}) </label>
<p>
{{ item.fail_reason }}.
</p>
</li>
{% endfor %}
</fieldset>
</ul>
<div class="block pt-1" onclick="selectAll(this)">
<label class="label">
<input type="checkbox" class="checkbox">
Select all
</label>
</div>
<button class="button" type="submit">Retry items</button>
{% else %}
<ul>
{% for item in failed_items %}
<li class="pb-1">
<p>
Line {{ item.index }}:
<strong>{{ item.data|dict_key:'Title' }}</strong> by
{{ item.data|dict_key:'Author' }}
</p>
<p>
{{ item.fail_reason }}.
<a href="/create-book/{{ item.id }}">Manually add book</a>
</p>
</li> </li>
{% endfor %} {% endfor %}
</ul> </ul>
{% endif %}
</form>
</div> </div>
{% endif %} {% endif %}

View file

@ -22,15 +22,10 @@ class Book(TestCase):
) )
def test_remote_id(self): def test_remote_id(self):
local_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id) remote_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id)
self.assertEqual(self.work.get_remote_id(), local_id) self.assertEqual(self.work.get_remote_id(), remote_id)
self.assertEqual(self.work.remote_id, 'https://example.com/book/1') self.assertEqual(self.work.remote_id, 'https://example.com/book/1')
def test_local_id(self):
''' the local_id property for books '''
expected_id = 'https://%s/book/%d' % (settings.DOMAIN, self.work.id)
self.assertEqual(self.work.local_id, expected_id)
def test_create_book(self): def test_create_book(self):
''' you shouldn't be able to create Books (only editions and works) ''' ''' you shouldn't be able to create Books (only editions and works) '''
self.assertRaises( self.assertRaises(

View file

@ -54,9 +54,9 @@ urlpatterns = [
path('', views.home), path('', views.home),
re_path(r'^(?P<tab>home|local|federated)/?$', views.home_tab), re_path(r'^(?P<tab>home|local|federated)/?$', views.home_tab),
re_path(r'^notifications/?', views.notifications_page), re_path(r'^notifications/?', views.notifications_page),
re_path(r'import/?$', views.import_page), re_path(r'^import/?$', views.import_page),
re_path(r'import-status/(\d+)/?$', views.import_status), re_path(r'^import-status/(\d+)/?$', views.import_status),
re_path(r'user-edit/?$', views.edit_profile_page), re_path(r'^user-edit/?$', views.edit_profile_page),
# should return a ui view or activitypub json blob as requested # should return a ui view or activitypub json blob as requested
# users # users
@ -98,6 +98,7 @@ urlpatterns = [
re_path(r'^edit-profile/?$', actions.edit_profile), re_path(r'^edit-profile/?$', actions.edit_profile),
re_path(r'^import-data/?', actions.import_data), re_path(r'^import-data/?', actions.import_data),
re_path(r'^retry-import/?', actions.retry_import),
re_path(r'^resolve-book/?', actions.resolve_book), re_path(r'^resolve-book/?', actions.resolve_book),
re_path(r'^edit-book/(?P<book_id>\d+)/?', actions.edit_book), re_path(r'^edit-book/(?P<book_id>\d+)/?', actions.edit_book),
re_path(r'^upload-cover/(?P<book_id>\d+)/?', actions.upload_cover), re_path(r'^upload-cover/(?P<book_id>\d+)/?', actions.upload_cover),

View file

@ -662,10 +662,27 @@ def import_data(request):
except (UnicodeDecodeError, ValueError): except (UnicodeDecodeError, ValueError):
return HttpResponseBadRequest('Not a valid csv file') return HttpResponseBadRequest('Not a valid csv file')
goodreads_import.start_import(job) goodreads_import.start_import(job)
return redirect('/import-status/%d' % (job.id,)) return redirect('/import-status/%d' % job.id)
return HttpResponseBadRequest() return HttpResponseBadRequest()
@login_required
def retry_import(request):
''' ingest a goodreads csv '''
job = get_object_or_404(models.ImportJob, id=request.POST.get('import_job'))
items = []
for item in request.POST.getlist('import_item'):
items.append(get_object_or_404(models.ImportItem, id=item))
job = goodreads_import.create_retry_job(
request.user,
job,
items,
)
goodreads_import.start_import(job)
return redirect('/import-status/%d' % job.id)
@login_required @login_required
@permission_required('bookwyrm.create_invites', raise_exception=True) @permission_required('bookwyrm.create_invites', raise_exception=True)
def create_invite(request): def create_invite(request):