From a93b5cf5bc2505f382cd02d8f9821417e395e6b5 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 28 Nov 2020 10:18:24 -0800 Subject: [PATCH] Use remote_id resolver to load books, user --- bookwyrm/activitypub/__init__.py | 4 +- bookwyrm/activitypub/base_activity.py | 50 ++++++++++++++----- bookwyrm/activitypub/book.py | 26 +++++----- bookwyrm/activitypub/ordered_collection.py | 1 + bookwyrm/activitypub/verbs.py | 8 +++ bookwyrm/books_manager.py | 17 ------- bookwyrm/incoming.py | 14 +----- .../migrations/0016_auto_20201128_1804.py | 24 +++++++++ bookwyrm/models/base_model.py | 4 ++ bookwyrm/models/book.py | 7 +-- bookwyrm/models/shelf.py | 18 ++++++- bookwyrm/models/status.py | 2 - bookwyrm/models/user.py | 1 - bookwyrm/remote_user.py | 29 +---------- bookwyrm/views.py | 3 +- 15 files changed, 115 insertions(+), 93 deletions(-) create mode 100644 bookwyrm/migrations/0016_auto_20201128_1804.py diff --git a/bookwyrm/activitypub/__init__.py b/bookwyrm/activitypub/__init__.py index c97e8c49..eee9345d 100644 --- a/bookwyrm/activitypub/__init__.py +++ b/bookwyrm/activitypub/__init__.py @@ -4,7 +4,7 @@ import sys from .base_activity import ActivityEncoder, PublicKey, Signature from .base_activity import Link, Mention -from .base_activity import ActivitySerializerError +from .base_activity import ActivitySerializerError, resolve_remote_id from .image import Image from .note import Note, GeneratedNote, Article, Comment, Review, Quotation from .note import Tombstone @@ -14,7 +14,7 @@ from .person import Person from .book import Edition, Work, Author from .verbs import Create, Delete, Undo, Update from .verbs import Follow, Accept, Reject -from .verbs import Add, Remove +from .verbs import Add, AddBook, Remove # this creates a list of all the Activity types that we can serialize, # so when an Activity comes in from outside, we can check if it's known diff --git a/bookwyrm/activitypub/base_activity.py b/bookwyrm/activitypub/base_activity.py index e4d95b0e..dd46753f 100644 --- a/bookwyrm/activitypub/base_activity.py +++ b/bookwyrm/activitypub/base_activity.py @@ -3,15 +3,20 @@ from dataclasses import dataclass, fields, MISSING from json import JSONEncoder from uuid import uuid4 +import dateutil.parser +from dateutil.parser import ParserError from django.core.files.base import ContentFile from django.db import transaction from django.db.models.fields.related_descriptors \ import ForwardManyToOneDescriptor, ManyToManyDescriptor, \ ReverseManyToOneDescriptor +from django.db.models.fields import DateTimeField from django.db.models.fields.files import ImageFileDescriptor +from django.db.models.query_utils import DeferredAttribute +from django.utils import timezone import requests -from bookwyrm import books_manager, models +from bookwyrm import models class ActivitySerializerError(ValueError): @@ -106,11 +111,27 @@ class ActivityObject: model_field = getattr(model, mapping.model_key) formatted_value = mapping.model_formatter(value) - if isinstance(model_field, ForwardManyToOneDescriptor) and \ + if isinstance(model_field, DeferredAttribute) and \ + isinstance(model_field.field, DateTimeField): + print("DATE") + try: + formatted_value = timezone.make_aware( + dateutil.parser.parse(formatted_value) + ) + except ParserError: + formatted_value = None + elif isinstance(model_field, ForwardManyToOneDescriptor) and \ formatted_value: # foreign key remote id reolver (work on Edition, for example) fk_model = model_field.field.related_model - reference = resolve_foreign_key(fk_model, formatted_value) + if isinstance(formatted_value, dict) and \ + formatted_value.get('id'): + # if the AP field is a serialized object (as in Add) + remote_id = formatted_value['id'] + else: + # if the AP field is just a remote_id (as in every other case) + remote_id = formatted_value + reference = resolve_remote_id(fk_model, remote_id) mapped_fields[mapping.model_key] = reference elif isinstance(model_field, ManyToManyDescriptor): # status mentions book/users @@ -122,6 +143,8 @@ class ActivityObject: # image fields need custom handling image_fields[mapping.model_key] = formatted_value else: + if formatted_value == MISSING: + formatted_value = None mapped_fields[mapping.model_key] = formatted_value with transaction.atomic(): @@ -153,12 +176,15 @@ class ActivityObject: model = model_field.model items = [] for link in values: + # check that the Type matches the model (because Status + # tags contain both user mentions and book tags) + if not model.activity_serializer.type == link.get('type'): + continue items.append( - resolve_foreign_key(model, link.get('href')) + resolve_remote_id(model, link.get('href')) ) getattr(instance, model_key).set(items) - # add one to many fields for (model_key, values) in one_to_many_fields.items(): if values == MISSING: @@ -183,11 +209,8 @@ class ActivityObject: return data -def resolve_foreign_key(model, remote_id): - ''' look up the remote_id on an activity json field ''' - if model in [models.Edition, models.Work, models.Book]: - return books_manager.get_or_create_book(remote_id) - +def resolve_remote_id(model, remote_id, refresh=False): + ''' look up the remote_id in the database or load it remotely ''' result = model.objects if hasattr(model.objects, 'select_subclasses'): result = result.select_subclasses() @@ -196,10 +219,10 @@ def resolve_foreign_key(model, remote_id): result = result.filter( remote_id=remote_id ).first() - if result: + if result and not refresh: return result - # failing that, load the data and create the object + # load the data and create the object try: response = requests.get( remote_id, @@ -215,7 +238,8 @@ def resolve_foreign_key(model, remote_id): (model.__name__, remote_id)) item = model.activity_serializer(**response.json()) - return item.to_model(model) + # if we're refreshing, "result" will be set and we'll update it + return item.to_model(model, instance=result) def image_formatter(image_slug): diff --git a/bookwyrm/activitypub/book.py b/bookwyrm/activitypub/book.py index 02cab281..8a6b88d9 100644 --- a/bookwyrm/activitypub/book.py +++ b/bookwyrm/activitypub/book.py @@ -12,13 +12,13 @@ class Book(ActivityObject): sortTitle: str = '' subtitle: str = '' description: str = '' - languages: List[str] + languages: List[str] = field(default_factory=lambda: []) series: str = '' seriesNumber: str = '' - subjects: List[str] - subjectPlaces: List[str] + subjects: List[str] = field(default_factory=lambda: []) + subjectPlaces: List[str] = field(default_factory=lambda: []) - authors: List[str] + authors: List[str] = field(default_factory=lambda: []) firstPublishedDate: str = '' publishedDate: str = '' @@ -33,22 +33,22 @@ class Book(ActivityObject): @dataclass(init=False) class Edition(Book): ''' Edition instance of a book object ''' - isbn10: str - isbn13: str - oclcNumber: str - asin: str - pages: str - physicalFormat: str - publishers: List[str] - work: str + isbn10: str = '' + isbn13: str = '' + oclcNumber: str = '' + asin: str = '' + pages: str = '' + physicalFormat: str = '' + publishers: List[str] = field(default_factory=lambda: []) + type: str = 'Edition' @dataclass(init=False) class Work(Book): ''' work instance of a book object ''' - lccn: str + lccn: str = '' editions: List[str] type: str = 'Work' diff --git a/bookwyrm/activitypub/ordered_collection.py b/bookwyrm/activitypub/ordered_collection.py index efd23d5a..9aeaf664 100644 --- a/bookwyrm/activitypub/ordered_collection.py +++ b/bookwyrm/activitypub/ordered_collection.py @@ -12,6 +12,7 @@ class OrderedCollection(ActivityObject): first: str last: str = '' name: str = '' + owner: str = '' type: str = 'OrderedCollection' diff --git a/bookwyrm/activitypub/verbs.py b/bookwyrm/activitypub/verbs.py index eb166260..e890d81f 100644 --- a/bookwyrm/activitypub/verbs.py +++ b/bookwyrm/activitypub/verbs.py @@ -3,6 +3,7 @@ from dataclasses import dataclass from typing import List from .base_activity import ActivityObject, Signature +from .book import Book @dataclass(init=False) class Verb(ActivityObject): @@ -69,6 +70,13 @@ class Add(Verb): type: str = 'Add' +@dataclass(init=False) +class AddBook(Verb): + '''Add activity that's aware of the book obj ''' + target: Book + type: str = 'Add' + + @dataclass(init=False) class Remove(Verb): '''Remove activity ''' diff --git a/bookwyrm/books_manager.py b/bookwyrm/books_manager.py index 461017a0..fb0a747e 100644 --- a/bookwyrm/books_manager.py +++ b/bookwyrm/books_manager.py @@ -16,23 +16,6 @@ def get_edition(book_id): return book -def get_or_create_book(remote_id): - ''' pull up a book record by whatever means possible ''' - book = models.Book.objects.select_subclasses().filter( - remote_id=remote_id - ).first() - if book: - return book - - connector = get_or_create_connector(remote_id) - - # raises ConnectorException - book = connector.get_or_create_book(remote_id) - if book: - load_more_data.delay(book.id) - return book - - def get_or_create_connector(remote_id): ''' get the connector related to the author's server ''' url = urlparse(remote_id) diff --git a/bookwyrm/incoming.py b/bookwyrm/incoming.py index 0e7c1856..9a348483 100644 --- a/bookwyrm/incoming.py +++ b/bookwyrm/incoming.py @@ -317,7 +317,7 @@ def handle_tag(activity): user = get_or_create_remote_user(activity['actor']) if not user.local: # ordered collection weirndess so we can't just to_model - book = books_manager.get_or_create_book(activity['object']['id']) + book = (activity['object']['id']) name = activity['object']['target'].split('/')[-1] name = unquote_plus(name) models.Tag.objects.get_or_create( @@ -330,17 +330,7 @@ def handle_tag(activity): @app.task def handle_shelve(activity): ''' putting a book on a shelf ''' - user = get_or_create_remote_user(activity['actor']) - book = books_manager.get_or_create_book(activity['object']) - try: - shelf = models.Shelf.objects.get(remote_id=activity['target']) - except models.Shelf.DoesNotExist: - return - if shelf.user != user: - # this doesn't add up. - return - shelf.books.add(book) - shelf.save() + activitypub.AddBook(**activity).to_model(models.ShelfBook) @app.task diff --git a/bookwyrm/migrations/0016_auto_20201128_1804.py b/bookwyrm/migrations/0016_auto_20201128_1804.py new file mode 100644 index 00000000..9becde82 --- /dev/null +++ b/bookwyrm/migrations/0016_auto_20201128_1804.py @@ -0,0 +1,24 @@ +# Generated by Django 3.0.7 on 2020-11-28 18:04 + +import bookwyrm.utils.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('bookwyrm', '0015_auto_20201128_0349'), + ] + + operations = [ + migrations.AlterField( + model_name='book', + name='subject_places', + field=bookwyrm.utils.fields.ArrayField(base_field=models.CharField(max_length=255), blank=True, default=list, null=True, size=None), + ), + migrations.AlterField( + model_name='book', + name='subjects', + field=bookwyrm.utils.fields.ArrayField(base_field=models.CharField(max_length=255), blank=True, default=list, null=True, size=None), + ), + ] diff --git a/bookwyrm/models/base_model.py b/bookwyrm/models/base_model.py index 4109a49b..29301fb5 100644 --- a/bookwyrm/models/base_model.py +++ b/bookwyrm/models/base_model.py @@ -227,12 +227,16 @@ class OrderedCollectionPageMixin(ActivitypubMixin): name = '' if hasattr(self, 'name'): name = self.name + owner = '' + if hasattr(self, 'user'): + owner = self.user.remote_id size = queryset.count() return activitypub.OrderedCollection( id=remote_id, totalItems=size, name=name, + owner=owner, first='%s%s' % (remote_id, self.page()), last='%s%s' % (remote_id, self.page(min_id=0)) ).serialize() diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index 132b4c07..8892119a 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -41,10 +41,10 @@ class Book(ActivitypubMixin, BookWyrmModel): series = models.CharField(max_length=255, blank=True, null=True) series_number = models.CharField(max_length=255, blank=True, null=True) subjects = ArrayField( - models.CharField(max_length=255), blank=True, default=list + models.CharField(max_length=255), blank=True, null=True, default=list ) subject_places = ArrayField( - models.CharField(max_length=255), blank=True, default=list + models.CharField(max_length=255), blank=True, null=True, default=list ) # TODO: include an annotation about the type of authorship (ie, translator) authors = models.ManyToManyField('Author') @@ -132,7 +132,8 @@ class Work(OrderedCollectionPageMixin, Book): ''' it'd be nice to serialize the edition instead but, recursion ''' default = self.default_edition ed_list = [ - e.remote_id for e in self.edition_set.filter(~Q(id=default.id)).all() + e.remote_id for e in \ + self.edition_set.filter(~Q(id=default.id)).all() ] return [default.remote_id] + ed_list diff --git a/bookwyrm/models/shelf.py b/bookwyrm/models/shelf.py index e85294ba..3f443f6d 100644 --- a/bookwyrm/models/shelf.py +++ b/bookwyrm/models/shelf.py @@ -3,7 +3,8 @@ import re from django.db import models from bookwyrm import activitypub -from .base_model import BookWyrmModel, OrderedCollectionMixin, PrivacyLevels +from .base_model import ActivityMapping, BookWyrmModel +from .base_model import OrderedCollectionMixin, PrivacyLevels class Shelf(OrderedCollectionMixin, BookWyrmModel): @@ -47,6 +48,12 @@ class Shelf(OrderedCollectionMixin, BookWyrmModel): ''' user/shelf unqiueness ''' unique_together = ('user', 'identifier') + activity_mappings = [ + ActivityMapping('id', 'remote_id'), + ActivityMapping('owner', 'user'), + ActivityMapping('name', 'name'), + ] + class ShelfBook(BookWyrmModel): ''' many to many join table for books and shelves ''' @@ -59,6 +66,15 @@ class ShelfBook(BookWyrmModel): on_delete=models.PROTECT ) + activity_mappings = [ + ActivityMapping('id', 'remote_id'), + ActivityMapping('actor', 'added_by'), + ActivityMapping('object', 'book'), + ActivityMapping('target', 'shelf') + ] + + activity_serializer = activitypub.AddBook + def to_add_activity(self, user): ''' AP for shelving a book''' return activitypub.Add( diff --git a/bookwyrm/models/status.py b/bookwyrm/models/status.py index b55d2da6..a520164a 100644 --- a/bookwyrm/models/status.py +++ b/bookwyrm/models/status.py @@ -80,12 +80,10 @@ class Status(OrderedCollectionPageMixin, BookWyrmModel): ActivityMapping( 'tag', 'mention_books', lambda x: tag_formatter(x, 'title', 'Book'), - lambda x: [i for i in x if x.get('type') == 'Book'] ), ActivityMapping( 'tag', 'mention_users', lambda x: tag_formatter(x, 'username', 'Mention'), - lambda x: [i for i in x if x.get('type') == 'Mention'] ), ActivityMapping( 'attachment', 'attachments', diff --git a/bookwyrm/models/user.py b/bookwyrm/models/user.py index 4d511d56..a9812037 100644 --- a/bookwyrm/models/user.py +++ b/bookwyrm/models/user.py @@ -11,7 +11,6 @@ from bookwyrm.models.status import Status from bookwyrm.settings import DOMAIN from bookwyrm.signatures import create_key_pair from .base_model import ActivityMapping, OrderedCollectionPageMixin -from .base_model import image_formatter class User(OrderedCollectionPageMixin, AbstractUser): diff --git a/bookwyrm/remote_user.py b/bookwyrm/remote_user.py index 23a805b3..205b0893 100644 --- a/bookwyrm/remote_user.py +++ b/bookwyrm/remote_user.py @@ -16,11 +16,9 @@ def get_or_create_remote_user(actor): except models.User.DoesNotExist: pass - data = fetch_user_data(actor) - actor_parts = urlparse(actor) with transaction.atomic(): - user = activitypub.Person(**data).to_model(models.User) + user = activitypub.resolve_remote_id(models.User, actor) user.federated_server = get_or_create_remote_server(actor_parts.netloc) user.save() if user.bookwyrm_user: @@ -28,32 +26,9 @@ def get_or_create_remote_user(actor): return user -def fetch_user_data(actor): - ''' load the user's info from the actor url ''' - try: - response = requests.get( - actor, - headers={'Accept': 'application/activity+json'} - ) - except ConnectionError: - return None - - if not response.ok: - response.raise_for_status() - data = response.json() - - # make sure our actor is who they say they are - if actor != data['id']: - raise ValueError("Remote actor id must match url.") - return data - - def refresh_remote_user(user): ''' get updated user data from its home instance ''' - data = fetch_user_data(user.remote_id) - - activity = activitypub.Person(**data) - activity.to_model(models.User, instance=user) + activitypub.resolve_remote_id(user.remote_id, refresh=True) @app.task diff --git a/bookwyrm/views.py b/bookwyrm/views.py index e0feaee7..e2887255 100644 --- a/bookwyrm/views.py +++ b/bookwyrm/views.py @@ -5,8 +5,7 @@ from django.contrib.auth.decorators import login_required, permission_required from django.contrib.postgres.search import TrigramSimilarity from django.core.paginator import Paginator from django.db.models import Avg, Q -from django.http import HttpResponseBadRequest, HttpResponseNotFound,\ - JsonResponse +from django.http import HttpResponseNotFound, JsonResponse from django.core.exceptions import PermissionDenied from django.shortcuts import get_object_or_404, redirect from django.template.response import TemplateResponse