From 847014720e06ca3ee120cc1645553cf127952e69 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Tue, 24 Nov 2020 16:05:00 -0800 Subject: [PATCH] Refactors bookwyrm connector to use activitypub serializer --- bookwyrm/activitypub/base_activity.py | 10 ++- bookwyrm/connectors/abstract_connector.py | 32 +++---- bookwyrm/connectors/bookwyrm_connector.py | 100 +++++----------------- bookwyrm/connectors/openlibrary.py | 3 +- bookwyrm/models/book.py | 4 +- 5 files changed, 48 insertions(+), 101 deletions(-) diff --git a/bookwyrm/activitypub/base_activity.py b/bookwyrm/activitypub/base_activity.py index 62fce70b..d120e111 100644 --- a/bookwyrm/activitypub/base_activity.py +++ b/bookwyrm/activitypub/base_activity.py @@ -143,7 +143,8 @@ class ActivityObject: # add images for (model_key, value) in image_fields.items(): - getattr(instance, model_key).save(*value, save=True) + if value: + getattr(instance, model_key).save(*value, save=True) # add one to many fields for (model_key, values) in one_to_many_fields.items(): @@ -207,6 +208,13 @@ def tag_formatter(tags, tag_type): def image_formatter(image_json): ''' helper function to load images and format them for a model ''' + if isinstance(image_json, list): + try: + image_json = image_json[0] + except IndexError: + return None + if not image_json: + return None url = image_json.get('url') if not url: return None diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 7fc4596b..d709b075 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -157,7 +157,7 @@ class AbstractConnector(ABC): def update_book_from_data(self, book, data, update_cover=True): ''' for creating a new book or syncing with data ''' - book = update_from_mappings(book, data, self.book_mappings) + book = self.update_from_mappings(book, data, self.book_mappings) author_text = [] for author in self.get_authors_from_data(data): @@ -262,23 +262,23 @@ class AbstractConnector(ABC): ''' get more info on a book ''' -def update_from_mappings(obj, data, mappings): - ''' assign data to model with mappings ''' - for mapping in mappings: - # check if this field is present in the data - value = data.get(mapping.remote_field) - if not value: - continue + def update_from_mappings(self, obj, data, mappings): + ''' assign data to model with mappings ''' + for mapping in mappings: + # check if this field is present in the data + value = data.get(mapping.remote_field) + if not value: + continue - # extract the value in the right format - try: - value = mapping.formatter(value) - except: - continue + # extract the value in the right format + try: + value = mapping.formatter(value) + except: + continue - # assign the formatted value to the model - obj.__setattr__(mapping.local_field, value) - return obj + # assign the formatted value to the model + obj.__setattr__(mapping.local_field, value) + return obj def get_date(date_string): diff --git a/bookwyrm/connectors/bookwyrm_connector.py b/bookwyrm/connectors/bookwyrm_connector.py index 6ed9dda1..f1e53971 100644 --- a/bookwyrm/connectors/bookwyrm_connector.py +++ b/bookwyrm/connectors/bookwyrm_connector.py @@ -1,55 +1,22 @@ ''' using another bookwyrm instance as a source of book data ''' -from uuid import uuid4 - from django.core.exceptions import ObjectDoesNotExist -from django.core.files.base import ContentFile from django.db import transaction -import requests -from bookwyrm import models -from .abstract_connector import AbstractConnector, SearchResult, Mapping -from .abstract_connector import update_from_mappings, get_date, get_data +from bookwyrm import activitypub, models +from .abstract_connector import AbstractConnector, SearchResult +from .abstract_connector import get_data class Connector(AbstractConnector): ''' interact with other instances ''' - def __init__(self, identifier): - super().__init__(identifier) - self.key_mappings = [ - Mapping('isbn_13', model=models.Edition), - Mapping('isbn_10', model=models.Edition), - Mapping('lccn', model=models.Work), - Mapping('oclc_number', model=models.Edition), - Mapping('openlibrary_key'), - Mapping('goodreads_key'), - Mapping('asin'), - ] - self.book_mappings = self.key_mappings + [ - Mapping('sort_title'), - Mapping('subtitle'), - Mapping('description'), - Mapping('languages'), - Mapping('series'), - Mapping('series_number'), - Mapping('subjects'), - Mapping('subject_places'), - Mapping('first_published_date'), - Mapping('published_date'), - Mapping('pages'), - Mapping('physical_format'), - Mapping('publishers'), - ] - - self.author_mappings = [ - Mapping('name'), - Mapping('bio'), - Mapping('openlibrary_key'), - Mapping('wikipedia_link'), - Mapping('aliases'), - Mapping('born', formatter=get_date), - Mapping('died', formatter=get_date), - ] + def update_from_mappings(self, obj, data, mappings): + ''' serialize book data into a model ''' + if self.is_work_data(data): + work_data = activitypub.Work(**data) + return work_data.to_model(models.Work, instance=obj) + edition_data = activitypub.Edition(**data) + return edition_data.to_model(models.Edition, instance=obj) def get_remote_id_from_data(self, data): @@ -71,46 +38,19 @@ class Connector(AbstractConnector): def get_authors_from_data(self, data): - for author_url in data.get('authors', []): - yield self.get_or_create_author(author_url) + ''' load author data ''' + for author_id in data.get('authors', []): + try: + yield models.Author.objects.get(origin_id=author_id) + except models.Author.DoesNotExist: + continue + data = get_data(author_id) + author_data = activitypub.Author(**data) + yield author_data.to_model(models.Author) def get_cover_from_data(self, data): - cover_data = data.get('attachment') - if not cover_data: - return None - try: - cover_url = cover_data[0].get('url') - except IndexError: - return None - try: - response = requests.get(cover_url) - except ConnectionError: - return None - - if not response.ok: - return None - - image_name = str(uuid4()) + '.' + cover_url.split('.')[-1] - image_content = ContentFile(response.content) - return [image_name, image_content] - - - def get_or_create_author(self, remote_id): - ''' load that author ''' - try: - return models.Author.objects.get(origin_id=remote_id) - except ObjectDoesNotExist: - pass - - data = get_data(remote_id) - - # ingest a new author - author = models.Author(origin_id=remote_id) - author = update_from_mappings(author, data, self.author_mappings) - author.save() - - return author + pass def parse_search_data(self, data): diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index 5c26ad45..5e18616d 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -7,7 +7,6 @@ from django.core.files.base import ContentFile from bookwyrm import models from .abstract_connector import AbstractConnector, SearchResult, Mapping from .abstract_connector import ConnectorException -from .abstract_connector import update_from_mappings from .abstract_connector import get_date, get_data from .openlibrary_languages import languages @@ -185,7 +184,7 @@ class Connector(AbstractConnector): data = get_data(url) author = models.Author(openlibrary_key=olkey) - author = update_from_mappings(author, data, self.author_mappings) + author = self.update_from_mappings(author, data, self.author_mappings) name = data.get('name') # TODO this is making some BOLD assumption if name: diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index c8643f07..642b5bfe 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -102,7 +102,7 @@ class Book(ActivitypubMixin, BookWyrmModel): 'attachment', 'cover', # this expects an iterable and the field is just an image lambda x: image_attachments_formatter([x]), - lambda x: activitypub.image_attachments_formatter(x)[0] + activitypub.image_formatter ), ] @@ -190,7 +190,7 @@ class Edition(Book): if self.isbn_10 and not self.isbn_13: self.isbn_13 = isbn_10_to_13(self.isbn_10) - super().save(*args, **kwargs) + return super().save(*args, **kwargs) def isbn_10_to_13(isbn_10):