From 847014720e06ca3ee120cc1645553cf127952e69 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Tue, 24 Nov 2020 16:05:00 -0800 Subject: [PATCH 1/4] Refactors bookwyrm connector to use activitypub serializer --- bookwyrm/activitypub/base_activity.py | 10 ++- bookwyrm/connectors/abstract_connector.py | 32 +++---- bookwyrm/connectors/bookwyrm_connector.py | 100 +++++----------------- bookwyrm/connectors/openlibrary.py | 3 +- bookwyrm/models/book.py | 4 +- 5 files changed, 48 insertions(+), 101 deletions(-) diff --git a/bookwyrm/activitypub/base_activity.py b/bookwyrm/activitypub/base_activity.py index 62fce70b2..d120e1115 100644 --- a/bookwyrm/activitypub/base_activity.py +++ b/bookwyrm/activitypub/base_activity.py @@ -143,7 +143,8 @@ class ActivityObject: # add images for (model_key, value) in image_fields.items(): - getattr(instance, model_key).save(*value, save=True) + if value: + getattr(instance, model_key).save(*value, save=True) # add one to many fields for (model_key, values) in one_to_many_fields.items(): @@ -207,6 +208,13 @@ def tag_formatter(tags, tag_type): def image_formatter(image_json): ''' helper function to load images and format them for a model ''' + if isinstance(image_json, list): + try: + image_json = image_json[0] + except IndexError: + return None + if not image_json: + return None url = image_json.get('url') if not url: return None diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 7fc4596b3..d709b075a 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -157,7 +157,7 @@ class AbstractConnector(ABC): def update_book_from_data(self, book, data, update_cover=True): ''' for creating a new book or syncing with data ''' - book = update_from_mappings(book, data, self.book_mappings) + book = self.update_from_mappings(book, data, self.book_mappings) author_text = [] for author in self.get_authors_from_data(data): @@ -262,23 +262,23 @@ class AbstractConnector(ABC): ''' get more info on a book ''' -def update_from_mappings(obj, data, mappings): - ''' assign data to model with mappings ''' - for mapping in mappings: - # check if this field is present in the data - value = data.get(mapping.remote_field) - if not value: - continue + def update_from_mappings(self, obj, data, mappings): + ''' assign data to model with mappings ''' + for mapping in mappings: + # check if this field is present in the data + value = data.get(mapping.remote_field) + if not value: + continue - # extract the value in the right format - try: - value = mapping.formatter(value) - except: - continue + # extract the value in the right format + try: + value = mapping.formatter(value) + except: + continue - # assign the formatted value to the model - obj.__setattr__(mapping.local_field, value) - return obj + # assign the formatted value to the model + obj.__setattr__(mapping.local_field, value) + return obj def get_date(date_string): diff --git a/bookwyrm/connectors/bookwyrm_connector.py b/bookwyrm/connectors/bookwyrm_connector.py index 6ed9dda1d..f1e539719 100644 --- a/bookwyrm/connectors/bookwyrm_connector.py +++ b/bookwyrm/connectors/bookwyrm_connector.py @@ -1,55 +1,22 @@ ''' using another bookwyrm instance as a source of book data ''' -from uuid import uuid4 - from django.core.exceptions import ObjectDoesNotExist -from django.core.files.base import ContentFile from django.db import transaction -import requests -from bookwyrm import models -from .abstract_connector import AbstractConnector, SearchResult, Mapping -from .abstract_connector import update_from_mappings, get_date, get_data +from bookwyrm import activitypub, models +from .abstract_connector import AbstractConnector, SearchResult +from .abstract_connector import get_data class Connector(AbstractConnector): ''' interact with other instances ''' - def __init__(self, identifier): - super().__init__(identifier) - self.key_mappings = [ - Mapping('isbn_13', model=models.Edition), - Mapping('isbn_10', model=models.Edition), - Mapping('lccn', model=models.Work), - Mapping('oclc_number', model=models.Edition), - Mapping('openlibrary_key'), - Mapping('goodreads_key'), - Mapping('asin'), - ] - self.book_mappings = self.key_mappings + [ - Mapping('sort_title'), - Mapping('subtitle'), - Mapping('description'), - Mapping('languages'), - Mapping('series'), - Mapping('series_number'), - Mapping('subjects'), - Mapping('subject_places'), - Mapping('first_published_date'), - Mapping('published_date'), - Mapping('pages'), - Mapping('physical_format'), - Mapping('publishers'), - ] - - self.author_mappings = [ - Mapping('name'), - Mapping('bio'), - Mapping('openlibrary_key'), - Mapping('wikipedia_link'), - Mapping('aliases'), - Mapping('born', formatter=get_date), - Mapping('died', formatter=get_date), - ] + def update_from_mappings(self, obj, data, mappings): + ''' serialize book data into a model ''' + if self.is_work_data(data): + work_data = activitypub.Work(**data) + return work_data.to_model(models.Work, instance=obj) + edition_data = activitypub.Edition(**data) + return edition_data.to_model(models.Edition, instance=obj) def get_remote_id_from_data(self, data): @@ -71,46 +38,19 @@ class Connector(AbstractConnector): def get_authors_from_data(self, data): - for author_url in data.get('authors', []): - yield self.get_or_create_author(author_url) + ''' load author data ''' + for author_id in data.get('authors', []): + try: + yield models.Author.objects.get(origin_id=author_id) + except models.Author.DoesNotExist: + continue + data = get_data(author_id) + author_data = activitypub.Author(**data) + yield author_data.to_model(models.Author) def get_cover_from_data(self, data): - cover_data = data.get('attachment') - if not cover_data: - return None - try: - cover_url = cover_data[0].get('url') - except IndexError: - return None - try: - response = requests.get(cover_url) - except ConnectionError: - return None - - if not response.ok: - return None - - image_name = str(uuid4()) + '.' + cover_url.split('.')[-1] - image_content = ContentFile(response.content) - return [image_name, image_content] - - - def get_or_create_author(self, remote_id): - ''' load that author ''' - try: - return models.Author.objects.get(origin_id=remote_id) - except ObjectDoesNotExist: - pass - - data = get_data(remote_id) - - # ingest a new author - author = models.Author(origin_id=remote_id) - author = update_from_mappings(author, data, self.author_mappings) - author.save() - - return author + pass def parse_search_data(self, data): diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index 5c26ad45b..5e18616d5 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -7,7 +7,6 @@ from django.core.files.base import ContentFile from bookwyrm import models from .abstract_connector import AbstractConnector, SearchResult, Mapping from .abstract_connector import ConnectorException -from .abstract_connector import update_from_mappings from .abstract_connector import get_date, get_data from .openlibrary_languages import languages @@ -185,7 +184,7 @@ class Connector(AbstractConnector): data = get_data(url) author = models.Author(openlibrary_key=olkey) - author = update_from_mappings(author, data, self.author_mappings) + author = self.update_from_mappings(author, data, self.author_mappings) name = data.get('name') # TODO this is making some BOLD assumption if name: diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index c8643f07e..642b5bfe0 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -102,7 +102,7 @@ class Book(ActivitypubMixin, BookWyrmModel): 'attachment', 'cover', # this expects an iterable and the field is just an image lambda x: image_attachments_formatter([x]), - lambda x: activitypub.image_attachments_formatter(x)[0] + activitypub.image_formatter ), ] @@ -190,7 +190,7 @@ class Edition(Book): if self.isbn_10 and not self.isbn_13: self.isbn_13 = isbn_10_to_13(self.isbn_10) - super().save(*args, **kwargs) + return super().save(*args, **kwargs) def isbn_10_to_13(isbn_10): From 4f6ce0307d3171a30e1a9afde8d03b8ec89bc920 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Wed, 25 Nov 2020 11:24:36 -0800 Subject: [PATCH 2/4] fixes import in abstract connector unit test --- bookwyrm/tests/connectors/test_abstract_connector.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bookwyrm/tests/connectors/test_abstract_connector.py b/bookwyrm/tests/connectors/test_abstract_connector.py index f4af8a1ac..b207faf62 100644 --- a/bookwyrm/tests/connectors/test_abstract_connector.py +++ b/bookwyrm/tests/connectors/test_abstract_connector.py @@ -2,8 +2,7 @@ from django.test import TestCase from bookwyrm import models -from bookwyrm.connectors.abstract_connector import Mapping,\ - update_from_mappings +from bookwyrm.connectors.abstract_connector import Mapping from bookwyrm.connectors.bookwyrm_connector import Connector @@ -79,7 +78,7 @@ class AbstractConnector(TestCase): Mapping('physical_format', remote_field='format'), Mapping('series', formatter=lambda x: x[0]), ] - book = update_from_mappings(self.book, data, mappings) + book = self.connector.update_from_mappings(self.book, data, mappings) self.assertEqual(book.title, 'Example Edition') self.assertEqual(book.isbn_10, '1234567890') self.assertEqual(book.isbn_13, None) From 9b79eb5fc3d12e0469dd1fed2419d8d889c21e92 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Wed, 25 Nov 2020 11:44:19 -0800 Subject: [PATCH 3/4] Removes outdated test of bookwyrm connector updater --- bookwyrm/connectors/bookwyrm_connector.py | 2 +- .../connectors/test_abstract_connector.py | 23 ------------------- bookwyrm/tests/data/fr_work.json | 4 +--- 3 files changed, 2 insertions(+), 27 deletions(-) diff --git a/bookwyrm/connectors/bookwyrm_connector.py b/bookwyrm/connectors/bookwyrm_connector.py index f1e539719..78ae84c32 100644 --- a/bookwyrm/connectors/bookwyrm_connector.py +++ b/bookwyrm/connectors/bookwyrm_connector.py @@ -24,7 +24,7 @@ class Connector(AbstractConnector): def is_work_data(self, data): - return data['type'] == 'Work' + return data.get('type') == 'Work' def get_edition_from_work_data(self, data): diff --git a/bookwyrm/tests/connectors/test_abstract_connector.py b/bookwyrm/tests/connectors/test_abstract_connector.py index b207faf62..c01b00dba 100644 --- a/bookwyrm/tests/connectors/test_abstract_connector.py +++ b/bookwyrm/tests/connectors/test_abstract_connector.py @@ -63,29 +63,6 @@ class AbstractConnector(TestCase): self.assertEqual(mapping.formatter('bb'), 'aabb') - def test_update_from_mappings(self): - data = { - 'title': 'Unused title', - 'isbn_10': '1234567890', - 'isbn_13': 'blahhh', - 'blah': 'bip', - 'format': 'hardcover', - 'series': ['one', 'two'], - } - mappings = [ - Mapping('isbn_10'), - Mapping('blah'),# not present on self.book - Mapping('physical_format', remote_field='format'), - Mapping('series', formatter=lambda x: x[0]), - ] - book = self.connector.update_from_mappings(self.book, data, mappings) - self.assertEqual(book.title, 'Example Edition') - self.assertEqual(book.isbn_10, '1234567890') - self.assertEqual(book.isbn_13, None) - self.assertEqual(book.physical_format, 'hardcover') - self.assertEqual(book.series, 'one') - - def test_match_from_mappings(self): edition = models.Edition.objects.create( title='Blah', diff --git a/bookwyrm/tests/data/fr_work.json b/bookwyrm/tests/data/fr_work.json index 4e759a194..3a36fc640 100644 --- a/bookwyrm/tests/data/fr_work.json +++ b/bookwyrm/tests/data/fr_work.json @@ -28,9 +28,7 @@ ], "lccn": null, "editions": [ - "https://bookwyrm.social/book/5989", - "OL28439584M", - "OL28300471M" + "https://bookwyrm.social/book/5989" ], "@context": "https://www.w3.org/ns/activitystreams" } From 7b6035898bed76e0690b805d1be79b4f3073e225 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Fri, 27 Nov 2020 14:54:08 -0800 Subject: [PATCH 4/4] Serialize activitypub authors from data in connector --- bookwyrm/activitypub/book.py | 12 +++--- bookwyrm/connectors/bookwyrm_connector.py | 6 +-- bookwyrm/models/__init__.py | 3 +- bookwyrm/models/author.py | 50 +++++++++++++++++++++++ bookwyrm/models/book.py | 41 ------------------- 5 files changed, 61 insertions(+), 51 deletions(-) create mode 100644 bookwyrm/models/author.py diff --git a/bookwyrm/activitypub/book.py b/bookwyrm/activitypub/book.py index bdc30edd2..2bfafba70 100644 --- a/bookwyrm/activitypub/book.py +++ b/bookwyrm/activitypub/book.py @@ -56,10 +56,10 @@ class Work(Book): class Author(ActivityObject): ''' author of a book ''' name: str - born: str - died: str - aliases: str - bio: str - openlibrary_key: str - wikipedia_link: str + born: str = '' + died: str = '' + aliases: str = '' + bio: str = '' + openlibraryKey: str = '' + wikipediaLink: str = '' type: str = 'Person' diff --git a/bookwyrm/connectors/bookwyrm_connector.py b/bookwyrm/connectors/bookwyrm_connector.py index 78ae84c32..1bc81450d 100644 --- a/bookwyrm/connectors/bookwyrm_connector.py +++ b/bookwyrm/connectors/bookwyrm_connector.py @@ -1,5 +1,4 @@ ''' using another bookwyrm instance as a source of book data ''' -from django.core.exceptions import ObjectDoesNotExist from django.db import transaction from bookwyrm import activitypub, models @@ -43,10 +42,11 @@ class Connector(AbstractConnector): try: yield models.Author.objects.get(origin_id=author_id) except models.Author.DoesNotExist: - continue + pass data = get_data(author_id) author_data = activitypub.Author(**data) - yield author_data.to_model(models.Author) + author = author_data.to_model(models.Author) + yield author def get_cover_from_data(self, data): diff --git a/bookwyrm/models/__init__.py b/bookwyrm/models/__init__.py index 8a93b805a..81c64831b 100644 --- a/bookwyrm/models/__init__.py +++ b/bookwyrm/models/__init__.py @@ -2,7 +2,8 @@ import inspect import sys -from .book import Book, Work, Edition, Author +from .book import Book, Work, Edition +from .author import Author from .connector import Connector from .relationship import UserFollows, UserFollowRequest, UserBlocks from .shelf import Shelf, ShelfBook diff --git a/bookwyrm/models/author.py b/bookwyrm/models/author.py new file mode 100644 index 000000000..1d7017974 --- /dev/null +++ b/bookwyrm/models/author.py @@ -0,0 +1,50 @@ +''' database schema for info about authors ''' +from django.db import models +from django.utils import timezone + +from bookwyrm import activitypub +from bookwyrm.utils.fields import ArrayField + +from .base_model import ActivitypubMixin, ActivityMapping, BookWyrmModel + + +class Author(ActivitypubMixin, BookWyrmModel): + ''' basic biographic info ''' + origin_id = models.CharField(max_length=255, null=True) + ''' copy of an author from OL ''' + openlibrary_key = models.CharField(max_length=255, blank=True, null=True) + sync = models.BooleanField(default=True) + last_sync_date = models.DateTimeField(default=timezone.now) + wikipedia_link = models.CharField(max_length=255, blank=True, null=True) + # idk probably other keys would be useful here? + born = models.DateTimeField(blank=True, null=True) + died = models.DateTimeField(blank=True, null=True) + name = models.CharField(max_length=255) + last_name = models.CharField(max_length=255, blank=True, null=True) + first_name = models.CharField(max_length=255, blank=True, null=True) + aliases = ArrayField( + models.CharField(max_length=255), blank=True, default=list + ) + bio = models.TextField(null=True, blank=True) + + @property + def display_name(self): + ''' Helper to return a displayable name''' + if self.name: + return self.name + # don't want to return a spurious space if all of these are None + if self.first_name and self.last_name: + return self.first_name + ' ' + self.last_name + return self.last_name or self.first_name + + activity_mappings = [ + ActivityMapping('id', 'remote_id'), + ActivityMapping('name', 'name'), + ActivityMapping('born', 'born'), + ActivityMapping('died', 'died'), + ActivityMapping('aliases', 'aliases'), + ActivityMapping('bio', 'bio'), + ActivityMapping('openlibraryKey', 'openlibrary_key'), + ActivityMapping('wikipediaLink', 'wikipedia_link'), + ] + activity_serializer = activitypub.Author diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index 642b5bfe0..7ec330da2 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -234,44 +234,3 @@ def isbn_13_to_10(isbn_13): if checkdigit == 10: checkdigit = 'X' return converted + str(checkdigit) - - -class Author(ActivitypubMixin, BookWyrmModel): - origin_id = models.CharField(max_length=255, null=True) - ''' copy of an author from OL ''' - openlibrary_key = models.CharField(max_length=255, blank=True, null=True) - sync = models.BooleanField(default=True) - last_sync_date = models.DateTimeField(default=timezone.now) - wikipedia_link = models.CharField(max_length=255, blank=True, null=True) - # idk probably other keys would be useful here? - born = models.DateTimeField(blank=True, null=True) - died = models.DateTimeField(blank=True, null=True) - name = models.CharField(max_length=255) - last_name = models.CharField(max_length=255, blank=True, null=True) - first_name = models.CharField(max_length=255, blank=True, null=True) - aliases = ArrayField( - models.CharField(max_length=255), blank=True, default=list - ) - bio = models.TextField(null=True, blank=True) - - @property - def display_name(self): - ''' Helper to return a displayable name''' - if self.name: - return self.name - # don't want to return a spurious space if all of these are None - if self.first_name and self.last_name: - return self.first_name + ' ' + self.last_name - return self.last_name or self.first_name - - activity_mappings = [ - ActivityMapping('id', 'remote_id'), - ActivityMapping('name', 'display_name'), - ActivityMapping('born', 'born'), - ActivityMapping('died', 'died'), - ActivityMapping('aliases', 'aliases'), - ActivityMapping('bio', 'bio'), - ActivityMapping('openlibrary_key', 'openlibrary_key'), - ActivityMapping('wikipedia_link', 'wikipedia_link'), - ] - activity_serializer = activitypub.Author