Refactors bookwyrm connector to use activitypub serializer

This commit is contained in:
Mouse Reeve 2020-11-24 16:05:00 -08:00
parent dd985094a5
commit 847014720e
5 changed files with 48 additions and 101 deletions

View file

@ -143,7 +143,8 @@ class ActivityObject:
# add images # add images
for (model_key, value) in image_fields.items(): for (model_key, value) in image_fields.items():
getattr(instance, model_key).save(*value, save=True) if value:
getattr(instance, model_key).save(*value, save=True)
# add one to many fields # add one to many fields
for (model_key, values) in one_to_many_fields.items(): for (model_key, values) in one_to_many_fields.items():
@ -207,6 +208,13 @@ def tag_formatter(tags, tag_type):
def image_formatter(image_json): def image_formatter(image_json):
''' helper function to load images and format them for a model ''' ''' helper function to load images and format them for a model '''
if isinstance(image_json, list):
try:
image_json = image_json[0]
except IndexError:
return None
if not image_json:
return None
url = image_json.get('url') url = image_json.get('url')
if not url: if not url:
return None return None

View file

@ -157,7 +157,7 @@ class AbstractConnector(ABC):
def update_book_from_data(self, book, data, update_cover=True): def update_book_from_data(self, book, data, update_cover=True):
''' for creating a new book or syncing with data ''' ''' for creating a new book or syncing with data '''
book = update_from_mappings(book, data, self.book_mappings) book = self.update_from_mappings(book, data, self.book_mappings)
author_text = [] author_text = []
for author in self.get_authors_from_data(data): for author in self.get_authors_from_data(data):
@ -262,23 +262,23 @@ class AbstractConnector(ABC):
''' get more info on a book ''' ''' get more info on a book '''
def update_from_mappings(obj, data, mappings): def update_from_mappings(self, obj, data, mappings):
''' assign data to model with mappings ''' ''' assign data to model with mappings '''
for mapping in mappings: for mapping in mappings:
# check if this field is present in the data # check if this field is present in the data
value = data.get(mapping.remote_field) value = data.get(mapping.remote_field)
if not value: if not value:
continue continue
# extract the value in the right format # extract the value in the right format
try: try:
value = mapping.formatter(value) value = mapping.formatter(value)
except: except:
continue continue
# assign the formatted value to the model # assign the formatted value to the model
obj.__setattr__(mapping.local_field, value) obj.__setattr__(mapping.local_field, value)
return obj return obj
def get_date(date_string): def get_date(date_string):

View file

@ -1,55 +1,22 @@
''' using another bookwyrm instance as a source of book data ''' ''' using another bookwyrm instance as a source of book data '''
from uuid import uuid4
from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ObjectDoesNotExist
from django.core.files.base import ContentFile
from django.db import transaction from django.db import transaction
import requests
from bookwyrm import models from bookwyrm import activitypub, models
from .abstract_connector import AbstractConnector, SearchResult, Mapping from .abstract_connector import AbstractConnector, SearchResult
from .abstract_connector import update_from_mappings, get_date, get_data from .abstract_connector import get_data
class Connector(AbstractConnector): class Connector(AbstractConnector):
''' interact with other instances ''' ''' interact with other instances '''
def __init__(self, identifier):
super().__init__(identifier)
self.key_mappings = [
Mapping('isbn_13', model=models.Edition),
Mapping('isbn_10', model=models.Edition),
Mapping('lccn', model=models.Work),
Mapping('oclc_number', model=models.Edition),
Mapping('openlibrary_key'),
Mapping('goodreads_key'),
Mapping('asin'),
]
self.book_mappings = self.key_mappings + [ def update_from_mappings(self, obj, data, mappings):
Mapping('sort_title'), ''' serialize book data into a model '''
Mapping('subtitle'), if self.is_work_data(data):
Mapping('description'), work_data = activitypub.Work(**data)
Mapping('languages'), return work_data.to_model(models.Work, instance=obj)
Mapping('series'), edition_data = activitypub.Edition(**data)
Mapping('series_number'), return edition_data.to_model(models.Edition, instance=obj)
Mapping('subjects'),
Mapping('subject_places'),
Mapping('first_published_date'),
Mapping('published_date'),
Mapping('pages'),
Mapping('physical_format'),
Mapping('publishers'),
]
self.author_mappings = [
Mapping('name'),
Mapping('bio'),
Mapping('openlibrary_key'),
Mapping('wikipedia_link'),
Mapping('aliases'),
Mapping('born', formatter=get_date),
Mapping('died', formatter=get_date),
]
def get_remote_id_from_data(self, data): def get_remote_id_from_data(self, data):
@ -71,46 +38,19 @@ class Connector(AbstractConnector):
def get_authors_from_data(self, data): def get_authors_from_data(self, data):
for author_url in data.get('authors', []): ''' load author data '''
yield self.get_or_create_author(author_url) for author_id in data.get('authors', []):
try:
yield models.Author.objects.get(origin_id=author_id)
except models.Author.DoesNotExist:
continue
data = get_data(author_id)
author_data = activitypub.Author(**data)
yield author_data.to_model(models.Author)
def get_cover_from_data(self, data): def get_cover_from_data(self, data):
cover_data = data.get('attachment') pass
if not cover_data:
return None
try:
cover_url = cover_data[0].get('url')
except IndexError:
return None
try:
response = requests.get(cover_url)
except ConnectionError:
return None
if not response.ok:
return None
image_name = str(uuid4()) + '.' + cover_url.split('.')[-1]
image_content = ContentFile(response.content)
return [image_name, image_content]
def get_or_create_author(self, remote_id):
''' load that author '''
try:
return models.Author.objects.get(origin_id=remote_id)
except ObjectDoesNotExist:
pass
data = get_data(remote_id)
# ingest a new author
author = models.Author(origin_id=remote_id)
author = update_from_mappings(author, data, self.author_mappings)
author.save()
return author
def parse_search_data(self, data): def parse_search_data(self, data):

View file

@ -7,7 +7,6 @@ from django.core.files.base import ContentFile
from bookwyrm import models from bookwyrm import models
from .abstract_connector import AbstractConnector, SearchResult, Mapping from .abstract_connector import AbstractConnector, SearchResult, Mapping
from .abstract_connector import ConnectorException from .abstract_connector import ConnectorException
from .abstract_connector import update_from_mappings
from .abstract_connector import get_date, get_data from .abstract_connector import get_date, get_data
from .openlibrary_languages import languages from .openlibrary_languages import languages
@ -185,7 +184,7 @@ class Connector(AbstractConnector):
data = get_data(url) data = get_data(url)
author = models.Author(openlibrary_key=olkey) author = models.Author(openlibrary_key=olkey)
author = update_from_mappings(author, data, self.author_mappings) author = self.update_from_mappings(author, data, self.author_mappings)
name = data.get('name') name = data.get('name')
# TODO this is making some BOLD assumption # TODO this is making some BOLD assumption
if name: if name:

View file

@ -102,7 +102,7 @@ class Book(ActivitypubMixin, BookWyrmModel):
'attachment', 'cover', 'attachment', 'cover',
# this expects an iterable and the field is just an image # this expects an iterable and the field is just an image
lambda x: image_attachments_formatter([x]), lambda x: image_attachments_formatter([x]),
lambda x: activitypub.image_attachments_formatter(x)[0] activitypub.image_formatter
), ),
] ]
@ -190,7 +190,7 @@ class Edition(Book):
if self.isbn_10 and not self.isbn_13: if self.isbn_10 and not self.isbn_13:
self.isbn_13 = isbn_10_to_13(self.isbn_10) self.isbn_13 = isbn_10_to_13(self.isbn_10)
super().save(*args, **kwargs) return super().save(*args, **kwargs)
def isbn_10_to_13(isbn_10): def isbn_10_to_13(isbn_10):