Merge pull request #365 from mouse-reeve/refactor-bookwyrm-connector

Refactors bookwyrm connector to use activitypub serializer
This commit is contained in:
Mouse Reeve 2020-11-27 15:25:22 -08:00 committed by GitHub
commit 1df5b2d481
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 107 additions and 178 deletions

View file

@ -212,6 +212,12 @@ def tag_formatter(tags, tag_type):
def image_formatter(image_json):
''' helper function to load images and format them for a model '''
if isinstance(image_json, list):
try:
image_json = image_json[0]
except IndexError:
return None
if not image_json or not hasattr(image_json, 'url'):
return None
url = image_json.get('url')

View file

@ -56,10 +56,10 @@ class Work(Book):
class Author(ActivityObject):
''' author of a book '''
name: str
born: str
died: str
aliases: str
bio: str
openlibrary_key: str
wikipedia_link: str
born: str = ''
died: str = ''
aliases: str = ''
bio: str = ''
openlibraryKey: str = ''
wikipediaLink: str = ''
type: str = 'Person'

View file

@ -157,7 +157,7 @@ class AbstractConnector(ABC):
def update_book_from_data(self, book, data, update_cover=True):
''' for creating a new book or syncing with data '''
book = update_from_mappings(book, data, self.book_mappings)
book = self.update_from_mappings(book, data, self.book_mappings)
author_text = []
for author in self.get_authors_from_data(data):
@ -262,23 +262,23 @@ class AbstractConnector(ABC):
''' get more info on a book '''
def update_from_mappings(obj, data, mappings):
''' assign data to model with mappings '''
for mapping in mappings:
# check if this field is present in the data
value = data.get(mapping.remote_field)
if not value:
continue
def update_from_mappings(self, obj, data, mappings):
''' assign data to model with mappings '''
for mapping in mappings:
# check if this field is present in the data
value = data.get(mapping.remote_field)
if not value:
continue
# extract the value in the right format
try:
value = mapping.formatter(value)
except:
continue
# extract the value in the right format
try:
value = mapping.formatter(value)
except:
continue
# assign the formatted value to the model
obj.__setattr__(mapping.local_field, value)
return obj
# assign the formatted value to the model
obj.__setattr__(mapping.local_field, value)
return obj
def get_date(date_string):

View file

@ -1,55 +1,21 @@
''' using another bookwyrm instance as a source of book data '''
from uuid import uuid4
from django.core.exceptions import ObjectDoesNotExist
from django.core.files.base import ContentFile
from django.db import transaction
import requests
from bookwyrm import models
from .abstract_connector import AbstractConnector, SearchResult, Mapping
from .abstract_connector import update_from_mappings, get_date, get_data
from bookwyrm import activitypub, models
from .abstract_connector import AbstractConnector, SearchResult
from .abstract_connector import get_data
class Connector(AbstractConnector):
''' interact with other instances '''
def __init__(self, identifier):
super().__init__(identifier)
self.key_mappings = [
Mapping('isbn_13', model=models.Edition),
Mapping('isbn_10', model=models.Edition),
Mapping('lccn', model=models.Work),
Mapping('oclc_number', model=models.Edition),
Mapping('openlibrary_key'),
Mapping('goodreads_key'),
Mapping('asin'),
]
self.book_mappings = self.key_mappings + [
Mapping('sort_title'),
Mapping('subtitle'),
Mapping('description'),
Mapping('languages'),
Mapping('series'),
Mapping('series_number'),
Mapping('subjects'),
Mapping('subject_places'),
Mapping('first_published_date'),
Mapping('published_date'),
Mapping('pages'),
Mapping('physical_format'),
Mapping('publishers'),
]
self.author_mappings = [
Mapping('name'),
Mapping('bio'),
Mapping('openlibrary_key'),
Mapping('wikipedia_link'),
Mapping('aliases'),
Mapping('born', formatter=get_date),
Mapping('died', formatter=get_date),
]
def update_from_mappings(self, obj, data, mappings):
''' serialize book data into a model '''
if self.is_work_data(data):
work_data = activitypub.Work(**data)
return work_data.to_model(models.Work, instance=obj)
edition_data = activitypub.Edition(**data)
return edition_data.to_model(models.Edition, instance=obj)
def get_remote_id_from_data(self, data):
@ -57,7 +23,7 @@ class Connector(AbstractConnector):
def is_work_data(self, data):
return data['type'] == 'Work'
return data.get('type') == 'Work'
def get_edition_from_work_data(self, data):
@ -71,46 +37,20 @@ class Connector(AbstractConnector):
def get_authors_from_data(self, data):
for author_url in data.get('authors', []):
yield self.get_or_create_author(author_url)
''' load author data '''
for author_id in data.get('authors', []):
try:
yield models.Author.objects.get(origin_id=author_id)
except models.Author.DoesNotExist:
pass
data = get_data(author_id)
author_data = activitypub.Author(**data)
author = author_data.to_model(models.Author)
yield author
def get_cover_from_data(self, data):
cover_data = data.get('attachment')
if not cover_data:
return None
try:
cover_url = cover_data[0].get('url')
except IndexError:
return None
try:
response = requests.get(cover_url)
except ConnectionError:
return None
if not response.ok:
return None
image_name = str(uuid4()) + '.' + cover_url.split('.')[-1]
image_content = ContentFile(response.content)
return [image_name, image_content]
def get_or_create_author(self, remote_id):
''' load that author '''
try:
return models.Author.objects.get(origin_id=remote_id)
except ObjectDoesNotExist:
pass
data = get_data(remote_id)
# ingest a new author
author = models.Author(origin_id=remote_id)
author = update_from_mappings(author, data, self.author_mappings)
author.save()
return author
pass
def parse_search_data(self, data):

View file

@ -7,7 +7,6 @@ from django.core.files.base import ContentFile
from bookwyrm import models
from .abstract_connector import AbstractConnector, SearchResult, Mapping
from .abstract_connector import ConnectorException
from .abstract_connector import update_from_mappings
from .abstract_connector import get_date, get_data
from .openlibrary_languages import languages
@ -185,7 +184,7 @@ class Connector(AbstractConnector):
data = get_data(url)
author = models.Author(openlibrary_key=olkey)
author = update_from_mappings(author, data, self.author_mappings)
author = self.update_from_mappings(author, data, self.author_mappings)
name = data.get('name')
# TODO this is making some BOLD assumption
if name:

View file

@ -2,7 +2,8 @@
import inspect
import sys
from .book import Book, Work, Edition, Author
from .book import Book, Work, Edition
from .author import Author
from .connector import Connector
from .relationship import UserFollows, UserFollowRequest, UserBlocks
from .shelf import Shelf, ShelfBook

50
bookwyrm/models/author.py Normal file
View file

@ -0,0 +1,50 @@
''' database schema for info about authors '''
from django.db import models
from django.utils import timezone
from bookwyrm import activitypub
from bookwyrm.utils.fields import ArrayField
from .base_model import ActivitypubMixin, ActivityMapping, BookWyrmModel
class Author(ActivitypubMixin, BookWyrmModel):
''' basic biographic info '''
origin_id = models.CharField(max_length=255, null=True)
''' copy of an author from OL '''
openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
sync = models.BooleanField(default=True)
last_sync_date = models.DateTimeField(default=timezone.now)
wikipedia_link = models.CharField(max_length=255, blank=True, null=True)
# idk probably other keys would be useful here?
born = models.DateTimeField(blank=True, null=True)
died = models.DateTimeField(blank=True, null=True)
name = models.CharField(max_length=255)
last_name = models.CharField(max_length=255, blank=True, null=True)
first_name = models.CharField(max_length=255, blank=True, null=True)
aliases = ArrayField(
models.CharField(max_length=255), blank=True, default=list
)
bio = models.TextField(null=True, blank=True)
@property
def display_name(self):
''' Helper to return a displayable name'''
if self.name:
return self.name
# don't want to return a spurious space if all of these are None
if self.first_name and self.last_name:
return self.first_name + ' ' + self.last_name
return self.last_name or self.first_name
activity_mappings = [
ActivityMapping('id', 'remote_id'),
ActivityMapping('name', 'name'),
ActivityMapping('born', 'born'),
ActivityMapping('died', 'died'),
ActivityMapping('aliases', 'aliases'),
ActivityMapping('bio', 'bio'),
ActivityMapping('openlibraryKey', 'openlibrary_key'),
ActivityMapping('wikipediaLink', 'wikipedia_link'),
]
activity_serializer = activitypub.Author

View file

@ -102,7 +102,7 @@ class Book(ActivitypubMixin, BookWyrmModel):
'attachment', 'cover',
# this expects an iterable and the field is just an image
lambda x: image_attachments_formatter([x]),
lambda x: activitypub.image_attachments_formatter(x)[0]
activitypub.image_formatter
),
]
@ -190,7 +190,7 @@ class Edition(Book):
if self.isbn_10 and not self.isbn_13:
self.isbn_13 = isbn_10_to_13(self.isbn_10)
super().save(*args, **kwargs)
return super().save(*args, **kwargs)
def isbn_10_to_13(isbn_10):
@ -234,44 +234,3 @@ def isbn_13_to_10(isbn_13):
if checkdigit == 10:
checkdigit = 'X'
return converted + str(checkdigit)
class Author(ActivitypubMixin, BookWyrmModel):
origin_id = models.CharField(max_length=255, null=True)
''' copy of an author from OL '''
openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
sync = models.BooleanField(default=True)
last_sync_date = models.DateTimeField(default=timezone.now)
wikipedia_link = models.CharField(max_length=255, blank=True, null=True)
# idk probably other keys would be useful here?
born = models.DateTimeField(blank=True, null=True)
died = models.DateTimeField(blank=True, null=True)
name = models.CharField(max_length=255)
last_name = models.CharField(max_length=255, blank=True, null=True)
first_name = models.CharField(max_length=255, blank=True, null=True)
aliases = ArrayField(
models.CharField(max_length=255), blank=True, default=list
)
bio = models.TextField(null=True, blank=True)
@property
def display_name(self):
''' Helper to return a displayable name'''
if self.name:
return self.name
# don't want to return a spurious space if all of these are None
if self.first_name and self.last_name:
return self.first_name + ' ' + self.last_name
return self.last_name or self.first_name
activity_mappings = [
ActivityMapping('id', 'remote_id'),
ActivityMapping('name', 'display_name'),
ActivityMapping('born', 'born'),
ActivityMapping('died', 'died'),
ActivityMapping('aliases', 'aliases'),
ActivityMapping('bio', 'bio'),
ActivityMapping('openlibrary_key', 'openlibrary_key'),
ActivityMapping('wikipedia_link', 'wikipedia_link'),
]
activity_serializer = activitypub.Author

View file

@ -2,8 +2,7 @@
from django.test import TestCase
from bookwyrm import models
from bookwyrm.connectors.abstract_connector import Mapping,\
update_from_mappings
from bookwyrm.connectors.abstract_connector import Mapping
from bookwyrm.connectors.bookwyrm_connector import Connector
@ -64,29 +63,6 @@ class AbstractConnector(TestCase):
self.assertEqual(mapping.formatter('bb'), 'aabb')
def test_update_from_mappings(self):
data = {
'title': 'Unused title',
'isbn_10': '1234567890',
'isbn_13': 'blahhh',
'blah': 'bip',
'format': 'hardcover',
'series': ['one', 'two'],
}
mappings = [
Mapping('isbn_10'),
Mapping('blah'),# not present on self.book
Mapping('physical_format', remote_field='format'),
Mapping('series', formatter=lambda x: x[0]),
]
book = update_from_mappings(self.book, data, mappings)
self.assertEqual(book.title, 'Example Edition')
self.assertEqual(book.isbn_10, '1234567890')
self.assertEqual(book.isbn_13, None)
self.assertEqual(book.physical_format, 'hardcover')
self.assertEqual(book.series, 'one')
def test_match_from_mappings(self):
edition = models.Edition.objects.create(
title='Blah',

View file

@ -28,9 +28,7 @@
],
"lccn": null,
"editions": [
"https://bookwyrm.social/book/5989",
"OL28439584M",
"OL28300471M"
"https://bookwyrm.social/book/5989"
],
"@context": "https://www.w3.org/ns/activitystreams"
}