forked from mirrors/bookwyrm
Merge pull request #365 from mouse-reeve/refactor-bookwyrm-connector
Refactors bookwyrm connector to use activitypub serializer
This commit is contained in:
commit
1df5b2d481
10 changed files with 107 additions and 178 deletions
|
@ -212,6 +212,12 @@ def tag_formatter(tags, tag_type):
|
||||||
|
|
||||||
def image_formatter(image_json):
|
def image_formatter(image_json):
|
||||||
''' helper function to load images and format them for a model '''
|
''' helper function to load images and format them for a model '''
|
||||||
|
if isinstance(image_json, list):
|
||||||
|
try:
|
||||||
|
image_json = image_json[0]
|
||||||
|
except IndexError:
|
||||||
|
return None
|
||||||
|
|
||||||
if not image_json or not hasattr(image_json, 'url'):
|
if not image_json or not hasattr(image_json, 'url'):
|
||||||
return None
|
return None
|
||||||
url = image_json.get('url')
|
url = image_json.get('url')
|
||||||
|
|
|
@ -56,10 +56,10 @@ class Work(Book):
|
||||||
class Author(ActivityObject):
|
class Author(ActivityObject):
|
||||||
''' author of a book '''
|
''' author of a book '''
|
||||||
name: str
|
name: str
|
||||||
born: str
|
born: str = ''
|
||||||
died: str
|
died: str = ''
|
||||||
aliases: str
|
aliases: str = ''
|
||||||
bio: str
|
bio: str = ''
|
||||||
openlibrary_key: str
|
openlibraryKey: str = ''
|
||||||
wikipedia_link: str
|
wikipediaLink: str = ''
|
||||||
type: str = 'Person'
|
type: str = 'Person'
|
||||||
|
|
|
@ -157,7 +157,7 @@ class AbstractConnector(ABC):
|
||||||
|
|
||||||
def update_book_from_data(self, book, data, update_cover=True):
|
def update_book_from_data(self, book, data, update_cover=True):
|
||||||
''' for creating a new book or syncing with data '''
|
''' for creating a new book or syncing with data '''
|
||||||
book = update_from_mappings(book, data, self.book_mappings)
|
book = self.update_from_mappings(book, data, self.book_mappings)
|
||||||
|
|
||||||
author_text = []
|
author_text = []
|
||||||
for author in self.get_authors_from_data(data):
|
for author in self.get_authors_from_data(data):
|
||||||
|
@ -262,23 +262,23 @@ class AbstractConnector(ABC):
|
||||||
''' get more info on a book '''
|
''' get more info on a book '''
|
||||||
|
|
||||||
|
|
||||||
def update_from_mappings(obj, data, mappings):
|
def update_from_mappings(self, obj, data, mappings):
|
||||||
''' assign data to model with mappings '''
|
''' assign data to model with mappings '''
|
||||||
for mapping in mappings:
|
for mapping in mappings:
|
||||||
# check if this field is present in the data
|
# check if this field is present in the data
|
||||||
value = data.get(mapping.remote_field)
|
value = data.get(mapping.remote_field)
|
||||||
if not value:
|
if not value:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# extract the value in the right format
|
# extract the value in the right format
|
||||||
try:
|
try:
|
||||||
value = mapping.formatter(value)
|
value = mapping.formatter(value)
|
||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# assign the formatted value to the model
|
# assign the formatted value to the model
|
||||||
obj.__setattr__(mapping.local_field, value)
|
obj.__setattr__(mapping.local_field, value)
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
||||||
def get_date(date_string):
|
def get_date(date_string):
|
||||||
|
|
|
@ -1,55 +1,21 @@
|
||||||
''' using another bookwyrm instance as a source of book data '''
|
''' using another bookwyrm instance as a source of book data '''
|
||||||
from uuid import uuid4
|
|
||||||
|
|
||||||
from django.core.exceptions import ObjectDoesNotExist
|
|
||||||
from django.core.files.base import ContentFile
|
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
import requests
|
|
||||||
|
|
||||||
from bookwyrm import models
|
from bookwyrm import activitypub, models
|
||||||
from .abstract_connector import AbstractConnector, SearchResult, Mapping
|
from .abstract_connector import AbstractConnector, SearchResult
|
||||||
from .abstract_connector import update_from_mappings, get_date, get_data
|
from .abstract_connector import get_data
|
||||||
|
|
||||||
|
|
||||||
class Connector(AbstractConnector):
|
class Connector(AbstractConnector):
|
||||||
''' interact with other instances '''
|
''' interact with other instances '''
|
||||||
def __init__(self, identifier):
|
|
||||||
super().__init__(identifier)
|
|
||||||
self.key_mappings = [
|
|
||||||
Mapping('isbn_13', model=models.Edition),
|
|
||||||
Mapping('isbn_10', model=models.Edition),
|
|
||||||
Mapping('lccn', model=models.Work),
|
|
||||||
Mapping('oclc_number', model=models.Edition),
|
|
||||||
Mapping('openlibrary_key'),
|
|
||||||
Mapping('goodreads_key'),
|
|
||||||
Mapping('asin'),
|
|
||||||
]
|
|
||||||
|
|
||||||
self.book_mappings = self.key_mappings + [
|
def update_from_mappings(self, obj, data, mappings):
|
||||||
Mapping('sort_title'),
|
''' serialize book data into a model '''
|
||||||
Mapping('subtitle'),
|
if self.is_work_data(data):
|
||||||
Mapping('description'),
|
work_data = activitypub.Work(**data)
|
||||||
Mapping('languages'),
|
return work_data.to_model(models.Work, instance=obj)
|
||||||
Mapping('series'),
|
edition_data = activitypub.Edition(**data)
|
||||||
Mapping('series_number'),
|
return edition_data.to_model(models.Edition, instance=obj)
|
||||||
Mapping('subjects'),
|
|
||||||
Mapping('subject_places'),
|
|
||||||
Mapping('first_published_date'),
|
|
||||||
Mapping('published_date'),
|
|
||||||
Mapping('pages'),
|
|
||||||
Mapping('physical_format'),
|
|
||||||
Mapping('publishers'),
|
|
||||||
]
|
|
||||||
|
|
||||||
self.author_mappings = [
|
|
||||||
Mapping('name'),
|
|
||||||
Mapping('bio'),
|
|
||||||
Mapping('openlibrary_key'),
|
|
||||||
Mapping('wikipedia_link'),
|
|
||||||
Mapping('aliases'),
|
|
||||||
Mapping('born', formatter=get_date),
|
|
||||||
Mapping('died', formatter=get_date),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def get_remote_id_from_data(self, data):
|
def get_remote_id_from_data(self, data):
|
||||||
|
@ -57,7 +23,7 @@ class Connector(AbstractConnector):
|
||||||
|
|
||||||
|
|
||||||
def is_work_data(self, data):
|
def is_work_data(self, data):
|
||||||
return data['type'] == 'Work'
|
return data.get('type') == 'Work'
|
||||||
|
|
||||||
|
|
||||||
def get_edition_from_work_data(self, data):
|
def get_edition_from_work_data(self, data):
|
||||||
|
@ -71,46 +37,20 @@ class Connector(AbstractConnector):
|
||||||
|
|
||||||
|
|
||||||
def get_authors_from_data(self, data):
|
def get_authors_from_data(self, data):
|
||||||
for author_url in data.get('authors', []):
|
''' load author data '''
|
||||||
yield self.get_or_create_author(author_url)
|
for author_id in data.get('authors', []):
|
||||||
|
try:
|
||||||
|
yield models.Author.objects.get(origin_id=author_id)
|
||||||
|
except models.Author.DoesNotExist:
|
||||||
|
pass
|
||||||
|
data = get_data(author_id)
|
||||||
|
author_data = activitypub.Author(**data)
|
||||||
|
author = author_data.to_model(models.Author)
|
||||||
|
yield author
|
||||||
|
|
||||||
|
|
||||||
def get_cover_from_data(self, data):
|
def get_cover_from_data(self, data):
|
||||||
cover_data = data.get('attachment')
|
pass
|
||||||
if not cover_data:
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
cover_url = cover_data[0].get('url')
|
|
||||||
except IndexError:
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
response = requests.get(cover_url)
|
|
||||||
except ConnectionError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
if not response.ok:
|
|
||||||
return None
|
|
||||||
|
|
||||||
image_name = str(uuid4()) + '.' + cover_url.split('.')[-1]
|
|
||||||
image_content = ContentFile(response.content)
|
|
||||||
return [image_name, image_content]
|
|
||||||
|
|
||||||
|
|
||||||
def get_or_create_author(self, remote_id):
|
|
||||||
''' load that author '''
|
|
||||||
try:
|
|
||||||
return models.Author.objects.get(origin_id=remote_id)
|
|
||||||
except ObjectDoesNotExist:
|
|
||||||
pass
|
|
||||||
|
|
||||||
data = get_data(remote_id)
|
|
||||||
|
|
||||||
# ingest a new author
|
|
||||||
author = models.Author(origin_id=remote_id)
|
|
||||||
author = update_from_mappings(author, data, self.author_mappings)
|
|
||||||
author.save()
|
|
||||||
|
|
||||||
return author
|
|
||||||
|
|
||||||
|
|
||||||
def parse_search_data(self, data):
|
def parse_search_data(self, data):
|
||||||
|
|
|
@ -7,7 +7,6 @@ from django.core.files.base import ContentFile
|
||||||
from bookwyrm import models
|
from bookwyrm import models
|
||||||
from .abstract_connector import AbstractConnector, SearchResult, Mapping
|
from .abstract_connector import AbstractConnector, SearchResult, Mapping
|
||||||
from .abstract_connector import ConnectorException
|
from .abstract_connector import ConnectorException
|
||||||
from .abstract_connector import update_from_mappings
|
|
||||||
from .abstract_connector import get_date, get_data
|
from .abstract_connector import get_date, get_data
|
||||||
from .openlibrary_languages import languages
|
from .openlibrary_languages import languages
|
||||||
|
|
||||||
|
@ -185,7 +184,7 @@ class Connector(AbstractConnector):
|
||||||
data = get_data(url)
|
data = get_data(url)
|
||||||
|
|
||||||
author = models.Author(openlibrary_key=olkey)
|
author = models.Author(openlibrary_key=olkey)
|
||||||
author = update_from_mappings(author, data, self.author_mappings)
|
author = self.update_from_mappings(author, data, self.author_mappings)
|
||||||
name = data.get('name')
|
name = data.get('name')
|
||||||
# TODO this is making some BOLD assumption
|
# TODO this is making some BOLD assumption
|
||||||
if name:
|
if name:
|
||||||
|
|
|
@ -2,7 +2,8 @@
|
||||||
import inspect
|
import inspect
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from .book import Book, Work, Edition, Author
|
from .book import Book, Work, Edition
|
||||||
|
from .author import Author
|
||||||
from .connector import Connector
|
from .connector import Connector
|
||||||
from .relationship import UserFollows, UserFollowRequest, UserBlocks
|
from .relationship import UserFollows, UserFollowRequest, UserBlocks
|
||||||
from .shelf import Shelf, ShelfBook
|
from .shelf import Shelf, ShelfBook
|
||||||
|
|
50
bookwyrm/models/author.py
Normal file
50
bookwyrm/models/author.py
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
''' database schema for info about authors '''
|
||||||
|
from django.db import models
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
from bookwyrm import activitypub
|
||||||
|
from bookwyrm.utils.fields import ArrayField
|
||||||
|
|
||||||
|
from .base_model import ActivitypubMixin, ActivityMapping, BookWyrmModel
|
||||||
|
|
||||||
|
|
||||||
|
class Author(ActivitypubMixin, BookWyrmModel):
|
||||||
|
''' basic biographic info '''
|
||||||
|
origin_id = models.CharField(max_length=255, null=True)
|
||||||
|
''' copy of an author from OL '''
|
||||||
|
openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
|
||||||
|
sync = models.BooleanField(default=True)
|
||||||
|
last_sync_date = models.DateTimeField(default=timezone.now)
|
||||||
|
wikipedia_link = models.CharField(max_length=255, blank=True, null=True)
|
||||||
|
# idk probably other keys would be useful here?
|
||||||
|
born = models.DateTimeField(blank=True, null=True)
|
||||||
|
died = models.DateTimeField(blank=True, null=True)
|
||||||
|
name = models.CharField(max_length=255)
|
||||||
|
last_name = models.CharField(max_length=255, blank=True, null=True)
|
||||||
|
first_name = models.CharField(max_length=255, blank=True, null=True)
|
||||||
|
aliases = ArrayField(
|
||||||
|
models.CharField(max_length=255), blank=True, default=list
|
||||||
|
)
|
||||||
|
bio = models.TextField(null=True, blank=True)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def display_name(self):
|
||||||
|
''' Helper to return a displayable name'''
|
||||||
|
if self.name:
|
||||||
|
return self.name
|
||||||
|
# don't want to return a spurious space if all of these are None
|
||||||
|
if self.first_name and self.last_name:
|
||||||
|
return self.first_name + ' ' + self.last_name
|
||||||
|
return self.last_name or self.first_name
|
||||||
|
|
||||||
|
activity_mappings = [
|
||||||
|
ActivityMapping('id', 'remote_id'),
|
||||||
|
ActivityMapping('name', 'name'),
|
||||||
|
ActivityMapping('born', 'born'),
|
||||||
|
ActivityMapping('died', 'died'),
|
||||||
|
ActivityMapping('aliases', 'aliases'),
|
||||||
|
ActivityMapping('bio', 'bio'),
|
||||||
|
ActivityMapping('openlibraryKey', 'openlibrary_key'),
|
||||||
|
ActivityMapping('wikipediaLink', 'wikipedia_link'),
|
||||||
|
]
|
||||||
|
activity_serializer = activitypub.Author
|
|
@ -102,7 +102,7 @@ class Book(ActivitypubMixin, BookWyrmModel):
|
||||||
'attachment', 'cover',
|
'attachment', 'cover',
|
||||||
# this expects an iterable and the field is just an image
|
# this expects an iterable and the field is just an image
|
||||||
lambda x: image_attachments_formatter([x]),
|
lambda x: image_attachments_formatter([x]),
|
||||||
lambda x: activitypub.image_attachments_formatter(x)[0]
|
activitypub.image_formatter
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -190,7 +190,7 @@ class Edition(Book):
|
||||||
if self.isbn_10 and not self.isbn_13:
|
if self.isbn_10 and not self.isbn_13:
|
||||||
self.isbn_13 = isbn_10_to_13(self.isbn_10)
|
self.isbn_13 = isbn_10_to_13(self.isbn_10)
|
||||||
|
|
||||||
super().save(*args, **kwargs)
|
return super().save(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def isbn_10_to_13(isbn_10):
|
def isbn_10_to_13(isbn_10):
|
||||||
|
@ -234,44 +234,3 @@ def isbn_13_to_10(isbn_13):
|
||||||
if checkdigit == 10:
|
if checkdigit == 10:
|
||||||
checkdigit = 'X'
|
checkdigit = 'X'
|
||||||
return converted + str(checkdigit)
|
return converted + str(checkdigit)
|
||||||
|
|
||||||
|
|
||||||
class Author(ActivitypubMixin, BookWyrmModel):
|
|
||||||
origin_id = models.CharField(max_length=255, null=True)
|
|
||||||
''' copy of an author from OL '''
|
|
||||||
openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
|
|
||||||
sync = models.BooleanField(default=True)
|
|
||||||
last_sync_date = models.DateTimeField(default=timezone.now)
|
|
||||||
wikipedia_link = models.CharField(max_length=255, blank=True, null=True)
|
|
||||||
# idk probably other keys would be useful here?
|
|
||||||
born = models.DateTimeField(blank=True, null=True)
|
|
||||||
died = models.DateTimeField(blank=True, null=True)
|
|
||||||
name = models.CharField(max_length=255)
|
|
||||||
last_name = models.CharField(max_length=255, blank=True, null=True)
|
|
||||||
first_name = models.CharField(max_length=255, blank=True, null=True)
|
|
||||||
aliases = ArrayField(
|
|
||||||
models.CharField(max_length=255), blank=True, default=list
|
|
||||||
)
|
|
||||||
bio = models.TextField(null=True, blank=True)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def display_name(self):
|
|
||||||
''' Helper to return a displayable name'''
|
|
||||||
if self.name:
|
|
||||||
return self.name
|
|
||||||
# don't want to return a spurious space if all of these are None
|
|
||||||
if self.first_name and self.last_name:
|
|
||||||
return self.first_name + ' ' + self.last_name
|
|
||||||
return self.last_name or self.first_name
|
|
||||||
|
|
||||||
activity_mappings = [
|
|
||||||
ActivityMapping('id', 'remote_id'),
|
|
||||||
ActivityMapping('name', 'display_name'),
|
|
||||||
ActivityMapping('born', 'born'),
|
|
||||||
ActivityMapping('died', 'died'),
|
|
||||||
ActivityMapping('aliases', 'aliases'),
|
|
||||||
ActivityMapping('bio', 'bio'),
|
|
||||||
ActivityMapping('openlibrary_key', 'openlibrary_key'),
|
|
||||||
ActivityMapping('wikipedia_link', 'wikipedia_link'),
|
|
||||||
]
|
|
||||||
activity_serializer = activitypub.Author
|
|
||||||
|
|
|
@ -2,8 +2,7 @@
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
|
|
||||||
from bookwyrm import models
|
from bookwyrm import models
|
||||||
from bookwyrm.connectors.abstract_connector import Mapping,\
|
from bookwyrm.connectors.abstract_connector import Mapping
|
||||||
update_from_mappings
|
|
||||||
from bookwyrm.connectors.bookwyrm_connector import Connector
|
from bookwyrm.connectors.bookwyrm_connector import Connector
|
||||||
|
|
||||||
|
|
||||||
|
@ -64,29 +63,6 @@ class AbstractConnector(TestCase):
|
||||||
self.assertEqual(mapping.formatter('bb'), 'aabb')
|
self.assertEqual(mapping.formatter('bb'), 'aabb')
|
||||||
|
|
||||||
|
|
||||||
def test_update_from_mappings(self):
|
|
||||||
data = {
|
|
||||||
'title': 'Unused title',
|
|
||||||
'isbn_10': '1234567890',
|
|
||||||
'isbn_13': 'blahhh',
|
|
||||||
'blah': 'bip',
|
|
||||||
'format': 'hardcover',
|
|
||||||
'series': ['one', 'two'],
|
|
||||||
}
|
|
||||||
mappings = [
|
|
||||||
Mapping('isbn_10'),
|
|
||||||
Mapping('blah'),# not present on self.book
|
|
||||||
Mapping('physical_format', remote_field='format'),
|
|
||||||
Mapping('series', formatter=lambda x: x[0]),
|
|
||||||
]
|
|
||||||
book = update_from_mappings(self.book, data, mappings)
|
|
||||||
self.assertEqual(book.title, 'Example Edition')
|
|
||||||
self.assertEqual(book.isbn_10, '1234567890')
|
|
||||||
self.assertEqual(book.isbn_13, None)
|
|
||||||
self.assertEqual(book.physical_format, 'hardcover')
|
|
||||||
self.assertEqual(book.series, 'one')
|
|
||||||
|
|
||||||
|
|
||||||
def test_match_from_mappings(self):
|
def test_match_from_mappings(self):
|
||||||
edition = models.Edition.objects.create(
|
edition = models.Edition.objects.create(
|
||||||
title='Blah',
|
title='Blah',
|
||||||
|
|
|
@ -28,9 +28,7 @@
|
||||||
],
|
],
|
||||||
"lccn": null,
|
"lccn": null,
|
||||||
"editions": [
|
"editions": [
|
||||||
"https://bookwyrm.social/book/5989",
|
"https://bookwyrm.social/book/5989"
|
||||||
"OL28439584M",
|
|
||||||
"OL28300471M"
|
|
||||||
],
|
],
|
||||||
"@context": "https://www.w3.org/ns/activitystreams"
|
"@context": "https://www.w3.org/ns/activitystreams"
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue