mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2025-01-25 16:38:09 +00:00
Merge pull request #365 from mouse-reeve/refactor-bookwyrm-connector
Refactors bookwyrm connector to use activitypub serializer
This commit is contained in:
commit
1df5b2d481
10 changed files with 107 additions and 178 deletions
|
@ -212,6 +212,12 @@ def tag_formatter(tags, tag_type):
|
|||
|
||||
def image_formatter(image_json):
|
||||
''' helper function to load images and format them for a model '''
|
||||
if isinstance(image_json, list):
|
||||
try:
|
||||
image_json = image_json[0]
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
if not image_json or not hasattr(image_json, 'url'):
|
||||
return None
|
||||
url = image_json.get('url')
|
||||
|
|
|
@ -56,10 +56,10 @@ class Work(Book):
|
|||
class Author(ActivityObject):
|
||||
''' author of a book '''
|
||||
name: str
|
||||
born: str
|
||||
died: str
|
||||
aliases: str
|
||||
bio: str
|
||||
openlibrary_key: str
|
||||
wikipedia_link: str
|
||||
born: str = ''
|
||||
died: str = ''
|
||||
aliases: str = ''
|
||||
bio: str = ''
|
||||
openlibraryKey: str = ''
|
||||
wikipediaLink: str = ''
|
||||
type: str = 'Person'
|
||||
|
|
|
@ -157,7 +157,7 @@ class AbstractConnector(ABC):
|
|||
|
||||
def update_book_from_data(self, book, data, update_cover=True):
|
||||
''' for creating a new book or syncing with data '''
|
||||
book = update_from_mappings(book, data, self.book_mappings)
|
||||
book = self.update_from_mappings(book, data, self.book_mappings)
|
||||
|
||||
author_text = []
|
||||
for author in self.get_authors_from_data(data):
|
||||
|
@ -262,7 +262,7 @@ class AbstractConnector(ABC):
|
|||
''' get more info on a book '''
|
||||
|
||||
|
||||
def update_from_mappings(obj, data, mappings):
|
||||
def update_from_mappings(self, obj, data, mappings):
|
||||
''' assign data to model with mappings '''
|
||||
for mapping in mappings:
|
||||
# check if this field is present in the data
|
||||
|
|
|
@ -1,55 +1,21 @@
|
|||
''' using another bookwyrm instance as a source of book data '''
|
||||
from uuid import uuid4
|
||||
|
||||
from django.core.exceptions import ObjectDoesNotExist
|
||||
from django.core.files.base import ContentFile
|
||||
from django.db import transaction
|
||||
import requests
|
||||
|
||||
from bookwyrm import models
|
||||
from .abstract_connector import AbstractConnector, SearchResult, Mapping
|
||||
from .abstract_connector import update_from_mappings, get_date, get_data
|
||||
from bookwyrm import activitypub, models
|
||||
from .abstract_connector import AbstractConnector, SearchResult
|
||||
from .abstract_connector import get_data
|
||||
|
||||
|
||||
class Connector(AbstractConnector):
|
||||
''' interact with other instances '''
|
||||
def __init__(self, identifier):
|
||||
super().__init__(identifier)
|
||||
self.key_mappings = [
|
||||
Mapping('isbn_13', model=models.Edition),
|
||||
Mapping('isbn_10', model=models.Edition),
|
||||
Mapping('lccn', model=models.Work),
|
||||
Mapping('oclc_number', model=models.Edition),
|
||||
Mapping('openlibrary_key'),
|
||||
Mapping('goodreads_key'),
|
||||
Mapping('asin'),
|
||||
]
|
||||
|
||||
self.book_mappings = self.key_mappings + [
|
||||
Mapping('sort_title'),
|
||||
Mapping('subtitle'),
|
||||
Mapping('description'),
|
||||
Mapping('languages'),
|
||||
Mapping('series'),
|
||||
Mapping('series_number'),
|
||||
Mapping('subjects'),
|
||||
Mapping('subject_places'),
|
||||
Mapping('first_published_date'),
|
||||
Mapping('published_date'),
|
||||
Mapping('pages'),
|
||||
Mapping('physical_format'),
|
||||
Mapping('publishers'),
|
||||
]
|
||||
|
||||
self.author_mappings = [
|
||||
Mapping('name'),
|
||||
Mapping('bio'),
|
||||
Mapping('openlibrary_key'),
|
||||
Mapping('wikipedia_link'),
|
||||
Mapping('aliases'),
|
||||
Mapping('born', formatter=get_date),
|
||||
Mapping('died', formatter=get_date),
|
||||
]
|
||||
def update_from_mappings(self, obj, data, mappings):
|
||||
''' serialize book data into a model '''
|
||||
if self.is_work_data(data):
|
||||
work_data = activitypub.Work(**data)
|
||||
return work_data.to_model(models.Work, instance=obj)
|
||||
edition_data = activitypub.Edition(**data)
|
||||
return edition_data.to_model(models.Edition, instance=obj)
|
||||
|
||||
|
||||
def get_remote_id_from_data(self, data):
|
||||
|
@ -57,7 +23,7 @@ class Connector(AbstractConnector):
|
|||
|
||||
|
||||
def is_work_data(self, data):
|
||||
return data['type'] == 'Work'
|
||||
return data.get('type') == 'Work'
|
||||
|
||||
|
||||
def get_edition_from_work_data(self, data):
|
||||
|
@ -71,47 +37,21 @@ class Connector(AbstractConnector):
|
|||
|
||||
|
||||
def get_authors_from_data(self, data):
|
||||
for author_url in data.get('authors', []):
|
||||
yield self.get_or_create_author(author_url)
|
||||
''' load author data '''
|
||||
for author_id in data.get('authors', []):
|
||||
try:
|
||||
yield models.Author.objects.get(origin_id=author_id)
|
||||
except models.Author.DoesNotExist:
|
||||
pass
|
||||
data = get_data(author_id)
|
||||
author_data = activitypub.Author(**data)
|
||||
author = author_data.to_model(models.Author)
|
||||
yield author
|
||||
|
||||
|
||||
def get_cover_from_data(self, data):
|
||||
cover_data = data.get('attachment')
|
||||
if not cover_data:
|
||||
return None
|
||||
try:
|
||||
cover_url = cover_data[0].get('url')
|
||||
except IndexError:
|
||||
return None
|
||||
try:
|
||||
response = requests.get(cover_url)
|
||||
except ConnectionError:
|
||||
return None
|
||||
|
||||
if not response.ok:
|
||||
return None
|
||||
|
||||
image_name = str(uuid4()) + '.' + cover_url.split('.')[-1]
|
||||
image_content = ContentFile(response.content)
|
||||
return [image_name, image_content]
|
||||
|
||||
|
||||
def get_or_create_author(self, remote_id):
|
||||
''' load that author '''
|
||||
try:
|
||||
return models.Author.objects.get(origin_id=remote_id)
|
||||
except ObjectDoesNotExist:
|
||||
pass
|
||||
|
||||
data = get_data(remote_id)
|
||||
|
||||
# ingest a new author
|
||||
author = models.Author(origin_id=remote_id)
|
||||
author = update_from_mappings(author, data, self.author_mappings)
|
||||
author.save()
|
||||
|
||||
return author
|
||||
|
||||
|
||||
def parse_search_data(self, data):
|
||||
return data
|
||||
|
|
|
@ -7,7 +7,6 @@ from django.core.files.base import ContentFile
|
|||
from bookwyrm import models
|
||||
from .abstract_connector import AbstractConnector, SearchResult, Mapping
|
||||
from .abstract_connector import ConnectorException
|
||||
from .abstract_connector import update_from_mappings
|
||||
from .abstract_connector import get_date, get_data
|
||||
from .openlibrary_languages import languages
|
||||
|
||||
|
@ -185,7 +184,7 @@ class Connector(AbstractConnector):
|
|||
data = get_data(url)
|
||||
|
||||
author = models.Author(openlibrary_key=olkey)
|
||||
author = update_from_mappings(author, data, self.author_mappings)
|
||||
author = self.update_from_mappings(author, data, self.author_mappings)
|
||||
name = data.get('name')
|
||||
# TODO this is making some BOLD assumption
|
||||
if name:
|
||||
|
|
|
@ -2,7 +2,8 @@
|
|||
import inspect
|
||||
import sys
|
||||
|
||||
from .book import Book, Work, Edition, Author
|
||||
from .book import Book, Work, Edition
|
||||
from .author import Author
|
||||
from .connector import Connector
|
||||
from .relationship import UserFollows, UserFollowRequest, UserBlocks
|
||||
from .shelf import Shelf, ShelfBook
|
||||
|
|
50
bookwyrm/models/author.py
Normal file
50
bookwyrm/models/author.py
Normal file
|
@ -0,0 +1,50 @@
|
|||
''' database schema for info about authors '''
|
||||
from django.db import models
|
||||
from django.utils import timezone
|
||||
|
||||
from bookwyrm import activitypub
|
||||
from bookwyrm.utils.fields import ArrayField
|
||||
|
||||
from .base_model import ActivitypubMixin, ActivityMapping, BookWyrmModel
|
||||
|
||||
|
||||
class Author(ActivitypubMixin, BookWyrmModel):
|
||||
''' basic biographic info '''
|
||||
origin_id = models.CharField(max_length=255, null=True)
|
||||
''' copy of an author from OL '''
|
||||
openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
|
||||
sync = models.BooleanField(default=True)
|
||||
last_sync_date = models.DateTimeField(default=timezone.now)
|
||||
wikipedia_link = models.CharField(max_length=255, blank=True, null=True)
|
||||
# idk probably other keys would be useful here?
|
||||
born = models.DateTimeField(blank=True, null=True)
|
||||
died = models.DateTimeField(blank=True, null=True)
|
||||
name = models.CharField(max_length=255)
|
||||
last_name = models.CharField(max_length=255, blank=True, null=True)
|
||||
first_name = models.CharField(max_length=255, blank=True, null=True)
|
||||
aliases = ArrayField(
|
||||
models.CharField(max_length=255), blank=True, default=list
|
||||
)
|
||||
bio = models.TextField(null=True, blank=True)
|
||||
|
||||
@property
|
||||
def display_name(self):
|
||||
''' Helper to return a displayable name'''
|
||||
if self.name:
|
||||
return self.name
|
||||
# don't want to return a spurious space if all of these are None
|
||||
if self.first_name and self.last_name:
|
||||
return self.first_name + ' ' + self.last_name
|
||||
return self.last_name or self.first_name
|
||||
|
||||
activity_mappings = [
|
||||
ActivityMapping('id', 'remote_id'),
|
||||
ActivityMapping('name', 'name'),
|
||||
ActivityMapping('born', 'born'),
|
||||
ActivityMapping('died', 'died'),
|
||||
ActivityMapping('aliases', 'aliases'),
|
||||
ActivityMapping('bio', 'bio'),
|
||||
ActivityMapping('openlibraryKey', 'openlibrary_key'),
|
||||
ActivityMapping('wikipediaLink', 'wikipedia_link'),
|
||||
]
|
||||
activity_serializer = activitypub.Author
|
|
@ -102,7 +102,7 @@ class Book(ActivitypubMixin, BookWyrmModel):
|
|||
'attachment', 'cover',
|
||||
# this expects an iterable and the field is just an image
|
||||
lambda x: image_attachments_formatter([x]),
|
||||
lambda x: activitypub.image_attachments_formatter(x)[0]
|
||||
activitypub.image_formatter
|
||||
),
|
||||
]
|
||||
|
||||
|
@ -190,7 +190,7 @@ class Edition(Book):
|
|||
if self.isbn_10 and not self.isbn_13:
|
||||
self.isbn_13 = isbn_10_to_13(self.isbn_10)
|
||||
|
||||
super().save(*args, **kwargs)
|
||||
return super().save(*args, **kwargs)
|
||||
|
||||
|
||||
def isbn_10_to_13(isbn_10):
|
||||
|
@ -234,44 +234,3 @@ def isbn_13_to_10(isbn_13):
|
|||
if checkdigit == 10:
|
||||
checkdigit = 'X'
|
||||
return converted + str(checkdigit)
|
||||
|
||||
|
||||
class Author(ActivitypubMixin, BookWyrmModel):
|
||||
origin_id = models.CharField(max_length=255, null=True)
|
||||
''' copy of an author from OL '''
|
||||
openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
|
||||
sync = models.BooleanField(default=True)
|
||||
last_sync_date = models.DateTimeField(default=timezone.now)
|
||||
wikipedia_link = models.CharField(max_length=255, blank=True, null=True)
|
||||
# idk probably other keys would be useful here?
|
||||
born = models.DateTimeField(blank=True, null=True)
|
||||
died = models.DateTimeField(blank=True, null=True)
|
||||
name = models.CharField(max_length=255)
|
||||
last_name = models.CharField(max_length=255, blank=True, null=True)
|
||||
first_name = models.CharField(max_length=255, blank=True, null=True)
|
||||
aliases = ArrayField(
|
||||
models.CharField(max_length=255), blank=True, default=list
|
||||
)
|
||||
bio = models.TextField(null=True, blank=True)
|
||||
|
||||
@property
|
||||
def display_name(self):
|
||||
''' Helper to return a displayable name'''
|
||||
if self.name:
|
||||
return self.name
|
||||
# don't want to return a spurious space if all of these are None
|
||||
if self.first_name and self.last_name:
|
||||
return self.first_name + ' ' + self.last_name
|
||||
return self.last_name or self.first_name
|
||||
|
||||
activity_mappings = [
|
||||
ActivityMapping('id', 'remote_id'),
|
||||
ActivityMapping('name', 'display_name'),
|
||||
ActivityMapping('born', 'born'),
|
||||
ActivityMapping('died', 'died'),
|
||||
ActivityMapping('aliases', 'aliases'),
|
||||
ActivityMapping('bio', 'bio'),
|
||||
ActivityMapping('openlibrary_key', 'openlibrary_key'),
|
||||
ActivityMapping('wikipedia_link', 'wikipedia_link'),
|
||||
]
|
||||
activity_serializer = activitypub.Author
|
||||
|
|
|
@ -2,8 +2,7 @@
|
|||
from django.test import TestCase
|
||||
|
||||
from bookwyrm import models
|
||||
from bookwyrm.connectors.abstract_connector import Mapping,\
|
||||
update_from_mappings
|
||||
from bookwyrm.connectors.abstract_connector import Mapping
|
||||
from bookwyrm.connectors.bookwyrm_connector import Connector
|
||||
|
||||
|
||||
|
@ -64,29 +63,6 @@ class AbstractConnector(TestCase):
|
|||
self.assertEqual(mapping.formatter('bb'), 'aabb')
|
||||
|
||||
|
||||
def test_update_from_mappings(self):
|
||||
data = {
|
||||
'title': 'Unused title',
|
||||
'isbn_10': '1234567890',
|
||||
'isbn_13': 'blahhh',
|
||||
'blah': 'bip',
|
||||
'format': 'hardcover',
|
||||
'series': ['one', 'two'],
|
||||
}
|
||||
mappings = [
|
||||
Mapping('isbn_10'),
|
||||
Mapping('blah'),# not present on self.book
|
||||
Mapping('physical_format', remote_field='format'),
|
||||
Mapping('series', formatter=lambda x: x[0]),
|
||||
]
|
||||
book = update_from_mappings(self.book, data, mappings)
|
||||
self.assertEqual(book.title, 'Example Edition')
|
||||
self.assertEqual(book.isbn_10, '1234567890')
|
||||
self.assertEqual(book.isbn_13, None)
|
||||
self.assertEqual(book.physical_format, 'hardcover')
|
||||
self.assertEqual(book.series, 'one')
|
||||
|
||||
|
||||
def test_match_from_mappings(self):
|
||||
edition = models.Edition.objects.create(
|
||||
title='Blah',
|
||||
|
|
|
@ -28,9 +28,7 @@
|
|||
],
|
||||
"lccn": null,
|
||||
"editions": [
|
||||
"https://bookwyrm.social/book/5989",
|
||||
"OL28439584M",
|
||||
"OL28300471M"
|
||||
"https://bookwyrm.social/book/5989"
|
||||
],
|
||||
"@context": "https://www.w3.org/ns/activitystreams"
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue