Fixes loading more data

This commit is contained in:
Mouse Reeve 2020-12-19 16:14:05 -08:00
parent d3671d981f
commit 3e61f48da2
4 changed files with 54 additions and 64 deletions

View file

@ -50,7 +50,7 @@ class Work(Book):
''' work instance of a book object ''' ''' work instance of a book object '''
lccn: str = '' lccn: str = ''
defaultEdition: str = '' defaultEdition: str = ''
editions: List[str] editions: List[str] = field(default_factory=lambda: [])
type: str = 'Work' type: str = 'Work'
@ -58,9 +58,9 @@ class Work(Book):
class Author(ActivityObject): class Author(ActivityObject):
''' author of a book ''' ''' author of a book '''
name: str name: str
born: str = '' born: str = None
died: str = '' died: str = None
aliases: str = '' aliases: List[str] = field(default_factory=lambda: [])
bio: str = '' bio: str = ''
openlibraryKey: str = '' openlibraryKey: str = ''
wikipediaLink: str = '' wikipediaLink: str = ''

View file

@ -1,11 +1,9 @@
''' functionality outline for a book data connector ''' ''' functionality outline for a book data connector '''
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
import pytz
from urllib3.exceptions import RequestError from urllib3.exceptions import RequestError
from django.db import transaction from django.db import transaction
from dateutil import parser
import requests import requests
from requests import HTTPError from requests import HTTPError
from requests.exceptions import SSLError from requests.exceptions import SSLError
@ -102,12 +100,10 @@ class AbstractConnector(AbstractMinimalConnector):
if self.is_work_data(data): if self.is_work_data(data):
try: try:
edition_data = self.get_edition_from_work_data(data) edition_data = self.get_edition_from_work_data(data)
edition_data = dict_from_mappings(\
edition_data, self.book_mappings)
except KeyError: except KeyError:
# hack: re-use the work data as the edition data # hack: re-use the work data as the edition data
# this is why remote ids aren't necessarily unique # this is why remote ids aren't necessarily unique
edition_data = mapped_data edition_data = data
work_data = mapped_data work_data = mapped_data
else: else:
try: try:
@ -115,75 +111,76 @@ class AbstractConnector(AbstractMinimalConnector):
work_data = dict_from_mappings(work_data, self.book_mappings) work_data = dict_from_mappings(work_data, self.book_mappings)
except KeyError: except KeyError:
work_data = mapped_data work_data = mapped_data
edition_data = mapped_data edition_data = data
if not work_data or not edition_data: if not work_data or not edition_data:
raise ConnectorException('Unable to load book data: %s' % remote_id) raise ConnectorException('Unable to load book data: %s' % remote_id)
# create activitypub object # create activitypub object
work_activity = activitypub.Work(**work_data) work_activity = activitypub.Work(**work_data)
edition_activity = activitypub.Edition(**edition_data)
# this will dedupe automatically # this will dedupe automatically
work = work_activity.to_model(models.Work, save=False) work = work_activity.to_model(models.Work)
edition = edition_activity.to_model(models.Edition, save=False)
edition.parent_work = work
work.default_edition = edition
work.save()
edition.save()
for author in self.get_authors_from_data(data): for author in self.get_authors_from_data(data):
work.authors.add(author) work.authors.add(author)
return self.create_edition_from_data(work, edition_data)
def create_edition_from_data(self, work, edition_data):
''' if we already have the work, we're ready '''
mapped_data = dict_from_mappings(edition_data, self.book_mappings)
mapped_data['work'] = work.remote_id
edition_activity = activitypub.Edition(**mapped_data)
edition = edition_activity.to_model(models.Edition)
edition.connector = self.connector
edition.save()
work.default_edition = edition
work.save()
for author in self.get_authors_from_data(edition_data):
edition.authors.add(author) edition.authors.add(author)
if not edition.authors.exists() and work.authors.exists():
edition.authors.add(work.authors.all())
return edition return edition
def get_or_create_author(self, remote_id): def get_or_create_author(self, remote_id):
''' load that author ''' ''' load that author '''
existing = models.Author.find_exising_by_remote_id(remote_id) existing = models.Author.find_existing_by_remote_id(remote_id)
if existing: if existing:
return existing return existing
data = get_data(remote_id) data = get_data(remote_id)
author_activity = dict_from_mappings(data, self.author_mappings) mapped_data = dict_from_mappings(data, self.author_mappings)
activity = activitypub.Author(**mapped_data)
# this will dedupe # this will dedupe
return activitypub.Author(**author_activity).to_model() return activity.to_model(models.Author)
@abstractmethod @abstractmethod
def is_work_data(self, data): def is_work_data(self, data):
''' differentiate works and editions ''' ''' differentiate works and editions '''
@abstractmethod @abstractmethod
def get_edition_from_work_data(self, data): def get_edition_from_work_data(self, data):
''' every work needs at least one edition ''' ''' every work needs at least one edition '''
@abstractmethod @abstractmethod
def get_work_from_edition_date(self, data): def get_work_from_edition_date(self, data):
''' every edition needs a work ''' ''' every edition needs a work '''
@abstractmethod @abstractmethod
def get_authors_from_data(self, data): def get_authors_from_data(self, data):
''' load author data ''' ''' load author data '''
@abstractmethod
def get_cover_from_data(self, data):
''' load cover '''
@abstractmethod @abstractmethod
def expand_book_data(self, book): def expand_book_data(self, book):
''' get more info on a book ''' ''' get more info on a book '''
def dict_from_mappings(self, data, mappings): def dict_from_mappings(data, mappings):
''' create a dict in Activitypub format, using mappings supplies by ''' create a dict in Activitypub format, using mappings supplies by
the subclass ''' the subclass '''
result = {} result = {}
@ -250,4 +247,9 @@ class Mapping:
def get_value(self, data): def get_value(self, data):
''' pull a field from incoming json and return the formatted version ''' ''' pull a field from incoming json and return the formatted version '''
value = data.get(self.remote_field) value = data.get(self.remote_field)
return self.formatter(value) if not value:
return None
try:
return self.formatter(value)
except:# pylint: disable=bare-except
return None

View file

@ -1,13 +1,9 @@
''' openlibrary data connector ''' ''' openlibrary data connector '''
import re import re
import requests
from django.core.files.base import ContentFile
from bookwyrm import models from bookwyrm import models
from .abstract_connector import AbstractConnector, SearchResult, Mapping from .abstract_connector import AbstractConnector, SearchResult, Mapping
from .abstract_connector import ConnectorException, dict_from_mappings from .abstract_connector import ConnectorException, get_data
from .abstract_connector import get_data, update_from_mappings
from .openlibrary_languages import languages from .openlibrary_languages import languages
@ -17,8 +13,12 @@ class Connector(AbstractConnector):
super().__init__(identifier) super().__init__(identifier)
get_first = lambda a: a[0] get_first = lambda a: a[0]
get_remote_id = lambda a: self.base_url + a
self.book_mappings = [ self.book_mappings = [
Mapping('title'), Mapping('title'),
Mapping('id', remote_field='key', formatter=get_remote_id),
Mapping(
'cover', remote_field='covers', formatter=self.get_cover_url),
Mapping('sortTitle', remote_field='sort_title'), Mapping('sortTitle', remote_field='sort_title'),
Mapping('subtitle'), Mapping('subtitle'),
Mapping('description', formatter=get_description), Mapping('description', formatter=get_description),
@ -50,7 +50,12 @@ class Connector(AbstractConnector):
] ]
self.author_mappings = [ self.author_mappings = [
Mapping('id', remote_field='key', formatter=get_remote_id),
Mapping('name'), Mapping('name'),
Mapping(
'openlibraryKey', remote_field='key',
formatter=get_openlibrary_key
),
Mapping('born', remote_field='birth_date'), Mapping('born', remote_field='birth_date'),
Mapping('died', remote_field='death_date'), Mapping('died', remote_field='death_date'),
Mapping('bio', formatter=get_description), Mapping('bio', formatter=get_description),
@ -58,6 +63,7 @@ class Connector(AbstractConnector):
def get_remote_id_from_data(self, data): def get_remote_id_from_data(self, data):
''' format a url from an openlibrary id field '''
try: try:
key = data['key'] key = data['key']
except KeyError: except KeyError:
@ -93,24 +99,16 @@ class Connector(AbstractConnector):
for author_blob in data.get('authors', []): for author_blob in data.get('authors', []):
author_blob = author_blob.get('author', author_blob) author_blob = author_blob.get('author', author_blob)
# this id is "/authors/OL1234567A" # this id is "/authors/OL1234567A"
author_id = author_blob['key'].split('/')[-1] author_id = author_blob['key']
url = '%s/%s.json' % (self.base_url, author_id) url = '%s/%s.json' % (self.base_url, author_id)
yield self.get_or_create_author(url) yield self.get_or_create_author(url)
def get_cover_from_data(self, data): def get_cover_url(self, cover_blob):
''' ask openlibrary for the cover ''' ''' ask openlibrary for the cover '''
if not data.get('covers'): cover_id = cover_blob[0]
return None
cover_id = data.get('covers')[0]
image_name = '%s-M.jpg' % cover_id image_name = '%s-M.jpg' % cover_id
url = '%s/b/id/%s' % (self.covers_url, image_name) return '%s/b/id/%s' % (self.covers_url, image_name)
response = requests.get(url)
if not response.ok:
response.raise_for_status()
image_content = ContentFile(response.content)
return [image_name, image_content]
def parse_search_data(self, data): def parse_search_data(self, data):
@ -144,19 +142,7 @@ class Connector(AbstractConnector):
# we can mass download edition data from OL to avoid repeatedly querying # we can mass download edition data from OL to avoid repeatedly querying
edition_options = self.load_edition_data(work.openlibrary_key) edition_options = self.load_edition_data(work.openlibrary_key)
for edition_data in edition_options.get('entries'): for edition_data in edition_options.get('entries'):
olkey = edition_data.get('key').split('/')[-1] self.create_edition_from_data(work, edition_data)
# make sure the edition isn't already in the database
if models.Edition.objects.filter(openlibrary_key=olkey).count():
continue
# creates and populates the book from the data
edition = self.create_book(olkey, edition_data, models.Edition)
# ensures that the edition is associated with the work
edition.parent_work = work
edition.save()
# get author data from the work if it's missing from the edition
if not edition.authors and work.authors:
edition.authors.set(work.authors.all())
def get_description(description_blob): def get_description(description_blob):

View file

@ -223,6 +223,8 @@ def resolve_book(request):
remote_id = request.POST.get('remote_id') remote_id = request.POST.get('remote_id')
connector = books_manager.get_or_create_connector(remote_id) connector = books_manager.get_or_create_connector(remote_id)
book = connector.get_or_create_book(remote_id) book = connector.get_or_create_book(remote_id)
if book.connector:
books_manager.load_more_data.delay(book.id)
return redirect('/book/%d' % book.id) return redirect('/book/%d' % book.id)