Fixes loading more data

This commit is contained in:
Mouse Reeve 2020-12-19 16:14:05 -08:00
parent d3671d981f
commit 3e61f48da2
4 changed files with 54 additions and 64 deletions

View file

@ -50,7 +50,7 @@ class Work(Book):
''' work instance of a book object '''
lccn: str = ''
defaultEdition: str = ''
editions: List[str]
editions: List[str] = field(default_factory=lambda: [])
type: str = 'Work'
@ -58,9 +58,9 @@ class Work(Book):
class Author(ActivityObject):
''' author of a book '''
name: str
born: str = ''
died: str = ''
aliases: str = ''
born: str = None
died: str = None
aliases: List[str] = field(default_factory=lambda: [])
bio: str = ''
openlibraryKey: str = ''
wikipediaLink: str = ''

View file

@ -1,11 +1,9 @@
''' functionality outline for a book data connector '''
from abc import ABC, abstractmethod
from dataclasses import dataclass
import pytz
from urllib3.exceptions import RequestError
from django.db import transaction
from dateutil import parser
import requests
from requests import HTTPError
from requests.exceptions import SSLError
@ -102,12 +100,10 @@ class AbstractConnector(AbstractMinimalConnector):
if self.is_work_data(data):
try:
edition_data = self.get_edition_from_work_data(data)
edition_data = dict_from_mappings(\
edition_data, self.book_mappings)
except KeyError:
# hack: re-use the work data as the edition data
# this is why remote ids aren't necessarily unique
edition_data = mapped_data
edition_data = data
work_data = mapped_data
else:
try:
@ -115,75 +111,76 @@ class AbstractConnector(AbstractMinimalConnector):
work_data = dict_from_mappings(work_data, self.book_mappings)
except KeyError:
work_data = mapped_data
edition_data = mapped_data
edition_data = data
if not work_data or not edition_data:
raise ConnectorException('Unable to load book data: %s' % remote_id)
# create activitypub object
work_activity = activitypub.Work(**work_data)
edition_activity = activitypub.Edition(**edition_data)
# this will dedupe automatically
work = work_activity.to_model(models.Work, save=False)
edition = edition_activity.to_model(models.Edition, save=False)
edition.parent_work = work
work.default_edition = edition
work.save()
edition.save()
work = work_activity.to_model(models.Work)
for author in self.get_authors_from_data(data):
work.authors.add(author)
return self.create_edition_from_data(work, edition_data)
def create_edition_from_data(self, work, edition_data):
''' if we already have the work, we're ready '''
mapped_data = dict_from_mappings(edition_data, self.book_mappings)
mapped_data['work'] = work.remote_id
edition_activity = activitypub.Edition(**mapped_data)
edition = edition_activity.to_model(models.Edition)
edition.connector = self.connector
edition.save()
work.default_edition = edition
work.save()
for author in self.get_authors_from_data(edition_data):
edition.authors.add(author)
if not edition.authors.exists() and work.authors.exists():
edition.authors.add(work.authors.all())
return edition
def get_or_create_author(self, remote_id):
''' load that author '''
existing = models.Author.find_exising_by_remote_id(remote_id)
existing = models.Author.find_existing_by_remote_id(remote_id)
if existing:
return existing
data = get_data(remote_id)
author_activity = dict_from_mappings(data, self.author_mappings)
mapped_data = dict_from_mappings(data, self.author_mappings)
activity = activitypub.Author(**mapped_data)
# this will dedupe
return activitypub.Author(**author_activity).to_model()
return activity.to_model(models.Author)
@abstractmethod
def is_work_data(self, data):
''' differentiate works and editions '''
@abstractmethod
def get_edition_from_work_data(self, data):
''' every work needs at least one edition '''
@abstractmethod
def get_work_from_edition_date(self, data):
''' every edition needs a work '''
@abstractmethod
def get_authors_from_data(self, data):
''' load author data '''
@abstractmethod
def get_cover_from_data(self, data):
''' load cover '''
@abstractmethod
def expand_book_data(self, book):
''' get more info on a book '''
def dict_from_mappings(self, data, mappings):
def dict_from_mappings(data, mappings):
''' create a dict in Activitypub format, using mappings supplies by
the subclass '''
result = {}
@ -250,4 +247,9 @@ class Mapping:
def get_value(self, data):
''' pull a field from incoming json and return the formatted version '''
value = data.get(self.remote_field)
if not value:
return None
try:
return self.formatter(value)
except:# pylint: disable=bare-except
return None

View file

@ -1,13 +1,9 @@
''' openlibrary data connector '''
import re
import requests
from django.core.files.base import ContentFile
from bookwyrm import models
from .abstract_connector import AbstractConnector, SearchResult, Mapping
from .abstract_connector import ConnectorException, dict_from_mappings
from .abstract_connector import get_data, update_from_mappings
from .abstract_connector import ConnectorException, get_data
from .openlibrary_languages import languages
@ -17,8 +13,12 @@ class Connector(AbstractConnector):
super().__init__(identifier)
get_first = lambda a: a[0]
get_remote_id = lambda a: self.base_url + a
self.book_mappings = [
Mapping('title'),
Mapping('id', remote_field='key', formatter=get_remote_id),
Mapping(
'cover', remote_field='covers', formatter=self.get_cover_url),
Mapping('sortTitle', remote_field='sort_title'),
Mapping('subtitle'),
Mapping('description', formatter=get_description),
@ -50,7 +50,12 @@ class Connector(AbstractConnector):
]
self.author_mappings = [
Mapping('id', remote_field='key', formatter=get_remote_id),
Mapping('name'),
Mapping(
'openlibraryKey', remote_field='key',
formatter=get_openlibrary_key
),
Mapping('born', remote_field='birth_date'),
Mapping('died', remote_field='death_date'),
Mapping('bio', formatter=get_description),
@ -58,6 +63,7 @@ class Connector(AbstractConnector):
def get_remote_id_from_data(self, data):
''' format a url from an openlibrary id field '''
try:
key = data['key']
except KeyError:
@ -93,24 +99,16 @@ class Connector(AbstractConnector):
for author_blob in data.get('authors', []):
author_blob = author_blob.get('author', author_blob)
# this id is "/authors/OL1234567A"
author_id = author_blob['key'].split('/')[-1]
author_id = author_blob['key']
url = '%s/%s.json' % (self.base_url, author_id)
yield self.get_or_create_author(url)
def get_cover_from_data(self, data):
def get_cover_url(self, cover_blob):
''' ask openlibrary for the cover '''
if not data.get('covers'):
return None
cover_id = data.get('covers')[0]
cover_id = cover_blob[0]
image_name = '%s-M.jpg' % cover_id
url = '%s/b/id/%s' % (self.covers_url, image_name)
response = requests.get(url)
if not response.ok:
response.raise_for_status()
image_content = ContentFile(response.content)
return [image_name, image_content]
return '%s/b/id/%s' % (self.covers_url, image_name)
def parse_search_data(self, data):
@ -144,19 +142,7 @@ class Connector(AbstractConnector):
# we can mass download edition data from OL to avoid repeatedly querying
edition_options = self.load_edition_data(work.openlibrary_key)
for edition_data in edition_options.get('entries'):
olkey = edition_data.get('key').split('/')[-1]
# make sure the edition isn't already in the database
if models.Edition.objects.filter(openlibrary_key=olkey).count():
continue
# creates and populates the book from the data
edition = self.create_book(olkey, edition_data, models.Edition)
# ensures that the edition is associated with the work
edition.parent_work = work
edition.save()
# get author data from the work if it's missing from the edition
if not edition.authors and work.authors:
edition.authors.set(work.authors.all())
self.create_edition_from_data(work, edition_data)
def get_description(description_blob):

View file

@ -223,6 +223,8 @@ def resolve_book(request):
remote_id = request.POST.get('remote_id')
connector = books_manager.get_or_create_connector(remote_id)
book = connector.get_or_create_book(remote_id)
if book.connector:
books_manager.load_more_data.delay(book.id)
return redirect('/book/%d' % book.id)