From 3e61f48da212c77586eb505f0f632c4b8b2bcdab Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 19 Dec 2020 16:14:05 -0800 Subject: [PATCH] Fixes loading more data --- bookwyrm/activitypub/book.py | 8 +-- bookwyrm/connectors/abstract_connector.py | 62 ++++++++++++----------- bookwyrm/connectors/openlibrary.py | 46 ++++++----------- bookwyrm/view_actions.py | 2 + 4 files changed, 54 insertions(+), 64 deletions(-) diff --git a/bookwyrm/activitypub/book.py b/bookwyrm/activitypub/book.py index 93cd384fe..ee4b88515 100644 --- a/bookwyrm/activitypub/book.py +++ b/bookwyrm/activitypub/book.py @@ -50,7 +50,7 @@ class Work(Book): ''' work instance of a book object ''' lccn: str = '' defaultEdition: str = '' - editions: List[str] + editions: List[str] = field(default_factory=lambda: []) type: str = 'Work' @@ -58,9 +58,9 @@ class Work(Book): class Author(ActivityObject): ''' author of a book ''' name: str - born: str = '' - died: str = '' - aliases: str = '' + born: str = None + died: str = None + aliases: List[str] = field(default_factory=lambda: []) bio: str = '' openlibraryKey: str = '' wikipediaLink: str = '' diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 921b2e297..5afd10897 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -1,11 +1,9 @@ ''' functionality outline for a book data connector ''' from abc import ABC, abstractmethod from dataclasses import dataclass -import pytz from urllib3.exceptions import RequestError from django.db import transaction -from dateutil import parser import requests from requests import HTTPError from requests.exceptions import SSLError @@ -102,12 +100,10 @@ class AbstractConnector(AbstractMinimalConnector): if self.is_work_data(data): try: edition_data = self.get_edition_from_work_data(data) - edition_data = dict_from_mappings(\ - edition_data, self.book_mappings) except KeyError: # hack: re-use the work data as the edition data # this is why remote ids aren't necessarily unique - edition_data = mapped_data + edition_data = data work_data = mapped_data else: try: @@ -115,75 +111,76 @@ class AbstractConnector(AbstractMinimalConnector): work_data = dict_from_mappings(work_data, self.book_mappings) except KeyError: work_data = mapped_data - edition_data = mapped_data + edition_data = data if not work_data or not edition_data: raise ConnectorException('Unable to load book data: %s' % remote_id) # create activitypub object work_activity = activitypub.Work(**work_data) - edition_activity = activitypub.Edition(**edition_data) - # this will dedupe automatically - work = work_activity.to_model(models.Work, save=False) - edition = edition_activity.to_model(models.Edition, save=False) - - edition.parent_work = work - work.default_edition = edition - - work.save() - edition.save() - + work = work_activity.to_model(models.Work) for author in self.get_authors_from_data(data): work.authors.add(author) + return self.create_edition_from_data(work, edition_data) + + + def create_edition_from_data(self, work, edition_data): + ''' if we already have the work, we're ready ''' + mapped_data = dict_from_mappings(edition_data, self.book_mappings) + mapped_data['work'] = work.remote_id + edition_activity = activitypub.Edition(**mapped_data) + edition = edition_activity.to_model(models.Edition) + edition.connector = self.connector + edition.save() + + work.default_edition = edition + work.save() + + for author in self.get_authors_from_data(edition_data): edition.authors.add(author) + if not edition.authors.exists() and work.authors.exists(): + edition.authors.add(work.authors.all()) return edition def get_or_create_author(self, remote_id): ''' load that author ''' - existing = models.Author.find_exising_by_remote_id(remote_id) + existing = models.Author.find_existing_by_remote_id(remote_id) if existing: return existing data = get_data(remote_id) - author_activity = dict_from_mappings(data, self.author_mappings) + mapped_data = dict_from_mappings(data, self.author_mappings) + activity = activitypub.Author(**mapped_data) # this will dedupe - return activitypub.Author(**author_activity).to_model() + return activity.to_model(models.Author) @abstractmethod def is_work_data(self, data): ''' differentiate works and editions ''' - @abstractmethod def get_edition_from_work_data(self, data): ''' every work needs at least one edition ''' - @abstractmethod def get_work_from_edition_date(self, data): ''' every edition needs a work ''' - @abstractmethod def get_authors_from_data(self, data): ''' load author data ''' - - @abstractmethod - def get_cover_from_data(self, data): - ''' load cover ''' - @abstractmethod def expand_book_data(self, book): ''' get more info on a book ''' -def dict_from_mappings(self, data, mappings): +def dict_from_mappings(data, mappings): ''' create a dict in Activitypub format, using mappings supplies by the subclass ''' result = {} @@ -250,4 +247,9 @@ class Mapping: def get_value(self, data): ''' pull a field from incoming json and return the formatted version ''' value = data.get(self.remote_field) - return self.formatter(value) + if not value: + return None + try: + return self.formatter(value) + except:# pylint: disable=bare-except + return None diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index 2d1690454..74f76668c 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -1,13 +1,9 @@ ''' openlibrary data connector ''' import re -import requests - -from django.core.files.base import ContentFile from bookwyrm import models from .abstract_connector import AbstractConnector, SearchResult, Mapping -from .abstract_connector import ConnectorException, dict_from_mappings -from .abstract_connector import get_data, update_from_mappings +from .abstract_connector import ConnectorException, get_data from .openlibrary_languages import languages @@ -17,8 +13,12 @@ class Connector(AbstractConnector): super().__init__(identifier) get_first = lambda a: a[0] + get_remote_id = lambda a: self.base_url + a self.book_mappings = [ Mapping('title'), + Mapping('id', remote_field='key', formatter=get_remote_id), + Mapping( + 'cover', remote_field='covers', formatter=self.get_cover_url), Mapping('sortTitle', remote_field='sort_title'), Mapping('subtitle'), Mapping('description', formatter=get_description), @@ -50,7 +50,12 @@ class Connector(AbstractConnector): ] self.author_mappings = [ + Mapping('id', remote_field='key', formatter=get_remote_id), Mapping('name'), + Mapping( + 'openlibraryKey', remote_field='key', + formatter=get_openlibrary_key + ), Mapping('born', remote_field='birth_date'), Mapping('died', remote_field='death_date'), Mapping('bio', formatter=get_description), @@ -58,6 +63,7 @@ class Connector(AbstractConnector): def get_remote_id_from_data(self, data): + ''' format a url from an openlibrary id field ''' try: key = data['key'] except KeyError: @@ -93,24 +99,16 @@ class Connector(AbstractConnector): for author_blob in data.get('authors', []): author_blob = author_blob.get('author', author_blob) # this id is "/authors/OL1234567A" - author_id = author_blob['key'].split('/')[-1] + author_id = author_blob['key'] url = '%s/%s.json' % (self.base_url, author_id) yield self.get_or_create_author(url) - def get_cover_from_data(self, data): + def get_cover_url(self, cover_blob): ''' ask openlibrary for the cover ''' - if not data.get('covers'): - return None - - cover_id = data.get('covers')[0] + cover_id = cover_blob[0] image_name = '%s-M.jpg' % cover_id - url = '%s/b/id/%s' % (self.covers_url, image_name) - response = requests.get(url) - if not response.ok: - response.raise_for_status() - image_content = ContentFile(response.content) - return [image_name, image_content] + return '%s/b/id/%s' % (self.covers_url, image_name) def parse_search_data(self, data): @@ -144,19 +142,7 @@ class Connector(AbstractConnector): # we can mass download edition data from OL to avoid repeatedly querying edition_options = self.load_edition_data(work.openlibrary_key) for edition_data in edition_options.get('entries'): - olkey = edition_data.get('key').split('/')[-1] - # make sure the edition isn't already in the database - if models.Edition.objects.filter(openlibrary_key=olkey).count(): - continue - - # creates and populates the book from the data - edition = self.create_book(olkey, edition_data, models.Edition) - # ensures that the edition is associated with the work - edition.parent_work = work - edition.save() - # get author data from the work if it's missing from the edition - if not edition.authors and work.authors: - edition.authors.set(work.authors.all()) + self.create_edition_from_data(work, edition_data) def get_description(description_blob): diff --git a/bookwyrm/view_actions.py b/bookwyrm/view_actions.py index fcb684764..1df1dcbaf 100644 --- a/bookwyrm/view_actions.py +++ b/bookwyrm/view_actions.py @@ -223,6 +223,8 @@ def resolve_book(request): remote_id = request.POST.get('remote_id') connector = books_manager.get_or_create_connector(remote_id) book = connector.get_or_create_book(remote_id) + if book.connector: + books_manager.load_more_data.delay(book.id) return redirect('/book/%d' % book.id)