''' functionality outline for a book data connector ''' from abc import ABC, abstractmethod from dataclasses import dataclass import pytz from urllib3.exceptions import RequestError from django.db import transaction from dateutil import parser import requests from requests import HTTPError from bookwyrm import models class ConnectorException(HTTPError): ''' when the connector can't do what was asked ''' class AbstractMinimalConnector(ABC): ''' just the bare bones, for other bookwyrm instances ''' def __init__(self, identifier): # load connector settings info = models.Connector.objects.get(identifier=identifier) self.connector = info # the things in the connector model to copy over self_fields = [ 'base_url', 'books_url', 'covers_url', 'search_url', 'max_query_count', 'name', 'identifier', 'local' ] for field in self_fields: setattr(self, field, getattr(info, field)) def search(self, query, min_confidence=None): ''' free text search ''' resp = requests.get( '%s%s' % (self.search_url, query), headers={ 'Accept': 'application/json; charset=utf-8', }, ) if not resp.ok: resp.raise_for_status() data = resp.json() results = [] for doc in self.parse_search_data(data)[:10]: results.append(self.format_search_result(doc)) return results @abstractmethod def get_or_create_book(self, remote_id): ''' pull up a book record by whatever means possible ''' @abstractmethod def parse_search_data(self, data): ''' turn the result json from a search into a list ''' @abstractmethod def format_search_result(self, search_result): ''' create a SearchResult obj from json ''' class AbstractConnector(AbstractMinimalConnector): ''' generic book data connector ''' def __init__(self, identifier): super().__init__(identifier) self.key_mappings = [] # fields we want to look for in book data to copy over # title we handle separately. self.book_mappings = [] def is_available(self): ''' check if you're allowed to use this connector ''' if self.max_query_count is not None: if self.connector.query_count >= self.max_query_count: return False return True def get_or_create_book(self, remote_id): # try to load the book book = models.Book.objects.select_subclasses().filter( origin_id=remote_id ).first() if book: if isinstance(book, models.Work): return book.default_edition return book # no book was found, so we start creating a new one data = get_data(remote_id) work = None edition = None if self.is_work_data(data): work_data = data # if we requested a work and there's already an edition, we're set work = self.match_from_mappings(work_data, models.Work) if work and work.default_edition: return work.default_edition # no such luck, we need more information. try: edition_data = self.get_edition_from_work_data(work_data) except KeyError: # hack: re-use the work data as the edition data # this is why remote ids aren't necessarily unique edition_data = data else: edition_data = data edition = self.match_from_mappings(edition_data, models.Edition) # no need to figure out about the work if we already know about it if edition and edition.parent_work: return edition # no such luck, we need more information. try: work_data = self.get_work_from_edition_date(edition_data) except KeyError: # remember this hack: re-use the work data as the edition data work_data = data if not work_data or not edition_data: raise ConnectorException('Unable to load book data: %s' % remote_id) # at this point, we need to figure out the work, edition, or both # atomic so that we don't save a work with no edition for vice versa with transaction.atomic(): if not work: work_key = self.get_remote_id_from_data(work_data) work = self.create_book(work_key, work_data, models.Work) if not edition: ed_key = self.get_remote_id_from_data(edition_data) edition = self.create_book(ed_key, edition_data, models.Edition) edition.parent_work = work edition.save() work.default_edition = edition work.save() # now's our change to fill in author gaps if not edition.authors.exists() and work.authors.exists(): edition.authors.set(work.authors.all()) edition.author_text = work.author_text edition.save() if not edition: raise ConnectorException('Unable to create book: %s' % remote_id) return edition def create_book(self, remote_id, data, model): ''' create a work or edition from data ''' book = model.objects.create( origin_id=remote_id, title=data['title'], connector=self.connector, ) return self.update_book_from_data(book, data) def update_book_from_data(self, book, data, update_cover=True): ''' for creating a new book or syncing with data ''' book = update_from_mappings(book, data, self.book_mappings) author_text = [] for author in self.get_authors_from_data(data): book.authors.add(author) if author.display_name: author_text.append(author.display_name) book.author_text = ', '.join(author_text) book.save() if not update_cover: return book cover = self.get_cover_from_data(data) if cover: book.cover.save(*cover, save=True) return book def update_book(self, book, data=None): ''' load new data ''' if not book.sync and not book.sync_cover: return book if not data: key = getattr(book, self.key_name) data = self.load_book_data(key) if book.sync: book = self.update_book_from_data( book, data, update_cover=book.sync_cover) else: cover = self.get_cover_from_data(data) if cover: book.cover.save(*cover, save=True) return book def match_from_mappings(self, data, model): ''' try to find existing copies of this book using various keys ''' relevent_mappings = [m for m in self.key_mappings if \ not m.model or model == m.model] for mapping in relevent_mappings: # check if this field is present in the data value = data.get(mapping.remote_field) if not value: continue # extract the value in the right format value = mapping.formatter(value) # search our database for a matching book kwargs = {mapping.local_field: value} match = model.objects.filter(**kwargs).first() if match: return match return None @abstractmethod def get_remote_id_from_data(self, data): ''' otherwise we won't properly set the remote_id in the db ''' @abstractmethod def is_work_data(self, data): ''' differentiate works and editions ''' @abstractmethod def get_edition_from_work_data(self, data): ''' every work needs at least one edition ''' @abstractmethod def get_work_from_edition_date(self, data): ''' every edition needs a work ''' @abstractmethod def get_authors_from_data(self, data): ''' load author data ''' @abstractmethod def get_cover_from_data(self, data): ''' load cover ''' @abstractmethod def expand_book_data(self, book): ''' get more info on a book ''' def update_from_mappings(obj, data, mappings): ''' assign data to model with mappings ''' for mapping in mappings: # check if this field is present in the data value = data.get(mapping.remote_field) if not value: continue # extract the value in the right format try: value = mapping.formatter(value) except: continue # assign the formatted value to the model obj.__setattr__(mapping.local_field, value) return obj def get_date(date_string): ''' helper function to try to interpret dates ''' if not date_string: return None try: return pytz.utc.localize(parser.parse(date_string)) except ValueError: pass try: return parser.parse(date_string) except ValueError: return None def get_data(url): ''' wrapper for request.get ''' try: resp = requests.get( url, headers={ 'Accept': 'application/json; charset=utf-8', }, ) except RequestError: raise ConnectorException() if not resp.ok: resp.raise_for_status() data = resp.json() return data @dataclass class SearchResult: ''' standardized search result object ''' title: str key: str author: str year: str confidence: int = 1 def __repr__(self): return "".format( self.key, self.title, self.author) class Mapping: ''' associate a local database field with a field in an external dataset ''' def __init__( self, local_field, remote_field=None, formatter=None, model=None): noop = lambda x: x self.local_field = local_field self.remote_field = remote_field or local_field self.formatter = formatter or noop self.model = model