''' openlibrary data connector ''' import re from bookwyrm import models from .abstract_connector import AbstractConnector, SearchResult, Mapping from .abstract_connector import get_data from .connector_manager import ConnectorException from .openlibrary_languages import languages class Connector(AbstractConnector): ''' instantiate a connector for OL ''' def __init__(self, identifier): super().__init__(identifier) get_first = lambda a: a[0] get_remote_id = lambda a: self.base_url + a self.book_mappings = [ Mapping('title'), Mapping('id', remote_field='key', formatter=get_remote_id), Mapping( 'cover', remote_field='covers', formatter=self.get_cover_url), Mapping('sortTitle', remote_field='sort_title'), Mapping('subtitle'), Mapping('description', formatter=get_description), Mapping('languages', formatter=get_languages), Mapping('series', formatter=get_first), Mapping('seriesNumber', remote_field='series_number'), Mapping('subjects'), Mapping('subjectPlaces', remote_field='subject_places'), Mapping('isbn13', remote_field='isbn_13', formatter=get_first), Mapping('isbn10', remote_field='isbn_10', formatter=get_first), Mapping('lccn', formatter=get_first), Mapping( 'oclcNumber', remote_field='oclc_numbers', formatter=get_first ), Mapping( 'openlibraryKey', remote_field='key', formatter=get_openlibrary_key ), Mapping('goodreadsKey', remote_field='goodreads_key'), Mapping('asin'), Mapping( 'firstPublishedDate', remote_field='first_publish_date', ), Mapping('publishedDate', remote_field='publish_date'), Mapping('pages', remote_field='number_of_pages'), Mapping('physicalFormat', remote_field='physical_format'), Mapping('publishers'), ] self.author_mappings = [ Mapping('id', remote_field='key', formatter=get_remote_id), Mapping('name'), Mapping( 'openlibraryKey', remote_field='key', formatter=get_openlibrary_key ), Mapping('born', remote_field='birth_date'), Mapping('died', remote_field='death_date'), Mapping('bio', formatter=get_description), ] def get_remote_id_from_data(self, data): ''' format a url from an openlibrary id field ''' try: key = data['key'] except KeyError: raise ConnectorException('Invalid book data') return '%s%s' % (self.books_url, key) def is_work_data(self, data): return bool(re.match(r'^[\/\w]+OL\d+W$', data['key'])) def get_edition_from_work_data(self, data): try: key = data['key'] except KeyError: raise ConnectorException('Invalid book data') url = '%s%s/editions' % (self.books_url, key) data = get_data(url) return pick_default_edition(data['entries']) def get_work_from_edition_data(self, data): try: key = data['works'][0]['key'] except (IndexError, KeyError): raise ConnectorException('No work found for edition') url = '%s%s' % (self.books_url, key) return get_data(url) def get_authors_from_data(self, data): ''' parse author json and load or create authors ''' for author_blob in data.get('authors', []): author_blob = author_blob.get('author', author_blob) # this id is "/authors/OL1234567A" author_id = author_blob['key'] url = '%s%s' % (self.base_url, author_id) yield self.get_or_create_author(url) def get_cover_url(self, cover_blob): ''' ask openlibrary for the cover ''' cover_id = cover_blob[0] image_name = '%s-L.jpg' % cover_id return '%s/b/id/%s' % (self.covers_url, image_name) def parse_search_data(self, data): return data.get('docs') def format_search_result(self, search_result): # build the remote id from the openlibrary key key = self.books_url + search_result['key'] author = search_result.get('author_name') or ['Unknown'] return SearchResult( title=search_result.get('title'), key=key, author=', '.join(author), connector=self, year=search_result.get('first_publish_year'), ) def load_edition_data(self, olkey): ''' query openlibrary for editions of a work ''' url = '%s/works/%s/editions' % (self.books_url, olkey) return get_data(url) def expand_book_data(self, book): work = book # go from the edition to the work, if necessary if isinstance(book, models.Edition): work = book.parent_work # we can mass download edition data from OL to avoid repeatedly querying try: edition_options = self.load_edition_data(work.openlibrary_key) except ConnectorException: # who knows, man return for edition_data in edition_options.get('entries'): # does this edition have ANY interesting data? if ignore_edition(edition_data): continue self.create_edition_from_data(work, edition_data) def ignore_edition(edition_data): ''' don't load a million editions that have no metadata ''' # an isbn, we love to see it if edition_data.get('isbn_13') or edition_data.get('isbn_10'): print(edition_data.get('isbn_10')) return False # grudgingly, oclc can stay if edition_data.get('oclc_numbers'): print(edition_data.get('oclc_numbers')) return False # if it has a cover it can stay if edition_data.get('covers'): print(edition_data.get('covers')) return False # keep non-english editions if edition_data.get('languages') and \ 'languages/eng' not in str(edition_data.get('languages')): print(edition_data.get('languages')) return False return True def get_description(description_blob): ''' descriptions can be a string or a dict ''' if isinstance(description_blob, dict): return description_blob.get('value') return description_blob def get_openlibrary_key(key): ''' convert /books/OL27320736M into OL27320736M ''' return key.split('/')[-1] def get_languages(language_blob): ''' /language/eng -> English ''' langs = [] for lang in language_blob: langs.append( languages.get(lang.get('key', ''), None) ) return langs def pick_default_edition(options): ''' favor physical copies with covers in english ''' if not options: return None if len(options) == 1: return options[0] options = [e for e in options if e.get('covers')] or options options = [e for e in options if \ '/languages/eng' in str(e.get('languages'))] or options formats = ['paperback', 'hardcover', 'mass market paperback'] options = [e for e in options if \ str(e.get('physical_format')).lower() in formats] or options options = [e for e in options if e.get('isbn_13')] or options options = [e for e in options if e.get('ocaid')] or options return options[0]