diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index c9f1ad2e..e2bb1043 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -10,7 +10,7 @@ import requests from requests import HTTPError from requests.exceptions import SSLError -from bookwyrm import models +from bookwyrm import activitypub, models class ConnectorException(HTTPError): @@ -38,7 +38,7 @@ class AbstractMinimalConnector(ABC): for field in self_fields: setattr(self, field, getattr(info, field)) - def search(self, query, min_confidence=None): + def search(self, query, min_confidence=None):# pylint: disable=unused-argument ''' free text search ''' resp = requests.get( '%s%s' % (self.search_url, query), @@ -72,9 +72,6 @@ class AbstractConnector(AbstractMinimalConnector): ''' generic book data connector ''' def __init__(self, identifier): super().__init__(identifier) - - self.key_mappings = [] - # fields we want to look for in book data to copy over # title we handle separately. self.book_mappings = [] @@ -88,153 +85,68 @@ class AbstractConnector(AbstractMinimalConnector): return True + @transaction.atomic def get_or_create_book(self, remote_id): - # try to load the book - book = models.Book.objects.select_subclasses().filter( - origin_id=remote_id - ).first() - if book: - if isinstance(book, models.Work): - return book.default_edition - return book + ''' translate arbitrary json into an Activitypub dataclass ''' + # first, check if we have the origin_id saved + existing = models.Edition.find_existing_by_remote_id(remote_id) or \ + models.Work.find_existing_by_remote_id(remote_id) + if existing: + if hasattr(existing, 'get_default_editon'): + return existing.get_default_editon() + return existing - # no book was found, so we start creating a new one + # load the json data = get_data(remote_id) - - work = None - edition = None + mapped_data = self.dict_from_mappings(data) if self.is_work_data(data): - work_data = data - # if we requested a work and there's already an edition, we're set - work = self.match_from_mappings(work_data, models.Work) - if work and work.default_edition: - return work.default_edition - - # no such luck, we need more information. try: - edition_data = self.get_edition_from_work_data(work_data) + edition_data = self.get_edition_from_work_data(data) + edition_data = self.dict_from_mappings(edition_data) except KeyError: # hack: re-use the work data as the edition data # this is why remote ids aren't necessarily unique - edition_data = data + edition_data = mapped_data + work_data = mapped_data else: - edition_data = data - edition = self.match_from_mappings(edition_data, models.Edition) - # no need to figure out about the work if we already know about it - if edition and edition.parent_work: - return edition - - # no such luck, we need more information. try: - work_data = self.get_work_from_edition_date(edition_data) + work_data = self.get_work_from_edition_data(data) + work_data = self.dict_from_mappings(work_data) except KeyError: - # remember this hack: re-use the work data as the edition data - work_data = data + work_data = mapped_data + edition_data = mapped_data if not work_data or not edition_data: raise ConnectorException('Unable to load book data: %s' % remote_id) - # at this point, we need to figure out the work, edition, or both - # atomic so that we don't save a work with no edition for vice versa - with transaction.atomic(): - if not work: - work_key = self.get_remote_id_from_data(work_data) - work = self.create_book(work_key, work_data, models.Work) + # create activitypub object + work_activity = activitypub.Work(**work_data) + edition_activity = activitypub.Edition(**edition_data) - if not edition: - ed_key = self.get_remote_id_from_data(edition_data) - edition = self.create_book(ed_key, edition_data, models.Edition) - edition.parent_work = work - edition.save() - work.default_edition = edition - work.save() + # this will dedupe automatically + work = work_activity.to_model(models.Work, save=False) + edition = edition_activity.to_model(models.Edition, save=False) - # now's our change to fill in author gaps - if not edition.authors.exists() and work.authors.exists(): - edition.authors.set(work.authors.all()) - edition.author_text = work.author_text - edition.save() + edition.parent_work = work + work.default_edition = edition - if not edition: - raise ConnectorException('Unable to create book: %s' % remote_id) + work.save() + edition.save() + + for author in self.get_authors_from_data(data): + work.authors.add(author) + edition.authors.add(author) return edition - def create_book(self, remote_id, data, model): - ''' create a work or edition from data ''' - book = model.objects.create( - origin_id=remote_id, - title=data['title'], - connector=self.connector, - ) - return self.update_book_from_data(book, data) - - - def update_book_from_data(self, book, data, update_cover=True): - ''' for creating a new book or syncing with data ''' - book = update_from_mappings(book, data, self.book_mappings) - - author_text = [] - for author in self.get_authors_from_data(data): - book.authors.add(author) - author_text.append(author.name) - book.author_text = ', '.join(author_text) - book.save() - - if not update_cover: - return book - - cover = self.get_cover_from_data(data) - if cover: - book.cover.save(*cover, save=True) - return book - - - def update_book(self, book, data=None): - ''' load new data ''' - if not book.sync and not book.sync_cover: - return book - - if not data: - key = getattr(book, self.key_name) - data = self.load_book_data(key) - - if book.sync: - book = self.update_book_from_data( - book, data, update_cover=book.sync_cover) - else: - cover = self.get_cover_from_data(data) - if cover: - book.cover.save(*cover, save=True) - - return book - - - def match_from_mappings(self, data, model): - ''' try to find existing copies of this book using various keys ''' - relevent_mappings = [m for m in self.key_mappings if \ - not m.model or model == m.model] - for mapping in relevent_mappings: - # check if this field is present in the data - value = data.get(mapping.remote_field) - if not value: - continue - - # extract the value in the right format - value = mapping.formatter(value) - - # search our database for a matching book - kwargs = {mapping.local_field: value} - match = model.objects.filter(**kwargs).first() - if match: - return match - return None - - - @abstractmethod - def get_remote_id_from_data(self, data): - ''' otherwise we won't properly set the remote_id in the db ''' + def dict_from_mappings(self, data): + ''' create a dict in Activitypub format, using mappings supplies by + the subclass ''' + result = {} + for mapping in self.book_mapping: + result[mapping.local_field] = mapping.get_value(data) + return result @abstractmethod @@ -266,25 +178,6 @@ class AbstractConnector(AbstractMinimalConnector): ''' get more info on a book ''' -def update_from_mappings(obj, data, mappings): - ''' assign data to model with mappings ''' - for mapping in mappings: - # check if this field is present in the data - value = data.get(mapping.remote_field) - if not value: - continue - - # extract the value in the right format - try: - value = mapping.formatter(value) - except: - continue - - # assign the formatted value to the model - obj.__setattr__(mapping.local_field, value) - return obj - - def get_date(date_string): ''' helper function to try to interpret dates ''' if not date_string: @@ -349,11 +242,14 @@ class SearchResult: class Mapping: ''' associate a local database field with a field in an external dataset ''' - def __init__( - self, local_field, remote_field=None, formatter=None, model=None): + def __init__(self, local_field, remote_field=None, formatter=None): noop = lambda x: x self.local_field = local_field self.remote_field = remote_field or local_field self.formatter = formatter or noop - self.model = model + + def get_value(self, data): + ''' pull a field from incoming json and return the formatted version ''' + value = data.get(self.remote_field) + return self.formatter(value) diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index 28eb1ea0..9c4d5d15 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -17,50 +17,35 @@ class Connector(AbstractConnector): super().__init__(identifier) get_first = lambda a: a[0] - self.key_mappings = [ - Mapping('isbn_13', model=models.Edition, formatter=get_first), - Mapping('isbn_10', model=models.Edition, formatter=get_first), - Mapping('lccn', model=models.Work, formatter=get_first), - Mapping( - 'oclc_number', - remote_field='oclc_numbers', - model=models.Edition, - formatter=get_first - ), - Mapping( - 'openlibrary_key', - remote_field='key', - formatter=get_openlibrary_key - ), - Mapping('goodreads_key'), - Mapping('asin'), - ] - - self.book_mappings = self.key_mappings + [ - Mapping('sort_title'), + self.book_mappings = [ + Mapping('title'), + Mapping('sortTitle', remote_field='sort_title'), Mapping('subtitle'), Mapping('description', formatter=get_description), Mapping('languages', formatter=get_languages), Mapping('series', formatter=get_first), - Mapping('series_number'), + Mapping('seriesNumber', remote_field='series_number'), Mapping('subjects'), - Mapping('subject_places'), + Mapping('subjectPlaces'), + Mapping('isbn13', formatter=get_first), + Mapping('isbn10', formatter=get_first), + Mapping('lccn', formatter=get_first), Mapping( - 'first_published_date', - remote_field='first_publish_date', - formatter=get_date + 'oclcNumber', remote_field='oclc_numbers', + formatter=get_first ), Mapping( - 'published_date', - remote_field='publish_date', - formatter=get_date + 'openlibraryKey', remote_field='key', + formatter=get_openlibrary_key ), + Mapping('goodreadsKey', remote_field='goodreads_key'), + Mapping('asin'), Mapping( - 'pages', - model=models.Edition, - remote_field='number_of_pages' + 'firstPublishedDate', remote_field='first_publish_date', ), - Mapping('physical_format', model=models.Edition), + Mapping('publishedDate', remote_field='publish_date'), + Mapping('pages', remote_field='number_of_pages'), + Mapping('physicalFormat', remote_field='physical_format'), Mapping('publishers'), ]