From 0f579e7d8d0d708d08fc7d64572c64f096af8567 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sun, 10 May 2020 16:41:24 -0700 Subject: [PATCH] Re-thinks connector mappings --- fedireads/connectors/abstract_connector.py | 94 +++++++++---------- fedireads/connectors/fedireads_connector.py | 44 ++++++--- fedireads/connectors/openlibrary.py | 84 ++++++++++++----- .../migrations/0039_auto_20200510_2342.py | 21 +++++ fedireads/models/book.py | 3 - fedireads/tests/test_connector_fedireads.py | 1 - fedireads/tests/test_connector_openlibrary.py | 9 +- 7 files changed, 166 insertions(+), 90 deletions(-) create mode 100644 fedireads/migrations/0039_auto_20200510_2342.py diff --git a/fedireads/connectors/abstract_connector.py b/fedireads/connectors/abstract_connector.py index 644271177..029d90f82 100644 --- a/fedireads/connectors/abstract_connector.py +++ b/fedireads/connectors/abstract_connector.py @@ -17,26 +17,24 @@ class AbstractConnector(ABC): info = models.Connector.objects.get(identifier=identifier) self.connector = info - self.book_mappings = {} - self.key_mappings = { - 'isbn_13': ('isbn_13', None), - 'isbn_10': ('isbn_10', None), - 'oclc_numbers': ('oclc_number', None), - 'lccn': ('lccn', None), - } + self.key_mappings = [] - fields = [ + # fields we want to look for in book data to copy over + # title we handle separately. + self.book_mappings = [] + + # the things in the connector model to copy over + self_fields = [ 'base_url', 'books_url', 'covers_url', 'search_url', - 'key_name', 'max_query_count', 'name', 'identifier', 'local' ] - for field in fields: + for field in self_fields: setattr(self, field, getattr(info, field)) @@ -85,7 +83,7 @@ class AbstractConnector(ABC): if self.is_work_data(data): work_data = data # if we requested a work and there's already an edition, we're set - work = self.match_from_mappings(work_data) + work = self.match_from_mappings(work_data, models.Work) if work and work.default_edition: return work.default_edition @@ -98,7 +96,7 @@ class AbstractConnector(ABC): edition_data = data else: edition_data = data - edition = self.match_from_mappings(edition_data) + edition = self.match_from_mappings(edition_data, models.Edition) # no need to figure out about the work if we already know about it if edition and edition.parent_work: return edition @@ -181,35 +179,25 @@ class AbstractConnector(ABC): return book - def match_from_mappings(self, data): + def match_from_mappings(self, data, model): ''' try to find existing copies of this book using various keys ''' - keys = [ - ('openlibrary_key', models.Book), - ('librarything_key', models.Book), - ('goodreads_key', models.Book), - ('lccn', models.Work), - ('isbn_10', models.Edition), - ('isbn_13', models.Edition), - ('oclc_number', models.Edition), - ('asin', models.Edition), - ] - noop = lambda x: x - for key, model in keys: - formatter = None - if key in self.key_mappings: - key, formatter = self.key_mappings[key] - if not formatter: - formatter = noop - - value = data.get(key) + relevent_mappings = [m for m in self.key_mappings if \ + m.model and model == m.model] + for mapping in relevent_mappings: + # check if this field is present in the data + value = data.get(mapping.remote_field) if not value: continue - value = formatter(value) - match = model.objects.select_subclasses().filter( - **{key: value}).first() + # extract the value in the right format + value = mapping.formatter(value) + + # search our database for a matching book + kwargs = {mapping.local_field: value} + match = model.objects.filter(**kwargs).first() if match: return match + return None @abstractmethod @@ -254,23 +242,17 @@ class AbstractConnector(ABC): def update_from_mappings(obj, data, mappings): ''' assign data to model with mappings ''' - noop = lambda x: x - mappings['authors'] = ('', noop) - mappings['parent_work'] = ('', noop) - for (key, value) in data.items(): - formatter = None - if key in mappings: - key, formatter = mappings[key] - if not formatter: - formatter = noop - - if key == 'id': + for mapping in mappings: + # check if this field is present in the data + value = data.get(mapping.remote_field) + if not value: continue - try: - hasattr(obj, key) - except ValueError: - obj.__setattr__(key, formatter(value)) + # extract the value in the right format + value = mapping.formatter(value) + + # assign the formatted value to the model + obj.__setattr__(mapping.local_field, value) return obj @@ -315,3 +297,15 @@ class SearchResult(object): def __repr__(self): return "".format( self.key, self.title, self.author) + + +class Mapping(object): + ''' associate a local database field with a field in an external dataset ''' + def __init__( + self, local_field, remote_field=None, formatter=None, model=None): + noop = lambda x: x + + self.local_field = local_field + self.remote_field = remote_field or local_field + self.formatter = formatter or noop + self.model = model diff --git a/fedireads/connectors/fedireads_connector.py b/fedireads/connectors/fedireads_connector.py index f1a5d68fd..201e08a53 100644 --- a/fedireads/connectors/fedireads_connector.py +++ b/fedireads/connectors/fedireads_connector.py @@ -6,7 +6,7 @@ from django.core.files.base import ContentFile import requests from fedireads import models -from .abstract_connector import AbstractConnector, SearchResult +from .abstract_connector import AbstractConnector, SearchResult, Mapping from .abstract_connector import update_from_mappings, get_date, get_data @@ -14,11 +14,37 @@ class Connector(AbstractConnector): ''' interact with other instances ''' def __init__(self, identifier): super().__init__(identifier) - self.book_mappings = self.key_mappings.copy() - self.book_mappings.update({ - 'published_date': ('published_date', get_date), - 'first_published_date': ('first_published_date', get_date), - }) + self.key_mappings = [ + Mapping('isbn_13', model=models.Edition), + Mapping('isbn_10', model=models.Edition), + Mapping('lccn', model=models.Work), + Mapping('oclc_number', model=models.Edition), + Mapping('openlibrary_key'), + Mapping('goodreads_key'), + Mapping('asin'), + ] + + self.book_mappings = self.key_mappings + [ + Mapping('sort_title'), + Mapping('subtitle'), + Mapping('description'), + Mapping('languages'), + Mapping('series'), + Mapping('series_number'), + Mapping('subjects'), + Mapping('subject_places'), + Mapping('first_published_date'), + Mapping('published_date'), + Mapping('pages'), + Mapping('physical_format'), + Mapping('publishers'), + ] + + self.author_mappings = [ + Mapping('born', remote_field='birth_date', formatter=get_date), + Mapping('died', remote_field='death_date', formatter=get_date), + Mapping('bio'), + ] def is_work_data(self, data): @@ -63,11 +89,7 @@ class Connector(AbstractConnector): # ingest a new author author = models.Author(remote_id=remote_id) - mappings = { - 'born': ('born', get_date), - 'died': ('died', get_date), - } - author = update_from_mappings(author, data, mappings) + author = update_from_mappings(author, data, self.author_mappings) author.save() return author diff --git a/fedireads/connectors/openlibrary.py b/fedireads/connectors/openlibrary.py index e4ee74b43..ca0cf33f4 100644 --- a/fedireads/connectors/openlibrary.py +++ b/fedireads/connectors/openlibrary.py @@ -5,7 +5,7 @@ import requests from django.core.files.base import ContentFile from fedireads import models -from .abstract_connector import AbstractConnector, SearchResult +from .abstract_connector import AbstractConnector, SearchResult, Mapping from .abstract_connector import update_from_mappings from .abstract_connector import get_date, get_data from .openlibrary_languages import languages @@ -15,23 +15,61 @@ class Connector(AbstractConnector): ''' instantiate a connector for OL ''' def __init__(self, identifier): super().__init__(identifier) - get_first = lambda a: a[0] - self.key_mappings = { - 'isbn_13': ('isbn_13', get_first), - 'isbn_10': ('isbn_10', get_first), - 'oclc_numbers': ('oclc_number', get_first), - 'lccn': ('lccn', get_first), - } - self.book_mappings = self.key_mappings.copy() - self.book_mappings.update({ - 'publish_date': ('published_date', get_date), - 'first_publish_date': ('first_published_date', get_date), - 'description': ('description', get_description), - 'languages': ('languages', get_languages), - 'number_of_pages': ('pages', None), - 'series': ('series', get_first), - }) + get_first = lambda a: a[0] + self.key_mappings = [ + Mapping('isbn_13', model=models.Edition, formatter=get_first), + Mapping('isbn_10', model=models.Edition, formatter=get_first), + Mapping('lccn', model=models.Work, formatter=get_first), + Mapping( + 'oclc_number', + remote_field='oclc_numbers', + model=models.Edition, + formatter=get_first + ), + Mapping( + 'openlibrary_key', + remote_field='key', + formatter=get_openlibrary_key + ), + Mapping('goodreads_key'), + Mapping('asin'), + ] + + self.book_mappings = self.key_mappings + [ + Mapping('sort_title'), + Mapping('subtitle'), + Mapping('description', formatter=get_description), + Mapping('languages', formatter=get_languages), + Mapping('series', formatter=get_first), + Mapping('series_number'), + Mapping('subjects'), + Mapping('subject_places'), + Mapping( + 'first_published_date', + remote_field='first_publish_date', + formatter=get_date + ), + Mapping( + 'published_date', + remote_field='publish_date', + formatter=get_date + ), + Mapping( + 'pages', + model=models.Edition, + remote_field='number_of_pages' + ), + Mapping('physical_format', model=models.Edition), + Mapping('publishers'), + ] + + self.author_mappings = [ + Mapping('born', remote_field='birth_date', formatter=get_date), + Mapping('died', remote_field='death_date', formatter=get_date), + Mapping('bio', formatter=get_description), + ] + def is_work_data(self, data): @@ -133,12 +171,7 @@ class Connector(AbstractConnector): data = get_data(url) author = models.Author(openlibrary_key=olkey) - mappings = { - 'birth_date': ('born', get_date), - 'death_date': ('died', get_date), - 'bio': ('bio', get_description), - } - author = update_from_mappings(author, data, mappings) + author = update_from_mappings(author, data, self.author_mappings) name = data.get('name') # TODO this is making some BOLD assumption if name: @@ -156,6 +189,11 @@ def get_description(description_blob): return description_blob +def get_openlibrary_key(key): + ''' convert /books/OL27320736M into OL27320736M ''' + return key.split('/')[-1] + + def get_languages(language_blob): ''' /language/eng -> English ''' langs = [] diff --git a/fedireads/migrations/0039_auto_20200510_2342.py b/fedireads/migrations/0039_auto_20200510_2342.py new file mode 100644 index 000000000..723ed3622 --- /dev/null +++ b/fedireads/migrations/0039_auto_20200510_2342.py @@ -0,0 +1,21 @@ +# Generated by Django 3.0.3 on 2020-05-10 23:42 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('fedireads', '0038_author_remote_id'), + ] + + operations = [ + migrations.RemoveField( + model_name='book', + name='misc_identifiers', + ), + migrations.RemoveField( + model_name='connector', + name='key_name', + ), + ] diff --git a/fedireads/models/book.py b/fedireads/models/book.py index 325734147..2b561a1d2 100644 --- a/fedireads/models/book.py +++ b/fedireads/models/book.py @@ -29,8 +29,6 @@ class Connector(FedireadsModel): covers_url = models.CharField(max_length=255) search_url = models.CharField(max_length=255, null=True) - key_name = models.CharField(max_length=255) - politeness_delay = models.IntegerField(null=True) #seconds max_query_count = models.IntegerField(null=True) # how many queries executed in a unit of time, like a day @@ -54,7 +52,6 @@ class Book(FedireadsModel): openlibrary_key = models.CharField(max_length=255, blank=True, null=True) librarything_key = models.CharField(max_length=255, blank=True, null=True) goodreads_key = models.CharField(max_length=255, blank=True, null=True) - misc_identifiers = JSONField(null=True) # info about where the data comes from and where/if to sync sync = models.BooleanField(default=True) diff --git a/fedireads/tests/test_connector_fedireads.py b/fedireads/tests/test_connector_fedireads.py index 2e0bfaf0c..ea5dd3e1c 100644 --- a/fedireads/tests/test_connector_fedireads.py +++ b/fedireads/tests/test_connector_fedireads.py @@ -18,7 +18,6 @@ class FedireadsConnector(TestCase): books_url='https://example.com', covers_url='https://example.com/images/covers', search_url='https://example.com/search?q=', - key_name='remote_id', ) self.connector = Connector('example.com') diff --git a/fedireads/tests/test_connector_openlibrary.py b/fedireads/tests/test_connector_openlibrary.py index 68bace67a..a17f46981 100644 --- a/fedireads/tests/test_connector_openlibrary.py +++ b/fedireads/tests/test_connector_openlibrary.py @@ -8,7 +8,7 @@ import pytz from fedireads import models from fedireads.connectors.openlibrary import Connector from fedireads.connectors.openlibrary import get_languages, get_description -from fedireads.connectors.openlibrary import pick_default_edition +from fedireads.connectors.openlibrary import pick_default_edition, get_openlibrary_key from fedireads.connectors.abstract_connector import SearchResult, get_date @@ -22,7 +22,6 @@ class Openlibrary(TestCase): books_url='https://openlibrary.org', covers_url='https://covers.openlibrary.org', search_url='https://openlibrary.org/search?q=', - key_name='openlibrary_key', ) self.connector = Connector('openlibrary.org') @@ -77,3 +76,9 @@ class Openlibrary(TestCase): def test_get_languages(self): languages = get_languages(self.edition_data['languages']) self.assertEqual(languages, ['English']) + + + def test_get_ol_key(self): + key = get_openlibrary_key('/books/OL27320736M') + self.assertEqual(key, 'OL27320736M') +