Re-thinks connector mappings

This commit is contained in:
Mouse Reeve 2020-05-10 16:41:24 -07:00
parent d54c8c4dc4
commit 0f579e7d8d
7 changed files with 166 additions and 90 deletions

View file

@ -17,26 +17,24 @@ class AbstractConnector(ABC):
info = models.Connector.objects.get(identifier=identifier)
self.connector = info
self.book_mappings = {}
self.key_mappings = {
'isbn_13': ('isbn_13', None),
'isbn_10': ('isbn_10', None),
'oclc_numbers': ('oclc_number', None),
'lccn': ('lccn', None),
}
self.key_mappings = []
fields = [
# fields we want to look for in book data to copy over
# title we handle separately.
self.book_mappings = []
# the things in the connector model to copy over
self_fields = [
'base_url',
'books_url',
'covers_url',
'search_url',
'key_name',
'max_query_count',
'name',
'identifier',
'local'
]
for field in fields:
for field in self_fields:
setattr(self, field, getattr(info, field))
@ -85,7 +83,7 @@ class AbstractConnector(ABC):
if self.is_work_data(data):
work_data = data
# if we requested a work and there's already an edition, we're set
work = self.match_from_mappings(work_data)
work = self.match_from_mappings(work_data, models.Work)
if work and work.default_edition:
return work.default_edition
@ -98,7 +96,7 @@ class AbstractConnector(ABC):
edition_data = data
else:
edition_data = data
edition = self.match_from_mappings(edition_data)
edition = self.match_from_mappings(edition_data, models.Edition)
# no need to figure out about the work if we already know about it
if edition and edition.parent_work:
return edition
@ -181,35 +179,25 @@ class AbstractConnector(ABC):
return book
def match_from_mappings(self, data):
def match_from_mappings(self, data, model):
''' try to find existing copies of this book using various keys '''
keys = [
('openlibrary_key', models.Book),
('librarything_key', models.Book),
('goodreads_key', models.Book),
('lccn', models.Work),
('isbn_10', models.Edition),
('isbn_13', models.Edition),
('oclc_number', models.Edition),
('asin', models.Edition),
]
noop = lambda x: x
for key, model in keys:
formatter = None
if key in self.key_mappings:
key, formatter = self.key_mappings[key]
if not formatter:
formatter = noop
value = data.get(key)
relevent_mappings = [m for m in self.key_mappings if \
m.model and model == m.model]
for mapping in relevent_mappings:
# check if this field is present in the data
value = data.get(mapping.remote_field)
if not value:
continue
value = formatter(value)
match = model.objects.select_subclasses().filter(
**{key: value}).first()
# extract the value in the right format
value = mapping.formatter(value)
# search our database for a matching book
kwargs = {mapping.local_field: value}
match = model.objects.filter(**kwargs).first()
if match:
return match
return None
@abstractmethod
@ -254,23 +242,17 @@ class AbstractConnector(ABC):
def update_from_mappings(obj, data, mappings):
''' assign data to model with mappings '''
noop = lambda x: x
mappings['authors'] = ('', noop)
mappings['parent_work'] = ('', noop)
for (key, value) in data.items():
formatter = None
if key in mappings:
key, formatter = mappings[key]
if not formatter:
formatter = noop
if key == 'id':
for mapping in mappings:
# check if this field is present in the data
value = data.get(mapping.remote_field)
if not value:
continue
try:
hasattr(obj, key)
except ValueError:
obj.__setattr__(key, formatter(value))
# extract the value in the right format
value = mapping.formatter(value)
# assign the formatted value to the model
obj.__setattr__(mapping.local_field, value)
return obj
@ -315,3 +297,15 @@ class SearchResult(object):
def __repr__(self):
return "<SearchResult key={!r} title={!r} author={!r}>".format(
self.key, self.title, self.author)
class Mapping(object):
''' associate a local database field with a field in an external dataset '''
def __init__(
self, local_field, remote_field=None, formatter=None, model=None):
noop = lambda x: x
self.local_field = local_field
self.remote_field = remote_field or local_field
self.formatter = formatter or noop
self.model = model

View file

@ -6,7 +6,7 @@ from django.core.files.base import ContentFile
import requests
from fedireads import models
from .abstract_connector import AbstractConnector, SearchResult
from .abstract_connector import AbstractConnector, SearchResult, Mapping
from .abstract_connector import update_from_mappings, get_date, get_data
@ -14,11 +14,37 @@ class Connector(AbstractConnector):
''' interact with other instances '''
def __init__(self, identifier):
super().__init__(identifier)
self.book_mappings = self.key_mappings.copy()
self.book_mappings.update({
'published_date': ('published_date', get_date),
'first_published_date': ('first_published_date', get_date),
})
self.key_mappings = [
Mapping('isbn_13', model=models.Edition),
Mapping('isbn_10', model=models.Edition),
Mapping('lccn', model=models.Work),
Mapping('oclc_number', model=models.Edition),
Mapping('openlibrary_key'),
Mapping('goodreads_key'),
Mapping('asin'),
]
self.book_mappings = self.key_mappings + [
Mapping('sort_title'),
Mapping('subtitle'),
Mapping('description'),
Mapping('languages'),
Mapping('series'),
Mapping('series_number'),
Mapping('subjects'),
Mapping('subject_places'),
Mapping('first_published_date'),
Mapping('published_date'),
Mapping('pages'),
Mapping('physical_format'),
Mapping('publishers'),
]
self.author_mappings = [
Mapping('born', remote_field='birth_date', formatter=get_date),
Mapping('died', remote_field='death_date', formatter=get_date),
Mapping('bio'),
]
def is_work_data(self, data):
@ -63,11 +89,7 @@ class Connector(AbstractConnector):
# ingest a new author
author = models.Author(remote_id=remote_id)
mappings = {
'born': ('born', get_date),
'died': ('died', get_date),
}
author = update_from_mappings(author, data, mappings)
author = update_from_mappings(author, data, self.author_mappings)
author.save()
return author

View file

@ -5,7 +5,7 @@ import requests
from django.core.files.base import ContentFile
from fedireads import models
from .abstract_connector import AbstractConnector, SearchResult
from .abstract_connector import AbstractConnector, SearchResult, Mapping
from .abstract_connector import update_from_mappings
from .abstract_connector import get_date, get_data
from .openlibrary_languages import languages
@ -15,23 +15,61 @@ class Connector(AbstractConnector):
''' instantiate a connector for OL '''
def __init__(self, identifier):
super().__init__(identifier)
get_first = lambda a: a[0]
self.key_mappings = {
'isbn_13': ('isbn_13', get_first),
'isbn_10': ('isbn_10', get_first),
'oclc_numbers': ('oclc_number', get_first),
'lccn': ('lccn', get_first),
}
self.book_mappings = self.key_mappings.copy()
self.book_mappings.update({
'publish_date': ('published_date', get_date),
'first_publish_date': ('first_published_date', get_date),
'description': ('description', get_description),
'languages': ('languages', get_languages),
'number_of_pages': ('pages', None),
'series': ('series', get_first),
})
get_first = lambda a: a[0]
self.key_mappings = [
Mapping('isbn_13', model=models.Edition, formatter=get_first),
Mapping('isbn_10', model=models.Edition, formatter=get_first),
Mapping('lccn', model=models.Work, formatter=get_first),
Mapping(
'oclc_number',
remote_field='oclc_numbers',
model=models.Edition,
formatter=get_first
),
Mapping(
'openlibrary_key',
remote_field='key',
formatter=get_openlibrary_key
),
Mapping('goodreads_key'),
Mapping('asin'),
]
self.book_mappings = self.key_mappings + [
Mapping('sort_title'),
Mapping('subtitle'),
Mapping('description', formatter=get_description),
Mapping('languages', formatter=get_languages),
Mapping('series', formatter=get_first),
Mapping('series_number'),
Mapping('subjects'),
Mapping('subject_places'),
Mapping(
'first_published_date',
remote_field='first_publish_date',
formatter=get_date
),
Mapping(
'published_date',
remote_field='publish_date',
formatter=get_date
),
Mapping(
'pages',
model=models.Edition,
remote_field='number_of_pages'
),
Mapping('physical_format', model=models.Edition),
Mapping('publishers'),
]
self.author_mappings = [
Mapping('born', remote_field='birth_date', formatter=get_date),
Mapping('died', remote_field='death_date', formatter=get_date),
Mapping('bio', formatter=get_description),
]
def is_work_data(self, data):
@ -133,12 +171,7 @@ class Connector(AbstractConnector):
data = get_data(url)
author = models.Author(openlibrary_key=olkey)
mappings = {
'birth_date': ('born', get_date),
'death_date': ('died', get_date),
'bio': ('bio', get_description),
}
author = update_from_mappings(author, data, mappings)
author = update_from_mappings(author, data, self.author_mappings)
name = data.get('name')
# TODO this is making some BOLD assumption
if name:
@ -156,6 +189,11 @@ def get_description(description_blob):
return description_blob
def get_openlibrary_key(key):
''' convert /books/OL27320736M into OL27320736M '''
return key.split('/')[-1]
def get_languages(language_blob):
''' /language/eng -> English '''
langs = []

View file

@ -0,0 +1,21 @@
# Generated by Django 3.0.3 on 2020-05-10 23:42
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('fedireads', '0038_author_remote_id'),
]
operations = [
migrations.RemoveField(
model_name='book',
name='misc_identifiers',
),
migrations.RemoveField(
model_name='connector',
name='key_name',
),
]

View file

@ -29,8 +29,6 @@ class Connector(FedireadsModel):
covers_url = models.CharField(max_length=255)
search_url = models.CharField(max_length=255, null=True)
key_name = models.CharField(max_length=255)
politeness_delay = models.IntegerField(null=True) #seconds
max_query_count = models.IntegerField(null=True)
# how many queries executed in a unit of time, like a day
@ -54,7 +52,6 @@ class Book(FedireadsModel):
openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
librarything_key = models.CharField(max_length=255, blank=True, null=True)
goodreads_key = models.CharField(max_length=255, blank=True, null=True)
misc_identifiers = JSONField(null=True)
# info about where the data comes from and where/if to sync
sync = models.BooleanField(default=True)

View file

@ -18,7 +18,6 @@ class FedireadsConnector(TestCase):
books_url='https://example.com',
covers_url='https://example.com/images/covers',
search_url='https://example.com/search?q=',
key_name='remote_id',
)
self.connector = Connector('example.com')

View file

@ -8,7 +8,7 @@ import pytz
from fedireads import models
from fedireads.connectors.openlibrary import Connector
from fedireads.connectors.openlibrary import get_languages, get_description
from fedireads.connectors.openlibrary import pick_default_edition
from fedireads.connectors.openlibrary import pick_default_edition, get_openlibrary_key
from fedireads.connectors.abstract_connector import SearchResult, get_date
@ -22,7 +22,6 @@ class Openlibrary(TestCase):
books_url='https://openlibrary.org',
covers_url='https://covers.openlibrary.org',
search_url='https://openlibrary.org/search?q=',
key_name='openlibrary_key',
)
self.connector = Connector('openlibrary.org')
@ -77,3 +76,9 @@ class Openlibrary(TestCase):
def test_get_languages(self):
languages = get_languages(self.edition_data['languages'])
self.assertEqual(languages, ['English'])
def test_get_ol_key(self):
key = get_openlibrary_key('/books/OL27320736M')
self.assertEqual(key, 'OL27320736M')