forked from mirrors/bookwyrm
Re-thinks connector mappings
This commit is contained in:
parent
d54c8c4dc4
commit
0f579e7d8d
7 changed files with 166 additions and 90 deletions
|
@ -17,26 +17,24 @@ class AbstractConnector(ABC):
|
|||
info = models.Connector.objects.get(identifier=identifier)
|
||||
self.connector = info
|
||||
|
||||
self.book_mappings = {}
|
||||
self.key_mappings = {
|
||||
'isbn_13': ('isbn_13', None),
|
||||
'isbn_10': ('isbn_10', None),
|
||||
'oclc_numbers': ('oclc_number', None),
|
||||
'lccn': ('lccn', None),
|
||||
}
|
||||
self.key_mappings = []
|
||||
|
||||
fields = [
|
||||
# fields we want to look for in book data to copy over
|
||||
# title we handle separately.
|
||||
self.book_mappings = []
|
||||
|
||||
# the things in the connector model to copy over
|
||||
self_fields = [
|
||||
'base_url',
|
||||
'books_url',
|
||||
'covers_url',
|
||||
'search_url',
|
||||
'key_name',
|
||||
'max_query_count',
|
||||
'name',
|
||||
'identifier',
|
||||
'local'
|
||||
]
|
||||
for field in fields:
|
||||
for field in self_fields:
|
||||
setattr(self, field, getattr(info, field))
|
||||
|
||||
|
||||
|
@ -85,7 +83,7 @@ class AbstractConnector(ABC):
|
|||
if self.is_work_data(data):
|
||||
work_data = data
|
||||
# if we requested a work and there's already an edition, we're set
|
||||
work = self.match_from_mappings(work_data)
|
||||
work = self.match_from_mappings(work_data, models.Work)
|
||||
if work and work.default_edition:
|
||||
return work.default_edition
|
||||
|
||||
|
@ -98,7 +96,7 @@ class AbstractConnector(ABC):
|
|||
edition_data = data
|
||||
else:
|
||||
edition_data = data
|
||||
edition = self.match_from_mappings(edition_data)
|
||||
edition = self.match_from_mappings(edition_data, models.Edition)
|
||||
# no need to figure out about the work if we already know about it
|
||||
if edition and edition.parent_work:
|
||||
return edition
|
||||
|
@ -181,35 +179,25 @@ class AbstractConnector(ABC):
|
|||
return book
|
||||
|
||||
|
||||
def match_from_mappings(self, data):
|
||||
def match_from_mappings(self, data, model):
|
||||
''' try to find existing copies of this book using various keys '''
|
||||
keys = [
|
||||
('openlibrary_key', models.Book),
|
||||
('librarything_key', models.Book),
|
||||
('goodreads_key', models.Book),
|
||||
('lccn', models.Work),
|
||||
('isbn_10', models.Edition),
|
||||
('isbn_13', models.Edition),
|
||||
('oclc_number', models.Edition),
|
||||
('asin', models.Edition),
|
||||
]
|
||||
noop = lambda x: x
|
||||
for key, model in keys:
|
||||
formatter = None
|
||||
if key in self.key_mappings:
|
||||
key, formatter = self.key_mappings[key]
|
||||
if not formatter:
|
||||
formatter = noop
|
||||
|
||||
value = data.get(key)
|
||||
relevent_mappings = [m for m in self.key_mappings if \
|
||||
m.model and model == m.model]
|
||||
for mapping in relevent_mappings:
|
||||
# check if this field is present in the data
|
||||
value = data.get(mapping.remote_field)
|
||||
if not value:
|
||||
continue
|
||||
value = formatter(value)
|
||||
|
||||
match = model.objects.select_subclasses().filter(
|
||||
**{key: value}).first()
|
||||
# extract the value in the right format
|
||||
value = mapping.formatter(value)
|
||||
|
||||
# search our database for a matching book
|
||||
kwargs = {mapping.local_field: value}
|
||||
match = model.objects.filter(**kwargs).first()
|
||||
if match:
|
||||
return match
|
||||
return None
|
||||
|
||||
|
||||
@abstractmethod
|
||||
|
@ -254,23 +242,17 @@ class AbstractConnector(ABC):
|
|||
|
||||
def update_from_mappings(obj, data, mappings):
|
||||
''' assign data to model with mappings '''
|
||||
noop = lambda x: x
|
||||
mappings['authors'] = ('', noop)
|
||||
mappings['parent_work'] = ('', noop)
|
||||
for (key, value) in data.items():
|
||||
formatter = None
|
||||
if key in mappings:
|
||||
key, formatter = mappings[key]
|
||||
if not formatter:
|
||||
formatter = noop
|
||||
|
||||
if key == 'id':
|
||||
for mapping in mappings:
|
||||
# check if this field is present in the data
|
||||
value = data.get(mapping.remote_field)
|
||||
if not value:
|
||||
continue
|
||||
|
||||
try:
|
||||
hasattr(obj, key)
|
||||
except ValueError:
|
||||
obj.__setattr__(key, formatter(value))
|
||||
# extract the value in the right format
|
||||
value = mapping.formatter(value)
|
||||
|
||||
# assign the formatted value to the model
|
||||
obj.__setattr__(mapping.local_field, value)
|
||||
return obj
|
||||
|
||||
|
||||
|
@ -315,3 +297,15 @@ class SearchResult(object):
|
|||
def __repr__(self):
|
||||
return "<SearchResult key={!r} title={!r} author={!r}>".format(
|
||||
self.key, self.title, self.author)
|
||||
|
||||
|
||||
class Mapping(object):
|
||||
''' associate a local database field with a field in an external dataset '''
|
||||
def __init__(
|
||||
self, local_field, remote_field=None, formatter=None, model=None):
|
||||
noop = lambda x: x
|
||||
|
||||
self.local_field = local_field
|
||||
self.remote_field = remote_field or local_field
|
||||
self.formatter = formatter or noop
|
||||
self.model = model
|
||||
|
|
|
@ -6,7 +6,7 @@ from django.core.files.base import ContentFile
|
|||
import requests
|
||||
|
||||
from fedireads import models
|
||||
from .abstract_connector import AbstractConnector, SearchResult
|
||||
from .abstract_connector import AbstractConnector, SearchResult, Mapping
|
||||
from .abstract_connector import update_from_mappings, get_date, get_data
|
||||
|
||||
|
||||
|
@ -14,11 +14,37 @@ class Connector(AbstractConnector):
|
|||
''' interact with other instances '''
|
||||
def __init__(self, identifier):
|
||||
super().__init__(identifier)
|
||||
self.book_mappings = self.key_mappings.copy()
|
||||
self.book_mappings.update({
|
||||
'published_date': ('published_date', get_date),
|
||||
'first_published_date': ('first_published_date', get_date),
|
||||
})
|
||||
self.key_mappings = [
|
||||
Mapping('isbn_13', model=models.Edition),
|
||||
Mapping('isbn_10', model=models.Edition),
|
||||
Mapping('lccn', model=models.Work),
|
||||
Mapping('oclc_number', model=models.Edition),
|
||||
Mapping('openlibrary_key'),
|
||||
Mapping('goodreads_key'),
|
||||
Mapping('asin'),
|
||||
]
|
||||
|
||||
self.book_mappings = self.key_mappings + [
|
||||
Mapping('sort_title'),
|
||||
Mapping('subtitle'),
|
||||
Mapping('description'),
|
||||
Mapping('languages'),
|
||||
Mapping('series'),
|
||||
Mapping('series_number'),
|
||||
Mapping('subjects'),
|
||||
Mapping('subject_places'),
|
||||
Mapping('first_published_date'),
|
||||
Mapping('published_date'),
|
||||
Mapping('pages'),
|
||||
Mapping('physical_format'),
|
||||
Mapping('publishers'),
|
||||
]
|
||||
|
||||
self.author_mappings = [
|
||||
Mapping('born', remote_field='birth_date', formatter=get_date),
|
||||
Mapping('died', remote_field='death_date', formatter=get_date),
|
||||
Mapping('bio'),
|
||||
]
|
||||
|
||||
|
||||
def is_work_data(self, data):
|
||||
|
@ -63,11 +89,7 @@ class Connector(AbstractConnector):
|
|||
|
||||
# ingest a new author
|
||||
author = models.Author(remote_id=remote_id)
|
||||
mappings = {
|
||||
'born': ('born', get_date),
|
||||
'died': ('died', get_date),
|
||||
}
|
||||
author = update_from_mappings(author, data, mappings)
|
||||
author = update_from_mappings(author, data, self.author_mappings)
|
||||
author.save()
|
||||
|
||||
return author
|
||||
|
|
|
@ -5,7 +5,7 @@ import requests
|
|||
from django.core.files.base import ContentFile
|
||||
|
||||
from fedireads import models
|
||||
from .abstract_connector import AbstractConnector, SearchResult
|
||||
from .abstract_connector import AbstractConnector, SearchResult, Mapping
|
||||
from .abstract_connector import update_from_mappings
|
||||
from .abstract_connector import get_date, get_data
|
||||
from .openlibrary_languages import languages
|
||||
|
@ -15,23 +15,61 @@ class Connector(AbstractConnector):
|
|||
''' instantiate a connector for OL '''
|
||||
def __init__(self, identifier):
|
||||
super().__init__(identifier)
|
||||
get_first = lambda a: a[0]
|
||||
self.key_mappings = {
|
||||
'isbn_13': ('isbn_13', get_first),
|
||||
'isbn_10': ('isbn_10', get_first),
|
||||
'oclc_numbers': ('oclc_number', get_first),
|
||||
'lccn': ('lccn', get_first),
|
||||
}
|
||||
|
||||
self.book_mappings = self.key_mappings.copy()
|
||||
self.book_mappings.update({
|
||||
'publish_date': ('published_date', get_date),
|
||||
'first_publish_date': ('first_published_date', get_date),
|
||||
'description': ('description', get_description),
|
||||
'languages': ('languages', get_languages),
|
||||
'number_of_pages': ('pages', None),
|
||||
'series': ('series', get_first),
|
||||
})
|
||||
get_first = lambda a: a[0]
|
||||
self.key_mappings = [
|
||||
Mapping('isbn_13', model=models.Edition, formatter=get_first),
|
||||
Mapping('isbn_10', model=models.Edition, formatter=get_first),
|
||||
Mapping('lccn', model=models.Work, formatter=get_first),
|
||||
Mapping(
|
||||
'oclc_number',
|
||||
remote_field='oclc_numbers',
|
||||
model=models.Edition,
|
||||
formatter=get_first
|
||||
),
|
||||
Mapping(
|
||||
'openlibrary_key',
|
||||
remote_field='key',
|
||||
formatter=get_openlibrary_key
|
||||
),
|
||||
Mapping('goodreads_key'),
|
||||
Mapping('asin'),
|
||||
]
|
||||
|
||||
self.book_mappings = self.key_mappings + [
|
||||
Mapping('sort_title'),
|
||||
Mapping('subtitle'),
|
||||
Mapping('description', formatter=get_description),
|
||||
Mapping('languages', formatter=get_languages),
|
||||
Mapping('series', formatter=get_first),
|
||||
Mapping('series_number'),
|
||||
Mapping('subjects'),
|
||||
Mapping('subject_places'),
|
||||
Mapping(
|
||||
'first_published_date',
|
||||
remote_field='first_publish_date',
|
||||
formatter=get_date
|
||||
),
|
||||
Mapping(
|
||||
'published_date',
|
||||
remote_field='publish_date',
|
||||
formatter=get_date
|
||||
),
|
||||
Mapping(
|
||||
'pages',
|
||||
model=models.Edition,
|
||||
remote_field='number_of_pages'
|
||||
),
|
||||
Mapping('physical_format', model=models.Edition),
|
||||
Mapping('publishers'),
|
||||
]
|
||||
|
||||
self.author_mappings = [
|
||||
Mapping('born', remote_field='birth_date', formatter=get_date),
|
||||
Mapping('died', remote_field='death_date', formatter=get_date),
|
||||
Mapping('bio', formatter=get_description),
|
||||
]
|
||||
|
||||
|
||||
|
||||
def is_work_data(self, data):
|
||||
|
@ -133,12 +171,7 @@ class Connector(AbstractConnector):
|
|||
data = get_data(url)
|
||||
|
||||
author = models.Author(openlibrary_key=olkey)
|
||||
mappings = {
|
||||
'birth_date': ('born', get_date),
|
||||
'death_date': ('died', get_date),
|
||||
'bio': ('bio', get_description),
|
||||
}
|
||||
author = update_from_mappings(author, data, mappings)
|
||||
author = update_from_mappings(author, data, self.author_mappings)
|
||||
name = data.get('name')
|
||||
# TODO this is making some BOLD assumption
|
||||
if name:
|
||||
|
@ -156,6 +189,11 @@ def get_description(description_blob):
|
|||
return description_blob
|
||||
|
||||
|
||||
def get_openlibrary_key(key):
|
||||
''' convert /books/OL27320736M into OL27320736M '''
|
||||
return key.split('/')[-1]
|
||||
|
||||
|
||||
def get_languages(language_blob):
|
||||
''' /language/eng -> English '''
|
||||
langs = []
|
||||
|
|
21
fedireads/migrations/0039_auto_20200510_2342.py
Normal file
21
fedireads/migrations/0039_auto_20200510_2342.py
Normal file
|
@ -0,0 +1,21 @@
|
|||
# Generated by Django 3.0.3 on 2020-05-10 23:42
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('fedireads', '0038_author_remote_id'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RemoveField(
|
||||
model_name='book',
|
||||
name='misc_identifiers',
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='connector',
|
||||
name='key_name',
|
||||
),
|
||||
]
|
|
@ -29,8 +29,6 @@ class Connector(FedireadsModel):
|
|||
covers_url = models.CharField(max_length=255)
|
||||
search_url = models.CharField(max_length=255, null=True)
|
||||
|
||||
key_name = models.CharField(max_length=255)
|
||||
|
||||
politeness_delay = models.IntegerField(null=True) #seconds
|
||||
max_query_count = models.IntegerField(null=True)
|
||||
# how many queries executed in a unit of time, like a day
|
||||
|
@ -54,7 +52,6 @@ class Book(FedireadsModel):
|
|||
openlibrary_key = models.CharField(max_length=255, blank=True, null=True)
|
||||
librarything_key = models.CharField(max_length=255, blank=True, null=True)
|
||||
goodreads_key = models.CharField(max_length=255, blank=True, null=True)
|
||||
misc_identifiers = JSONField(null=True)
|
||||
|
||||
# info about where the data comes from and where/if to sync
|
||||
sync = models.BooleanField(default=True)
|
||||
|
|
|
@ -18,7 +18,6 @@ class FedireadsConnector(TestCase):
|
|||
books_url='https://example.com',
|
||||
covers_url='https://example.com/images/covers',
|
||||
search_url='https://example.com/search?q=',
|
||||
key_name='remote_id',
|
||||
)
|
||||
self.connector = Connector('example.com')
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ import pytz
|
|||
from fedireads import models
|
||||
from fedireads.connectors.openlibrary import Connector
|
||||
from fedireads.connectors.openlibrary import get_languages, get_description
|
||||
from fedireads.connectors.openlibrary import pick_default_edition
|
||||
from fedireads.connectors.openlibrary import pick_default_edition, get_openlibrary_key
|
||||
from fedireads.connectors.abstract_connector import SearchResult, get_date
|
||||
|
||||
|
||||
|
@ -22,7 +22,6 @@ class Openlibrary(TestCase):
|
|||
books_url='https://openlibrary.org',
|
||||
covers_url='https://covers.openlibrary.org',
|
||||
search_url='https://openlibrary.org/search?q=',
|
||||
key_name='openlibrary_key',
|
||||
)
|
||||
self.connector = Connector('openlibrary.org')
|
||||
|
||||
|
@ -77,3 +76,9 @@ class Openlibrary(TestCase):
|
|||
def test_get_languages(self):
|
||||
languages = get_languages(self.edition_data['languages'])
|
||||
self.assertEqual(languages, ['English'])
|
||||
|
||||
|
||||
def test_get_ol_key(self):
|
||||
key = get_openlibrary_key('/books/OL27320736M')
|
||||
self.assertEqual(key, 'OL27320736M')
|
||||
|
||||
|
|
Loading…
Reference in a new issue