Expand matching books on keys like isbn

This commit is contained in:
Mouse Reeve 2020-05-03 21:00:25 -07:00
parent 3c3afed6b3
commit 07aab3806b
4 changed files with 109 additions and 68 deletions

View file

@ -2,6 +2,7 @@
from abc import ABC, abstractmethod
from dateutil import parser
import pytz
import requests
from fedireads import models
@ -33,10 +34,27 @@ class AbstractConnector(ABC):
return True
@abstractmethod
def search(self, query):
''' free text search '''
# return list of search result objs
resp = requests.get(
'%s%s' % (self.search_url, query),
headers={
'Accept': 'application/json; charset=utf-8',
},
)
if not resp.ok:
resp.raise_for_status()
data = resp.json()
results = []
for doc in data['docs'][:10]:
results.append(self.format_search_result(doc))
return results
@abstractmethod
def format_search_result(self, search_result):
''' create a SearchResult obj from json '''
@abstractmethod
@ -82,6 +100,37 @@ def update_from_mappings(obj, data, mappings):
return obj
def match_from_mappings(data, mappings):
''' try to find existing copies of this book using various keys '''
keys = [
('openlibrary_key', models.Book),
('librarything_key', models.Book),
('goodreads_key', models.Book),
('lccn', models.Work),
('isbn_10', models.Edition),
('isbn_13', models.Edition),
('oclc_number', models.Edition),
('asin', models.Edition),
]
noop = lambda x: x
for key, model in keys:
formatter = None
if key in mappings:
key, formatter = mappings[key]
if not formatter:
formatter = noop
value = data.get(key)
if not value:
continue
value = formatter(value)
match = model.objects.select_subclasses().filter(
**{key: value}).first()
if match:
return match
def has_attr(obj, key):
''' helper function to check if a model object has a key '''
try:
@ -100,12 +149,11 @@ def get_date(date_string):
class SearchResult:
''' standardized search result object '''
def __init__(self, title, key, author, year, raw_data):
def __init__(self, title, key, author, year):
self.title = title
self.key = key
self.author = author
self.year = year
self.raw_data = raw_data
def __repr__(self):
return "<SearchResult key={!r} title={!r} author={!r}>".format(

View file

@ -4,48 +4,37 @@ from django.core.files.base import ContentFile
import requests
from fedireads import models
from .abstract_connector import AbstractConnector
from .abstract_connector import update_from_mappings, get_date
from .abstract_connector import AbstractConnector, SearchResult, get_date
from .abstract_connector import match_from_mappings, update_from_mappings
class Connector(AbstractConnector):
''' interact with other instances '''
def search(self, query):
''' right now you can't search fedireads, but... '''
resp = requests.get(
'%s%s' % (self.search_url, query),
headers={
'Accept': 'application/activity+json; charset=utf-8',
},
)
if not resp.ok:
resp.raise_for_status()
return resp.json()
def format_search_result(self, search_result):
return SearchResult(**search_result)
def get_or_create_book(self, remote_id):
''' pull up a book record by whatever means possible '''
try:
book = models.Book.objects.select_subclasses().get(
remote_id=remote_id
)
book = models.Book.objects.select_subclasses().filter(
remote_id=remote_id
).first()
if book:
if isinstance(book, models.Work):
return book.default_edition
return book
except ObjectDoesNotExist:
if self.model.is_self:
# we can't load a book from a remote server, this is it
return None
# no book was found, so we start creating a new one
book = models.Book(remote_id=remote_id)
# no book was found, so we start creating a new one
book = models.Book(remote_id=remote_id)
self.update_book(book)
def update_book(self, book, data=None):
''' add remote data to a local book '''
remote_id = book.remote_id
if not data:
response = requests.get(
'%s/%s' % (self.base_url, remote_id),
book.remote_id,
headers={
'Accept': 'application/activity+json; charset=utf-8',
},
@ -55,6 +44,10 @@ class Connector(AbstractConnector):
data = response.json()
match = match_from_mappings(data, {})
if match:
return match
# great, we can update our book.
mappings = {
'published_date': ('published_date', get_date),

View file

@ -7,7 +7,8 @@ from django.db import transaction
from fedireads import models
from .abstract_connector import AbstractConnector, SearchResult
from .abstract_connector import update_from_mappings, get_date
from .abstract_connector import match_from_mappings, update_from_mappings
from .abstract_connector import get_date
from .openlibrary_languages import languages
@ -15,45 +16,34 @@ class Connector(AbstractConnector):
''' instantiate a connector for OL '''
def __init__(self, identifier):
get_first = lambda a: a[0]
self.book_mappings = {
'publish_date': ('published_date', get_date),
'first_publish_date': ('first_published_date', get_date),
'description': ('description', get_description),
self.key_mappings = {
'isbn_13': ('isbn_13', get_first),
'oclc_numbers': ('oclc_number', get_first),
'lccn': ('lccn', get_first),
}
self.book_mappings = self.key_mappings.copy()
self.book_mappings.update({
'publish_date': ('published_date', get_date),
'first_publish_date': ('first_published_date', get_date),
'description': ('description', get_description),
'languages': ('languages', get_languages),
'number_of_pages': ('pages', None),
'series': ('series', get_first),
}
})
super().__init__(identifier)
def search(self, query):
''' query openlibrary search '''
resp = requests.get(
'%s%s' % (self.search_url, query),
headers={
'Accept': 'application/json; charset=utf-8',
},
def format_search_result(self, doc):
key = doc['key']
key = key.split('/')[-1]
author = doc.get('author_name') or ['Unknown']
return SearchResult(
doc.get('title'),
key,
author[0],
doc.get('first_publish_year'),
)
if not resp.ok:
resp.raise_for_status()
data = resp.json()
results = []
for doc in data['docs'][:5]:
key = doc['key']
key = key.split('/')[-1]
author = doc.get('author_name') or ['Unknown']
results.append(SearchResult(
doc.get('title'),
key,
author[0],
doc.get('first_publish_year'),
doc
))
return results
def get_or_create_book(self, olkey):
@ -115,6 +105,11 @@ class Connector(AbstractConnector):
def create_book(self, key, data, model):
''' create a work or edition from data '''
# we really would rather use an existing book than make a new one
match = match_from_mappings(data, self.key_mappings)
if match:
return match
book = model.objects.create(
openlibrary_key=key,
title=data['title'],
@ -145,7 +140,9 @@ class Connector(AbstractConnector):
if not book.sync and not book.sync_cover:
return
data = self.load_book_data(book.openlibrary_key)
if not data:
data = self.load_book_data(book.openlibrary_key)
if book.sync_cover and data.get('covers'):
book.cover.save(*self.get_cover(data['covers'][0]), save=True)
if book.sync:

View file

@ -34,17 +34,20 @@ class Connector(AbstractConnector):
search_results = []
for book in results[:10]:
search_results.append(
SearchResult(
book.title,
book.id,
book.author_text,
book.published_date.year if book.published_date else None,
None
)
self.format_search_result(book)
)
return search_results
def format_search_result(self, book):
return SearchResult(
book.title,
book.id,
book.author_text,
book.published_date.year if book.published_date else None,
)
def get_or_create_book(self, book_id):
''' since this is querying its own data source, it can only
get a book, not load one from an external source '''