2020-03-07 20:22:28 +00:00
|
|
|
''' openlibrary data connector '''
|
|
|
|
from django.core.exceptions import ObjectDoesNotExist
|
|
|
|
from django.core.files.base import ContentFile
|
|
|
|
import re
|
|
|
|
import requests
|
|
|
|
|
|
|
|
from fedireads import models
|
2020-03-30 00:40:51 +00:00
|
|
|
from .abstract_connector import AbstractConnector, SearchResult
|
|
|
|
from .abstract_connector import update_from_mappings, get_date
|
2020-03-30 20:15:49 +00:00
|
|
|
from .openlibrary_languages import languages
|
2020-03-07 20:22:28 +00:00
|
|
|
|
|
|
|
|
2020-03-27 22:25:08 +00:00
|
|
|
class Connector(AbstractConnector):
|
2020-03-07 20:22:28 +00:00
|
|
|
''' instantiate a connector for OL '''
|
2020-03-27 23:36:52 +00:00
|
|
|
def __init__(self, identifier):
|
|
|
|
super().__init__(identifier)
|
2020-03-07 20:22:28 +00:00
|
|
|
|
|
|
|
|
|
|
|
def search(self, query):
|
|
|
|
''' query openlibrary search '''
|
2020-03-28 19:55:53 +00:00
|
|
|
resp = requests.get(
|
|
|
|
'%s%s' % (self.search_url, query),
|
|
|
|
headers={
|
|
|
|
'Accept': 'application/json; charset=utf-8',
|
|
|
|
},
|
|
|
|
)
|
2020-03-07 20:22:28 +00:00
|
|
|
if not resp.ok:
|
|
|
|
resp.raise_for_status()
|
|
|
|
data = resp.json()
|
|
|
|
results = []
|
|
|
|
|
|
|
|
for doc in data['docs'][:5]:
|
|
|
|
key = doc['key']
|
|
|
|
key = key.split('/')[-1]
|
|
|
|
author = doc.get('author_name') or ['Unknown']
|
|
|
|
results.append(SearchResult(
|
|
|
|
doc.get('title'),
|
|
|
|
key,
|
|
|
|
author[0],
|
|
|
|
doc.get('first_publish_year'),
|
2020-03-25 12:29:21 +00:00
|
|
|
doc
|
2020-03-07 20:22:28 +00:00
|
|
|
))
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
def get_or_create_book(self, olkey):
|
2020-03-30 19:21:04 +00:00
|
|
|
''' pull up a book record by whatever means possible.
|
|
|
|
if you give a work key, it should give you the default edition,
|
|
|
|
annotated with work data. '''
|
2020-03-07 20:22:28 +00:00
|
|
|
|
|
|
|
try:
|
2020-03-30 19:21:04 +00:00
|
|
|
book = models.Book.objects.select_subclasses().get(
|
|
|
|
openlibrary_key=olkey
|
|
|
|
)
|
2020-03-07 20:22:28 +00:00
|
|
|
return book
|
|
|
|
except ObjectDoesNotExist:
|
2020-03-30 19:21:04 +00:00
|
|
|
pass
|
|
|
|
# no book was found, so we start creating a new one
|
|
|
|
model = models.Edition
|
|
|
|
if re.match(r'^OL\d+W$', olkey):
|
|
|
|
model = models.Work
|
|
|
|
book = model(openlibrary_key=olkey)
|
2020-03-30 00:40:51 +00:00
|
|
|
return self.update_book(book)
|
2020-03-07 20:22:28 +00:00
|
|
|
|
2020-03-28 23:30:54 +00:00
|
|
|
|
|
|
|
def update_book(self, book):
|
|
|
|
''' query openlibrary for data on a book '''
|
|
|
|
olkey = book.openlibrary_key
|
2020-03-07 20:22:28 +00:00
|
|
|
# load the book json from openlibrary.org
|
|
|
|
response = requests.get('%s/works/%s.json' % (self.url, olkey))
|
|
|
|
if not response.ok:
|
|
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
2020-03-30 19:21:04 +00:00
|
|
|
if not book.source_url:
|
|
|
|
book.source_url = response.url
|
|
|
|
return self.update_from_data(book, data)
|
2020-03-07 20:22:28 +00:00
|
|
|
|
2020-03-30 19:21:04 +00:00
|
|
|
|
2020-04-02 05:11:31 +00:00
|
|
|
def update_from_data(self, book, data):
|
2020-03-30 19:21:04 +00:00
|
|
|
''' update a book from a json blob '''
|
2020-03-28 04:28:52 +00:00
|
|
|
mappings = {
|
|
|
|
'publish_date': ('published_date', get_date),
|
|
|
|
'first_publish_date': ('first_published_date', get_date),
|
|
|
|
'description': ('description', get_description),
|
2020-04-02 05:23:25 +00:00
|
|
|
'isbn_13': ('isbn', lambda a: a[0]),
|
2020-03-28 04:28:52 +00:00
|
|
|
'oclc_numbers': ('oclc_number', lambda a: a[0]),
|
|
|
|
'lccn': ('lccn', lambda a: a[0]),
|
2020-03-30 20:15:49 +00:00
|
|
|
'languages': ('languages', get_languages),
|
2020-04-02 15:44:34 +00:00
|
|
|
'number_of_pages': ('pages', None),
|
|
|
|
'series': ('series', lambda a: a[0]),
|
2020-03-28 04:28:52 +00:00
|
|
|
}
|
2020-03-28 19:55:53 +00:00
|
|
|
book = update_from_mappings(book, data, mappings)
|
2020-03-28 04:28:52 +00:00
|
|
|
|
|
|
|
if 'identifiers' in data:
|
|
|
|
if 'goodreads' in data['identifiers']:
|
2020-04-02 05:23:25 +00:00
|
|
|
book.goodreads_key = data['identifiers']['goodreads'][0]
|
2020-04-02 15:44:34 +00:00
|
|
|
if 'series' in data and len(data['series']) > 1:
|
|
|
|
book.series_number = data['series'][1]
|
2020-03-07 20:22:28 +00:00
|
|
|
|
2020-03-28 23:30:54 +00:00
|
|
|
if not book.connector:
|
|
|
|
book.connector = self.connector
|
2020-03-14 04:10:53 +00:00
|
|
|
book.save()
|
|
|
|
|
2020-03-07 20:22:28 +00:00
|
|
|
# this book sure as heck better be an edition
|
2020-04-02 05:11:31 +00:00
|
|
|
if data.get('works'):
|
|
|
|
key = data.get('works')[0]['key']
|
|
|
|
key = key.split('/')[-1]
|
|
|
|
work = self.get_or_create_book(key)
|
|
|
|
book.parent_work = work
|
2020-03-07 20:22:28 +00:00
|
|
|
|
2020-03-30 19:21:04 +00:00
|
|
|
if isinstance(book, models.Work):
|
|
|
|
# load editions of a work
|
|
|
|
self.get_editions_of_work(book)
|
|
|
|
|
2020-03-07 20:22:28 +00:00
|
|
|
# we also need to know the author get the cover
|
2020-03-14 04:10:53 +00:00
|
|
|
for author_blob in data.get('authors', []):
|
2020-03-07 20:22:28 +00:00
|
|
|
# this id is "/authors/OL1234567A" and we want just "OL1234567A"
|
|
|
|
author_blob = author_blob.get('author', author_blob)
|
|
|
|
author_id = author_blob['key']
|
|
|
|
author_id = author_id.split('/')[-1]
|
|
|
|
book.authors.add(self.get_or_create_author(author_id))
|
2020-04-02 16:11:42 +00:00
|
|
|
if not data.get('authors'):
|
|
|
|
book.authors.set(book.parent_work.authors.all())
|
|
|
|
|
2020-03-07 20:22:28 +00:00
|
|
|
|
2020-03-28 23:30:54 +00:00
|
|
|
if book.sync_cover and data.get('covers') and len(data['covers']):
|
2020-03-07 20:22:28 +00:00
|
|
|
book.cover.save(*self.get_cover(data['covers'][0]), save=True)
|
|
|
|
|
|
|
|
return book
|
|
|
|
|
|
|
|
|
2020-04-02 05:11:31 +00:00
|
|
|
def expand_book_data(self, book):
|
|
|
|
work = book
|
|
|
|
if isinstance(book, models.Edition):
|
|
|
|
work = book.parent_work
|
|
|
|
self.get_editions_of_work(work, default_only=False)
|
|
|
|
|
|
|
|
|
|
|
|
def get_editions_of_work(self, work, default_only=True):
|
2020-03-30 19:21:04 +00:00
|
|
|
''' get all editions of a work '''
|
|
|
|
response = requests.get(
|
|
|
|
'%s/works/%s/editions.json' % (self.url, work.openlibrary_key))
|
|
|
|
edition_data = response.json()
|
2020-04-02 05:11:31 +00:00
|
|
|
|
|
|
|
options = edition_data.get('entries', [])
|
2020-04-02 15:44:34 +00:00
|
|
|
if default_only and len(options) > 1:
|
2020-04-02 05:11:31 +00:00
|
|
|
options = [e for e in options if e.get('cover')] or options
|
|
|
|
options = [e for e in options if \
|
|
|
|
'/languages/eng' in str(e.get('languages'))] or options
|
|
|
|
formats = ['paperback', 'hardcover', 'mass market paperback']
|
|
|
|
options = [e for e in options if \
|
|
|
|
str(e.get('physical_format')).lower() in formats] or options
|
2020-04-02 15:44:34 +00:00
|
|
|
options = [e for e in options if e.get('isbn_13')] or options
|
|
|
|
options = [e for e in options if e.get('ocaid')] or options
|
2020-04-02 05:11:31 +00:00
|
|
|
|
|
|
|
if not options:
|
2020-04-02 15:44:34 +00:00
|
|
|
options = edition_data.get('entries', [])
|
2020-04-02 05:11:31 +00:00
|
|
|
options = options[:1]
|
|
|
|
|
|
|
|
for data in options:
|
2020-03-30 19:21:04 +00:00
|
|
|
try:
|
|
|
|
olkey = data['key'].split('/')[-1]
|
|
|
|
except KeyError:
|
|
|
|
# bad data I guess?
|
2020-04-02 05:11:31 +00:00
|
|
|
return
|
2020-03-30 19:21:04 +00:00
|
|
|
|
|
|
|
try:
|
|
|
|
models.Edition.objects.get(openlibrary_key=olkey)
|
|
|
|
except ObjectDoesNotExist:
|
|
|
|
book = models.Edition.objects.create(openlibrary_key=olkey)
|
2020-04-02 05:11:31 +00:00
|
|
|
self.update_from_data(book, data)
|
2020-03-30 19:21:04 +00:00
|
|
|
|
|
|
|
|
2020-03-07 20:22:28 +00:00
|
|
|
def get_or_create_author(self, olkey):
|
|
|
|
''' load that author '''
|
|
|
|
if not re.match(r'^OL\d+A$', olkey):
|
|
|
|
raise ValueError('Invalid OpenLibrary author ID')
|
|
|
|
try:
|
2020-03-14 04:10:53 +00:00
|
|
|
return models.Author.objects.get(openlibrary_key=olkey)
|
2020-03-07 20:22:28 +00:00
|
|
|
except ObjectDoesNotExist:
|
|
|
|
pass
|
|
|
|
|
|
|
|
response = requests.get('%s/authors/%s.json' % (self.url, olkey))
|
|
|
|
if not response.ok:
|
|
|
|
response.raise_for_status()
|
|
|
|
|
|
|
|
data = response.json()
|
|
|
|
author = models.Author(openlibrary_key=olkey)
|
2020-03-28 19:55:53 +00:00
|
|
|
mappings = {
|
|
|
|
'birth_date': ('born', get_date),
|
|
|
|
'death_date': ('died', get_date),
|
|
|
|
'bio': ('bio', get_description),
|
|
|
|
}
|
|
|
|
author = update_from_mappings(author, data, mappings)
|
2020-03-07 20:22:28 +00:00
|
|
|
# TODO this is making some BOLD assumption
|
2020-04-01 01:58:13 +00:00
|
|
|
name = data.get('name')
|
2020-04-01 21:18:46 +00:00
|
|
|
if name:
|
|
|
|
author.last_name = name.split(' ')[-1]
|
|
|
|
author.first_name = ' '.join(name.split(' ')[:-1])
|
2020-03-07 20:22:28 +00:00
|
|
|
author.save()
|
|
|
|
|
|
|
|
return author
|
|
|
|
|
|
|
|
|
|
|
|
def get_cover(self, cover_id):
|
|
|
|
''' ask openlibrary for the cover '''
|
|
|
|
# TODO: get medium and small versions
|
|
|
|
image_name = '%s-M.jpg' % cover_id
|
|
|
|
url = '%s/b/id/%s' % (self.covers_url, image_name)
|
|
|
|
response = requests.get(url)
|
|
|
|
if not response.ok:
|
|
|
|
response.raise_for_status()
|
2020-03-14 04:10:53 +00:00
|
|
|
image_content = ContentFile(response.content)
|
2020-03-07 20:22:28 +00:00
|
|
|
return [image_name, image_content]
|
|
|
|
|
|
|
|
|
2020-03-28 04:28:52 +00:00
|
|
|
def get_description(description_blob):
|
|
|
|
''' descriptions can be a string or a dict '''
|
|
|
|
if isinstance(description_blob, dict):
|
|
|
|
return description_blob.get('value')
|
|
|
|
return description_blob
|
|
|
|
|
2020-03-30 20:15:49 +00:00
|
|
|
|
|
|
|
def get_languages(language_blob):
|
|
|
|
''' /language/eng -> English '''
|
|
|
|
langs = []
|
|
|
|
for lang in language_blob:
|
|
|
|
langs.append(
|
|
|
|
languages.get(lang.get('key', ''), None)
|
|
|
|
)
|
|
|
|
return langs
|
|
|
|
|
|
|
|
|