Adds generalized book data connectors

This commit is contained in:
Mouse Reeve 2020-03-07 12:22:28 -08:00
parent d501e707ee
commit 6f765bd6f4
11 changed files with 249 additions and 144 deletions

View file

@ -0,0 +1,11 @@
''' select and call a connector for whatever book task needs doing '''
from fedireads.connectors import OpenLibraryConnector
openlibrary = OpenLibraryConnector()
def get_or_create_book(key):
''' pull up a book record by whatever means possible '''
return openlibrary.get_or_create_book(key)
def search(query):
''' ya '''
return openlibrary.search(query)

View file

@ -0,0 +1,3 @@
''' bring connectors into the namespace '''
from .settings import CONNECTORS
from .openlibrary import OpenLibraryConnector

View file

@ -0,0 +1,60 @@
''' functionality outline for a book data connector '''
from abc import ABC, abstractmethod
from fedireads.connectors import CONNECTORS
class AbstractConnector(ABC):
''' generic book data connector '''
def __init__(self, connector_name):
# load connector settings
settings = CONNECTORS.get(connector_name)
if not settings:
raise ValueError('No connector with name "%s"' % connector_name)
try:
self.url = settings['BASE_URL']
self.covers_url = settings['COVERS_URL']
self.db_field = settings['DB_KEY_FIELD']
self.key_name = settings['KEY_NAME']
except KeyError:
raise KeyError('Invalid connector settings')
# TODO: politeness settings
@abstractmethod
def search(self, query):
''' free text search '''
# return list of search result objs
pass
@abstractmethod
def get_or_create_book(self, book_id):
''' request and format a book given an identifier '''
# return book model obj
pass
@abstractmethod
def get_or_create_author(self, book_id):
''' request and format a book given an identifier '''
# return book model obj
pass
@abstractmethod
def update_book(self, book_obj):
''' sync a book with the canonical remote copy '''
# return book model obj
pass
class SearchResult(object):
''' standardized search result object '''
def __init__(self, title, key, author, year):
self.title = title
self.key = key
self.author = author
self.year = year

View file

@ -0,0 +1,138 @@
''' openlibrary data connector '''
from django.core.exceptions import ObjectDoesNotExist
from django.core.files.base import ContentFile
import re
import requests
from fedireads import models
from .abstract_connector import AbstractConnector, SearchResult
class OpenLibraryConnector(AbstractConnector):
''' instantiate a connector for OL '''
def __init__(self):
super().__init__('openlibrary')
def search(self, query):
''' query openlibrary search '''
resp = requests.get('%s/search.json' % self.url, params={'q': query})
if not resp.ok:
resp.raise_for_status()
data = resp.json()
results = []
for doc in data['docs'][:5]:
key = doc['key']
key = key.split('/')[-1]
author = doc.get('author_name') or ['Unknown']
results.append(SearchResult(
doc.get('title'),
key,
author[0],
doc.get('first_publish_year'),
))
return results
def get_or_create_book(self, olkey):
''' pull up a book record by whatever means possible '''
if re.match(r'^OL\d+W$', olkey):
model = models.Work
elif re.match(r'^OL\d+M$', olkey):
model = models.Edition
else:
raise ValueError('Invalid OpenLibrary ID')
try:
book = models.Book.objects.get(openlibrary_key=olkey)
return book
except ObjectDoesNotExist:
# no book was found, so we start creating a new one
book = model(openlibrary_key=olkey)
# load the book json from openlibrary.org
response = requests.get('%s/works/%s.json' % (self.url, olkey))
if not response.ok:
response.raise_for_status()
data = response.json()
# great, we can update our book.
book.title = data['title']
description = data.get('description')
if description:
if isinstance(description, dict):
description = description.get('value')
book.description = description
book.pages = data.get('pages')
#book.published_date = data.get('publish_date')
# this book sure as heck better be an edition
if data.get('works'):
key = data.get('works')[0]['key']
key = key.split('/')[-1]
work = self.get_or_create_book(key)
book.parent_work = work
book.save()
# we also need to know the author get the cover
for author_blob in data.get('authors'):
# this id is "/authors/OL1234567A" and we want just "OL1234567A"
author_blob = author_blob.get('author', author_blob)
author_id = author_blob['key']
author_id = author_id.split('/')[-1]
book.authors.add(self.get_or_create_author(author_id))
if data.get('covers') and len(data['covers']):
book.cover.save(*self.get_cover(data['covers'][0]), save=True)
return book
def get_or_create_author(self, olkey):
''' load that author '''
if not re.match(r'^OL\d+A$', olkey):
raise ValueError('Invalid OpenLibrary author ID')
try:
author = models.Author.objects.get(openlibrary_key=olkey)
except ObjectDoesNotExist:
pass
response = requests.get('%s/authors/%s.json' % (self.url, olkey))
if not response.ok:
response.raise_for_status()
data = response.json()
author = models.Author(openlibrary_key=olkey)
bio = data.get('bio')
if bio:
if isinstance(bio, dict):
bio = bio.get('value')
author.bio = bio
name = data['name']
author.name = name
# TODO this is making some BOLD assumption
author.last_name = name.split(' ')[-1]
author.first_name = ' '.join(name.split(' ')[:-1])
#author.born = data.get('birth_date')
#author.died = data.get('death_date')
author.save()
return author
def get_cover(self, cover_id):
''' ask openlibrary for the cover '''
# TODO: get medium and small versions
image_name = '%s-M.jpg' % cover_id
url = '%s/b/id/%s' % (self.covers_url, image_name)
response = requests.get(url)
if not response.ok:
response.raise_for_status()
image_content = ContentFile(requests.get(url).content)
return [image_name, image_content]
def update_book(self, book_obj):
pass

View file

@ -0,0 +1,28 @@
''' settings book data connectors '''
CONNECTORS = {
'openlibrary': {
'KEY_NAME': 'olkey',
'DB_KEY_FIELD': 'openlibrary_key',
'POLITENESS_DELAY': 0,
'MAX_DAILY_QUERIES': -1,
'BASE_URL': 'https://openlibrary.org',
'COVERS_URL': 'https://covers.openlibrary.org',
},
}
''' not implemented yet:
'librarything': {
'KEY_NAME': 'ltkey',
'DB_KEY_FIELD': 'librarything_key',
'POLITENESS_DELAY': 1,
'MAX_DAILY_QUERIES': 1000,
'BASE_URL': 'https://librarything.com',
},
'worldcat': {
'KEY_NAME': 'ocn',
'DB_KEY_FIELD': 'oclc_number',
'POLITENESS_DELAY': 0,
'MAX_DAILY_QUERIES': -1,
'BASE_URL': 'https://worldcat.org',
},
'''

View file

@ -1,135 +0,0 @@
''' activitystream api and books '''
from django.core.exceptions import ObjectDoesNotExist
from django.core.files.base import ContentFile
import re
import requests
from fedireads import models
from fedireads.settings import OL_URL
def book_search(query):
''' look up a book '''
response = requests.get('%s/search.json' % OL_URL, params={'q': query})
if not response.ok:
response.raise_for_status()
data = response.json()
results = []
for doc in data['docs'][:5]:
key = doc['key']
key = key.split('/')[-1]
author = doc.get('author_name') or ['Unknown']
results.append({
'title': doc.get('title'),
'olkey': key,
'year': doc.get('first_publish_year'),
'author': author[0],
})
return results
def get_or_create_book(olkey, update=False):
''' create a book or work '''
# check if this is in the format of an OL book identifier
if re.match(r'^OL\d+W$', olkey):
model = models.Work
elif re.match(r'^OL\d+M$', olkey):
model = models.Edition
else:
raise ValueError('Invalid OpenLibrary ID')
# get the existing entry from our db, if it exists
try:
book = model.objects.get(openlibrary_key=olkey)
if not update:
return book
# we have the book, but still want to update it from OL
except ObjectDoesNotExist:
# no book was found, so we start creating a new one
book = model(openlibrary_key=olkey)
# load the book json from openlibrary.org
response = requests.get('%s/works/%s.json' % (OL_URL, olkey))
if not response.ok:
response.raise_for_status()
data = response.json()
# great, we can update our book.
book.title = data['title']
description = data.get('description')
if description:
if isinstance(description, dict):
description = description.get('value')
book.description = description
book.pages = data.get('pages')
#book.published_date = data.get('publish_date')
# this book sure as heck better be an edition
if data.get('works'):
key = data.get('works')[0]['key']
key = key.split('/')[-1]
work = get_or_create_book(key)
book.parent_work = work
book.save()
# we also need to know the author get the cover
for author_blob in data.get('authors'):
# this id starts as "/authors/OL1234567A" and we want just "OL1234567A"
author_blob = author_blob.get('author', author_blob)
author_id = author_blob['key']
author_id = author_id.split('/')[-1]
book.authors.add(get_or_create_author(author_id))
if data.get('covers') and len(data['covers']):
book.cover.save(*get_cover(data['covers'][0]), save=True)
return book
def get_cover(cover_id):
''' ask openlibrary for the cover '''
# TODO: get medium and small versions
image_name = '%s-M.jpg' % cover_id
url = 'https://covers.openlibrary.org/b/id/%s' % image_name
response = requests.get(url)
if not response.ok:
response.raise_for_status()
image_content = ContentFile(requests.get(url).content)
return [image_name, image_content]
def get_or_create_author(olkey, update=False):
''' load that author '''
if not re.match(r'^OL\d+A$', olkey):
raise ValueError('Invalid OpenLibrary author ID')
try:
author = models.Author.objects.get(openlibrary_key=olkey)
if not update:
return author
except ObjectDoesNotExist:
pass
response = requests.get('%s/authors/%s.json' % (OL_URL, olkey))
if not response.ok:
response.raise_for_status()
data = response.json()
author = models.Author(openlibrary_key=olkey)
bio = data.get('bio')
if bio:
if isinstance(bio, dict):
bio = bio.get('value')
author.bio = bio
name = data['name']
author.name = name
# TODO this is making some BOLD assumption
author.last_name = name.split(' ')[-1]
author.first_name = ' '.join(name.split(' ')[:-1])
#author.born = data.get('birth_date')
#author.died = data.get('death_date')
author.save()
return author

View file

@ -1,6 +1,6 @@
''' Handle user activity ''' ''' Handle user activity '''
from fedireads import models from fedireads import models
from fedireads.openlibrary import get_or_create_book from fedireads.books_manager import get_or_create_book
from fedireads.sanitize_html import InputHtmlParser from fedireads.sanitize_html import InputHtmlParser
from django.db import IntegrityError from django.db import IntegrityError

View file

@ -5,7 +5,7 @@
<h1>Search results</h1> <h1>Search results</h1>
{% for result in results %} {% for result in results %}
<div> <div>
<a href="/book/{{ result.olkey }}">{{ result.title }}</a> by {{ result.author }} ({{ result.year }}) <a href="/book/{{ result.key }}">{{ result.title }}</a> by {{ result.author }} ({{ result.year }})
</div> </div>
{% endfor %} {% endfor %}
</div> </div>

View file

@ -5,7 +5,7 @@ from django.shortcuts import redirect
from django.template.response import TemplateResponse from django.template.response import TemplateResponse
import re import re
from fedireads import forms, models, openlibrary, outgoing from fedireads import forms, models, books_manager, outgoing
from fedireads.views import get_user_from_username from fedireads.views import get_user_from_username
@ -150,8 +150,8 @@ def search(request):
results = [outgoing.handle_account_search(query)] results = [outgoing.handle_account_search(query)]
template = 'user_results.html' template = 'user_results.html'
else: else:
# just send the question over to openlibrary for book search # just send the question over to book search
results = openlibrary.book_search(query) results = books_manager.search(query)
template = 'book_results.html' template = 'book_results.html'
return TemplateResponse(request, template, {'results': results}) return TemplateResponse(request, template, {'results': results})

View file

@ -6,7 +6,7 @@ from django.http import HttpResponseNotFound
from django.shortcuts import redirect from django.shortcuts import redirect
from django.template.response import TemplateResponse from django.template.response import TemplateResponse
from fedireads import forms, models, openlibrary, incoming from fedireads import forms, models, books_manager, incoming
from fedireads.settings import DOMAIN from fedireads.settings import DOMAIN
@ -188,7 +188,7 @@ def edit_profile_page(request, username):
@login_required @login_required
def book_page(request, book_identifier, tab='friends'): def book_page(request, book_identifier, tab='friends'):
''' info about a book ''' ''' info about a book '''
book = openlibrary.get_or_create_book(book_identifier) book = books_manager.get_or_create_book(book_identifier)
if isinstance(book, models.Work): if isinstance(book, models.Work):
book_reviews = models.Review.objects.filter( book_reviews = models.Review.objects.filter(
@ -258,7 +258,7 @@ def book_page(request, book_identifier, tab='friends'):
def author_page(request, author_identifier): def author_page(request, author_identifier):
''' landing page for an author ''' ''' landing page for an author '''
try: try:
author = models.Author.objects.get(openlibrary_key=author_identifier) author = models.Author.objects.get(books_manager_key=author_identifier)
except ValueError: except ValueError:
return HttpResponseNotFound() return HttpResponseNotFound()

View file

@ -1,5 +1,5 @@
from fedireads.models import User from fedireads.models import User
from fedireads.openlibrary import get_or_create_book from fedireads.books_manager import get_or_create_book
User.objects.create_user('mouse', 'mouse.reeve@gmail.com', 'password123') User.objects.create_user('mouse', 'mouse.reeve@gmail.com', 'password123')
User.objects.create_user('rat', 'rat@rat.com', 'ratword') User.objects.create_user('rat', 'rat@rat.com', 'ratword')