forked from mirrors/bookwyrm
Adds generalized book data connectors
This commit is contained in:
parent
d501e707ee
commit
6f765bd6f4
11 changed files with 249 additions and 144 deletions
11
fedireads/books_manager.py
Normal file
11
fedireads/books_manager.py
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
''' select and call a connector for whatever book task needs doing '''
|
||||||
|
from fedireads.connectors import OpenLibraryConnector
|
||||||
|
|
||||||
|
openlibrary = OpenLibraryConnector()
|
||||||
|
def get_or_create_book(key):
|
||||||
|
''' pull up a book record by whatever means possible '''
|
||||||
|
return openlibrary.get_or_create_book(key)
|
||||||
|
|
||||||
|
def search(query):
|
||||||
|
''' ya '''
|
||||||
|
return openlibrary.search(query)
|
3
fedireads/connectors/__init__.py
Normal file
3
fedireads/connectors/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
''' bring connectors into the namespace '''
|
||||||
|
from .settings import CONNECTORS
|
||||||
|
from .openlibrary import OpenLibraryConnector
|
60
fedireads/connectors/abstract_connector.py
Normal file
60
fedireads/connectors/abstract_connector.py
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
''' functionality outline for a book data connector '''
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
from fedireads.connectors import CONNECTORS
|
||||||
|
|
||||||
|
|
||||||
|
class AbstractConnector(ABC):
|
||||||
|
''' generic book data connector '''
|
||||||
|
|
||||||
|
def __init__(self, connector_name):
|
||||||
|
# load connector settings
|
||||||
|
settings = CONNECTORS.get(connector_name)
|
||||||
|
if not settings:
|
||||||
|
raise ValueError('No connector with name "%s"' % connector_name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.url = settings['BASE_URL']
|
||||||
|
self.covers_url = settings['COVERS_URL']
|
||||||
|
self.db_field = settings['DB_KEY_FIELD']
|
||||||
|
self.key_name = settings['KEY_NAME']
|
||||||
|
except KeyError:
|
||||||
|
raise KeyError('Invalid connector settings')
|
||||||
|
# TODO: politeness settings
|
||||||
|
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def search(self, query):
|
||||||
|
''' free text search '''
|
||||||
|
# return list of search result objs
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_or_create_book(self, book_id):
|
||||||
|
''' request and format a book given an identifier '''
|
||||||
|
# return book model obj
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_or_create_author(self, book_id):
|
||||||
|
''' request and format a book given an identifier '''
|
||||||
|
# return book model obj
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def update_book(self, book_obj):
|
||||||
|
''' sync a book with the canonical remote copy '''
|
||||||
|
# return book model obj
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class SearchResult(object):
|
||||||
|
''' standardized search result object '''
|
||||||
|
def __init__(self, title, key, author, year):
|
||||||
|
self.title = title
|
||||||
|
self.key = key
|
||||||
|
self.author = author
|
||||||
|
self.year = year
|
138
fedireads/connectors/openlibrary.py
Normal file
138
fedireads/connectors/openlibrary.py
Normal file
|
@ -0,0 +1,138 @@
|
||||||
|
''' openlibrary data connector '''
|
||||||
|
from django.core.exceptions import ObjectDoesNotExist
|
||||||
|
from django.core.files.base import ContentFile
|
||||||
|
import re
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from fedireads import models
|
||||||
|
from .abstract_connector import AbstractConnector, SearchResult
|
||||||
|
|
||||||
|
|
||||||
|
class OpenLibraryConnector(AbstractConnector):
|
||||||
|
''' instantiate a connector for OL '''
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__('openlibrary')
|
||||||
|
|
||||||
|
|
||||||
|
def search(self, query):
|
||||||
|
''' query openlibrary search '''
|
||||||
|
resp = requests.get('%s/search.json' % self.url, params={'q': query})
|
||||||
|
if not resp.ok:
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for doc in data['docs'][:5]:
|
||||||
|
key = doc['key']
|
||||||
|
key = key.split('/')[-1]
|
||||||
|
author = doc.get('author_name') or ['Unknown']
|
||||||
|
results.append(SearchResult(
|
||||||
|
doc.get('title'),
|
||||||
|
key,
|
||||||
|
author[0],
|
||||||
|
doc.get('first_publish_year'),
|
||||||
|
))
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def get_or_create_book(self, olkey):
|
||||||
|
''' pull up a book record by whatever means possible '''
|
||||||
|
if re.match(r'^OL\d+W$', olkey):
|
||||||
|
model = models.Work
|
||||||
|
elif re.match(r'^OL\d+M$', olkey):
|
||||||
|
model = models.Edition
|
||||||
|
else:
|
||||||
|
raise ValueError('Invalid OpenLibrary ID')
|
||||||
|
|
||||||
|
try:
|
||||||
|
book = models.Book.objects.get(openlibrary_key=olkey)
|
||||||
|
return book
|
||||||
|
except ObjectDoesNotExist:
|
||||||
|
# no book was found, so we start creating a new one
|
||||||
|
book = model(openlibrary_key=olkey)
|
||||||
|
|
||||||
|
# load the book json from openlibrary.org
|
||||||
|
response = requests.get('%s/works/%s.json' % (self.url, olkey))
|
||||||
|
if not response.ok:
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
# great, we can update our book.
|
||||||
|
book.title = data['title']
|
||||||
|
description = data.get('description')
|
||||||
|
if description:
|
||||||
|
if isinstance(description, dict):
|
||||||
|
description = description.get('value')
|
||||||
|
book.description = description
|
||||||
|
book.pages = data.get('pages')
|
||||||
|
#book.published_date = data.get('publish_date')
|
||||||
|
|
||||||
|
# this book sure as heck better be an edition
|
||||||
|
if data.get('works'):
|
||||||
|
key = data.get('works')[0]['key']
|
||||||
|
key = key.split('/')[-1]
|
||||||
|
work = self.get_or_create_book(key)
|
||||||
|
book.parent_work = work
|
||||||
|
book.save()
|
||||||
|
|
||||||
|
# we also need to know the author get the cover
|
||||||
|
for author_blob in data.get('authors'):
|
||||||
|
# this id is "/authors/OL1234567A" and we want just "OL1234567A"
|
||||||
|
author_blob = author_blob.get('author', author_blob)
|
||||||
|
author_id = author_blob['key']
|
||||||
|
author_id = author_id.split('/')[-1]
|
||||||
|
book.authors.add(self.get_or_create_author(author_id))
|
||||||
|
|
||||||
|
if data.get('covers') and len(data['covers']):
|
||||||
|
book.cover.save(*self.get_cover(data['covers'][0]), save=True)
|
||||||
|
|
||||||
|
return book
|
||||||
|
|
||||||
|
|
||||||
|
def get_or_create_author(self, olkey):
|
||||||
|
''' load that author '''
|
||||||
|
if not re.match(r'^OL\d+A$', olkey):
|
||||||
|
raise ValueError('Invalid OpenLibrary author ID')
|
||||||
|
try:
|
||||||
|
author = models.Author.objects.get(openlibrary_key=olkey)
|
||||||
|
except ObjectDoesNotExist:
|
||||||
|
pass
|
||||||
|
|
||||||
|
response = requests.get('%s/authors/%s.json' % (self.url, olkey))
|
||||||
|
if not response.ok:
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
author = models.Author(openlibrary_key=olkey)
|
||||||
|
bio = data.get('bio')
|
||||||
|
if bio:
|
||||||
|
if isinstance(bio, dict):
|
||||||
|
bio = bio.get('value')
|
||||||
|
author.bio = bio
|
||||||
|
name = data['name']
|
||||||
|
author.name = name
|
||||||
|
# TODO this is making some BOLD assumption
|
||||||
|
author.last_name = name.split(' ')[-1]
|
||||||
|
author.first_name = ' '.join(name.split(' ')[:-1])
|
||||||
|
#author.born = data.get('birth_date')
|
||||||
|
#author.died = data.get('death_date')
|
||||||
|
author.save()
|
||||||
|
|
||||||
|
return author
|
||||||
|
|
||||||
|
|
||||||
|
def get_cover(self, cover_id):
|
||||||
|
''' ask openlibrary for the cover '''
|
||||||
|
# TODO: get medium and small versions
|
||||||
|
image_name = '%s-M.jpg' % cover_id
|
||||||
|
url = '%s/b/id/%s' % (self.covers_url, image_name)
|
||||||
|
response = requests.get(url)
|
||||||
|
if not response.ok:
|
||||||
|
response.raise_for_status()
|
||||||
|
image_content = ContentFile(requests.get(url).content)
|
||||||
|
return [image_name, image_content]
|
||||||
|
|
||||||
|
|
||||||
|
def update_book(self, book_obj):
|
||||||
|
pass
|
28
fedireads/connectors/settings.py
Normal file
28
fedireads/connectors/settings.py
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
''' settings book data connectors '''
|
||||||
|
CONNECTORS = {
|
||||||
|
'openlibrary': {
|
||||||
|
'KEY_NAME': 'olkey',
|
||||||
|
'DB_KEY_FIELD': 'openlibrary_key',
|
||||||
|
'POLITENESS_DELAY': 0,
|
||||||
|
'MAX_DAILY_QUERIES': -1,
|
||||||
|
'BASE_URL': 'https://openlibrary.org',
|
||||||
|
'COVERS_URL': 'https://covers.openlibrary.org',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
''' not implemented yet:
|
||||||
|
'librarything': {
|
||||||
|
'KEY_NAME': 'ltkey',
|
||||||
|
'DB_KEY_FIELD': 'librarything_key',
|
||||||
|
'POLITENESS_DELAY': 1,
|
||||||
|
'MAX_DAILY_QUERIES': 1000,
|
||||||
|
'BASE_URL': 'https://librarything.com',
|
||||||
|
},
|
||||||
|
'worldcat': {
|
||||||
|
'KEY_NAME': 'ocn',
|
||||||
|
'DB_KEY_FIELD': 'oclc_number',
|
||||||
|
'POLITENESS_DELAY': 0,
|
||||||
|
'MAX_DAILY_QUERIES': -1,
|
||||||
|
'BASE_URL': 'https://worldcat.org',
|
||||||
|
},
|
||||||
|
'''
|
|
@ -1,135 +0,0 @@
|
||||||
''' activitystream api and books '''
|
|
||||||
from django.core.exceptions import ObjectDoesNotExist
|
|
||||||
from django.core.files.base import ContentFile
|
|
||||||
import re
|
|
||||||
import requests
|
|
||||||
|
|
||||||
from fedireads import models
|
|
||||||
from fedireads.settings import OL_URL
|
|
||||||
|
|
||||||
|
|
||||||
def book_search(query):
|
|
||||||
''' look up a book '''
|
|
||||||
response = requests.get('%s/search.json' % OL_URL, params={'q': query})
|
|
||||||
if not response.ok:
|
|
||||||
response.raise_for_status()
|
|
||||||
data = response.json()
|
|
||||||
results = []
|
|
||||||
|
|
||||||
for doc in data['docs'][:5]:
|
|
||||||
key = doc['key']
|
|
||||||
key = key.split('/')[-1]
|
|
||||||
author = doc.get('author_name') or ['Unknown']
|
|
||||||
results.append({
|
|
||||||
'title': doc.get('title'),
|
|
||||||
'olkey': key,
|
|
||||||
'year': doc.get('first_publish_year'),
|
|
||||||
'author': author[0],
|
|
||||||
})
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def get_or_create_book(olkey, update=False):
|
|
||||||
''' create a book or work '''
|
|
||||||
# check if this is in the format of an OL book identifier
|
|
||||||
if re.match(r'^OL\d+W$', olkey):
|
|
||||||
model = models.Work
|
|
||||||
elif re.match(r'^OL\d+M$', olkey):
|
|
||||||
model = models.Edition
|
|
||||||
else:
|
|
||||||
raise ValueError('Invalid OpenLibrary ID')
|
|
||||||
|
|
||||||
# get the existing entry from our db, if it exists
|
|
||||||
try:
|
|
||||||
book = model.objects.get(openlibrary_key=olkey)
|
|
||||||
if not update:
|
|
||||||
return book
|
|
||||||
# we have the book, but still want to update it from OL
|
|
||||||
except ObjectDoesNotExist:
|
|
||||||
# no book was found, so we start creating a new one
|
|
||||||
book = model(openlibrary_key=olkey)
|
|
||||||
|
|
||||||
# load the book json from openlibrary.org
|
|
||||||
response = requests.get('%s/works/%s.json' % (OL_URL, olkey))
|
|
||||||
if not response.ok:
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
data = response.json()
|
|
||||||
|
|
||||||
# great, we can update our book.
|
|
||||||
book.title = data['title']
|
|
||||||
description = data.get('description')
|
|
||||||
if description:
|
|
||||||
if isinstance(description, dict):
|
|
||||||
description = description.get('value')
|
|
||||||
book.description = description
|
|
||||||
book.pages = data.get('pages')
|
|
||||||
#book.published_date = data.get('publish_date')
|
|
||||||
|
|
||||||
# this book sure as heck better be an edition
|
|
||||||
if data.get('works'):
|
|
||||||
key = data.get('works')[0]['key']
|
|
||||||
key = key.split('/')[-1]
|
|
||||||
work = get_or_create_book(key)
|
|
||||||
book.parent_work = work
|
|
||||||
book.save()
|
|
||||||
|
|
||||||
# we also need to know the author get the cover
|
|
||||||
for author_blob in data.get('authors'):
|
|
||||||
# this id starts as "/authors/OL1234567A" and we want just "OL1234567A"
|
|
||||||
author_blob = author_blob.get('author', author_blob)
|
|
||||||
author_id = author_blob['key']
|
|
||||||
author_id = author_id.split('/')[-1]
|
|
||||||
book.authors.add(get_or_create_author(author_id))
|
|
||||||
|
|
||||||
if data.get('covers') and len(data['covers']):
|
|
||||||
book.cover.save(*get_cover(data['covers'][0]), save=True)
|
|
||||||
|
|
||||||
return book
|
|
||||||
|
|
||||||
|
|
||||||
def get_cover(cover_id):
|
|
||||||
''' ask openlibrary for the cover '''
|
|
||||||
# TODO: get medium and small versions
|
|
||||||
image_name = '%s-M.jpg' % cover_id
|
|
||||||
url = 'https://covers.openlibrary.org/b/id/%s' % image_name
|
|
||||||
response = requests.get(url)
|
|
||||||
if not response.ok:
|
|
||||||
response.raise_for_status()
|
|
||||||
image_content = ContentFile(requests.get(url).content)
|
|
||||||
return [image_name, image_content]
|
|
||||||
|
|
||||||
|
|
||||||
def get_or_create_author(olkey, update=False):
|
|
||||||
''' load that author '''
|
|
||||||
if not re.match(r'^OL\d+A$', olkey):
|
|
||||||
raise ValueError('Invalid OpenLibrary author ID')
|
|
||||||
try:
|
|
||||||
author = models.Author.objects.get(openlibrary_key=olkey)
|
|
||||||
if not update:
|
|
||||||
return author
|
|
||||||
except ObjectDoesNotExist:
|
|
||||||
pass
|
|
||||||
|
|
||||||
response = requests.get('%s/authors/%s.json' % (OL_URL, olkey))
|
|
||||||
if not response.ok:
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
data = response.json()
|
|
||||||
author = models.Author(openlibrary_key=olkey)
|
|
||||||
bio = data.get('bio')
|
|
||||||
if bio:
|
|
||||||
if isinstance(bio, dict):
|
|
||||||
bio = bio.get('value')
|
|
||||||
author.bio = bio
|
|
||||||
name = data['name']
|
|
||||||
author.name = name
|
|
||||||
# TODO this is making some BOLD assumption
|
|
||||||
author.last_name = name.split(' ')[-1]
|
|
||||||
author.first_name = ' '.join(name.split(' ')[:-1])
|
|
||||||
#author.born = data.get('birth_date')
|
|
||||||
#author.died = data.get('death_date')
|
|
||||||
author.save()
|
|
||||||
|
|
||||||
return author
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
''' Handle user activity '''
|
''' Handle user activity '''
|
||||||
from fedireads import models
|
from fedireads import models
|
||||||
from fedireads.openlibrary import get_or_create_book
|
from fedireads.books_manager import get_or_create_book
|
||||||
from fedireads.sanitize_html import InputHtmlParser
|
from fedireads.sanitize_html import InputHtmlParser
|
||||||
from django.db import IntegrityError
|
from django.db import IntegrityError
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
<h1>Search results</h1>
|
<h1>Search results</h1>
|
||||||
{% for result in results %}
|
{% for result in results %}
|
||||||
<div>
|
<div>
|
||||||
<a href="/book/{{ result.olkey }}">{{ result.title }}</a> by {{ result.author }} ({{ result.year }})
|
<a href="/book/{{ result.key }}">{{ result.title }}</a> by {{ result.author }} ({{ result.year }})
|
||||||
</div>
|
</div>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -5,7 +5,7 @@ from django.shortcuts import redirect
|
||||||
from django.template.response import TemplateResponse
|
from django.template.response import TemplateResponse
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from fedireads import forms, models, openlibrary, outgoing
|
from fedireads import forms, models, books_manager, outgoing
|
||||||
from fedireads.views import get_user_from_username
|
from fedireads.views import get_user_from_username
|
||||||
|
|
||||||
|
|
||||||
|
@ -150,8 +150,8 @@ def search(request):
|
||||||
results = [outgoing.handle_account_search(query)]
|
results = [outgoing.handle_account_search(query)]
|
||||||
template = 'user_results.html'
|
template = 'user_results.html'
|
||||||
else:
|
else:
|
||||||
# just send the question over to openlibrary for book search
|
# just send the question over to book search
|
||||||
results = openlibrary.book_search(query)
|
results = books_manager.search(query)
|
||||||
template = 'book_results.html'
|
template = 'book_results.html'
|
||||||
|
|
||||||
return TemplateResponse(request, template, {'results': results})
|
return TemplateResponse(request, template, {'results': results})
|
||||||
|
|
|
@ -6,7 +6,7 @@ from django.http import HttpResponseNotFound
|
||||||
from django.shortcuts import redirect
|
from django.shortcuts import redirect
|
||||||
from django.template.response import TemplateResponse
|
from django.template.response import TemplateResponse
|
||||||
|
|
||||||
from fedireads import forms, models, openlibrary, incoming
|
from fedireads import forms, models, books_manager, incoming
|
||||||
from fedireads.settings import DOMAIN
|
from fedireads.settings import DOMAIN
|
||||||
|
|
||||||
|
|
||||||
|
@ -188,7 +188,7 @@ def edit_profile_page(request, username):
|
||||||
@login_required
|
@login_required
|
||||||
def book_page(request, book_identifier, tab='friends'):
|
def book_page(request, book_identifier, tab='friends'):
|
||||||
''' info about a book '''
|
''' info about a book '''
|
||||||
book = openlibrary.get_or_create_book(book_identifier)
|
book = books_manager.get_or_create_book(book_identifier)
|
||||||
|
|
||||||
if isinstance(book, models.Work):
|
if isinstance(book, models.Work):
|
||||||
book_reviews = models.Review.objects.filter(
|
book_reviews = models.Review.objects.filter(
|
||||||
|
@ -258,7 +258,7 @@ def book_page(request, book_identifier, tab='friends'):
|
||||||
def author_page(request, author_identifier):
|
def author_page(request, author_identifier):
|
||||||
''' landing page for an author '''
|
''' landing page for an author '''
|
||||||
try:
|
try:
|
||||||
author = models.Author.objects.get(openlibrary_key=author_identifier)
|
author = models.Author.objects.get(books_manager_key=author_identifier)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return HttpResponseNotFound()
|
return HttpResponseNotFound()
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from fedireads.models import User
|
from fedireads.models import User
|
||||||
from fedireads.openlibrary import get_or_create_book
|
from fedireads.books_manager import get_or_create_book
|
||||||
|
|
||||||
User.objects.create_user('mouse', 'mouse.reeve@gmail.com', 'password123')
|
User.objects.create_user('mouse', 'mouse.reeve@gmail.com', 'password123')
|
||||||
User.objects.create_user('rat', 'rat@rat.com', 'ratword')
|
User.objects.create_user('rat', 'rat@rat.com', 'ratword')
|
||||||
|
|
Loading…
Reference in a new issue