Merge branch 'fedireads_connector' into code-cleanup

This commit is contained in:
Mouse Reeve 2020-05-10 13:37:16 -07:00
commit 3edfc0be74
9 changed files with 244 additions and 269 deletions

View file

@ -4,7 +4,7 @@ from requests import HTTPError
import importlib import importlib
from urllib.parse import urlparse from urllib.parse import urlparse
from fedireads import models from fedireads import models, settings
from fedireads.tasks import app from fedireads.tasks import app
@ -69,6 +69,10 @@ def get_by_absolute_id(absolute_id, model):
except model.DoesNotExist: except model.DoesNotExist:
pass pass
url = urlparse(absolute_id)
if url.netloc != settings.DOMAIN:
return None
# try finding a local status with that id # try finding a local status with that id
local_id = absolute_id.split('/')[-1] local_id = absolute_id.split('/')[-1]
try: try:

View file

@ -4,6 +4,8 @@ from dateutil import parser
import pytz import pytz
import requests import requests
from django.db import transaction
from fedireads import models from fedireads import models
@ -16,6 +18,12 @@ class AbstractConnector(ABC):
self.connector = info self.connector = info
self.book_mappings = {} self.book_mappings = {}
self.key_mappings = {
'isbn_13': ('isbn_13', None),
'isbn_10': ('isbn_10', None),
'oclc_numbers': ('oclc_number', None),
'lccn': ('lccn', None),
}
fields = [ fields = [
'base_url', 'base_url',
@ -58,38 +66,94 @@ class AbstractConnector(ABC):
return results return results
def create_book(self, key, data, model): def get_or_create_book(self, remote_id):
''' create a work or edition from data ''' ''' pull up a book record by whatever means possible '''
# we really would rather use an existing book than make a new one # try to load the book
match = match_from_mappings(data, self.key_mappings) book = models.Book.objects.select_subclasses().filter(
if match: remote_id=remote_id
if not isinstance(match, model): ).first()
if type(match).__name__ == 'Edition': if book:
return match.parent_work if isinstance(book, models.Work):
else: return book.default_edition
return match.default_edition return book
return match
kwargs = { # no book was found, so we start creating a new one
self.key_name: key, data = get_data(remote_id)
'title': data['title'],
'connector': self.connector work = None
} edition = None
book = model.objects.create(**kwargs) if self.is_work_data(data):
work_data = data
# if we requested a work and there's already an edition, we're set
work = self.match_from_mappings(work_data)
if work and work.default_edition:
return work.default_edition
# no such luck, we need more information.
try:
edition_data = self.get_edition_from_work_data(work_data)
except KeyError:
# hack: re-use the work data as the edition data
# this is why remote ids aren't necessarily unique
edition_data = data
else:
edition_data = data
edition = self.match_from_mappings(edition_data)
# no need to figure out about the work if we already know about it
if edition and edition.parent_work:
return edition
# no such luck, we need more information.
try:
work_data = self.get_work_from_edition_date(edition_data)
except KeyError:
# remember this hack: re-use the work data as the edition data
work_data = data
# at this point, we need to figure out the work, edition, or both
# atomic so that we don't save a work with no edition for vice versa
with transaction.atomic():
if not work:
work_key = work_data.get('url')
work = self.create_book(work_key, work_data, models.Work)
if not edition:
ed_key = edition_data.get('url')
edition = self.create_book(ed_key, edition_data, models.Edition)
edition.default = True
edition.parent_work = work
edition.save()
# now's our change to fill in author gaps
if not edition.authors and work.authors:
edition.authors.set(work.authors.all())
edition.author_text = work.author_text
edition.save()
return edition
def create_book(self, remote_id, data, model):
''' create a work or edition from data '''
book = model.objects.create(
remote_id=remote_id,
title=data['title'],
connector=self.connector,
)
return self.update_book_from_data(book, data) return self.update_book_from_data(book, data)
def update_book_from_data(self, book, data): def update_book_from_data(self, book, data, update_cover=True):
''' simple function to save data to a book ''' ''' for creating a new book or syncing with data '''
update_from_mappings(book, data, self.book_mappings) book = update_from_mappings(book, data, self.book_mappings)
for author in self.get_authors_from_data(data):
book.authors.add(author)
book.author_text = ', '.join(a.name for a in book.authors.all())
book.save() book.save()
authors = self.get_authors_from_data(data) if not update_cover:
for author in authors: return book
book.authors.add(author)
if authors:
book.author_text = ', '.join(a.name for a in authors)
book.save()
cover = self.get_cover_from_data(data) cover = self.get_cover_from_data(data)
if cover: if cover:
@ -106,16 +170,61 @@ class AbstractConnector(ABC):
key = getattr(book, self.key_name) key = getattr(book, self.key_name)
data = self.load_book_data(key) data = self.load_book_data(key)
if book.sync_cover:
book.cover.save(*self.get_cover_from_data(data), save=True)
if book.sync: if book.sync:
book = self.update_book_from_data(book, data) book = self.update_book_from_data(
book, data, update_cover=book.sync_cover)
else:
cover = self.get_cover_from_data(data)
if cover:
book.cover.save(*cover, save=True)
return book return book
def load_book_data(self, remote_id): def match_from_mappings(self, data):
''' default method for loading book data ''' ''' try to find existing copies of this book using various keys '''
return get_data(remote_id) keys = [
('openlibrary_key', models.Book),
('librarything_key', models.Book),
('goodreads_key', models.Book),
('lccn', models.Work),
('isbn_10', models.Edition),
('isbn_13', models.Edition),
('oclc_number', models.Edition),
('asin', models.Edition),
]
noop = lambda x: x
for key, model in keys:
formatter = None
if key in self.key_mappings:
key, formatter = self.key_mappings[key]
if not formatter:
formatter = noop
value = data.get(key)
if not value:
continue
value = formatter(value)
match = model.objects.select_subclasses().filter(
**{key: value}).first()
if match:
return match
@abstractmethod
def is_work_data(self, data):
''' differentiate works and editions '''
@abstractmethod
def get_edition_from_work_data(self, data):
''' every work needs at least one edition '''
@abstractmethod
def get_work_from_edition_date(self, data):
''' every edition needs a work '''
@abstractmethod @abstractmethod
@ -138,23 +247,11 @@ class AbstractConnector(ABC):
''' create a SearchResult obj from json ''' ''' create a SearchResult obj from json '''
@abstractmethod
def get_or_create_book(self, book_id):
''' request and format a book given an identifier '''
# return book model obj
@abstractmethod @abstractmethod
def expand_book_data(self, book): def expand_book_data(self, book):
''' get more info on a book ''' ''' get more info on a book '''
@abstractmethod
def get_or_create_author(self, book_id):
''' request and format a book given an identifier '''
# return book model obj
def update_from_mappings(obj, data, mappings): def update_from_mappings(obj, data, mappings):
''' assign data to model with mappings ''' ''' assign data to model with mappings '''
noop = lambda x: x noop = lambda x: x
@ -175,37 +272,6 @@ def update_from_mappings(obj, data, mappings):
return obj return obj
def match_from_mappings(data, mappings):
''' try to find existing copies of this book using various keys '''
keys = [
('openlibrary_key', models.Book),
('librarything_key', models.Book),
('goodreads_key', models.Book),
('lccn', models.Work),
('isbn_10', models.Edition),
('isbn_13', models.Edition),
('oclc_number', models.Edition),
('asin', models.Edition),
]
noop = lambda x: x
for key, model in keys:
formatter = None
if key in mappings:
key, formatter = mappings[key]
if not formatter:
formatter = noop
value = data.get(key)
if not value:
continue
value = formatter(value)
match = model.objects.select_subclasses().filter(
**{key: value}).first()
if match:
return match
def has_attr(obj, key): def has_attr(obj, key):
''' helper function to check if a model object has a key ''' ''' helper function to check if a model object has a key '''
try: try:
@ -229,7 +295,7 @@ def get_data(url):
resp = requests.get( resp = requests.get(
url, url,
headers={ headers={
'Accept': 'application/activity+json; charset=utf-8', 'Accept': 'application/json; charset=utf-8',
}, },
) )
if not resp.ok: if not resp.ok:

View file

@ -1,10 +1,8 @@
''' using another fedireads instance as a source of book data ''' ''' using another fedireads instance as a source of book data '''
import re
from uuid import uuid4 from uuid import uuid4
from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ObjectDoesNotExist
from django.core.files.base import ContentFile from django.core.files.base import ContentFile
from django.db import transaction
import requests import requests
from fedireads import models from fedireads import models
@ -15,71 +13,29 @@ from .abstract_connector import update_from_mappings, get_date, get_data
class Connector(AbstractConnector): class Connector(AbstractConnector):
''' interact with other instances ''' ''' interact with other instances '''
def __init__(self, identifier): def __init__(self, identifier):
self.key_mappings = { super().__init__(identifier)
'isbn_13': ('isbn_13', None),
'isbn_10': ('isbn_10', None),
'oclc_numbers': ('oclc_number', None),
'lccn': ('lccn', None),
}
self.book_mappings = self.key_mappings.copy() self.book_mappings = self.key_mappings.copy()
self.book_mappings.update({ self.book_mappings.update({
'published_date': ('published_date', get_date), 'published_date': ('published_date', get_date),
'first_published_date': ('first_published_date', get_date), 'first_published_date': ('first_published_date', get_date),
}) })
super().__init__(identifier)
def format_search_result(self, search_result): def is_work_data(self, data):
return SearchResult(**search_result) return data['book_type'] == 'Work'
def parse_search_data(self, data): def get_edition_from_work_data(self, data):
return data return data['editions'][0]
def get_or_create_book(self, remote_id): def get_work_from_edition_date(self, data):
''' pull up a book record by whatever means possible ''' return data['work']
# re-construct a remote id from the int and books_url
if re.match(r'^\d+$', remote_id):
remote_id = self.books_url + '/' + remote_id
book = models.Book.objects.select_subclasses().filter(
remote_id=remote_id
).first()
if book:
if isinstance(book, models.Work):
return book.default_edition
return book
# no book was found, so we start creating a new one
data = get_data(remote_id)
if data['book_type'] == 'work': def get_authors_from_data(self, data):
work_data = data for author_url in data.get('authors', []):
try: yield self.get_or_create_author(author_url)
edition_data = data['editions'][0]
except KeyError:
# hack: re-use the work data as the edition data
edition_data = data
else:
edition_data = data
try:
work_data = data['work']
except KeyError:
# hack: re-use the work data as the edition data
work_data = data
with transaction.atomic():
# create both work and a default edition
work_key = work_data.get('url')
work = self.create_book(work_key, work_data, models.Work)
ed_key = edition_data.get('url')
edition = self.create_book(ed_key, edition_data, models.Edition)
edition.default = True
edition.parent_work = work
edition.save()
return edition
def get_cover_from_data(self, data): def get_cover_from_data(self, data):
@ -96,14 +52,6 @@ class Connector(AbstractConnector):
return [image_name, image_content] return [image_name, image_content]
def get_authors_from_data(self, data):
authors = []
for author_url in data.get('authors', []):
authors.append(self.get_or_create_author(author_url))
return authors
def get_or_create_author(self, remote_id): def get_or_create_author(self, remote_id):
''' load that author ''' ''' load that author '''
try: try:
@ -125,16 +73,14 @@ class Connector(AbstractConnector):
return author return author
def parse_search_data(self, data):
return data
def format_search_result(self, search_result):
return SearchResult(**search_result)
def expand_book_data(self, book): def expand_book_data(self, book):
# TODO # TODO
pass pass
def get_cover(cover_url):
''' download the cover '''
image_name = cover_url.split('/')[-1]
response = requests.get(cover_url)
if not response.ok:
response.raise_for_status()
image_content = ContentFile(response.content)
return [image_name, image_content]

View file

@ -3,7 +3,6 @@ import re
import requests import requests
from django.core.files.base import ContentFile from django.core.files.base import ContentFile
from django.db import transaction
from fedireads import models from fedireads import models
from .abstract_connector import AbstractConnector, SearchResult from .abstract_connector import AbstractConnector, SearchResult
@ -15,6 +14,7 @@ from .openlibrary_languages import languages
class Connector(AbstractConnector): class Connector(AbstractConnector):
''' instantiate a connector for OL ''' ''' instantiate a connector for OL '''
def __init__(self, identifier): def __init__(self, identifier):
super().__init__(identifier)
get_first = lambda a: a[0] get_first = lambda a: a[0]
self.key_mappings = { self.key_mappings = {
'isbn_13': ('isbn_13', get_first), 'isbn_13': ('isbn_13', get_first),
@ -32,12 +32,62 @@ class Connector(AbstractConnector):
'number_of_pages': ('pages', None), 'number_of_pages': ('pages', None),
'series': ('series', get_first), 'series': ('series', get_first),
}) })
super().__init__(identifier)
def is_work_data(self, data):
return not re.match(r'^OL\d+M$', data['key'])
def get_edition_from_work_data(self, data):
try:
key = data['key']
except KeyError:
return False
url = '%s/%s/editions' % (self.books_url, key)
data = get_data(url)
return pick_default_edition(data['entries'])
def get_work_from_edition_date(self, data):
try:
key = data['works'][0]['key']
except (IndexError, KeyError):
return False
url = '%s/%s' % (self.books_url, key)
return get_data(url)
def get_authors_from_data(self, data):
''' parse author json and load or create authors '''
for author_blob in data.get('authors', []):
author_blob = author_blob.get('author', author_blob)
# this id is "/authors/OL1234567A" and we want just "OL1234567A"
author_id = author_blob['key'].split('/')[-1]
yield self.get_or_create_author(author_id)
def get_cover_from_data(self, data):
''' ask openlibrary for the cover '''
if not data.get('covers'):
return None
cover_id = data.get('covers')[0]
image_name = '%s-M.jpg' % cover_id
url = '%s/b/id/%s' % (self.covers_url, image_name)
response = requests.get(url)
if not response.ok:
response.raise_for_status()
image_content = ContentFile(response.content)
return [image_name, image_content]
def parse_search_data(self, data):
return data.get('docs')
def format_search_result(self, doc): def format_search_result(self, doc):
key = doc['key'] # build the absolute id from the openlibrary key
key = key.split('/')[-1] key = self.books_url + doc['key']
author = doc.get('author_name') or ['Unknown'] author = doc.get('author_name') or ['Unknown']
return SearchResult( return SearchResult(
doc.get('title'), doc.get('title'),
@ -47,84 +97,6 @@ class Connector(AbstractConnector):
) )
def parse_search_data(self, data):
return data.get('docs')
def get_or_create_book(self, olkey):
''' pull up a book record by whatever means possible.
if you give a work key, it should give you the default edition,
annotated with work data. '''
book = models.Book.objects.select_subclasses().filter(
openlibrary_key=olkey
).first()
if book:
if isinstance(book, models.Work):
return book.default_edition
return book
# no book was found, so we start creating a new one
if re.match(r'^OL\d+W$', olkey):
with transaction.atomic():
# create both work and a default edition
work_data = self.load_book_data(olkey)
work = self.create_book(olkey, work_data, models.Work)
edition_options = self.load_edition_data(olkey).get('entries')
edition_data = pick_default_edition(edition_options)
if not edition_data:
# hack: re-use the work data as the edition data
edition_data = work_data
key = edition_data.get('key').split('/')[-1]
edition = self.create_book(key, edition_data, models.Edition)
edition.default = True
edition.parent_work = work
edition.save()
else:
with transaction.atomic():
edition_data = self.load_book_data(olkey)
edition = self.create_book(olkey, edition_data, models.Edition)
work_data = edition_data.get('works')
if not work_data:
# hack: we're re-using the edition data as the work data
work_key = olkey
else:
work_key = work_data[0]['key'].split('/')[-1]
work = models.Work.objects.filter(
openlibrary_key=work_key
).first()
if not work:
work_data = self.load_book_data(work_key)
work = self.create_book(work_key, work_data, models.Work)
edition.parent_work = work
edition.save()
if not edition.authors and work.authors:
edition.authors.set(work.authors.all())
edition.author_text = ', '.join(a.name for a in edition.authors)
return edition
def get_authors_from_data(self, data):
''' parse author json and load or create authors '''
authors = []
for author_blob in data.get('authors', []):
# this id is "/authors/OL1234567A" and we want just "OL1234567A"
author_blob = author_blob.get('author', author_blob)
author_id = author_blob['key'].split('/')[-1]
authors.append(self.get_or_create_author(author_id))
return authors
def load_book_data(self, olkey):
''' query openlibrary for data on a book '''
url = '%s/works/%s.json' % (self.books_url, olkey)
return get_data(url)
def load_edition_data(self, olkey): def load_edition_data(self, olkey):
''' query openlibrary for editions of a work ''' ''' query openlibrary for editions of a work '''
url = '%s/works/%s/editions.json' % (self.books_url, olkey) url = '%s/works/%s/editions.json' % (self.books_url, olkey)
@ -167,8 +139,8 @@ class Connector(AbstractConnector):
'bio': ('bio', get_description), 'bio': ('bio', get_description),
} }
author = update_from_mappings(author, data, mappings) author = update_from_mappings(author, data, mappings)
# TODO this is making some BOLD assumption
name = data.get('name') name = data.get('name')
# TODO this is making some BOLD assumption
if name: if name:
author.last_name = name.split(' ')[-1] author.last_name = name.split(' ')[-1]
author.first_name = ' '.join(name.split(' ')[:-1]) author.first_name = ' '.join(name.split(' ')[:-1])
@ -177,21 +149,6 @@ class Connector(AbstractConnector):
return author return author
def get_cover_from_data(self, data):
''' ask openlibrary for the cover '''
if not data.get('covers'):
return None
cover_id = data.get('covers')[0]
image_name = '%s-M.jpg' % cover_id
url = '%s/b/id/%s' % (self.covers_url, image_name)
response = requests.get(url)
if not response.ok:
response.raise_for_status()
image_content = ContentFile(response.content)
return [image_name, image_content]
def get_description(description_blob): def get_description(description_blob):
''' descriptions can be a string or a dict ''' ''' descriptions can be a string or a dict '''
if isinstance(description_blob, dict): if isinstance(description_blob, dict):

View file

@ -42,7 +42,7 @@ class Connector(AbstractConnector):
def format_search_result(self, book): def format_search_result(self, book):
return SearchResult( return SearchResult(
book.title, book.title,
book.id, book.absolute_id,
book.author_text, book.author_text,
book.published_date.year if book.published_date else None, book.published_date.year if book.published_date else None,
) )
@ -59,17 +59,14 @@ class Connector(AbstractConnector):
return None return None
def get_or_create_author(self, author_id): def is_work_data(self, data):
''' load that author ''' pass
try:
return models.Author.objects.get(id=author_id)
except ObjectDoesNotExist:
pass
def get_edition_from_work_data(self, data):
pass
def parse_search_data(self, data): def get_work_from_edition_date(self, data):
''' it's already in the right format, don't even worry about it ''' pass
return data
def get_authors_from_data(self, data): def get_authors_from_data(self, data):
return None return None
@ -77,8 +74,9 @@ class Connector(AbstractConnector):
def get_cover_from_data(self, data): def get_cover_from_data(self, data):
return None return None
def update_book(self, book_obj, data=None): def parse_search_data(self, data):
pass ''' it's already in the right format, don't even worry about it '''
return data
def expand_book_data(self, book): def expand_book_data(self, book):
pass pass

View file

@ -13,7 +13,11 @@
{% for result in result_set.results %} {% for result in result_set.results %}
<div> <div>
<a href="/book/{% if not result_set.connector.local %}{{ result_set.connector.id }}:{{ result_set.connector.key_name}}:{% endif %}{{ result.key }}">{{ result.title }}</a> by {{ result.author }} ({{ result.year }}) <form action="/resolve_book" method="POST">
{% csrf_token %}
<input type="hidden" name="remote_id" value="{{ result.key }}">
<button type="submit">{{ result.title }} by {{ result.author }} ({{ result.year }})</button>
</form>
</div> </div>
{% endfor %} {% endfor %}
</section> </section>

View file

@ -58,7 +58,7 @@ urlpatterns = [
re_path(r'%s/replies(.json)?/?$' % status_path, views.replies_page), re_path(r'%s/replies(.json)?/?$' % status_path, views.replies_page),
# books # books
re_path(r'book/(?P<book_id>[\w_:\d]+)(.json)?/?$', views.book_page), re_path(r'%s(.json)?/?$' % book_path, views.book_page),
re_path(r'%s/(?P<tab>friends|local|federated)?$' % book_path, views.book_page), re_path(r'%s/(?P<tab>friends|local|federated)?$' % book_path, views.book_page),
re_path(r'%s/edit/?$' % book_path, views.edit_book_page), re_path(r'%s/edit/?$' % book_path, views.edit_book_page),
re_path(r'^editions/(?P<work_id>\d+)/?$', views.editions_page), re_path(r'^editions/(?P<work_id>\d+)/?$', views.editions_page),
@ -77,6 +77,7 @@ urlpatterns = [
re_path(r'^edit_profile/?$', actions.edit_profile), re_path(r'^edit_profile/?$', actions.edit_profile),
re_path(r'^import_data/?', actions.import_data), re_path(r'^import_data/?', actions.import_data),
re_path(r'^resolve_book/?', actions.resolve_book),
re_path(r'^edit_book/(?P<book_id>\d+)/?', actions.edit_book), re_path(r'^edit_book/(?P<book_id>\d+)/?', actions.edit_book),
re_path(r'^upload_cover/(?P<book_id>\d+)/?', actions.upload_cover), re_path(r'^upload_cover/(?P<book_id>\d+)/?', actions.upload_cover),

View file

@ -114,6 +114,13 @@ def edit_profile(request):
return redirect('/user/%s' % request.user.localname) return redirect('/user/%s' % request.user.localname)
def resolve_book(request):
''' figure out the local path to a book from a remote_id '''
remote_id = request.POST.get('remote_id')
book = get_or_create_book(remote_id, key='remote_id')
return redirect('/book/%d' % book.id)
@login_required @login_required
def edit_book(request, book_id): def edit_book(request, book_id):
''' edit a book cool ''' ''' edit a book cool '''

View file

@ -390,14 +390,6 @@ def edit_profile_page(request):
def book_page(request, book_id, tab='friends'): def book_page(request, book_id, tab='friends'):
''' info about a book ''' ''' info about a book '''
if ':' in book_id:
try:
connector_id, key, book_id = book_id.split(':')
except ValueError:
return HttpResponseNotFound()
book = get_or_create_book(book_id, key=key, connector_id=connector_id)
return redirect('/book/%d' % book.id)
book = get_or_create_book(book_id) book = get_or_create_book(book_id)
if is_api_request(request): if is_api_request(request):
return JsonResponse(activitypub.get_book(book)) return JsonResponse(activitypub.get_book(book))