moviewyrm/fedireads/openlibrary.py

110 lines
3.4 KiB
Python
Raw Normal View History

2020-01-25 21:46:30 +00:00
''' activitystream api and books '''
from django.core.exceptions import ObjectDoesNotExist
2020-01-29 08:05:58 +00:00
from django.core.files.base import ContentFile
2020-02-11 05:09:04 +00:00
import re
2020-01-28 20:13:28 +00:00
import requests
2020-01-29 09:05:27 +00:00
from fedireads.models import Author, Book
2020-01-27 01:55:02 +00:00
from fedireads.settings import OL_URL
2020-01-28 20:13:28 +00:00
2020-01-25 21:46:30 +00:00
2020-01-29 09:05:27 +00:00
def book_search(query):
''' look up a book '''
response = requests.get('%s/search.json' % OL_URL, params={'q': query})
if not response.ok:
response.raise_for_status()
data = response.json()
results = []
2020-02-11 05:09:04 +00:00
2020-01-29 09:05:27 +00:00
for doc in data['docs'][:5]:
2020-02-11 05:09:04 +00:00
key = doc['key']
key = key.split('/')[-1]
2020-02-11 06:32:03 +00:00
author = doc.get('author_name') or ['Unknown']
2020-01-29 09:05:27 +00:00
results.append({
'title': doc.get('title'),
2020-02-11 05:09:04 +00:00
'olkey': key,
'year': doc.get('first_publish_year'),
2020-02-11 06:32:03 +00:00
'author': author[0],
2020-01-29 09:05:27 +00:00
})
return results
2020-02-07 23:11:53 +00:00
2020-01-29 08:05:58 +00:00
def get_or_create_book(olkey, user=None, update=False):
2020-02-11 05:09:04 +00:00
''' add a book by looking up its open library "work" key. I'm conflating
"book" and "work" here a bit; the table is called "book" in fedireads, but
in open library parlance, it's a "work," which is the canonical umbrella
item that contains all the editions ("book"s) '''
# check if this is in the format of an OL book identifier
if not re.match(r'^OL\d+W$', olkey):
raise ValueError('Invalid OpenLibrary work ID')
2020-01-25 21:46:30 +00:00
# get the existing entry from our db, if it exists
try:
2020-01-28 02:47:54 +00:00
book = Book.objects.get(openlibrary_key=olkey)
2020-01-27 01:55:02 +00:00
if not update:
return book
2020-01-28 20:13:28 +00:00
# we have the book, but still want to update it from OL
2020-01-25 21:46:30 +00:00
except ObjectDoesNotExist:
2020-01-28 20:13:28 +00:00
# no book was found, so we start creating a new one
2020-01-28 02:47:54 +00:00
book = Book(openlibrary_key=olkey)
2020-01-28 20:13:28 +00:00
# load the book json from openlibrary.org
2020-02-11 05:09:04 +00:00
response = requests.get('%s/works/%s.json' % (OL_URL, olkey))
2020-01-28 20:13:28 +00:00
if not response.ok:
response.raise_for_status()
2020-01-25 21:46:30 +00:00
data = response.json()
book.data = data
2020-01-28 20:13:28 +00:00
2020-01-27 01:55:02 +00:00
if user and user.is_authenticated:
book.added_by = user
2020-01-28 20:13:28 +00:00
# great, we can update our book.
2020-01-25 21:46:30 +00:00
book.save()
2020-01-28 20:13:28 +00:00
2020-01-29 09:05:27 +00:00
# we also need to know the author get the cover
for author_blob in data['authors']:
2020-02-11 05:09:04 +00:00
# this id starts as "/authors/OL1234567A" and we want just "OL1234567A"
2020-01-29 09:05:27 +00:00
author_id = author_blob['author']['key']
2020-02-11 05:09:04 +00:00
author_id = author_id.split('/')[-1]
2020-01-25 23:25:19 +00:00
book.authors.add(get_or_create_author(author_id))
2020-01-28 20:13:28 +00:00
2020-02-11 06:32:03 +00:00
if data.get('covers') and len(data['covers']):
2020-01-29 08:05:58 +00:00
book.cover.save(*get_cover(data['covers'][0]), save=True)
2020-01-27 01:55:02 +00:00
return book
2020-01-25 21:46:30 +00:00
2020-01-28 20:13:28 +00:00
2020-01-29 08:05:58 +00:00
def get_cover(cover_id):
''' ask openlibrary for the cover '''
2020-02-07 23:11:53 +00:00
# TODO: get medium and small versions
2020-01-29 08:05:58 +00:00
image_name = '%s-M.jpg' % cover_id
url = 'https://covers.openlibrary.org/b/id/%s' % image_name
response = requests.get(url)
if not response.ok:
response.raise_for_status()
image_content = ContentFile(requests.get(url).content)
return [image_name, image_content]
2020-02-11 05:09:04 +00:00
def get_or_create_author(olkey, update=False):
2020-01-27 01:55:02 +00:00
''' load that author '''
2020-02-11 05:09:04 +00:00
if not re.match(r'^OL\d+A$', olkey):
raise ValueError('Invalid OpenLibrary author ID')
2020-01-25 23:25:19 +00:00
try:
2020-01-28 02:47:54 +00:00
author = Author.objects.get(openlibrary_key=olkey)
2020-02-11 05:09:04 +00:00
if not update:
return author
2020-01-25 23:25:19 +00:00
except ObjectDoesNotExist:
2020-02-07 23:11:53 +00:00
pass
2020-02-11 05:09:04 +00:00
response = requests.get('%s/authors/%s.json' % (OL_URL, olkey))
2020-02-07 23:11:53 +00:00
if not response.ok:
response.raise_for_status()
data = response.json()
author = Author(openlibrary_key=olkey, data=data)
author.save()
2020-01-25 23:25:19 +00:00
return author