moviewyrm/fedireads/connectors/fedireads_connector.py

141 lines
4.2 KiB
Python
Raw Normal View History

2020-03-28 19:55:53 +00:00
''' using another fedireads instance as a source of book data '''
2020-05-09 19:29:23 +00:00
import re
2020-05-09 19:53:55 +00:00
from uuid import uuid4
2020-03-28 19:55:53 +00:00
from django.core.exceptions import ObjectDoesNotExist
from django.core.files.base import ContentFile
from django.db import transaction
2020-05-09 19:53:55 +00:00
import requests
2020-03-28 19:55:53 +00:00
from fedireads import models
2020-05-09 19:53:55 +00:00
from .abstract_connector import AbstractConnector, SearchResult
2020-05-09 20:36:10 +00:00
from .abstract_connector import update_from_mappings, get_date, get_data
2020-03-28 19:55:53 +00:00
class Connector(AbstractConnector):
2020-05-04 00:53:14 +00:00
''' interact with other instances '''
def __init__(self, identifier):
self.key_mappings = {
'isbn_13': ('isbn_13', None),
'isbn_10': ('isbn_10', None),
'oclc_numbers': ('oclc_number', None),
'lccn': ('lccn', None),
}
2020-05-09 19:53:55 +00:00
self.book_mappings = self.key_mappings.copy()
self.book_mappings.update({
'published_date': ('published_date', get_date),
'first_published_date': ('first_published_date', get_date),
})
super().__init__(identifier)
2020-05-04 00:53:14 +00:00
def format_search_result(self, search_result):
return SearchResult(**search_result)
2020-03-28 19:55:53 +00:00
def parse_search_data(self, data):
return data
2020-05-04 00:53:14 +00:00
def get_or_create_book(self, remote_id):
2020-03-28 19:55:53 +00:00
''' pull up a book record by whatever means possible '''
2020-05-09 19:29:23 +00:00
# re-construct a remote id from the int and books_url
if re.match(r'^\d+$', remote_id):
remote_id = self.books_url + '/' + remote_id
book = models.Book.objects.select_subclasses().filter(
remote_id=remote_id
).first()
if book:
if isinstance(book, models.Work):
return book.default_edition
2020-03-28 19:55:53 +00:00
return book
# no book was found, so we start creating a new one
data = get_data(remote_id)
if data['book_type'] == 'work':
work_data = data
try:
edition_data = data['editions'][0]
except KeyError:
# hack: re-use the work data as the edition data
edition_data = data
else:
edition_data = data
try:
work_data = data['work']
except KeyError:
# hack: re-use the work data as the edition data
work_data = data
with transaction.atomic():
# create both work and a default edition
work_key = work_data.get('url')
work = self.create_book(work_key, work_data, models.Work)
ed_key = edition_data.get('url')
edition = self.create_book(ed_key, edition_data, models.Edition)
edition.default = True
edition.parent_work = work
edition.save()
return edition
2020-03-28 19:55:53 +00:00
2020-03-28 23:30:54 +00:00
def get_cover_from_data(self, data):
2020-05-09 19:59:06 +00:00
cover_data = data.get('attachment')
if not cover_data:
2020-05-09 19:53:55 +00:00
return None
2020-05-09 19:59:06 +00:00
cover_url = cover_data[0].get('url')
2020-05-09 19:53:55 +00:00
response = requests.get(cover_url)
if not response.ok:
response.raise_for_status()
2020-05-09 19:59:06 +00:00
image_name = str(uuid4()) + cover_url.split('.')[-1]
2020-05-09 19:53:55 +00:00
image_content = ContentFile(response.content)
return [image_name, image_content]
def get_authors_from_data(self, data):
authors = []
for author_url in data.get('authors', []):
authors.append(self.get_or_create_author(author_url))
return authors
2020-05-04 00:53:14 +00:00
def get_or_create_author(self, remote_id):
2020-03-28 19:55:53 +00:00
''' load that author '''
try:
2020-05-04 00:53:14 +00:00
return models.Author.objects.get(remote_id=remote_id)
2020-03-28 19:55:53 +00:00
except ObjectDoesNotExist:
pass
data = get_data(remote_id)
2020-03-28 19:55:53 +00:00
# ingest a new author
2020-05-04 00:53:14 +00:00
author = models.Author(remote_id=remote_id)
2020-03-28 19:55:53 +00:00
mappings = {
'born': ('born', get_date),
'died': ('died', get_date),
}
author = update_from_mappings(author, data, mappings)
author.save()
return author
def expand_book_data(self, book):
2020-05-09 20:36:10 +00:00
# TODO
pass
2020-03-30 00:40:51 +00:00
def get_cover(cover_url):
2020-05-10 18:29:10 +00:00
''' download the cover '''
2020-03-30 00:40:51 +00:00
image_name = cover_url.split('/')[-1]
response = requests.get(cover_url)
if not response.ok:
response.raise_for_status()
image_content = ContentFile(response.content)
return [image_name, image_content]