2020-09-21 15:10:37 +00:00
|
|
|
''' using another bookwyrm instance as a source of book data '''
|
2020-05-09 19:53:55 +00:00
|
|
|
from uuid import uuid4
|
2020-05-08 23:56:49 +00:00
|
|
|
|
2020-03-28 19:55:53 +00:00
|
|
|
from django.core.exceptions import ObjectDoesNotExist
|
|
|
|
from django.core.files.base import ContentFile
|
2020-11-04 21:09:11 +00:00
|
|
|
from django.db import transaction
|
2020-05-09 19:53:55 +00:00
|
|
|
import requests
|
2020-03-28 19:55:53 +00:00
|
|
|
|
2020-09-21 15:10:37 +00:00
|
|
|
from bookwyrm import models
|
2020-05-10 23:41:24 +00:00
|
|
|
from .abstract_connector import AbstractConnector, SearchResult, Mapping
|
2020-05-09 20:36:10 +00:00
|
|
|
from .abstract_connector import update_from_mappings, get_date, get_data
|
2020-03-28 19:55:53 +00:00
|
|
|
|
|
|
|
|
|
|
|
class Connector(AbstractConnector):
|
2020-05-04 00:53:14 +00:00
|
|
|
''' interact with other instances '''
|
2020-05-08 23:56:49 +00:00
|
|
|
def __init__(self, identifier):
|
2020-05-10 19:56:59 +00:00
|
|
|
super().__init__(identifier)
|
2020-05-10 23:41:24 +00:00
|
|
|
self.key_mappings = [
|
|
|
|
Mapping('isbn_13', model=models.Edition),
|
|
|
|
Mapping('isbn_10', model=models.Edition),
|
|
|
|
Mapping('lccn', model=models.Work),
|
|
|
|
Mapping('oclc_number', model=models.Edition),
|
|
|
|
Mapping('openlibrary_key'),
|
|
|
|
Mapping('goodreads_key'),
|
|
|
|
Mapping('asin'),
|
|
|
|
]
|
|
|
|
|
|
|
|
self.book_mappings = self.key_mappings + [
|
|
|
|
Mapping('sort_title'),
|
|
|
|
Mapping('subtitle'),
|
|
|
|
Mapping('description'),
|
|
|
|
Mapping('languages'),
|
|
|
|
Mapping('series'),
|
|
|
|
Mapping('series_number'),
|
|
|
|
Mapping('subjects'),
|
|
|
|
Mapping('subject_places'),
|
|
|
|
Mapping('first_published_date'),
|
|
|
|
Mapping('published_date'),
|
|
|
|
Mapping('pages'),
|
|
|
|
Mapping('physical_format'),
|
|
|
|
Mapping('publishers'),
|
|
|
|
]
|
|
|
|
|
|
|
|
self.author_mappings = [
|
2020-10-31 00:18:25 +00:00
|
|
|
Mapping('name'),
|
2020-05-10 23:41:24 +00:00
|
|
|
Mapping('bio'),
|
2020-10-31 00:18:25 +00:00
|
|
|
Mapping('openlibrary_key'),
|
|
|
|
Mapping('wikipedia_link'),
|
|
|
|
Mapping('aliases'),
|
|
|
|
Mapping('born', formatter=get_date),
|
|
|
|
Mapping('died', formatter=get_date),
|
2020-05-10 23:41:24 +00:00
|
|
|
]
|
2020-05-08 23:56:49 +00:00
|
|
|
|
2020-05-04 00:53:14 +00:00
|
|
|
|
2020-10-31 00:04:10 +00:00
|
|
|
def get_remote_id_from_data(self, data):
|
|
|
|
return data.get('id')
|
|
|
|
|
|
|
|
|
2020-05-10 19:56:59 +00:00
|
|
|
def is_work_data(self, data):
|
2020-10-31 00:04:10 +00:00
|
|
|
return data['type'] == 'Work'
|
2020-03-28 19:55:53 +00:00
|
|
|
|
|
|
|
|
2020-05-10 19:56:59 +00:00
|
|
|
def get_edition_from_work_data(self, data):
|
2020-10-29 19:32:37 +00:00
|
|
|
''' we're served a list of edition urls '''
|
|
|
|
path = data['editions'][0]
|
|
|
|
return get_data(path)
|
2020-05-09 00:56:24 +00:00
|
|
|
|
|
|
|
|
2020-05-10 19:56:59 +00:00
|
|
|
def get_work_from_edition_date(self, data):
|
2020-10-29 19:32:37 +00:00
|
|
|
return get_data(data['work'])
|
2020-05-10 19:56:59 +00:00
|
|
|
|
2020-05-08 23:56:49 +00:00
|
|
|
|
2020-05-10 19:56:59 +00:00
|
|
|
def get_authors_from_data(self, data):
|
|
|
|
for author_url in data.get('authors', []):
|
|
|
|
yield self.get_or_create_author(author_url)
|
2020-03-28 19:55:53 +00:00
|
|
|
|
2020-03-28 23:30:54 +00:00
|
|
|
|
2020-05-09 19:09:40 +00:00
|
|
|
def get_cover_from_data(self, data):
|
2020-05-09 19:59:06 +00:00
|
|
|
cover_data = data.get('attachment')
|
|
|
|
if not cover_data:
|
2020-05-09 19:53:55 +00:00
|
|
|
return None
|
2020-05-09 19:59:06 +00:00
|
|
|
cover_url = cover_data[0].get('url')
|
2020-05-09 19:53:55 +00:00
|
|
|
response = requests.get(cover_url)
|
|
|
|
if not response.ok:
|
|
|
|
response.raise_for_status()
|
|
|
|
|
2020-05-09 19:59:06 +00:00
|
|
|
image_name = str(uuid4()) + cover_url.split('.')[-1]
|
2020-05-09 19:53:55 +00:00
|
|
|
image_content = ContentFile(response.content)
|
|
|
|
return [image_name, image_content]
|
|
|
|
|
2020-05-09 19:09:40 +00:00
|
|
|
|
2020-05-04 00:53:14 +00:00
|
|
|
def get_or_create_author(self, remote_id):
|
2020-03-28 19:55:53 +00:00
|
|
|
''' load that author '''
|
|
|
|
try:
|
2020-05-04 00:53:14 +00:00
|
|
|
return models.Author.objects.get(remote_id=remote_id)
|
2020-03-28 19:55:53 +00:00
|
|
|
except ObjectDoesNotExist:
|
|
|
|
pass
|
|
|
|
|
2020-05-09 19:39:58 +00:00
|
|
|
data = get_data(remote_id)
|
2020-03-28 19:55:53 +00:00
|
|
|
|
|
|
|
# ingest a new author
|
2020-05-04 00:53:14 +00:00
|
|
|
author = models.Author(remote_id=remote_id)
|
2020-05-10 23:41:24 +00:00
|
|
|
author = update_from_mappings(author, data, self.author_mappings)
|
2020-03-28 19:55:53 +00:00
|
|
|
author.save()
|
|
|
|
|
|
|
|
return author
|
|
|
|
|
|
|
|
|
2020-05-10 19:56:59 +00:00
|
|
|
def parse_search_data(self, data):
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
def format_search_result(self, search_result):
|
|
|
|
return SearchResult(**search_result)
|
|
|
|
|
|
|
|
|
2020-05-04 19:36:55 +00:00
|
|
|
def expand_book_data(self, book):
|
2020-11-04 21:09:11 +00:00
|
|
|
work = book
|
|
|
|
# go from the edition to the work, if necessary
|
|
|
|
if isinstance(book, models.Edition):
|
|
|
|
work = book.parent_work
|
|
|
|
|
|
|
|
# it may be that we actually want to request this url
|
2020-11-04 21:18:30 +00:00
|
|
|
editions_url = '%s/editions?page=true' % work.remote_id
|
2020-11-04 21:09:11 +00:00
|
|
|
edition_options = get_data(editions_url)
|
2020-11-04 21:18:30 +00:00
|
|
|
for edition_data in edition_options['orderedItems']:
|
2020-11-04 21:09:11 +00:00
|
|
|
with transaction.atomic():
|
|
|
|
edition = self.create_book(
|
|
|
|
edition_data['id'],
|
|
|
|
edition_data,
|
|
|
|
models.Edition
|
|
|
|
)
|
|
|
|
edition.parent_work = work
|
|
|
|
edition.save()
|
|
|
|
if not edition.authors.exists() and work.authors.exists():
|
|
|
|
edition.authors.set(work.authors.all())
|