Get more data out of openlibrary

This commit is contained in:
Mouse Reeve 2020-03-27 21:28:52 -07:00
parent 5c475e448a
commit 51e7a50b86
5 changed files with 143 additions and 28 deletions

View file

@ -4,6 +4,35 @@ from fedireads.settings import DOMAIN
def get_book(book):
''' activitypub serialize a book '''
fields = [
'sort_title',
'subtitle',
'isbn',
'oclc_number',
'openlibrary_key',
'librarything_key',
'fedireads_key',
'lccn',
'isbn',
'oclc_number',
'pages',
'physical_format',
'misc_identifiers',
'source_url',
'sync',
'last_sync_date',
'description',
'language',
'series',
'series_number',
'subjects',
'subject_places',
'pages',
'physical_format',
]
activity = {
'@context': 'https://www.w3.org/ns/activitystreams',
'type': 'Document',
@ -11,30 +40,17 @@ def get_book(book):
'name': book.title,
'url': book.absolute_id,
'sort_title': book.sort_title,
'subtitle': book.subtitle,
'openlibrary_key': book.openlibrary_key,
'librarything_key': book.librarything_key,
'fedireads_key': book.fedireads_key,
'misc_identifiers': book.misc_identifiers,
'source_url': book.source_url,
'sync': book.sync,
'last_sync_date': book.last_sync_date,
'description': book.description,
'language': book.language,
'series': book.series,
'series_number': book.series_number,
'authors': [get_author(a) for a in book.authors.all()],
'first_published_date': book.first_published_date.isoformat() if \
book.first_published_date else None,
'published_date': book.published_date.isoformat() if \
book.published_date else None,
'parent_work': book.parent_work.absolute_id if \
book.parent_work else None,
'authors': [get_author(a) for a in book.authors.all()],
}
for field in fields:
if hasattr(book, field):
activity[field] = book.__getattribute__(field)
if book.cover:
image_path = book.cover.url

View file

@ -26,6 +26,15 @@ class AbstractConnector(ABC):
return False
return True
def has_attr(self, obj, key):
''' helper function to check if a model object has a key '''
try:
return hasattr(obj, key)
except ValueError:
return False
@abstractmethod
def search(self, query):
''' free text search '''

View file

@ -1,4 +1,5 @@
''' openlibrary data connector '''
from datetime import datetime
from django.core.exceptions import ObjectDoesNotExist
from django.core.files.base import ContentFile
import re
@ -60,14 +61,28 @@ class Connector(AbstractConnector):
data = response.json()
# great, we can update our book.
book.title = data['title']
description = data.get('description')
if description:
if isinstance(description, dict):
description = description.get('value')
book.description = description
book.pages = data.get('pages')
#book.published_date = data.get('publish_date')
noop = lambda x: x
mappings = {
'publish_date': ('published_date', get_date),
'first_publish_date': ('first_published_date', get_date),
'description': ('description', get_description),
'isbn_13': ('isbn', noop),
'oclc_numbers': ('oclc_number', lambda a: a[0]),
'lccn': ('lccn', lambda a: a[0]),
}
for (key, value) in data.items():
if key in mappings:
key, formatter = mappings[key]
else:
key = key
formatter = noop
if self.has_attr(book, key):
book.__setattr__(key, formatter(value))
if 'identifiers' in data:
if 'goodreads' in data['identifiers']:
book.goodreads_key = data['identifiers']['goodreads']
book.save()
@ -140,3 +155,24 @@ class Connector(AbstractConnector):
def update_book(self, book_obj):
pass
def get_date(date_string):
''' helper function to try to interpret dates '''
formats = [
'%B %Y',
'%Y',
]
for date_format in formats:
try:
return datetime.strptime(date_string, date_format)
except ValueError:
pass
return False
def get_description(description_blob):
''' descriptions can be a string or a dict '''
if isinstance(description_blob, dict):
return description_blob.get('value')
return description_blob

View file

@ -0,0 +1,44 @@
# Generated by Django 3.0.3 on 2020-03-28 04:28
from django.db import migrations, models
import fedireads.utils.fields
class Migration(migrations.Migration):
dependencies = [
('fedireads', '0020_auto_20200327_2335'),
]
operations = [
migrations.AddField(
model_name='book',
name='goodreads_key',
field=models.CharField(max_length=255, null=True, unique=True),
),
migrations.AddField(
model_name='book',
name='subject_places',
field=fedireads.utils.fields.ArrayField(base_field=models.CharField(max_length=255), blank=True, default=list, size=None),
),
migrations.AddField(
model_name='book',
name='subjects',
field=fedireads.utils.fields.ArrayField(base_field=models.CharField(max_length=255), blank=True, default=list, size=None),
),
migrations.AddField(
model_name='edition',
name='physical_format',
field=models.CharField(max_length=255, null=True),
),
migrations.AddField(
model_name='edition',
name='publishers',
field=fedireads.utils.fields.ArrayField(base_field=models.CharField(max_length=255), blank=True, default=list, size=None),
),
migrations.AlterField(
model_name='connector',
name='connector_file',
field=models.CharField(choices=[('openlibrary', 'Openlibrary'), ('fedireads_connector', 'Fedireads Connector')], default='openlibrary', max_length=255),
),
]

View file

@ -53,6 +53,7 @@ class Book(FedireadsModel):
openlibrary_key = models.CharField(max_length=255, unique=True, null=True)
librarything_key = models.CharField(max_length=255, unique=True, null=True)
fedireads_key = models.CharField(max_length=255, unique=True, default=uuid4)
goodreads_key = models.CharField(max_length=255, unique=True, null=True)
misc_identifiers = JSONField(null=True)
# info about where the data comes from and where/if to sync
@ -72,6 +73,12 @@ class Book(FedireadsModel):
language = models.CharField(max_length=255, null=True)
series = models.CharField(max_length=255, blank=True, null=True)
series_number = models.CharField(max_length=255, blank=True, null=True)
subjects = ArrayField(
models.CharField(max_length=255), blank=True, default=list
)
subject_places = ArrayField(
models.CharField(max_length=255), blank=True, default=list
)
# TODO: include an annotation about the type of authorship (ie, translator)
authors = models.ManyToManyField('Author')
# TODO: also store cover thumbnail
@ -95,11 +102,10 @@ class Book(FedireadsModel):
return '%s/%s/%s' % (base_path, model_name, self.openlibrary_key)
def __repr__(self):
return "<{} key={!r} title={!r} author={!r}>".format(
return "<{} key={!r} title={!r}>".format(
self.__class__,
self.openlibrary_key,
self.title,
self.author
)
@ -115,6 +121,10 @@ class Edition(Book):
isbn = models.CharField(max_length=255, unique=True, null=True)
oclc_number = models.CharField(max_length=255, unique=True, null=True)
pages = models.IntegerField(null=True)
physical_format = models.CharField(max_length=255, null=True)
publishers = ArrayField(
models.CharField(max_length=255), blank=True, default=list
)
class Author(FedireadsModel):