Get more data out of openlibrary

This commit is contained in:
Mouse Reeve 2020-03-27 21:28:52 -07:00
parent 5c475e448a
commit 51e7a50b86
5 changed files with 143 additions and 28 deletions

View file

@ -4,6 +4,35 @@ from fedireads.settings import DOMAIN
def get_book(book): def get_book(book):
''' activitypub serialize a book ''' ''' activitypub serialize a book '''
fields = [
'sort_title',
'subtitle',
'isbn',
'oclc_number',
'openlibrary_key',
'librarything_key',
'fedireads_key',
'lccn',
'isbn',
'oclc_number',
'pages',
'physical_format',
'misc_identifiers',
'source_url',
'sync',
'last_sync_date',
'description',
'language',
'series',
'series_number',
'subjects',
'subject_places',
'pages',
'physical_format',
]
activity = { activity = {
'@context': 'https://www.w3.org/ns/activitystreams', '@context': 'https://www.w3.org/ns/activitystreams',
'type': 'Document', 'type': 'Document',
@ -11,30 +40,17 @@ def get_book(book):
'name': book.title, 'name': book.title,
'url': book.absolute_id, 'url': book.absolute_id,
'sort_title': book.sort_title, 'authors': [get_author(a) for a in book.authors.all()],
'subtitle': book.subtitle,
'openlibrary_key': book.openlibrary_key,
'librarything_key': book.librarything_key,
'fedireads_key': book.fedireads_key,
'misc_identifiers': book.misc_identifiers,
'source_url': book.source_url,
'sync': book.sync,
'last_sync_date': book.last_sync_date,
'description': book.description,
'language': book.language,
'series': book.series,
'series_number': book.series_number,
'first_published_date': book.first_published_date.isoformat() if \ 'first_published_date': book.first_published_date.isoformat() if \
book.first_published_date else None, book.first_published_date else None,
'published_date': book.published_date.isoformat() if \ 'published_date': book.published_date.isoformat() if \
book.published_date else None, book.published_date else None,
'parent_work': book.parent_work.absolute_id if \ 'parent_work': book.parent_work.absolute_id if \
book.parent_work else None, book.parent_work else None,
'authors': [get_author(a) for a in book.authors.all()],
} }
for field in fields:
if hasattr(book, field):
activity[field] = book.__getattribute__(field)
if book.cover: if book.cover:
image_path = book.cover.url image_path = book.cover.url
@ -45,7 +61,7 @@ def get_book(book):
'url': 'https://%s%s' % (DOMAIN, image_path), 'url': 'https://%s%s' % (DOMAIN, image_path),
'name': 'Cover of "%s"' % book.title, 'name': 'Cover of "%s"' % book.title,
}] }]
return {k: v for (k, v) in activity.items() if v} return {k: v for (k, v) in activity.items() if v}
def get_author(author): def get_author(author):

View file

@ -26,6 +26,15 @@ class AbstractConnector(ABC):
return False return False
return True return True
def has_attr(self, obj, key):
''' helper function to check if a model object has a key '''
try:
return hasattr(obj, key)
except ValueError:
return False
@abstractmethod @abstractmethod
def search(self, query): def search(self, query):
''' free text search ''' ''' free text search '''

View file

@ -1,4 +1,5 @@
''' openlibrary data connector ''' ''' openlibrary data connector '''
from datetime import datetime
from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ObjectDoesNotExist
from django.core.files.base import ContentFile from django.core.files.base import ContentFile
import re import re
@ -60,14 +61,28 @@ class Connector(AbstractConnector):
data = response.json() data = response.json()
# great, we can update our book. # great, we can update our book.
book.title = data['title'] noop = lambda x: x
description = data.get('description') mappings = {
if description: 'publish_date': ('published_date', get_date),
if isinstance(description, dict): 'first_publish_date': ('first_published_date', get_date),
description = description.get('value') 'description': ('description', get_description),
book.description = description 'isbn_13': ('isbn', noop),
book.pages = data.get('pages') 'oclc_numbers': ('oclc_number', lambda a: a[0]),
#book.published_date = data.get('publish_date') 'lccn': ('lccn', lambda a: a[0]),
}
for (key, value) in data.items():
if key in mappings:
key, formatter = mappings[key]
else:
key = key
formatter = noop
if self.has_attr(book, key):
book.__setattr__(key, formatter(value))
if 'identifiers' in data:
if 'goodreads' in data['identifiers']:
book.goodreads_key = data['identifiers']['goodreads']
book.save() book.save()
@ -140,3 +155,24 @@ class Connector(AbstractConnector):
def update_book(self, book_obj): def update_book(self, book_obj):
pass pass
def get_date(date_string):
''' helper function to try to interpret dates '''
formats = [
'%B %Y',
'%Y',
]
for date_format in formats:
try:
return datetime.strptime(date_string, date_format)
except ValueError:
pass
return False
def get_description(description_blob):
''' descriptions can be a string or a dict '''
if isinstance(description_blob, dict):
return description_blob.get('value')
return description_blob

View file

@ -0,0 +1,44 @@
# Generated by Django 3.0.3 on 2020-03-28 04:28
from django.db import migrations, models
import fedireads.utils.fields
class Migration(migrations.Migration):
dependencies = [
('fedireads', '0020_auto_20200327_2335'),
]
operations = [
migrations.AddField(
model_name='book',
name='goodreads_key',
field=models.CharField(max_length=255, null=True, unique=True),
),
migrations.AddField(
model_name='book',
name='subject_places',
field=fedireads.utils.fields.ArrayField(base_field=models.CharField(max_length=255), blank=True, default=list, size=None),
),
migrations.AddField(
model_name='book',
name='subjects',
field=fedireads.utils.fields.ArrayField(base_field=models.CharField(max_length=255), blank=True, default=list, size=None),
),
migrations.AddField(
model_name='edition',
name='physical_format',
field=models.CharField(max_length=255, null=True),
),
migrations.AddField(
model_name='edition',
name='publishers',
field=fedireads.utils.fields.ArrayField(base_field=models.CharField(max_length=255), blank=True, default=list, size=None),
),
migrations.AlterField(
model_name='connector',
name='connector_file',
field=models.CharField(choices=[('openlibrary', 'Openlibrary'), ('fedireads_connector', 'Fedireads Connector')], default='openlibrary', max_length=255),
),
]

View file

@ -53,6 +53,7 @@ class Book(FedireadsModel):
openlibrary_key = models.CharField(max_length=255, unique=True, null=True) openlibrary_key = models.CharField(max_length=255, unique=True, null=True)
librarything_key = models.CharField(max_length=255, unique=True, null=True) librarything_key = models.CharField(max_length=255, unique=True, null=True)
fedireads_key = models.CharField(max_length=255, unique=True, default=uuid4) fedireads_key = models.CharField(max_length=255, unique=True, default=uuid4)
goodreads_key = models.CharField(max_length=255, unique=True, null=True)
misc_identifiers = JSONField(null=True) misc_identifiers = JSONField(null=True)
# info about where the data comes from and where/if to sync # info about where the data comes from and where/if to sync
@ -72,6 +73,12 @@ class Book(FedireadsModel):
language = models.CharField(max_length=255, null=True) language = models.CharField(max_length=255, null=True)
series = models.CharField(max_length=255, blank=True, null=True) series = models.CharField(max_length=255, blank=True, null=True)
series_number = models.CharField(max_length=255, blank=True, null=True) series_number = models.CharField(max_length=255, blank=True, null=True)
subjects = ArrayField(
models.CharField(max_length=255), blank=True, default=list
)
subject_places = ArrayField(
models.CharField(max_length=255), blank=True, default=list
)
# TODO: include an annotation about the type of authorship (ie, translator) # TODO: include an annotation about the type of authorship (ie, translator)
authors = models.ManyToManyField('Author') authors = models.ManyToManyField('Author')
# TODO: also store cover thumbnail # TODO: also store cover thumbnail
@ -95,11 +102,10 @@ class Book(FedireadsModel):
return '%s/%s/%s' % (base_path, model_name, self.openlibrary_key) return '%s/%s/%s' % (base_path, model_name, self.openlibrary_key)
def __repr__(self): def __repr__(self):
return "<{} key={!r} title={!r} author={!r}>".format( return "<{} key={!r} title={!r}>".format(
self.__class__, self.__class__,
self.openlibrary_key, self.openlibrary_key,
self.title, self.title,
self.author
) )
@ -115,6 +121,10 @@ class Edition(Book):
isbn = models.CharField(max_length=255, unique=True, null=True) isbn = models.CharField(max_length=255, unique=True, null=True)
oclc_number = models.CharField(max_length=255, unique=True, null=True) oclc_number = models.CharField(max_length=255, unique=True, null=True)
pages = models.IntegerField(null=True) pages = models.IntegerField(null=True)
physical_format = models.CharField(max_length=255, null=True)
publishers = ArrayField(
models.CharField(max_length=255), blank=True, default=list
)
class Author(FedireadsModel): class Author(FedireadsModel):