More connectors more problems

This commit is contained in:
Mouse Reeve 2020-03-28 12:55:53 -07:00
parent 6fadf22a54
commit d94dbf3974
6 changed files with 214 additions and 33 deletions

View file

@ -27,14 +27,6 @@ class AbstractConnector(ABC):
return True return True
def has_attr(self, obj, key):
''' helper function to check if a model object has a key '''
try:
return hasattr(obj, key)
except ValueError:
return False
@abstractmethod @abstractmethod
def search(self, query): def search(self, query):
''' free text search ''' ''' free text search '''
@ -63,6 +55,29 @@ class AbstractConnector(ABC):
pass pass
def update_from_mappings(obj, data, mappings):
''' assign data to model with mappings '''
noop = lambda x: x
for (key, value) in data.items():
formatter = None
if key in mappings:
key, formatter = mappings[key]
if not formatter:
formatter = noop
if has_attr(obj, key):
obj.__setattr__(key, formatter(value))
return obj
def has_attr(obj, key):
''' helper function to check if a model object has a key '''
try:
return hasattr(obj, key)
except ValueError:
return False
class SearchResult(object): class SearchResult(object):
''' standardized search result object ''' ''' standardized search result object '''
def __init__(self, title, key, author, year, raw_data): def __init__(self, title, key, author, year, raw_data):
@ -75,3 +90,4 @@ class SearchResult(object):
def __repr__(self): def __repr__(self):
return "<SearchResult key={!r} title={!r} author={!r}>".format( return "<SearchResult key={!r} title={!r} author={!r}>".format(
self.key, self.title, self.author) self.key, self.title, self.author)

View file

@ -0,0 +1,130 @@
''' using another fedireads instance as a source of book data '''
from datetime import datetime
from django.core.exceptions import ObjectDoesNotExist
from django.core.files.base import ContentFile
import requests
from fedireads import models
from .abstract_connector import AbstractConnector, update_from_mappings
class Connector(AbstractConnector):
''' instantiate a connector '''
def __init__(self, identifier):
super().__init__(identifier)
def search(self, query):
''' right now you can't search fedireads, but... '''
resp = requests.get(
'%s%s' % (self.search_url, query),
headers={
'Accept': 'application/activity+json; charset=utf-8',
},
)
if not resp.ok:
resp.raise_for_status()
return resp.json()
def get_or_create_book(self, fedireads_key):
''' pull up a book record by whatever means possible '''
try:
book = models.Book.objects.select_subclasses().get(
fedireads_key=fedireads_key
)
return book
except ObjectDoesNotExist:
if self.model.is_self:
# we can't load a book from a remote server, this is it
return None
# no book was found, so we start creating a new one
book = models.Book(fedireads_key=fedireads_key)
response = requests.get(
'%s/%s' % (self.base_url, fedireads_key),
headers={
'Accept': 'application/activity+json; charset=utf-8',
},
)
if not response.ok:
response.raise_for_status()
data = response.json()
# great, we can update our book.
noop = lambda x: x
formatters = {
'published_date': get_date,
'first_published_date': get_date,
}
for (key, value) in data.items():
formatter = formatters[key] if key in formatters else noop
if self.has_attr(book, key):
book.__setattr__(key, formatter(value))
book.save()
if data.get('parent_work'):
work = self.get_or_create_book(data.get('parent_work'))
book.parent_work = work
for author_blob in data.get('authors', []):
author_blob = author_blob.get('author', author_blob)
author_id = author_blob['key']
author_id = author_id.split('/')[-1]
book.authors.add(self.get_or_create_author(author_id))
if data.get('covers') and len(data['covers']):
book.cover.save(*self.get_cover(data['covers'][0]), save=True)
return book
def get_or_create_author(self, fedireads_key):
''' load that author '''
try:
return models.Author.objects.get(fedireads_key=fedireads_key)
except ObjectDoesNotExist:
pass
resp = requests.get('%s/authors/%s.json' % (self.url, fedireads_key))
if not resp.ok:
resp.raise_for_status()
data = resp.json()
# ingest a new author
author = models.Author(fedireads_key=fedireads_key)
mappings = {
'born': ('born', get_date),
'died': ('died', get_date),
}
author = update_from_mappings(author, data, mappings)
author.save()
return author
def get_cover(self, cover_url):
''' ask openlibrary for the cover '''
image_name = cover_url.split('/')[-1]
response = requests.get(cover_url)
if not response.ok:
response.raise_for_status()
image_content = ContentFile(response.content)
return [image_name, image_content]
def update_book(self, book_obj):
pass
def get_date(date_string):
''' helper function to try to interpret dates '''
try:
datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S")
except ValueError:
return False

View file

@ -6,7 +6,8 @@ import re
import requests import requests
from fedireads import models from fedireads import models
from .abstract_connector import AbstractConnector, SearchResult from .abstract_connector import AbstractConnector, SearchResult, \
update_from_mappings
class Connector(AbstractConnector): class Connector(AbstractConnector):
@ -17,7 +18,12 @@ class Connector(AbstractConnector):
def search(self, query): def search(self, query):
''' query openlibrary search ''' ''' query openlibrary search '''
resp = requests.get('%s/search.json' % self.url, params={'q': query}) resp = requests.get(
'%s%s' % (self.search_url, query),
headers={
'Accept': 'application/json; charset=utf-8',
},
)
if not resp.ok: if not resp.ok:
resp.raise_for_status() resp.raise_for_status()
data = resp.json() data = resp.json()
@ -61,24 +67,15 @@ class Connector(AbstractConnector):
data = response.json() data = response.json()
# great, we can update our book. # great, we can update our book.
noop = lambda x: x
mappings = { mappings = {
'publish_date': ('published_date', get_date), 'publish_date': ('published_date', get_date),
'first_publish_date': ('first_published_date', get_date), 'first_publish_date': ('first_published_date', get_date),
'description': ('description', get_description), 'description': ('description', get_description),
'isbn_13': ('isbn', noop), 'isbn_13': ('isbn', None),
'oclc_numbers': ('oclc_number', lambda a: a[0]), 'oclc_numbers': ('oclc_number', lambda a: a[0]),
'lccn': ('lccn', lambda a: a[0]), 'lccn': ('lccn', lambda a: a[0]),
} }
for (key, value) in data.items(): book = update_from_mappings(book, data, mappings)
if key in mappings:
key, formatter = mappings[key]
else:
key = key
formatter = noop
if self.has_attr(book, key):
book.__setattr__(key, formatter(value))
if 'identifiers' in data: if 'identifiers' in data:
if 'goodreads' in data['identifiers']: if 'goodreads' in data['identifiers']:
@ -123,18 +120,16 @@ class Connector(AbstractConnector):
data = response.json() data = response.json()
author = models.Author(openlibrary_key=olkey) author = models.Author(openlibrary_key=olkey)
bio = data.get('bio') mappings = {
if bio: 'birth_date': ('born', get_date),
if isinstance(bio, dict): 'death_date': ('died', get_date),
bio = bio.get('value') 'bio': ('bio', get_description),
author.bio = bio }
name = data['name'] author = update_from_mappings(author, data, mappings)
author.name = name
# TODO this is making some BOLD assumption # TODO this is making some BOLD assumption
name = data['name']
author.last_name = name.split(' ')[-1] author.last_name = name.split(' ')[-1]
author.first_name = ' '.join(name.split(' ')[:-1]) author.first_name = ' '.join(name.split(' ')[:-1])
#author.born = data.get('birth_date')
#author.died = data.get('death_date')
author.save() author.save()
return author return author

View file

@ -0,0 +1,41 @@
''' using a fedireads instance as a source of book data '''
from django.core.exceptions import ObjectDoesNotExist
from fedireads import models
from .abstract_connector import AbstractConnector
class Connector(AbstractConnector):
''' instantiate a connector '''
def __init__(self, identifier):
super().__init__(identifier)
def search(self, query):
''' right now you can't search fedireads sorry, but when
that gets implemented it will totally rule '''
return []
def get_or_create_book(self, fedireads_key):
''' since this is querying its own data source, it can only
get a book, not load one from an external source '''
try:
return models.Book.objects.select_subclasses().get(
fedireads_key=fedireads_key
)
except ObjectDoesNotExist:
return None
def get_or_create_author(self, fedireads_key):
''' load that author '''
try:
return models.Author.objects.get(fedreads_key=fedireads_key)
except ObjectDoesNotExist:
pass
def update_book(self, book_obj):
pass

View file

@ -1,3 +1,3 @@
''' settings book data connectors ''' ''' settings book data connectors '''
CONNECTORS = ['openlibrary', 'fedireads_connector'] CONNECTORS = ['openlibrary', 'self_connector', 'fedireads_connector']

View file

@ -22,12 +22,11 @@ Connector.objects.create(
Connector.objects.create( Connector.objects.create(
identifier=DOMAIN, identifier=DOMAIN,
connector_file='fedireads_connector', connector_file='self_connector',
base_url='https://%s/book' % DOMAIN, base_url='https://%s/book' % DOMAIN,
covers_url='https://%s/images/covers' % DOMAIN, covers_url='https://%s/images/covers' % DOMAIN,
search_url='https://%s/search?q=' % DOMAIN, search_url='https://%s/search?q=' % DOMAIN,
key_name='openlibrary_key', key_name='openlibrary_key',
is_self=True
) )