From d94dbf3974260b5b33f546d9e8d9b5b5484b048f Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 28 Mar 2020 12:55:53 -0700 Subject: [PATCH] More connectors more problems --- fedireads/connectors/abstract_connector.py | 32 +++-- fedireads/connectors/fedireads_connector.py | 130 ++++++++++++++++++++ fedireads/connectors/openlibrary.py | 39 +++--- fedireads/connectors/self_connector.py | 41 ++++++ fedireads/connectors/settings.py | 2 +- init_db.py | 3 +- 6 files changed, 214 insertions(+), 33 deletions(-) create mode 100644 fedireads/connectors/fedireads_connector.py create mode 100644 fedireads/connectors/self_connector.py diff --git a/fedireads/connectors/abstract_connector.py b/fedireads/connectors/abstract_connector.py index 49b12e7f8..afa06397c 100644 --- a/fedireads/connectors/abstract_connector.py +++ b/fedireads/connectors/abstract_connector.py @@ -27,14 +27,6 @@ class AbstractConnector(ABC): return True - def has_attr(self, obj, key): - ''' helper function to check if a model object has a key ''' - try: - return hasattr(obj, key) - except ValueError: - return False - - @abstractmethod def search(self, query): ''' free text search ''' @@ -63,6 +55,29 @@ class AbstractConnector(ABC): pass +def update_from_mappings(obj, data, mappings): + ''' assign data to model with mappings ''' + noop = lambda x: x + for (key, value) in data.items(): + formatter = None + if key in mappings: + key, formatter = mappings[key] + if not formatter: + formatter = noop + + if has_attr(obj, key): + obj.__setattr__(key, formatter(value)) + return obj + + +def has_attr(obj, key): + ''' helper function to check if a model object has a key ''' + try: + return hasattr(obj, key) + except ValueError: + return False + + class SearchResult(object): ''' standardized search result object ''' def __init__(self, title, key, author, year, raw_data): @@ -75,3 +90,4 @@ class SearchResult(object): def __repr__(self): return "".format( self.key, self.title, self.author) + diff --git a/fedireads/connectors/fedireads_connector.py b/fedireads/connectors/fedireads_connector.py new file mode 100644 index 000000000..c3956c054 --- /dev/null +++ b/fedireads/connectors/fedireads_connector.py @@ -0,0 +1,130 @@ +''' using another fedireads instance as a source of book data ''' +from datetime import datetime +from django.core.exceptions import ObjectDoesNotExist +from django.core.files.base import ContentFile +import requests + +from fedireads import models +from .abstract_connector import AbstractConnector, update_from_mappings + + +class Connector(AbstractConnector): + ''' instantiate a connector ''' + def __init__(self, identifier): + super().__init__(identifier) + + + def search(self, query): + ''' right now you can't search fedireads, but... ''' + resp = requests.get( + '%s%s' % (self.search_url, query), + headers={ + 'Accept': 'application/activity+json; charset=utf-8', + }, + ) + if not resp.ok: + resp.raise_for_status() + + return resp.json() + + + def get_or_create_book(self, fedireads_key): + ''' pull up a book record by whatever means possible ''' + try: + book = models.Book.objects.select_subclasses().get( + fedireads_key=fedireads_key + ) + return book + except ObjectDoesNotExist: + if self.model.is_self: + # we can't load a book from a remote server, this is it + return None + # no book was found, so we start creating a new one + book = models.Book(fedireads_key=fedireads_key) + + response = requests.get( + '%s/%s' % (self.base_url, fedireads_key), + headers={ + 'Accept': 'application/activity+json; charset=utf-8', + }, + ) + if not response.ok: + response.raise_for_status() + + data = response.json() + + # great, we can update our book. + noop = lambda x: x + formatters = { + 'published_date': get_date, + 'first_published_date': get_date, + } + for (key, value) in data.items(): + formatter = formatters[key] if key in formatters else noop + + if self.has_attr(book, key): + book.__setattr__(key, formatter(value)) + book.save() + + if data.get('parent_work'): + work = self.get_or_create_book(data.get('parent_work')) + + book.parent_work = work + + for author_blob in data.get('authors', []): + author_blob = author_blob.get('author', author_blob) + author_id = author_blob['key'] + author_id = author_id.split('/')[-1] + book.authors.add(self.get_or_create_author(author_id)) + + if data.get('covers') and len(data['covers']): + book.cover.save(*self.get_cover(data['covers'][0]), save=True) + + return book + + + def get_or_create_author(self, fedireads_key): + ''' load that author ''' + try: + return models.Author.objects.get(fedireads_key=fedireads_key) + except ObjectDoesNotExist: + pass + + resp = requests.get('%s/authors/%s.json' % (self.url, fedireads_key)) + if not resp.ok: + resp.raise_for_status() + + data = resp.json() + + # ingest a new author + author = models.Author(fedireads_key=fedireads_key) + mappings = { + 'born': ('born', get_date), + 'died': ('died', get_date), + } + author = update_from_mappings(author, data, mappings) + author.save() + + return author + + + def get_cover(self, cover_url): + ''' ask openlibrary for the cover ''' + image_name = cover_url.split('/')[-1] + response = requests.get(cover_url) + if not response.ok: + response.raise_for_status() + image_content = ContentFile(response.content) + return [image_name, image_content] + + + def update_book(self, book_obj): + pass + + +def get_date(date_string): + ''' helper function to try to interpret dates ''' + try: + datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S") + except ValueError: + return False diff --git a/fedireads/connectors/openlibrary.py b/fedireads/connectors/openlibrary.py index 1bd2d2930..c52364295 100644 --- a/fedireads/connectors/openlibrary.py +++ b/fedireads/connectors/openlibrary.py @@ -6,7 +6,8 @@ import re import requests from fedireads import models -from .abstract_connector import AbstractConnector, SearchResult +from .abstract_connector import AbstractConnector, SearchResult, \ + update_from_mappings class Connector(AbstractConnector): @@ -17,7 +18,12 @@ class Connector(AbstractConnector): def search(self, query): ''' query openlibrary search ''' - resp = requests.get('%s/search.json' % self.url, params={'q': query}) + resp = requests.get( + '%s%s' % (self.search_url, query), + headers={ + 'Accept': 'application/json; charset=utf-8', + }, + ) if not resp.ok: resp.raise_for_status() data = resp.json() @@ -61,24 +67,15 @@ class Connector(AbstractConnector): data = response.json() # great, we can update our book. - noop = lambda x: x mappings = { 'publish_date': ('published_date', get_date), 'first_publish_date': ('first_published_date', get_date), 'description': ('description', get_description), - 'isbn_13': ('isbn', noop), + 'isbn_13': ('isbn', None), 'oclc_numbers': ('oclc_number', lambda a: a[0]), 'lccn': ('lccn', lambda a: a[0]), } - for (key, value) in data.items(): - if key in mappings: - key, formatter = mappings[key] - else: - key = key - formatter = noop - - if self.has_attr(book, key): - book.__setattr__(key, formatter(value)) + book = update_from_mappings(book, data, mappings) if 'identifiers' in data: if 'goodreads' in data['identifiers']: @@ -123,18 +120,16 @@ class Connector(AbstractConnector): data = response.json() author = models.Author(openlibrary_key=olkey) - bio = data.get('bio') - if bio: - if isinstance(bio, dict): - bio = bio.get('value') - author.bio = bio - name = data['name'] - author.name = name + mappings = { + 'birth_date': ('born', get_date), + 'death_date': ('died', get_date), + 'bio': ('bio', get_description), + } + author = update_from_mappings(author, data, mappings) # TODO this is making some BOLD assumption + name = data['name'] author.last_name = name.split(' ')[-1] author.first_name = ' '.join(name.split(' ')[:-1]) - #author.born = data.get('birth_date') - #author.died = data.get('death_date') author.save() return author diff --git a/fedireads/connectors/self_connector.py b/fedireads/connectors/self_connector.py new file mode 100644 index 000000000..ab701b38d --- /dev/null +++ b/fedireads/connectors/self_connector.py @@ -0,0 +1,41 @@ +''' using a fedireads instance as a source of book data ''' +from django.core.exceptions import ObjectDoesNotExist + +from fedireads import models +from .abstract_connector import AbstractConnector + + +class Connector(AbstractConnector): + ''' instantiate a connector ''' + def __init__(self, identifier): + super().__init__(identifier) + + + def search(self, query): + ''' right now you can't search fedireads sorry, but when + that gets implemented it will totally rule ''' + return [] + + + def get_or_create_book(self, fedireads_key): + ''' since this is querying its own data source, it can only + get a book, not load one from an external source ''' + try: + return models.Book.objects.select_subclasses().get( + fedireads_key=fedireads_key + ) + except ObjectDoesNotExist: + return None + + + def get_or_create_author(self, fedireads_key): + ''' load that author ''' + try: + return models.Author.objects.get(fedreads_key=fedireads_key) + except ObjectDoesNotExist: + pass + + + def update_book(self, book_obj): + pass + diff --git a/fedireads/connectors/settings.py b/fedireads/connectors/settings.py index 425e56ef7..52ba6662b 100644 --- a/fedireads/connectors/settings.py +++ b/fedireads/connectors/settings.py @@ -1,3 +1,3 @@ ''' settings book data connectors ''' -CONNECTORS = ['openlibrary', 'fedireads_connector'] +CONNECTORS = ['openlibrary', 'self_connector', 'fedireads_connector'] diff --git a/init_db.py b/init_db.py index 1487d356f..13f93f016 100644 --- a/init_db.py +++ b/init_db.py @@ -22,12 +22,11 @@ Connector.objects.create( Connector.objects.create( identifier=DOMAIN, - connector_file='fedireads_connector', + connector_file='self_connector', base_url='https://%s/book' % DOMAIN, covers_url='https://%s/images/covers' % DOMAIN, search_url='https://%s/search?q=' % DOMAIN, key_name='openlibrary_key', - is_self=True )