Adds generalized book data connectors

2024-11-25 11:01:12 +00:00 · 2020-03-07 12:22:28 -08:00 · 2020-03-07 12:22:28 -08:00 · 6f765bd6f4
commit 6f765bd6f4
parent d501e707ee
11 changed files with 249 additions and 144 deletions
--- a/fedireads/books_manager.py
+++ b/fedireads/books_manager.py
@ -0,0 +1,11 @@
+''' select and call a connector for whatever book task needs doing '''
+from fedireads.connectors import OpenLibraryConnector
+
+openlibrary = OpenLibraryConnector()
+def get_or_create_book(key):
+    ''' pull up a book record by whatever means possible '''
+    return openlibrary.get_or_create_book(key)
+
+def search(query):
+    ''' ya '''
+    return openlibrary.search(query)
--- a/fedireads/connectors/init.py
+++ b/fedireads/connectors/init.py
@ -0,0 +1,3 @@
+''' bring connectors into the namespace '''
+from .settings import CONNECTORS
+from .openlibrary import OpenLibraryConnector
--- a/fedireads/connectors/abstract_connector.py
+++ b/fedireads/connectors/abstract_connector.py
@ -0,0 +1,60 @@
+''' functionality outline for a book data connector '''
+from abc import ABC, abstractmethod
+
+from fedireads.connectors import CONNECTORS
+
+
+class AbstractConnector(ABC):
+    ''' generic book data connector '''
+
+    def __init__(self, connector_name):
+        # load connector settings
+        settings = CONNECTORS.get(connector_name)
+        if not settings:
+            raise ValueError('No connector with name "%s"' % connector_name)
+
+        try:
+            self.url = settings['BASE_URL']
+            self.covers_url = settings['COVERS_URL']
+            self.db_field = settings['DB_KEY_FIELD']
+            self.key_name = settings['KEY_NAME']
+        except KeyError:
+            raise KeyError('Invalid connector settings')
+        # TODO: politeness settings
+
+
+    @abstractmethod
+    def search(self, query):
+        ''' free text search '''
+        # return list of search result objs
+        pass
+
+
+    @abstractmethod
+    def get_or_create_book(self, book_id):
+        ''' request and format a book given an identifier '''
+        # return book model obj
+        pass
+
+
+    @abstractmethod
+    def get_or_create_author(self, book_id):
+        ''' request and format a book given an identifier '''
+        # return book model obj
+        pass
+
+
+    @abstractmethod
+    def update_book(self, book_obj):
+        ''' sync a book with the canonical remote copy '''
+        # return book model obj
+        pass
+
+
+class SearchResult(object):
+    ''' standardized search result object '''
+    def __init__(self, title, key, author, year):
+        self.title = title
+        self.key = key
+        self.author = author
+        self.year = year
--- a/fedireads/connectors/openlibrary.py
+++ b/fedireads/connectors/openlibrary.py
@ -0,0 +1,138 @@
+''' openlibrary data connector '''
+from django.core.exceptions import ObjectDoesNotExist
+from django.core.files.base import ContentFile
+import re
+import requests
+
+from fedireads import models
+from .abstract_connector import AbstractConnector, SearchResult
+
+
+class OpenLibraryConnector(AbstractConnector):
+    ''' instantiate a connector for OL '''
+    def __init__(self):
+        super().__init__('openlibrary')
+
+
+    def search(self, query):
+        ''' query openlibrary search '''
+        resp = requests.get('%s/search.json' % self.url, params={'q': query})
+        if not resp.ok:
+            resp.raise_for_status()
+        data = resp.json()
+        results = []
+
+        for doc in data['docs'][:5]:
+            key = doc['key']
+            key = key.split('/')[-1]
+            author = doc.get('author_name') or ['Unknown']
+            results.append(SearchResult(
+                doc.get('title'),
+                key,
+                author[0],
+                doc.get('first_publish_year'),
+            ))
+        return results
+
+
+    def get_or_create_book(self, olkey):
+        ''' pull up a book record by whatever means possible '''
+        if re.match(r'^OL\d+W$', olkey):
+            model = models.Work
+        elif re.match(r'^OL\d+M$', olkey):
+            model = models.Edition
+        else:
+            raise ValueError('Invalid OpenLibrary ID')
+
+        try:
+            book = models.Book.objects.get(openlibrary_key=olkey)
+            return book
+        except ObjectDoesNotExist:
+            # no book was found, so we start creating a new one
+            book = model(openlibrary_key=olkey)
+
+        # load the book json from openlibrary.org
+        response = requests.get('%s/works/%s.json' % (self.url, olkey))
+        if not response.ok:
+            response.raise_for_status()
+
+        data = response.json()
+
+        # great, we can update our book.
+        book.title = data['title']
+        description = data.get('description')
+        if description:
+            if isinstance(description, dict):
+                description = description.get('value')
+            book.description = description
+        book.pages = data.get('pages')
+        #book.published_date = data.get('publish_date')
+
+        # this book sure as heck better be an edition
+        if data.get('works'):
+            key = data.get('works')[0]['key']
+            key = key.split('/')[-1]
+            work = self.get_or_create_book(key)
+            book.parent_work = work
+        book.save()
+
+        # we also need to know the author get the cover
+        for author_blob in data.get('authors'):
+            # this id is "/authors/OL1234567A" and we want just "OL1234567A"
+            author_blob = author_blob.get('author', author_blob)
+            author_id = author_blob['key']
+            author_id = author_id.split('/')[-1]
+            book.authors.add(self.get_or_create_author(author_id))
+
+        if data.get('covers') and len(data['covers']):
+            book.cover.save(*self.get_cover(data['covers'][0]), save=True)
+
+        return book
+
+
+    def get_or_create_author(self, olkey):
+        ''' load that author '''
+        if not re.match(r'^OL\d+A$', olkey):
+            raise ValueError('Invalid OpenLibrary author ID')
+        try:
+            author = models.Author.objects.get(openlibrary_key=olkey)
+        except ObjectDoesNotExist:
+            pass
+
+        response = requests.get('%s/authors/%s.json' % (self.url, olkey))
+        if not response.ok:
+            response.raise_for_status()
+
+        data = response.json()
+        author = models.Author(openlibrary_key=olkey)
+        bio = data.get('bio')
+        if bio:
+            if isinstance(bio, dict):
+                bio = bio.get('value')
+            author.bio = bio
+        name = data['name']
+        author.name = name
+        # TODO this is making some BOLD assumption
+        author.last_name = name.split(' ')[-1]
+        author.first_name = ' '.join(name.split(' ')[:-1])
+        #author.born = data.get('birth_date')
+        #author.died = data.get('death_date')
+        author.save()
+
+        return author
+
+
+    def get_cover(self, cover_id):
+        ''' ask openlibrary for the cover '''
+        # TODO: get medium and small versions
+        image_name = '%s-M.jpg' % cover_id
+        url = '%s/b/id/%s' % (self.covers_url, image_name)
+        response = requests.get(url)
+        if not response.ok:
+            response.raise_for_status()
+        image_content = ContentFile(requests.get(url).content)
+        return [image_name, image_content]
+
+
+    def update_book(self, book_obj):
+        pass
--- a/fedireads/connectors/settings.py
+++ b/fedireads/connectors/settings.py
@ -0,0 +1,28 @@
+''' settings book data connectors '''
+CONNECTORS = {
+    'openlibrary': {
+        'KEY_NAME': 'olkey',
+        'DB_KEY_FIELD': 'openlibrary_key',
+        'POLITENESS_DELAY': 0,
+        'MAX_DAILY_QUERIES': -1,
+        'BASE_URL': 'https://openlibrary.org',
+        'COVERS_URL': 'https://covers.openlibrary.org',
+    },
+}
+
+''' not implemented yet:
+    'librarything': {
+        'KEY_NAME': 'ltkey',
+        'DB_KEY_FIELD': 'librarything_key',
+        'POLITENESS_DELAY': 1,
+        'MAX_DAILY_QUERIES': 1000,
+        'BASE_URL': 'https://librarything.com',
+    },
+    'worldcat': {
+        'KEY_NAME': 'ocn',
+        'DB_KEY_FIELD': 'oclc_number',
+        'POLITENESS_DELAY': 0,
+        'MAX_DAILY_QUERIES': -1,
+        'BASE_URL': 'https://worldcat.org',
+    },
+'''
--- a/fedireads/openlibrary.py
+++ b/fedireads/openlibrary.py
@ -1,135 +0,0 @@
-''' activitystream api and books '''
-from django.core.exceptions import ObjectDoesNotExist
-from django.core.files.base import ContentFile
-import re
-import requests
-
-from fedireads import models
-from fedireads.settings import OL_URL
-
-
-def book_search(query):
-    ''' look up a book '''
-    response = requests.get('%s/search.json' % OL_URL, params={'q': query})
-    if not response.ok:
-        response.raise_for_status()
-    data = response.json()
-    results = []
-
-    for doc in data['docs'][:5]:
-        key = doc['key']
-        key = key.split('/')[-1]
-        author = doc.get('author_name') or ['Unknown']
-        results.append({
-            'title': doc.get('title'),
-            'olkey': key,
-            'year': doc.get('first_publish_year'),
-            'author': author[0],
-        })
-    return results
-
-
-def get_or_create_book(olkey, update=False):
-    ''' create a book or work '''
-    # check if this is in the format of an OL book identifier
-    if re.match(r'^OL\d+W$', olkey):
-        model = models.Work
-    elif re.match(r'^OL\d+M$', olkey):
-        model = models.Edition
-    else:
-        raise ValueError('Invalid OpenLibrary ID')
-
-    # get the existing entry from our db, if it exists
-    try:
-        book = model.objects.get(openlibrary_key=olkey)
-        if not update:
-            return book
-        # we have the book, but still want to update it from OL
-    except ObjectDoesNotExist:
-        # no book was found, so we start creating a new one
-        book = model(openlibrary_key=olkey)
-
-    # load the book json from openlibrary.org
-    response = requests.get('%s/works/%s.json' % (OL_URL, olkey))
-    if not response.ok:
-        response.raise_for_status()
-
-    data = response.json()
-
-    # great, we can update our book.
-    book.title = data['title']
-    description = data.get('description')
-    if description:
-        if isinstance(description, dict):
-            description = description.get('value')
-        book.description = description
-    book.pages = data.get('pages')
-    #book.published_date = data.get('publish_date')
-
-    # this book sure as heck better be an edition
-    if data.get('works'):
-        key = data.get('works')[0]['key']
-        key = key.split('/')[-1]
-        work = get_or_create_book(key)
-        book.parent_work = work
-    book.save()
-
-    # we also need to know the author get the cover
-    for author_blob in data.get('authors'):
-        # this id starts as "/authors/OL1234567A" and we want just "OL1234567A"
-        author_blob = author_blob.get('author', author_blob)
-        author_id = author_blob['key']
-        author_id = author_id.split('/')[-1]
-        book.authors.add(get_or_create_author(author_id))
-
-    if data.get('covers') and len(data['covers']):
-        book.cover.save(*get_cover(data['covers'][0]), save=True)
-
-    return book
-
-
-def get_cover(cover_id):
-    ''' ask openlibrary for the cover '''
-    # TODO: get medium and small versions
-    image_name = '%s-M.jpg' % cover_id
-    url = 'https://covers.openlibrary.org/b/id/%s' % image_name
-    response = requests.get(url)
-    if not response.ok:
-        response.raise_for_status()
-    image_content = ContentFile(requests.get(url).content)
-    return [image_name, image_content]
-
-
-def get_or_create_author(olkey, update=False):
-    ''' load that author '''
-    if not re.match(r'^OL\d+A$', olkey):
-        raise ValueError('Invalid OpenLibrary author ID')
-    try:
-        author = models.Author.objects.get(openlibrary_key=olkey)
-        if not update:
-            return author
-    except ObjectDoesNotExist:
-        pass
-
-    response = requests.get('%s/authors/%s.json' % (OL_URL, olkey))
-    if not response.ok:
-        response.raise_for_status()
-
-    data = response.json()
-    author = models.Author(openlibrary_key=olkey)
-    bio = data.get('bio')
-    if bio:
-        if isinstance(bio, dict):
-            bio = bio.get('value')
-        author.bio = bio
-    name = data['name']
-    author.name = name
-    # TODO this is making some BOLD assumption
-    author.last_name = name.split(' ')[-1]
-    author.first_name = ' '.join(name.split(' ')[:-1])
-    #author.born = data.get('birth_date')
-    #author.died = data.get('death_date')
-    author.save()
-
-    return author
-
--- a/fedireads/status.py
+++ b/fedireads/status.py
@ -1,6 +1,6 @@
 ''' Handle user activity '''
 from fedireads import models
-from fedireads.openlibrary import get_or_create_book
+from fedireads.books_manager import get_or_create_book
 from fedireads.sanitize_html import InputHtmlParser
 from django.db import IntegrityError

--- a/fedireads/templates/book_results.html
+++ b/fedireads/templates/book_results.html
@ -5,7 +5,7 @@
        <h1>Search results</h1>
        {% for result in results %}
        <div>
-            <a href="/book/{{ result.olkey }}">{{ result.title }}</a> by {{ result.author }} ({{ result.year }})
+            <a href="/book/{{ result.key }}">{{ result.title }}</a> by {{ result.author }} ({{ result.year }})
        </div>
        {% endfor %}
    </div>
--- a/fedireads/view_actions.py
+++ b/fedireads/view_actions.py
@ -5,7 +5,7 @@ from django.shortcuts import redirect
 from django.template.response import TemplateResponse
 import re

-from fedireads import forms, models, openlibrary, outgoing
+from fedireads import forms, models, books_manager, outgoing
 from fedireads.views import get_user_from_username


@ -150,8 +150,8 @@ def search(request):
        results = [outgoing.handle_account_search(query)]
        template = 'user_results.html'
    else:
-        # just send the question over to openlibrary for book search
-        results = openlibrary.book_search(query)
+        # just send the question over to book search
+        results = books_manager.search(query)
        template = 'book_results.html'

    return TemplateResponse(request, template, {'results': results})
--- a/fedireads/views.py
+++ b/fedireads/views.py
@ -6,7 +6,7 @@ from django.http import HttpResponseNotFound
 from django.shortcuts import redirect
 from django.template.response import TemplateResponse

-from fedireads import forms, models, openlibrary, incoming
+from fedireads import forms, models, books_manager, incoming
 from fedireads.settings import DOMAIN


@ -188,7 +188,7 @@ def edit_profile_page(request, username):
@login_required
 def book_page(request, book_identifier, tab='friends'):
    ''' info about a book '''
-    book = openlibrary.get_or_create_book(book_identifier)
+    book = books_manager.get_or_create_book(book_identifier)

    if isinstance(book, models.Work):
        book_reviews = models.Review.objects.filter(
@ -258,7 +258,7 @@ def book_page(request, book_identifier, tab='friends'):
 def author_page(request, author_identifier):
    ''' landing page for an author '''
    try:
-        author = models.Author.objects.get(openlibrary_key=author_identifier)
+        author = models.Author.objects.get(books_manager_key=author_identifier)
    except ValueError:
        return HttpResponseNotFound()

--- a/init_db.py
+++ b/init_db.py
@ -1,5 +1,5 @@
 from fedireads.models import User
-from fedireads.openlibrary import get_or_create_book
+from fedireads.books_manager import get_or_create_book

 User.objects.create_user('mouse', 'mouse.reeve@gmail.com', 'password123')
 User.objects.create_user('rat', 'rat@rat.com', 'ratword')