Merge pull request #697 from mmai/feature/isbnSearch

ISBN search
2021-03-06 13:32:31 -08:00 · 2021-03-06 13:32:31 -08:00 · 13531f107d
commit 13531f107d
parent 5c9a3a5667 703ff60271
18 changed files with 358 additions and 4 deletions
--- a/bookwyrm/connectors/abstract_connector.py
+++ b/bookwyrm/connectors/abstract_connector.py
@ -26,6 +26,7 @@ class AbstractMinimalConnector(ABC):
            'books_url',
            'covers_url',
            'search_url',
+            'isbn_search_url',
            'max_query_count',
            'name',
            'identifier',
@ -61,6 +62,30 @@ class AbstractMinimalConnector(ABC):
            results.append(self.format_search_result(doc))
        return results

+    def isbn_search(self, query):
+        ''' isbn search '''
+        params = {}
+        resp = requests.get(
+            '%s%s' % (self.isbn_search_url, query),
+            params=params,
+            headers={
+                'Accept': 'application/json; charset=utf-8',
+                'User-Agent': settings.USER_AGENT,
+            },
+        )
+        if not resp.ok:
+            resp.raise_for_status()
+        try:
+            data = resp.json()
+        except ValueError as e:
+            logger.exception(e)
+            raise ConnectorException('Unable to parse json response', e)
+        results = []
+
+        for doc in self.parse_isbn_search_data(data):
+            results.append(self.format_isbn_search_result(doc))
+        return results
+
    @abstractmethod
    def get_or_create_book(self, remote_id):
        ''' pull up a book record by whatever means possible '''
@ -73,6 +98,14 @@ class AbstractMinimalConnector(ABC):
    def format_search_result(self, search_result):
        ''' create a SearchResult obj from json '''

+    @abstractmethod
+    def parse_isbn_search_data(self, data):
+        ''' turn the result json from a search into a list '''
+
+    @abstractmethod
+    def format_isbn_search_result(self, search_result):
+        ''' create a SearchResult obj from json '''
+

 class AbstractConnector(AbstractMinimalConnector):
    ''' generic book data connector '''
--- a/bookwyrm/connectors/bookwyrm_connector.py
+++ b/bookwyrm/connectors/bookwyrm_connector.py
@ -19,3 +19,11 @@ class Connector(AbstractMinimalConnector):
    def format_search_result(self, search_result):
        search_result['connector'] = self
        return SearchResult(**search_result)
+
+    def parse_isbn_search_data(self, data):
+        return data
+
+    def format_isbn_search_result(self, search_result):
+        search_result['connector'] = self
+        return SearchResult(**search_result)
+
--- a/bookwyrm/connectors/connector_manager.py
+++ b/bookwyrm/connectors/connector_manager.py
@ -1,5 +1,6 @@
 ''' interface with whatever connectors the app has '''
 import importlib
+import re
 from urllib.parse import urlparse

 from requests import HTTPError
@ -15,9 +16,27 @@ class ConnectorException(HTTPError):
 def search(query, min_confidence=0.1):
    ''' find books based on arbitary keywords '''
    results = []
+
+    # Have we got a ISBN ?
+    isbn = re.sub('[\W_]', '', query)
+    maybe_isbn = len(isbn) in [10, 13] # ISBN10 or ISBN13
+
    dedup_slug = lambda r: '%s/%s/%s' % (r.title, r.author, r.year)
    result_index = set()
    for connector in get_connectors():
+        result_set = None
+        if maybe_isbn:
+            # Search on ISBN
+            if not connector.isbn_search_url or connector.isbn_search_url == '': 
+                result_set = []
+            else:
+                try:
+                    result_set = connector.isbn_search(isbn)
+                except (HTTPError, ConnectorException):
+                    pass
+
+        # if no isbn search or results, we fallback to generic search
+        if result_set == None or result_set == []:
            try:
                result_set = connector.search(query, min_confidence=min_confidence)
            except (HTTPError, ConnectorException):
@ -41,6 +60,12 @@ def local_search(query, min_confidence=0.1, raw=False):
    return connector.search(query, min_confidence=min_confidence, raw=raw)


+def isbn_local_search(query, raw=False):
+    ''' only look at local search results '''
+    connector = load_connector(models.Connector.objects.get(local=True))
+    return connector.isbn_search(query, raw=raw)
+
+
 def first_search_result(query, min_confidence=0.1):
    ''' search until you find a result that fits '''
    for connector in get_connectors():
--- a/bookwyrm/connectors/openlibrary.py
+++ b/bookwyrm/connectors/openlibrary.py
@ -129,6 +129,22 @@ class Connector(AbstractConnector):
        )


+    def parse_isbn_search_data(self, data):
+        return list(data.values())
+
+    def format_isbn_search_result(self, search_result):
+        # build the remote id from the openlibrary key
+        key = self.books_url + search_result['key']
+        authors = search_result.get('authors') or [{'name': 'Unknown'}]
+        author_names = [ author.get('name') for author in authors]
+        return SearchResult(
+            title=search_result.get('title'),
+            key=key,
+            author=', '.join(author_names),
+            connector=self,
+            year=search_result.get('publish_date'),
+        )
+
    def load_edition_data(self, olkey):
        ''' query openlibrary for editions of a work '''
        url = '%s/works/%s/editions' % (self.books_url, olkey)
--- a/bookwyrm/connectors/self_connector.py
+++ b/bookwyrm/connectors/self_connector.py
@ -33,6 +33,31 @@ class Connector(AbstractConnector):
            search_results.sort(key=lambda r: r.confidence, reverse=True)
        return search_results

+    def isbn_search(self, query, raw=False):
+        ''' search your local database '''
+        if not query:
+            return []
+
+        filters = [{f: query} for f in ['isbn_10', 'isbn_13']]
+        results = models.Edition.objects.filter(
+            reduce(operator.or_, (Q(**f) for f in filters))
+        ).distinct()
+
+        # when there are multiple editions of the same work, pick the default.
+        # it would be odd for this to happen.
+        results = results.filter(parent_work__default_edition__id=F('id')) \
+                or results
+
+        search_results = []
+        for result in results:
+            if raw:
+                search_results.append(result)
+            else:
+                search_results.append(self.format_search_result(result))
+            if len(search_results) >= 10:
+                break
+        return search_results
+

    def format_search_result(self, search_result):
        return SearchResult(
@ -47,6 +72,19 @@ class Connector(AbstractConnector):
        )


+    def format_isbn_search_result(self, search_result):
+        return SearchResult(
+            title=search_result.title,
+            key=search_result.remote_id,
+            author=search_result.author_text,
+            year=search_result.published_date.year if \
+                    search_result.published_date else None,
+            connector=self,
+            confidence=search_result.rank if \
+                    hasattr(search_result, 'rank') else 1,
+        )
+
+
    def is_work_data(self, data):
        pass

@ -59,6 +97,10 @@ class Connector(AbstractConnector):
    def get_authors_from_data(self, data):
        return None

+    def parse_isbn_search_data(self, data):
+        ''' it's already in the right format, don't even worry about it '''
+        return data
+
    def parse_search_data(self, data):
        ''' it's already in the right format, don't even worry about it '''
        return data
--- a/bookwyrm/management/commands/initdb.py
+++ b/bookwyrm/management/commands/initdb.py
@ -66,6 +66,7 @@ def init_connectors():
        books_url='https://%s/book' % DOMAIN,
        covers_url='https://%s/images/covers' % DOMAIN,
        search_url='https://%s/search?q=' % DOMAIN,
+        isbn_search_url='https://%s/isbn/' % DOMAIN,
        priority=1,
    )

@ -77,6 +78,7 @@ def init_connectors():
        books_url='https://bookwyrm.social/book',
        covers_url='https://bookwyrm.social/images/covers',
        search_url='https://bookwyrm.social/search?q=',
+        isbn_search_url='https://bookwyrm.social/isbn/',
        priority=2,
    )

@ -88,6 +90,7 @@ def init_connectors():
        books_url='https://openlibrary.org',
        covers_url='https://covers.openlibrary.org',
        search_url='https://openlibrary.org/search?q=',
+        isbn_search_url='https://openlibrary.org/api/books?jscmd=data&format=json&bibkeys=ISBN:',
        priority=3,
    )

--- a/bookwyrm/migrations/0047_connector_isbn_search_url.py
+++ b/bookwyrm/migrations/0047_connector_isbn_search_url.py
@ -0,0 +1,18 @@
+# Generated by Django 3.0.7 on 2021-02-28 16:41
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('bookwyrm', '0046_sitesettings_privacy_policy'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='connector',
+            name='isbn_search_url',
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+    ]
--- a/bookwyrm/models/connector.py
+++ b/bookwyrm/models/connector.py
@ -22,6 +22,7 @@ class Connector(BookWyrmModel):
    books_url = models.CharField(max_length=255)
    covers_url = models.CharField(max_length=255)
    search_url = models.CharField(max_length=255, null=True, blank=True)
+    isbn_search_url = models.CharField(max_length=255, null=True, blank=True)

    politeness_delay = models.IntegerField(null=True, blank=True) #seconds
    max_query_count = models.IntegerField(null=True, blank=True)
--- a/bookwyrm/templates/isbn_search_results.html
+++ b/bookwyrm/templates/isbn_search_results.html
@ -0,0 +1,33 @@
+{% extends 'layout.html' %}
+{% load i18n %}
+
+{% block title %}{% trans "Search Results" %}{% endblock %}
+
+{% block content %}
+{% with book_results|first as local_results %}
+<div class="block">
+    <h1 class="title">{% blocktrans %}Search Results for "{{ query }}"{% endblocktrans %}</h1>
+</div>
+
+<div class="block columns">
+    <div class="column">
+        <h2 class="title">{% trans "Matching Books" %}</h2>
+        <section class="block">
+            {% if not results %}
+            <p>{% blocktrans %}No books found for "{{ query }}"{% endblocktrans %}</p>
+            {% else %}
+            <ul>
+            {% for result in results %}
+                <li class="pd-4">
+                    <a href="{{ result.key }}">{% include 'snippets/search_result_text.html' with result=result link=True %}</a>
+                </li>
+            {% endfor %}
+            </ul>
+            {% endif %}
+        </section>
+
+    <div class="column">
+    </div>
+</div>
+{% endwith %}
+{% endblock %}
--- a/bookwyrm/tests/connectors/test_abstract_connector.py
+++ b/bookwyrm/tests/connectors/test_abstract_connector.py
@ -42,6 +42,10 @@ class AbstractConnector(TestCase):
                return search_result
            def parse_search_data(self, data):
                return data
+            def format_isbn_search_result(self, search_result):
+                return search_result
+            def parse_isbn_search_data(self, data):
+                return data
            def is_work_data(self, data):
                return data['type'] == 'work'
            def get_edition_from_work_data(self, data):
--- a/bookwyrm/tests/connectors/test_abstract_minimal_connector.py
+++ b/bookwyrm/tests/connectors/test_abstract_minimal_connector.py
@ -18,6 +18,7 @@ class AbstractConnector(TestCase):
            books_url='https://example.com/books',
            covers_url='https://example.com/covers',
            search_url='https://example.com/search?q=',
+            isbn_search_url='https://example.com/isbn',
        )

        class TestConnector(abstract_connector.AbstractMinimalConnector):
@ -28,6 +29,10 @@ class AbstractConnector(TestCase):
                pass
            def parse_search_data(self, data):
                return data
+            def format_isbn_search_result(self, search_result):
+                return search_result
+            def parse_isbn_search_data(self, data):
+                return data
        self.test_connector = TestConnector('example.com')


@ -39,6 +44,7 @@ class AbstractConnector(TestCase):
        self.assertEqual(connector.books_url, 'https://example.com/books')
        self.assertEqual(connector.covers_url, 'https://example.com/covers')
        self.assertEqual(connector.search_url, 'https://example.com/search?q=')
+        self.assertEqual(connector.isbn_search_url, 'https://example.com/isbn')
        self.assertIsNone(connector.name)
        self.assertEqual(connector.identifier, 'example.com')
        self.assertIsNone(connector.max_query_count)
--- a/bookwyrm/tests/connectors/test_openlibrary_connector.py
+++ b/bookwyrm/tests/connectors/test_openlibrary_connector.py
@ -27,6 +27,7 @@ class Openlibrary(TestCase):
            books_url='https://openlibrary.org',
            covers_url='https://covers.openlibrary.org',
            search_url='https://openlibrary.org/search?q=',
+            isbn_search_url='https://openlibrary.org/isbn',
        )
        self.connector = Connector('openlibrary.org')

@ -149,6 +150,34 @@ class Openlibrary(TestCase):
        self.assertEqual(result.connector, self.connector)


+    def test_parse_isbn_search_result(self):
+        ''' extract the results from the search json response '''
+        datafile = pathlib.Path(__file__).parent.joinpath(
+            '../data/ol_isbn_search.json')
+        search_data = json.loads(datafile.read_bytes())
+        result = self.connector.parse_isbn_search_data(search_data)
+        self.assertIsInstance(result, list)
+        self.assertEqual(len(result), 1)
+
+
+    def test_format_isbn_search_result(self):
+        ''' translate json from openlibrary into SearchResult '''
+        datafile = pathlib.Path(__file__).parent.joinpath(
+            '../data/ol_isbn_search.json')
+        search_data = json.loads(datafile.read_bytes())
+        results = self.connector.parse_isbn_search_data(search_data)
+        self.assertIsInstance(results, list)
+
+        result = self.connector.format_isbn_search_result(results[0])
+        self.assertIsInstance(result, SearchResult)
+        self.assertEqual(result.title, 'Les ombres errantes')
+        self.assertEqual(
+            result.key, 'https://openlibrary.org/books/OL16262504M')
+        self.assertEqual(result.author, 'Pascal Quignard')
+        self.assertEqual(result.year, '2002')
+        self.assertEqual(result.connector, self.connector)
+
+
    @responses.activate
    def test_load_edition_data(self):
        ''' format url from key and make request '''
--- a/bookwyrm/tests/data/ol_isbn_search.json
+++ b/bookwyrm/tests/data/ol_isbn_search.json
@ -0,0 +1,45 @@
+{
+  "ISBN:9782070427796": {
+    "url": "https://openlibrary.org/books/OL16262504M/Les_ombres_errantes",
+    "key": "/books/OL16262504M",
+    "title": "Les ombres errantes",
+    "authors": [
+      {
+        "url": "https://openlibrary.org/authors/OL269675A/Pascal_Quignard",
+        "name": "Pascal Quignard"
+      }
+    ],
+    "by_statement": "Pascal Quignard.",
+    "identifiers": {
+      "goodreads": [
+        "1835483"
+      ],
+      "librarything": [
+        "983474"
+      ],
+      "isbn_10": [
+        "207042779X"
+      ],
+      "openlibrary": [
+        "OL16262504M"
+      ]
+    },
+    "classifications": {
+      "dewey_decimal_class": [
+        "848/.91403"
+      ]
+    },
+    "publishers": [
+      {
+        "name": "Gallimard"
+      }
+    ],
+    "publish_places": [
+      {
+        "name": "Paris"
+      }
+    ],
+    "publish_date": "2002",
+    "notes": "Hardback published Grasset, 2002."
+  }
+}
--- a/bookwyrm/tests/views/test_isbn.py
+++ b/bookwyrm/tests/views/test_isbn.py
@ -0,0 +1,54 @@
+''' test for app action functionality '''
+import json
+from unittest.mock import patch
+
+from django.http import JsonResponse
+from django.template.response import TemplateResponse
+from django.test import TestCase
+from django.test.client import RequestFactory
+
+from bookwyrm import models, views
+from bookwyrm.connectors import abstract_connector
+from bookwyrm.settings import DOMAIN
+
+
+class IsbnViews(TestCase):
+    ''' tag views'''
+    def setUp(self):
+        ''' we need basic test data and mocks '''
+        self.factory = RequestFactory()
+        self.local_user = models.User.objects.create_user(
+            'mouse@local.com', 'mouse@mouse.com', 'mouseword',
+            local=True, localname='mouse',
+            remote_id='https://example.com/users/mouse',
+        )
+        self.work = models.Work.objects.create(title='Test Work')
+        self.book = models.Edition.objects.create(
+            title='Test Book',
+            isbn_13='1234567890123',
+            remote_id='https://example.com/book/1',
+            parent_work=self.work
+        )
+        models.Connector.objects.create(
+            identifier='self',
+            connector_file='self_connector',
+            local=True
+        )
+        models.SiteSettings.objects.create()
+
+
+    def test_isbn_json_response(self):
+        ''' searches local data only and returns book data in json format '''
+        view = views.Isbn.as_view()
+        request = self.factory.get('')
+        with patch('bookwyrm.views.isbn.is_api_request') as is_api:
+            is_api.return_value = True
+            response = view(request, isbn='1234567890123')
+        self.assertIsInstance(response, JsonResponse)
+
+        data = json.loads(response.content)
+        self.assertEqual(len(data), 1)
+        self.assertEqual(data[0]['title'], 'Test Book')
+        self.assertEqual(
+            data[0]['key'], 'https://%s/book/%d' % (DOMAIN, self.book.id))
+
--- a/bookwyrm/tests/views/test_search.py
+++ b/bookwyrm/tests/views/test_search.py
@ -64,6 +64,10 @@ class ShelfViews(TestCase):
                pass
            def parse_search_data(self, data):
                pass
+            def format_isbn_search_result(self, search_result):
+                return search_result
+            def parse_isbn_search_data(self, data):
+                return data
        models.Connector.objects.create(
            identifier='example.com',
            connector_file='openlibrary',
--- a/bookwyrm/urls.py
+++ b/bookwyrm/urls.py
@ -135,6 +135,9 @@ urlpatterns = [
    re_path(r'^resolve-book/?$', views.resolve_book),
    re_path(r'^switch-edition/?$', views.switch_edition),

+    # isbn
+    re_path(r'^isbn/(?P<isbn>\d+)(.json)?/?$', views.Isbn.as_view()),
+
    # author
    re_path(r'^author/(?P<author_id>\d+)(.json)?/?$', views.Author.as_view()),
    re_path(r'^author/(?P<author_id>\d+)/edit/?$', views.EditAuthor.as_view()),
--- a/bookwyrm/views/init.py
+++ b/bookwyrm/views/init.py
@ -31,3 +31,4 @@ from .site import Site
 from .status import CreateStatus, DeleteStatus
 from .updates import Updates
 from .user import User, EditUser, Followers, Following
+from .isbn import Isbn
--- a/bookwyrm/views/isbn.py
+++ b/bookwyrm/views/isbn.py
@ -0,0 +1,29 @@
+''' isbn search view '''
+from django.http import HttpResponseNotFound
+from django.http import JsonResponse
+from django.shortcuts import get_object_or_404, redirect
+from django.template.response import TemplateResponse
+from django.utils.decorators import method_decorator
+from django.views import View
+from django.views.decorators.http import require_POST
+
+from bookwyrm import forms, models
+from bookwyrm.connectors import connector_manager
+from .helpers import is_api_request
+
+# pylint: disable= no-self-use
+class Isbn(View):
+    ''' search a book by isbn '''
+    def get(self, request, isbn):
+        ''' info about a book '''
+        book_results = connector_manager.isbn_local_search(isbn)
+
+        if is_api_request(request):
+            return JsonResponse([r.json() for r in book_results], safe=False)
+
+        data = {
+            'title': 'ISBN Search Results',
+            'results': book_results,
+            'query': isbn,
+        }
+        return TemplateResponse(request, 'isbn_search_results.html', data)