From 703ff60271fe4ad36bf2210949e0b375998df084 Mon Sep 17 00:00:00 2001 From: Henri Bourcereau Date: Mon, 1 Mar 2021 21:09:21 +0100 Subject: [PATCH] isbn search --- bookwyrm/connectors/abstract_connector.py | 33 ++++++++++++ bookwyrm/connectors/bookwyrm_connector.py | 8 +++ bookwyrm/connectors/connector_manager.py | 33 ++++++++++-- bookwyrm/connectors/openlibrary.py | 16 ++++++ bookwyrm/connectors/self_connector.py | 42 +++++++++++++++ bookwyrm/management/commands/initdb.py | 3 ++ .../0047_connector_isbn_search_url.py | 18 +++++++ bookwyrm/models/connector.py | 1 + bookwyrm/templates/isbn_search_results.html | 33 ++++++++++++ .../connectors/test_abstract_connector.py | 4 ++ .../test_abstract_minimal_connector.py | 6 +++ .../connectors/test_openlibrary_connector.py | 29 ++++++++++ bookwyrm/tests/data/ol_isbn_search.json | 45 ++++++++++++++++ bookwyrm/tests/views/test_isbn.py | 54 +++++++++++++++++++ bookwyrm/tests/views/test_search.py | 4 ++ bookwyrm/urls.py | 3 ++ bookwyrm/views/__init__.py | 1 + bookwyrm/views/isbn.py | 29 ++++++++++ 18 files changed, 358 insertions(+), 4 deletions(-) create mode 100644 bookwyrm/migrations/0047_connector_isbn_search_url.py create mode 100644 bookwyrm/templates/isbn_search_results.html create mode 100644 bookwyrm/tests/data/ol_isbn_search.json create mode 100644 bookwyrm/tests/views/test_isbn.py create mode 100644 bookwyrm/views/isbn.py diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 68ff2a483..e6372438e 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -26,6 +26,7 @@ class AbstractMinimalConnector(ABC): 'books_url', 'covers_url', 'search_url', + 'isbn_search_url', 'max_query_count', 'name', 'identifier', @@ -61,6 +62,30 @@ class AbstractMinimalConnector(ABC): results.append(self.format_search_result(doc)) return results + def isbn_search(self, query): + ''' isbn search ''' + params = {} + resp = requests.get( + '%s%s' % (self.isbn_search_url, query), + params=params, + headers={ + 'Accept': 'application/json; charset=utf-8', + 'User-Agent': settings.USER_AGENT, + }, + ) + if not resp.ok: + resp.raise_for_status() + try: + data = resp.json() + except ValueError as e: + logger.exception(e) + raise ConnectorException('Unable to parse json response', e) + results = [] + + for doc in self.parse_isbn_search_data(data): + results.append(self.format_isbn_search_result(doc)) + return results + @abstractmethod def get_or_create_book(self, remote_id): ''' pull up a book record by whatever means possible ''' @@ -73,6 +98,14 @@ class AbstractMinimalConnector(ABC): def format_search_result(self, search_result): ''' create a SearchResult obj from json ''' + @abstractmethod + def parse_isbn_search_data(self, data): + ''' turn the result json from a search into a list ''' + + @abstractmethod + def format_isbn_search_result(self, search_result): + ''' create a SearchResult obj from json ''' + class AbstractConnector(AbstractMinimalConnector): ''' generic book data connector ''' diff --git a/bookwyrm/connectors/bookwyrm_connector.py b/bookwyrm/connectors/bookwyrm_connector.py index 00e6c62f1..96b72f267 100644 --- a/bookwyrm/connectors/bookwyrm_connector.py +++ b/bookwyrm/connectors/bookwyrm_connector.py @@ -19,3 +19,11 @@ class Connector(AbstractMinimalConnector): def format_search_result(self, search_result): search_result['connector'] = self return SearchResult(**search_result) + + def parse_isbn_search_data(self, data): + return data + + def format_isbn_search_result(self, search_result): + search_result['connector'] = self + return SearchResult(**search_result) + diff --git a/bookwyrm/connectors/connector_manager.py b/bookwyrm/connectors/connector_manager.py index a63a788eb..053e1f9ef 100644 --- a/bookwyrm/connectors/connector_manager.py +++ b/bookwyrm/connectors/connector_manager.py @@ -1,5 +1,6 @@ ''' interface with whatever connectors the app has ''' import importlib +import re from urllib.parse import urlparse from requests import HTTPError @@ -15,13 +16,31 @@ class ConnectorException(HTTPError): def search(query, min_confidence=0.1): ''' find books based on arbitary keywords ''' results = [] + + # Have we got a ISBN ? + isbn = re.sub('[\W_]', '', query) + maybe_isbn = len(isbn) in [10, 13] # ISBN10 or ISBN13 + dedup_slug = lambda r: '%s/%s/%s' % (r.title, r.author, r.year) result_index = set() for connector in get_connectors(): - try: - result_set = connector.search(query, min_confidence=min_confidence) - except (HTTPError, ConnectorException): - continue + result_set = None + if maybe_isbn: + # Search on ISBN + if not connector.isbn_search_url or connector.isbn_search_url == '': + result_set = [] + else: + try: + result_set = connector.isbn_search(isbn) + except (HTTPError, ConnectorException): + pass + + # if no isbn search or results, we fallback to generic search + if result_set == None or result_set == []: + try: + result_set = connector.search(query, min_confidence=min_confidence) + except (HTTPError, ConnectorException): + continue result_set = [r for r in result_set \ if dedup_slug(r) not in result_index] @@ -41,6 +60,12 @@ def local_search(query, min_confidence=0.1, raw=False): return connector.search(query, min_confidence=min_confidence, raw=raw) +def isbn_local_search(query, raw=False): + ''' only look at local search results ''' + connector = load_connector(models.Connector.objects.get(local=True)) + return connector.isbn_search(query, raw=raw) + + def first_search_result(query, min_confidence=0.1): ''' search until you find a result that fits ''' for connector in get_connectors(): diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index a767a45ac..8d227eef1 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -129,6 +129,22 @@ class Connector(AbstractConnector): ) + def parse_isbn_search_data(self, data): + return list(data.values()) + + def format_isbn_search_result(self, search_result): + # build the remote id from the openlibrary key + key = self.books_url + search_result['key'] + authors = search_result.get('authors') or [{'name': 'Unknown'}] + author_names = [ author.get('name') for author in authors] + return SearchResult( + title=search_result.get('title'), + key=key, + author=', '.join(author_names), + connector=self, + year=search_result.get('publish_date'), + ) + def load_edition_data(self, olkey): ''' query openlibrary for editions of a work ''' url = '%s/works/%s/editions' % (self.books_url, olkey) diff --git a/bookwyrm/connectors/self_connector.py b/bookwyrm/connectors/self_connector.py index f57fbc1cc..b3a4d6f9f 100644 --- a/bookwyrm/connectors/self_connector.py +++ b/bookwyrm/connectors/self_connector.py @@ -33,6 +33,31 @@ class Connector(AbstractConnector): search_results.sort(key=lambda r: r.confidence, reverse=True) return search_results + def isbn_search(self, query, raw=False): + ''' search your local database ''' + if not query: + return [] + + filters = [{f: query} for f in ['isbn_10', 'isbn_13']] + results = models.Edition.objects.filter( + reduce(operator.or_, (Q(**f) for f in filters)) + ).distinct() + + # when there are multiple editions of the same work, pick the default. + # it would be odd for this to happen. + results = results.filter(parent_work__default_edition__id=F('id')) \ + or results + + search_results = [] + for result in results: + if raw: + search_results.append(result) + else: + search_results.append(self.format_search_result(result)) + if len(search_results) >= 10: + break + return search_results + def format_search_result(self, search_result): return SearchResult( @@ -47,6 +72,19 @@ class Connector(AbstractConnector): ) + def format_isbn_search_result(self, search_result): + return SearchResult( + title=search_result.title, + key=search_result.remote_id, + author=search_result.author_text, + year=search_result.published_date.year if \ + search_result.published_date else None, + connector=self, + confidence=search_result.rank if \ + hasattr(search_result, 'rank') else 1, + ) + + def is_work_data(self, data): pass @@ -59,6 +97,10 @@ class Connector(AbstractConnector): def get_authors_from_data(self, data): return None + def parse_isbn_search_data(self, data): + ''' it's already in the right format, don't even worry about it ''' + return data + def parse_search_data(self, data): ''' it's already in the right format, don't even worry about it ''' return data diff --git a/bookwyrm/management/commands/initdb.py b/bookwyrm/management/commands/initdb.py index 9fd117871..5759abfcc 100644 --- a/bookwyrm/management/commands/initdb.py +++ b/bookwyrm/management/commands/initdb.py @@ -66,6 +66,7 @@ def init_connectors(): books_url='https://%s/book' % DOMAIN, covers_url='https://%s/images/covers' % DOMAIN, search_url='https://%s/search?q=' % DOMAIN, + isbn_search_url='https://%s/isbn/' % DOMAIN, priority=1, ) @@ -77,6 +78,7 @@ def init_connectors(): books_url='https://bookwyrm.social/book', covers_url='https://bookwyrm.social/images/covers', search_url='https://bookwyrm.social/search?q=', + isbn_search_url='https://bookwyrm.social/isbn/', priority=2, ) @@ -88,6 +90,7 @@ def init_connectors(): books_url='https://openlibrary.org', covers_url='https://covers.openlibrary.org', search_url='https://openlibrary.org/search?q=', + isbn_search_url='https://openlibrary.org/api/books?jscmd=data&format=json&bibkeys=ISBN:', priority=3, ) diff --git a/bookwyrm/migrations/0047_connector_isbn_search_url.py b/bookwyrm/migrations/0047_connector_isbn_search_url.py new file mode 100644 index 000000000..617a89d9d --- /dev/null +++ b/bookwyrm/migrations/0047_connector_isbn_search_url.py @@ -0,0 +1,18 @@ +# Generated by Django 3.0.7 on 2021-02-28 16:41 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('bookwyrm', '0046_sitesettings_privacy_policy'), + ] + + operations = [ + migrations.AddField( + model_name='connector', + name='isbn_search_url', + field=models.CharField(blank=True, max_length=255, null=True), + ), + ] diff --git a/bookwyrm/models/connector.py b/bookwyrm/models/connector.py index 6f64cdf3e..c1fbf58bc 100644 --- a/bookwyrm/models/connector.py +++ b/bookwyrm/models/connector.py @@ -22,6 +22,7 @@ class Connector(BookWyrmModel): books_url = models.CharField(max_length=255) covers_url = models.CharField(max_length=255) search_url = models.CharField(max_length=255, null=True, blank=True) + isbn_search_url = models.CharField(max_length=255, null=True, blank=True) politeness_delay = models.IntegerField(null=True, blank=True) #seconds max_query_count = models.IntegerField(null=True, blank=True) diff --git a/bookwyrm/templates/isbn_search_results.html b/bookwyrm/templates/isbn_search_results.html new file mode 100644 index 000000000..a3861a68a --- /dev/null +++ b/bookwyrm/templates/isbn_search_results.html @@ -0,0 +1,33 @@ +{% extends 'layout.html' %} +{% load i18n %} + +{% block title %}{% trans "Search Results" %}{% endblock %} + +{% block content %} +{% with book_results|first as local_results %} +
+

{% blocktrans %}Search Results for "{{ query }}"{% endblocktrans %}

+
+ +
+
+

{% trans "Matching Books" %}

+
+ {% if not results %} +

{% blocktrans %}No books found for "{{ query }}"{% endblocktrans %}

+ {% else %} + + {% endif %} +
+ +
+
+
+{% endwith %} +{% endblock %} diff --git a/bookwyrm/tests/connectors/test_abstract_connector.py b/bookwyrm/tests/connectors/test_abstract_connector.py index 6e912858b..1b3821040 100644 --- a/bookwyrm/tests/connectors/test_abstract_connector.py +++ b/bookwyrm/tests/connectors/test_abstract_connector.py @@ -42,6 +42,10 @@ class AbstractConnector(TestCase): return search_result def parse_search_data(self, data): return data + def format_isbn_search_result(self, search_result): + return search_result + def parse_isbn_search_data(self, data): + return data def is_work_data(self, data): return data['type'] == 'work' def get_edition_from_work_data(self, data): diff --git a/bookwyrm/tests/connectors/test_abstract_minimal_connector.py b/bookwyrm/tests/connectors/test_abstract_minimal_connector.py index 0c6d25350..9b939067b 100644 --- a/bookwyrm/tests/connectors/test_abstract_minimal_connector.py +++ b/bookwyrm/tests/connectors/test_abstract_minimal_connector.py @@ -18,6 +18,7 @@ class AbstractConnector(TestCase): books_url='https://example.com/books', covers_url='https://example.com/covers', search_url='https://example.com/search?q=', + isbn_search_url='https://example.com/isbn', ) class TestConnector(abstract_connector.AbstractMinimalConnector): @@ -28,6 +29,10 @@ class AbstractConnector(TestCase): pass def parse_search_data(self, data): return data + def format_isbn_search_result(self, search_result): + return search_result + def parse_isbn_search_data(self, data): + return data self.test_connector = TestConnector('example.com') @@ -39,6 +44,7 @@ class AbstractConnector(TestCase): self.assertEqual(connector.books_url, 'https://example.com/books') self.assertEqual(connector.covers_url, 'https://example.com/covers') self.assertEqual(connector.search_url, 'https://example.com/search?q=') + self.assertEqual(connector.isbn_search_url, 'https://example.com/isbn') self.assertIsNone(connector.name) self.assertEqual(connector.identifier, 'example.com') self.assertIsNone(connector.max_query_count) diff --git a/bookwyrm/tests/connectors/test_openlibrary_connector.py b/bookwyrm/tests/connectors/test_openlibrary_connector.py index 576e353bf..a174300a9 100644 --- a/bookwyrm/tests/connectors/test_openlibrary_connector.py +++ b/bookwyrm/tests/connectors/test_openlibrary_connector.py @@ -27,6 +27,7 @@ class Openlibrary(TestCase): books_url='https://openlibrary.org', covers_url='https://covers.openlibrary.org', search_url='https://openlibrary.org/search?q=', + isbn_search_url='https://openlibrary.org/isbn', ) self.connector = Connector('openlibrary.org') @@ -149,6 +150,34 @@ class Openlibrary(TestCase): self.assertEqual(result.connector, self.connector) + def test_parse_isbn_search_result(self): + ''' extract the results from the search json response ''' + datafile = pathlib.Path(__file__).parent.joinpath( + '../data/ol_isbn_search.json') + search_data = json.loads(datafile.read_bytes()) + result = self.connector.parse_isbn_search_data(search_data) + self.assertIsInstance(result, list) + self.assertEqual(len(result), 1) + + + def test_format_isbn_search_result(self): + ''' translate json from openlibrary into SearchResult ''' + datafile = pathlib.Path(__file__).parent.joinpath( + '../data/ol_isbn_search.json') + search_data = json.loads(datafile.read_bytes()) + results = self.connector.parse_isbn_search_data(search_data) + self.assertIsInstance(results, list) + + result = self.connector.format_isbn_search_result(results[0]) + self.assertIsInstance(result, SearchResult) + self.assertEqual(result.title, 'Les ombres errantes') + self.assertEqual( + result.key, 'https://openlibrary.org/books/OL16262504M') + self.assertEqual(result.author, 'Pascal Quignard') + self.assertEqual(result.year, '2002') + self.assertEqual(result.connector, self.connector) + + @responses.activate def test_load_edition_data(self): ''' format url from key and make request ''' diff --git a/bookwyrm/tests/data/ol_isbn_search.json b/bookwyrm/tests/data/ol_isbn_search.json new file mode 100644 index 000000000..8516ff069 --- /dev/null +++ b/bookwyrm/tests/data/ol_isbn_search.json @@ -0,0 +1,45 @@ +{ + "ISBN:9782070427796": { + "url": "https://openlibrary.org/books/OL16262504M/Les_ombres_errantes", + "key": "/books/OL16262504M", + "title": "Les ombres errantes", + "authors": [ + { + "url": "https://openlibrary.org/authors/OL269675A/Pascal_Quignard", + "name": "Pascal Quignard" + } + ], + "by_statement": "Pascal Quignard.", + "identifiers": { + "goodreads": [ + "1835483" + ], + "librarything": [ + "983474" + ], + "isbn_10": [ + "207042779X" + ], + "openlibrary": [ + "OL16262504M" + ] + }, + "classifications": { + "dewey_decimal_class": [ + "848/.91403" + ] + }, + "publishers": [ + { + "name": "Gallimard" + } + ], + "publish_places": [ + { + "name": "Paris" + } + ], + "publish_date": "2002", + "notes": "Hardback published Grasset, 2002." + } +} diff --git a/bookwyrm/tests/views/test_isbn.py b/bookwyrm/tests/views/test_isbn.py new file mode 100644 index 000000000..1966702b4 --- /dev/null +++ b/bookwyrm/tests/views/test_isbn.py @@ -0,0 +1,54 @@ +''' test for app action functionality ''' +import json +from unittest.mock import patch + +from django.http import JsonResponse +from django.template.response import TemplateResponse +from django.test import TestCase +from django.test.client import RequestFactory + +from bookwyrm import models, views +from bookwyrm.connectors import abstract_connector +from bookwyrm.settings import DOMAIN + + +class IsbnViews(TestCase): + ''' tag views''' + def setUp(self): + ''' we need basic test data and mocks ''' + self.factory = RequestFactory() + self.local_user = models.User.objects.create_user( + 'mouse@local.com', 'mouse@mouse.com', 'mouseword', + local=True, localname='mouse', + remote_id='https://example.com/users/mouse', + ) + self.work = models.Work.objects.create(title='Test Work') + self.book = models.Edition.objects.create( + title='Test Book', + isbn_13='1234567890123', + remote_id='https://example.com/book/1', + parent_work=self.work + ) + models.Connector.objects.create( + identifier='self', + connector_file='self_connector', + local=True + ) + models.SiteSettings.objects.create() + + + def test_isbn_json_response(self): + ''' searches local data only and returns book data in json format ''' + view = views.Isbn.as_view() + request = self.factory.get('') + with patch('bookwyrm.views.isbn.is_api_request') as is_api: + is_api.return_value = True + response = view(request, isbn='1234567890123') + self.assertIsInstance(response, JsonResponse) + + data = json.loads(response.content) + self.assertEqual(len(data), 1) + self.assertEqual(data[0]['title'], 'Test Book') + self.assertEqual( + data[0]['key'], 'https://%s/book/%d' % (DOMAIN, self.book.id)) + diff --git a/bookwyrm/tests/views/test_search.py b/bookwyrm/tests/views/test_search.py index 655b4563a..5d7109e71 100644 --- a/bookwyrm/tests/views/test_search.py +++ b/bookwyrm/tests/views/test_search.py @@ -64,6 +64,10 @@ class ShelfViews(TestCase): pass def parse_search_data(self, data): pass + def format_isbn_search_result(self, search_result): + return search_result + def parse_isbn_search_data(self, data): + return data models.Connector.objects.create( identifier='example.com', connector_file='openlibrary', diff --git a/bookwyrm/urls.py b/bookwyrm/urls.py index a741088a2..1c3da3016 100644 --- a/bookwyrm/urls.py +++ b/bookwyrm/urls.py @@ -135,6 +135,9 @@ urlpatterns = [ re_path(r'^resolve-book/?$', views.resolve_book), re_path(r'^switch-edition/?$', views.switch_edition), + # isbn + re_path(r'^isbn/(?P\d+)(.json)?/?$', views.Isbn.as_view()), + # author re_path(r'^author/(?P\d+)(.json)?/?$', views.Author.as_view()), re_path(r'^author/(?P\d+)/edit/?$', views.EditAuthor.as_view()), diff --git a/bookwyrm/views/__init__.py b/bookwyrm/views/__init__.py index 2c7cdc461..dd601b28b 100644 --- a/bookwyrm/views/__init__.py +++ b/bookwyrm/views/__init__.py @@ -31,3 +31,4 @@ from .site import Site from .status import CreateStatus, DeleteStatus from .updates import Updates from .user import User, EditUser, Followers, Following +from .isbn import Isbn diff --git a/bookwyrm/views/isbn.py b/bookwyrm/views/isbn.py new file mode 100644 index 000000000..e5539ba3a --- /dev/null +++ b/bookwyrm/views/isbn.py @@ -0,0 +1,29 @@ +''' isbn search view ''' +from django.http import HttpResponseNotFound +from django.http import JsonResponse +from django.shortcuts import get_object_or_404, redirect +from django.template.response import TemplateResponse +from django.utils.decorators import method_decorator +from django.views import View +from django.views.decorators.http import require_POST + +from bookwyrm import forms, models +from bookwyrm.connectors import connector_manager +from .helpers import is_api_request + +# pylint: disable= no-self-use +class Isbn(View): + ''' search a book by isbn ''' + def get(self, request, isbn): + ''' info about a book ''' + book_results = connector_manager.isbn_local_search(isbn) + + if is_api_request(request): + return JsonResponse([r.json() for r in book_results], safe=False) + + data = { + 'title': 'ISBN Search Results', + 'results': book_results, + 'query': isbn, + } + return TemplateResponse(request, 'isbn_search_results.html', data)