isbn search

This commit is contained in:
Henri Bourcereau 2021-03-01 21:09:21 +01:00
parent 79c3ae1c44
commit 703ff60271
18 changed files with 358 additions and 4 deletions

View file

@ -26,6 +26,7 @@ class AbstractMinimalConnector(ABC):
'books_url',
'covers_url',
'search_url',
'isbn_search_url',
'max_query_count',
'name',
'identifier',
@ -61,6 +62,30 @@ class AbstractMinimalConnector(ABC):
results.append(self.format_search_result(doc))
return results
def isbn_search(self, query):
''' isbn search '''
params = {}
resp = requests.get(
'%s%s' % (self.isbn_search_url, query),
params=params,
headers={
'Accept': 'application/json; charset=utf-8',
'User-Agent': settings.USER_AGENT,
},
)
if not resp.ok:
resp.raise_for_status()
try:
data = resp.json()
except ValueError as e:
logger.exception(e)
raise ConnectorException('Unable to parse json response', e)
results = []
for doc in self.parse_isbn_search_data(data):
results.append(self.format_isbn_search_result(doc))
return results
@abstractmethod
def get_or_create_book(self, remote_id):
''' pull up a book record by whatever means possible '''
@ -73,6 +98,14 @@ class AbstractMinimalConnector(ABC):
def format_search_result(self, search_result):
''' create a SearchResult obj from json '''
@abstractmethod
def parse_isbn_search_data(self, data):
''' turn the result json from a search into a list '''
@abstractmethod
def format_isbn_search_result(self, search_result):
''' create a SearchResult obj from json '''
class AbstractConnector(AbstractMinimalConnector):
''' generic book data connector '''

View file

@ -19,3 +19,11 @@ class Connector(AbstractMinimalConnector):
def format_search_result(self, search_result):
search_result['connector'] = self
return SearchResult(**search_result)
def parse_isbn_search_data(self, data):
return data
def format_isbn_search_result(self, search_result):
search_result['connector'] = self
return SearchResult(**search_result)

View file

@ -1,5 +1,6 @@
''' interface with whatever connectors the app has '''
import importlib
import re
from urllib.parse import urlparse
from requests import HTTPError
@ -15,13 +16,31 @@ class ConnectorException(HTTPError):
def search(query, min_confidence=0.1):
''' find books based on arbitary keywords '''
results = []
# Have we got a ISBN ?
isbn = re.sub('[\W_]', '', query)
maybe_isbn = len(isbn) in [10, 13] # ISBN10 or ISBN13
dedup_slug = lambda r: '%s/%s/%s' % (r.title, r.author, r.year)
result_index = set()
for connector in get_connectors():
try:
result_set = connector.search(query, min_confidence=min_confidence)
except (HTTPError, ConnectorException):
continue
result_set = None
if maybe_isbn:
# Search on ISBN
if not connector.isbn_search_url or connector.isbn_search_url == '':
result_set = []
else:
try:
result_set = connector.isbn_search(isbn)
except (HTTPError, ConnectorException):
pass
# if no isbn search or results, we fallback to generic search
if result_set == None or result_set == []:
try:
result_set = connector.search(query, min_confidence=min_confidence)
except (HTTPError, ConnectorException):
continue
result_set = [r for r in result_set \
if dedup_slug(r) not in result_index]
@ -41,6 +60,12 @@ def local_search(query, min_confidence=0.1, raw=False):
return connector.search(query, min_confidence=min_confidence, raw=raw)
def isbn_local_search(query, raw=False):
''' only look at local search results '''
connector = load_connector(models.Connector.objects.get(local=True))
return connector.isbn_search(query, raw=raw)
def first_search_result(query, min_confidence=0.1):
''' search until you find a result that fits '''
for connector in get_connectors():

View file

@ -129,6 +129,22 @@ class Connector(AbstractConnector):
)
def parse_isbn_search_data(self, data):
return list(data.values())
def format_isbn_search_result(self, search_result):
# build the remote id from the openlibrary key
key = self.books_url + search_result['key']
authors = search_result.get('authors') or [{'name': 'Unknown'}]
author_names = [ author.get('name') for author in authors]
return SearchResult(
title=search_result.get('title'),
key=key,
author=', '.join(author_names),
connector=self,
year=search_result.get('publish_date'),
)
def load_edition_data(self, olkey):
''' query openlibrary for editions of a work '''
url = '%s/works/%s/editions' % (self.books_url, olkey)

View file

@ -33,6 +33,31 @@ class Connector(AbstractConnector):
search_results.sort(key=lambda r: r.confidence, reverse=True)
return search_results
def isbn_search(self, query, raw=False):
''' search your local database '''
if not query:
return []
filters = [{f: query} for f in ['isbn_10', 'isbn_13']]
results = models.Edition.objects.filter(
reduce(operator.or_, (Q(**f) for f in filters))
).distinct()
# when there are multiple editions of the same work, pick the default.
# it would be odd for this to happen.
results = results.filter(parent_work__default_edition__id=F('id')) \
or results
search_results = []
for result in results:
if raw:
search_results.append(result)
else:
search_results.append(self.format_search_result(result))
if len(search_results) >= 10:
break
return search_results
def format_search_result(self, search_result):
return SearchResult(
@ -47,6 +72,19 @@ class Connector(AbstractConnector):
)
def format_isbn_search_result(self, search_result):
return SearchResult(
title=search_result.title,
key=search_result.remote_id,
author=search_result.author_text,
year=search_result.published_date.year if \
search_result.published_date else None,
connector=self,
confidence=search_result.rank if \
hasattr(search_result, 'rank') else 1,
)
def is_work_data(self, data):
pass
@ -59,6 +97,10 @@ class Connector(AbstractConnector):
def get_authors_from_data(self, data):
return None
def parse_isbn_search_data(self, data):
''' it's already in the right format, don't even worry about it '''
return data
def parse_search_data(self, data):
''' it's already in the right format, don't even worry about it '''
return data

View file

@ -66,6 +66,7 @@ def init_connectors():
books_url='https://%s/book' % DOMAIN,
covers_url='https://%s/images/covers' % DOMAIN,
search_url='https://%s/search?q=' % DOMAIN,
isbn_search_url='https://%s/isbn/' % DOMAIN,
priority=1,
)
@ -77,6 +78,7 @@ def init_connectors():
books_url='https://bookwyrm.social/book',
covers_url='https://bookwyrm.social/images/covers',
search_url='https://bookwyrm.social/search?q=',
isbn_search_url='https://bookwyrm.social/isbn/',
priority=2,
)
@ -88,6 +90,7 @@ def init_connectors():
books_url='https://openlibrary.org',
covers_url='https://covers.openlibrary.org',
search_url='https://openlibrary.org/search?q=',
isbn_search_url='https://openlibrary.org/api/books?jscmd=data&format=json&bibkeys=ISBN:',
priority=3,
)

View file

@ -0,0 +1,18 @@
# Generated by Django 3.0.7 on 2021-02-28 16:41
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('bookwyrm', '0046_sitesettings_privacy_policy'),
]
operations = [
migrations.AddField(
model_name='connector',
name='isbn_search_url',
field=models.CharField(blank=True, max_length=255, null=True),
),
]

View file

@ -22,6 +22,7 @@ class Connector(BookWyrmModel):
books_url = models.CharField(max_length=255)
covers_url = models.CharField(max_length=255)
search_url = models.CharField(max_length=255, null=True, blank=True)
isbn_search_url = models.CharField(max_length=255, null=True, blank=True)
politeness_delay = models.IntegerField(null=True, blank=True) #seconds
max_query_count = models.IntegerField(null=True, blank=True)

View file

@ -0,0 +1,33 @@
{% extends 'layout.html' %}
{% load i18n %}
{% block title %}{% trans "Search Results" %}{% endblock %}
{% block content %}
{% with book_results|first as local_results %}
<div class="block">
<h1 class="title">{% blocktrans %}Search Results for "{{ query }}"{% endblocktrans %}</h1>
</div>
<div class="block columns">
<div class="column">
<h2 class="title">{% trans "Matching Books" %}</h2>
<section class="block">
{% if not results %}
<p>{% blocktrans %}No books found for "{{ query }}"{% endblocktrans %}</p>
{% else %}
<ul>
{% for result in results %}
<li class="pd-4">
<a href="{{ result.key }}">{% include 'snippets/search_result_text.html' with result=result link=True %}</a>
</li>
{% endfor %}
</ul>
{% endif %}
</section>
<div class="column">
</div>
</div>
{% endwith %}
{% endblock %}

View file

@ -42,6 +42,10 @@ class AbstractConnector(TestCase):
return search_result
def parse_search_data(self, data):
return data
def format_isbn_search_result(self, search_result):
return search_result
def parse_isbn_search_data(self, data):
return data
def is_work_data(self, data):
return data['type'] == 'work'
def get_edition_from_work_data(self, data):

View file

@ -18,6 +18,7 @@ class AbstractConnector(TestCase):
books_url='https://example.com/books',
covers_url='https://example.com/covers',
search_url='https://example.com/search?q=',
isbn_search_url='https://example.com/isbn',
)
class TestConnector(abstract_connector.AbstractMinimalConnector):
@ -28,6 +29,10 @@ class AbstractConnector(TestCase):
pass
def parse_search_data(self, data):
return data
def format_isbn_search_result(self, search_result):
return search_result
def parse_isbn_search_data(self, data):
return data
self.test_connector = TestConnector('example.com')
@ -39,6 +44,7 @@ class AbstractConnector(TestCase):
self.assertEqual(connector.books_url, 'https://example.com/books')
self.assertEqual(connector.covers_url, 'https://example.com/covers')
self.assertEqual(connector.search_url, 'https://example.com/search?q=')
self.assertEqual(connector.isbn_search_url, 'https://example.com/isbn')
self.assertIsNone(connector.name)
self.assertEqual(connector.identifier, 'example.com')
self.assertIsNone(connector.max_query_count)

View file

@ -27,6 +27,7 @@ class Openlibrary(TestCase):
books_url='https://openlibrary.org',
covers_url='https://covers.openlibrary.org',
search_url='https://openlibrary.org/search?q=',
isbn_search_url='https://openlibrary.org/isbn',
)
self.connector = Connector('openlibrary.org')
@ -149,6 +150,34 @@ class Openlibrary(TestCase):
self.assertEqual(result.connector, self.connector)
def test_parse_isbn_search_result(self):
''' extract the results from the search json response '''
datafile = pathlib.Path(__file__).parent.joinpath(
'../data/ol_isbn_search.json')
search_data = json.loads(datafile.read_bytes())
result = self.connector.parse_isbn_search_data(search_data)
self.assertIsInstance(result, list)
self.assertEqual(len(result), 1)
def test_format_isbn_search_result(self):
''' translate json from openlibrary into SearchResult '''
datafile = pathlib.Path(__file__).parent.joinpath(
'../data/ol_isbn_search.json')
search_data = json.loads(datafile.read_bytes())
results = self.connector.parse_isbn_search_data(search_data)
self.assertIsInstance(results, list)
result = self.connector.format_isbn_search_result(results[0])
self.assertIsInstance(result, SearchResult)
self.assertEqual(result.title, 'Les ombres errantes')
self.assertEqual(
result.key, 'https://openlibrary.org/books/OL16262504M')
self.assertEqual(result.author, 'Pascal Quignard')
self.assertEqual(result.year, '2002')
self.assertEqual(result.connector, self.connector)
@responses.activate
def test_load_edition_data(self):
''' format url from key and make request '''

View file

@ -0,0 +1,45 @@
{
"ISBN:9782070427796": {
"url": "https://openlibrary.org/books/OL16262504M/Les_ombres_errantes",
"key": "/books/OL16262504M",
"title": "Les ombres errantes",
"authors": [
{
"url": "https://openlibrary.org/authors/OL269675A/Pascal_Quignard",
"name": "Pascal Quignard"
}
],
"by_statement": "Pascal Quignard.",
"identifiers": {
"goodreads": [
"1835483"
],
"librarything": [
"983474"
],
"isbn_10": [
"207042779X"
],
"openlibrary": [
"OL16262504M"
]
},
"classifications": {
"dewey_decimal_class": [
"848/.91403"
]
},
"publishers": [
{
"name": "Gallimard"
}
],
"publish_places": [
{
"name": "Paris"
}
],
"publish_date": "2002",
"notes": "Hardback published Grasset, 2002."
}
}

View file

@ -0,0 +1,54 @@
''' test for app action functionality '''
import json
from unittest.mock import patch
from django.http import JsonResponse
from django.template.response import TemplateResponse
from django.test import TestCase
from django.test.client import RequestFactory
from bookwyrm import models, views
from bookwyrm.connectors import abstract_connector
from bookwyrm.settings import DOMAIN
class IsbnViews(TestCase):
''' tag views'''
def setUp(self):
''' we need basic test data and mocks '''
self.factory = RequestFactory()
self.local_user = models.User.objects.create_user(
'mouse@local.com', 'mouse@mouse.com', 'mouseword',
local=True, localname='mouse',
remote_id='https://example.com/users/mouse',
)
self.work = models.Work.objects.create(title='Test Work')
self.book = models.Edition.objects.create(
title='Test Book',
isbn_13='1234567890123',
remote_id='https://example.com/book/1',
parent_work=self.work
)
models.Connector.objects.create(
identifier='self',
connector_file='self_connector',
local=True
)
models.SiteSettings.objects.create()
def test_isbn_json_response(self):
''' searches local data only and returns book data in json format '''
view = views.Isbn.as_view()
request = self.factory.get('')
with patch('bookwyrm.views.isbn.is_api_request') as is_api:
is_api.return_value = True
response = view(request, isbn='1234567890123')
self.assertIsInstance(response, JsonResponse)
data = json.loads(response.content)
self.assertEqual(len(data), 1)
self.assertEqual(data[0]['title'], 'Test Book')
self.assertEqual(
data[0]['key'], 'https://%s/book/%d' % (DOMAIN, self.book.id))

View file

@ -64,6 +64,10 @@ class ShelfViews(TestCase):
pass
def parse_search_data(self, data):
pass
def format_isbn_search_result(self, search_result):
return search_result
def parse_isbn_search_data(self, data):
return data
models.Connector.objects.create(
identifier='example.com',
connector_file='openlibrary',

View file

@ -135,6 +135,9 @@ urlpatterns = [
re_path(r'^resolve-book/?$', views.resolve_book),
re_path(r'^switch-edition/?$', views.switch_edition),
# isbn
re_path(r'^isbn/(?P<isbn>\d+)(.json)?/?$', views.Isbn.as_view()),
# author
re_path(r'^author/(?P<author_id>\d+)(.json)?/?$', views.Author.as_view()),
re_path(r'^author/(?P<author_id>\d+)/edit/?$', views.EditAuthor.as_view()),

View file

@ -31,3 +31,4 @@ from .site import Site
from .status import CreateStatus, DeleteStatus
from .updates import Updates
from .user import User, EditUser, Followers, Following
from .isbn import Isbn

29
bookwyrm/views/isbn.py Normal file
View file

@ -0,0 +1,29 @@
''' isbn search view '''
from django.http import HttpResponseNotFound
from django.http import JsonResponse
from django.shortcuts import get_object_or_404, redirect
from django.template.response import TemplateResponse
from django.utils.decorators import method_decorator
from django.views import View
from django.views.decorators.http import require_POST
from bookwyrm import forms, models
from bookwyrm.connectors import connector_manager
from .helpers import is_api_request
# pylint: disable= no-self-use
class Isbn(View):
''' search a book by isbn '''
def get(self, request, isbn):
''' info about a book '''
book_results = connector_manager.isbn_local_search(isbn)
if is_api_request(request):
return JsonResponse([r.json() for r in book_results], safe=False)
data = {
'title': 'ISBN Search Results',
'results': book_results,
'query': isbn,
}
return TemplateResponse(request, 'isbn_search_results.html', data)