Merge pull request #2282 from hughrun/normalise-isbn

Normalise ISBNs for searching
This commit is contained in:
Mouse Reeve 2022-09-08 10:14:30 -07:00 committed by GitHub
commit 834c7e9cd5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 33 additions and 5 deletions

View file

@ -7,6 +7,7 @@ from django.contrib.postgres.search import SearchRank, SearchQuery
from django.db.models import OuterRef, Subquery, F, Q
from bookwyrm import models
from bookwyrm import connectors
from bookwyrm.settings import MEDIA_FULL_URL
@ -30,7 +31,9 @@ def isbn_search(query):
"""search your local database"""
if not query:
return []
# Up-case the ISBN string to ensure any 'X' check-digit is correct
# If the ISBN has only 9 characters, prepend missing zero
query = query.strip().upper().rjust(10, "0")
filters = [{f: query} for f in ["isbn_10", "isbn_13"]]
results = models.Edition.objects.filter(
reduce(operator.or_, (Q(**f) for f in filters))
@ -72,6 +75,10 @@ def format_search_result(search_result):
def search_identifiers(query, *filters, return_first=False):
"""tries remote_id, isbn; defined as dedupe fields on the model"""
if connectors.maybe_isbn(query):
# Oh did you think the 'S' in ISBN stood for 'standard'?
normalized_isbn = query.strip().upper().rjust(10, "0")
query = normalized_isbn
# pylint: disable=W0212
or_filters = [
{f.name: query}

View file

@ -1,6 +1,6 @@
""" bring connectors into the namespace """
from .settings import CONNECTORS
from .abstract_connector import ConnectorException
from .abstract_connector import get_data, get_image
from .abstract_connector import get_data, get_image, maybe_isbn
from .connector_manager import search, first_search_result

View file

@ -42,8 +42,10 @@ class AbstractMinimalConnector(ABC):
"""format the query url"""
# Check if the query resembles an ISBN
if maybe_isbn(query) and self.isbn_search_url and self.isbn_search_url != "":
return f"{self.isbn_search_url}{query}"
# Up-case the ISBN string to ensure any 'X' check-digit is correct
# If the ISBN has only 9 characters, prepend missing zero
normalized_query = query.strip().upper().rjust(10, "0")
return f"{self.isbn_search_url}{normalized_query}"
# NOTE: previously, we tried searching isbn and if that produces no results,
# searched as free text. This, instead, only searches isbn if it's isbn-y
return f"{self.search_url}{query}"
@ -325,4 +327,11 @@ def unique_physical_format(format_text):
def maybe_isbn(query):
"""check if a query looks like an isbn"""
isbn = re.sub(r"[\W_]", "", query) # removes filler characters
return len(isbn) in [10, 13] # ISBN10 or ISBN13
# ISBNs must be numeric except an ISBN10 checkdigit can be 'X'
if not isbn.upper().rstrip("X").isnumeric():
return False
return len(isbn) in [
9,
10,
13,
] # ISBN10 or ISBN13, or maybe ISBN10 missing a leading zero

View file

@ -28,6 +28,12 @@ class BookSearch(TestCase):
openlibrary_key="hello",
)
self.third_edition = models.Edition.objects.create(
title="Edition with annoying ISBN",
parent_work=self.work,
isbn_10="022222222X",
)
def test_search(self):
"""search for a book in the db"""
# title/author
@ -57,6 +63,12 @@ class BookSearch(TestCase):
self.assertEqual(len(results), 1)
self.assertEqual(results[0], self.second_edition)
def test_search_identifiers_isbn_search(self):
"""search by unique ID with slightly wonky ISBN"""
results = book_search.search_identifiers("22222222x")
self.assertEqual(len(results), 1)
self.assertEqual(results[0], self.third_edition)
def test_search_identifiers_return_first(self):
"""search by unique identifiers"""
result = book_search.search_identifiers("hello", return_first=True)