mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-12-31 20:38:45 +00:00
Merge pull request #2282 from hughrun/normalise-isbn
Normalise ISBNs for searching
This commit is contained in:
commit
834c7e9cd5
4 changed files with 33 additions and 5 deletions
|
@ -7,6 +7,7 @@ from django.contrib.postgres.search import SearchRank, SearchQuery
|
|||
from django.db.models import OuterRef, Subquery, F, Q
|
||||
|
||||
from bookwyrm import models
|
||||
from bookwyrm import connectors
|
||||
from bookwyrm.settings import MEDIA_FULL_URL
|
||||
|
||||
|
||||
|
@ -30,7 +31,9 @@ def isbn_search(query):
|
|||
"""search your local database"""
|
||||
if not query:
|
||||
return []
|
||||
|
||||
# Up-case the ISBN string to ensure any 'X' check-digit is correct
|
||||
# If the ISBN has only 9 characters, prepend missing zero
|
||||
query = query.strip().upper().rjust(10, "0")
|
||||
filters = [{f: query} for f in ["isbn_10", "isbn_13"]]
|
||||
results = models.Edition.objects.filter(
|
||||
reduce(operator.or_, (Q(**f) for f in filters))
|
||||
|
@ -72,6 +75,10 @@ def format_search_result(search_result):
|
|||
|
||||
def search_identifiers(query, *filters, return_first=False):
|
||||
"""tries remote_id, isbn; defined as dedupe fields on the model"""
|
||||
if connectors.maybe_isbn(query):
|
||||
# Oh did you think the 'S' in ISBN stood for 'standard'?
|
||||
normalized_isbn = query.strip().upper().rjust(10, "0")
|
||||
query = normalized_isbn
|
||||
# pylint: disable=W0212
|
||||
or_filters = [
|
||||
{f.name: query}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
""" bring connectors into the namespace """
|
||||
from .settings import CONNECTORS
|
||||
from .abstract_connector import ConnectorException
|
||||
from .abstract_connector import get_data, get_image
|
||||
from .abstract_connector import get_data, get_image, maybe_isbn
|
||||
|
||||
from .connector_manager import search, first_search_result
|
||||
|
|
|
@ -42,8 +42,10 @@ class AbstractMinimalConnector(ABC):
|
|||
"""format the query url"""
|
||||
# Check if the query resembles an ISBN
|
||||
if maybe_isbn(query) and self.isbn_search_url and self.isbn_search_url != "":
|
||||
return f"{self.isbn_search_url}{query}"
|
||||
|
||||
# Up-case the ISBN string to ensure any 'X' check-digit is correct
|
||||
# If the ISBN has only 9 characters, prepend missing zero
|
||||
normalized_query = query.strip().upper().rjust(10, "0")
|
||||
return f"{self.isbn_search_url}{normalized_query}"
|
||||
# NOTE: previously, we tried searching isbn and if that produces no results,
|
||||
# searched as free text. This, instead, only searches isbn if it's isbn-y
|
||||
return f"{self.search_url}{query}"
|
||||
|
@ -325,4 +327,11 @@ def unique_physical_format(format_text):
|
|||
def maybe_isbn(query):
|
||||
"""check if a query looks like an isbn"""
|
||||
isbn = re.sub(r"[\W_]", "", query) # removes filler characters
|
||||
return len(isbn) in [10, 13] # ISBN10 or ISBN13
|
||||
# ISBNs must be numeric except an ISBN10 checkdigit can be 'X'
|
||||
if not isbn.upper().rstrip("X").isnumeric():
|
||||
return False
|
||||
return len(isbn) in [
|
||||
9,
|
||||
10,
|
||||
13,
|
||||
] # ISBN10 or ISBN13, or maybe ISBN10 missing a leading zero
|
||||
|
|
|
@ -28,6 +28,12 @@ class BookSearch(TestCase):
|
|||
openlibrary_key="hello",
|
||||
)
|
||||
|
||||
self.third_edition = models.Edition.objects.create(
|
||||
title="Edition with annoying ISBN",
|
||||
parent_work=self.work,
|
||||
isbn_10="022222222X",
|
||||
)
|
||||
|
||||
def test_search(self):
|
||||
"""search for a book in the db"""
|
||||
# title/author
|
||||
|
@ -57,6 +63,12 @@ class BookSearch(TestCase):
|
|||
self.assertEqual(len(results), 1)
|
||||
self.assertEqual(results[0], self.second_edition)
|
||||
|
||||
def test_search_identifiers_isbn_search(self):
|
||||
"""search by unique ID with slightly wonky ISBN"""
|
||||
results = book_search.search_identifiers("22222222x")
|
||||
self.assertEqual(len(results), 1)
|
||||
self.assertEqual(results[0], self.third_edition)
|
||||
|
||||
def test_search_identifiers_return_first(self):
|
||||
"""search by unique identifiers"""
|
||||
result = book_search.search_identifiers("hello", return_first=True)
|
||||
|
|
Loading…
Reference in a new issue