normalise isbn searching

ISBNs are always numeric except for when the check digit in ISBN-10s is a ten, indicated with a capital X.
These changes ensure that ISBNs are always upper-case so that a lower-case 'x' is not used when searching.

Additionally some ancient ISBNs have been printed without a leading zero (i.e. they only have 9 characters on the physical book). This change prepends a zero if something looks like an ISBN but only has 9 chars.
This commit is contained in:
Hugh Rundle 2022-08-28 11:05:40 +10:00
parent c902301d82
commit da5fd32196
2 changed files with 10 additions and 2 deletions

View file

@ -30,7 +30,9 @@ def isbn_search(query):
"""search your local database""" """search your local database"""
if not query: if not query:
return [] return []
# Up-case the ISBN string to ensure any 'X' check-digit is correct
# If the ISBN has only 9 characters, prepend missing zero
query = query.upper().rjust(10, '0')
filters = [{f: query} for f in ["isbn_10", "isbn_13"]] filters = [{f: query} for f in ["isbn_10", "isbn_13"]]
results = models.Edition.objects.filter( results = models.Edition.objects.filter(
reduce(operator.or_, (Q(**f) for f in filters)) reduce(operator.or_, (Q(**f) for f in filters))

View file

@ -42,6 +42,9 @@ class AbstractMinimalConnector(ABC):
"""format the query url""" """format the query url"""
# Check if the query resembles an ISBN # Check if the query resembles an ISBN
if maybe_isbn(query) and self.isbn_search_url and self.isbn_search_url != "": if maybe_isbn(query) and self.isbn_search_url and self.isbn_search_url != "":
# Up-case the ISBN string to ensure any 'X' check-digit is correct
# If the ISBN has only 9 characters, prepend missing zero
query = query.upper().rjust(10, '0')
return f"{self.isbn_search_url}{query}" return f"{self.isbn_search_url}{query}"
# NOTE: previously, we tried searching isbn and if that produces no results, # NOTE: previously, we tried searching isbn and if that produces no results,
@ -325,4 +328,7 @@ def unique_physical_format(format_text):
def maybe_isbn(query): def maybe_isbn(query):
"""check if a query looks like an isbn""" """check if a query looks like an isbn"""
isbn = re.sub(r"[\W_]", "", query) # removes filler characters isbn = re.sub(r"[\W_]", "", query) # removes filler characters
return len(isbn) in [10, 13] # ISBN10 or ISBN13 # ISBNs must be numeric except an ISBN10 checkdigit can be 'X'
if not isbn.rstrip('X').isnumeric():
return False
return len(isbn) in [9, 10, 13] # ISBN10 or ISBN13, or maybe ISBN10 missing a prepended zero