Merge pull request #2962 from jderuiter/mypy-isbn

Type annotations and tests for isbn
This commit is contained in:
Mouse Reeve 2023-09-01 17:05:01 -07:00 committed by GitHub
commit bc870a305f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 100 additions and 19 deletions

View file

@ -1,11 +1,20 @@
""" Use the range message from isbn-international to hyphenate ISBNs """ """ Use the range message from isbn-international to hyphenate ISBNs """
import os import os
from typing import Optional
from xml.etree import ElementTree from xml.etree import ElementTree
from xml.etree.ElementTree import Element
import requests import requests
from bookwyrm import settings from bookwyrm import settings
def _get_rules(element: Element) -> list[Element]:
if (rules_el := element.find("Rules")) is not None:
return rules_el.findall("Rule")
return []
class IsbnHyphenator: class IsbnHyphenator:
"""Class to manage the range message xml file and use it to hyphenate ISBNs""" """Class to manage the range message xml file and use it to hyphenate ISBNs"""
@ -15,58 +24,94 @@ class IsbnHyphenator:
) )
__element_tree = None __element_tree = None
def update_range_message(self): def update_range_message(self) -> None:
"""Download the range message xml file and save it locally""" """Download the range message xml file and save it locally"""
response = requests.get(self.__range_message_url) response = requests.get(self.__range_message_url)
with open(self.__range_file_path, "w", encoding="utf-8") as file: with open(self.__range_file_path, "w", encoding="utf-8") as file:
file.write(response.text) file.write(response.text)
self.__element_tree = None self.__element_tree = None
def hyphenate(self, isbn_13): def hyphenate(self, isbn_13: Optional[str]) -> Optional[str]:
"""hyphenate the given ISBN-13 number using the range message""" """hyphenate the given ISBN-13 number using the range message"""
if isbn_13 is None: if isbn_13 is None:
return None return None
if self.__element_tree is None: if self.__element_tree is None:
self.__element_tree = ElementTree.parse(self.__range_file_path) self.__element_tree = ElementTree.parse(self.__range_file_path)
gs1_prefix = isbn_13[:3] gs1_prefix = isbn_13[:3]
reg_group = self.__find_reg_group(isbn_13, gs1_prefix) reg_group = self.__find_reg_group(isbn_13, gs1_prefix)
if reg_group is None: if reg_group is None:
return isbn_13 # failed to hyphenate return isbn_13 # failed to hyphenate
registrant = self.__find_registrant(isbn_13, gs1_prefix, reg_group) registrant = self.__find_registrant(isbn_13, gs1_prefix, reg_group)
if registrant is None: if registrant is None:
return isbn_13 # failed to hyphenate return isbn_13 # failed to hyphenate
publication = isbn_13[len(gs1_prefix) + len(reg_group) + len(registrant) : -1] publication = isbn_13[len(gs1_prefix) + len(reg_group) + len(registrant) : -1]
check_digit = isbn_13[-1:] check_digit = isbn_13[-1:]
return "-".join((gs1_prefix, reg_group, registrant, publication, check_digit)) return "-".join((gs1_prefix, reg_group, registrant, publication, check_digit))
def __find_reg_group(self, isbn_13, gs1_prefix): def __find_reg_group(self, isbn_13: str, gs1_prefix: str) -> Optional[str]:
for ean_ucc_el in self.__element_tree.find("EAN.UCCPrefixes").findall( if self.__element_tree is None:
"EAN.UCC" self.__element_tree = ElementTree.parse(self.__range_file_path)
):
if ean_ucc_el.find("Prefix").text == gs1_prefix: ucc_prefixes_el = self.__element_tree.find("EAN.UCCPrefixes")
for rule_el in ean_ucc_el.find("Rules").findall("Rule"): if ucc_prefixes_el is None:
length = int(rule_el.find("Length").text) return None
for ean_ucc_el in ucc_prefixes_el.findall("EAN.UCC"):
if (
prefix_el := ean_ucc_el.find("Prefix")
) is not None and prefix_el.text == gs1_prefix:
for rule_el in _get_rules(ean_ucc_el):
length_el = rule_el.find("Length")
if length_el is None:
continue
length = int(text) if (text := length_el.text) else 0
if length == 0: if length == 0:
continue continue
reg_grp_range = [
int(x[:length]) for x in rule_el.find("Range").text.split("-") range_el = rule_el.find("Range")
] if range_el is None or range_el.text is None:
continue
reg_grp_range = [int(x[:length]) for x in range_el.text.split("-")]
reg_group = isbn_13[len(gs1_prefix) : len(gs1_prefix) + length] reg_group = isbn_13[len(gs1_prefix) : len(gs1_prefix) + length]
if reg_grp_range[0] <= int(reg_group) <= reg_grp_range[1]: if reg_grp_range[0] <= int(reg_group) <= reg_grp_range[1]:
return reg_group return reg_group
return None return None
return None return None
def __find_registrant(self, isbn_13, gs1_prefix, reg_group): def __find_registrant(
self, isbn_13: str, gs1_prefix: str, reg_group: str
) -> Optional[str]:
from_ind = len(gs1_prefix) + len(reg_group) from_ind = len(gs1_prefix) + len(reg_group)
for group_el in self.__element_tree.find("RegistrationGroups").findall("Group"):
if group_el.find("Prefix").text == "-".join((gs1_prefix, reg_group)): if self.__element_tree is None:
for rule_el in group_el.find("Rules").findall("Rule"): self.__element_tree = ElementTree.parse(self.__range_file_path)
length = int(rule_el.find("Length").text)
reg_groups_el = self.__element_tree.find("RegistrationGroups")
if reg_groups_el is None:
return None
for group_el in reg_groups_el.findall("Group"):
if (
prefix_el := group_el.find("Prefix")
) is not None and prefix_el.text == "-".join((gs1_prefix, reg_group)):
for rule_el in _get_rules(group_el):
length_el = rule_el.find("Length")
if length_el is None:
continue
length = int(text) if (text := length_el.text) else 0
if length == 0: if length == 0:
continue continue
range_el = rule_el.find("Range")
if range_el is None or range_el.text is None:
continue
registrant_range = [ registrant_range = [
int(x[:length]) for x in rule_el.find("Range").text.split("-") int(x[:length]) for x in range_el.text.split("-")
] ]
registrant = isbn_13[from_ind : from_ind + length] registrant = isbn_13[from_ind : from_ind + length]
if registrant_range[0] <= int(registrant) <= registrant_range[1]: if registrant_range[0] <= int(registrant) <= registrant_range[1]:

View file

@ -1,5 +1,7 @@
""" bookwyrm settings and configuration """ """ bookwyrm settings and configuration """
import os import os
from typing import AnyStr
from environs import Env from environs import Env
import requests import requests
@ -37,7 +39,7 @@ EMAIL_SENDER_DOMAIN = env("EMAIL_SENDER_DOMAIN", DOMAIN)
EMAIL_SENDER = f"{EMAIL_SENDER_NAME}@{EMAIL_SENDER_DOMAIN}" EMAIL_SENDER = f"{EMAIL_SENDER_NAME}@{EMAIL_SENDER_DOMAIN}"
# Build paths inside the project like this: os.path.join(BASE_DIR, ...) # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) BASE_DIR: AnyStr = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
LOCALE_PATHS = [ LOCALE_PATHS = [
os.path.join(BASE_DIR, "locale"), os.path.join(BASE_DIR, "locale"),
] ]

View file

@ -0,0 +1,31 @@
""" test ISBN hyphenator for books """
from django.test import TestCase
from bookwyrm.isbn.isbn import hyphenator_singleton as hyphenator
class TestISBN(TestCase):
"""isbn hyphenator"""
def test_isbn_hyphenation(self):
"""different isbn hyphenations"""
# nothing
self.assertEqual(hyphenator.hyphenate(None), None)
# 978-0 (English language) 3700000-6389999
self.assertEqual(hyphenator.hyphenate("9780439554930"), "978-0-439-55493-0")
# 978-2 (French language) 0000000-1999999
self.assertEqual(hyphenator.hyphenate("9782070100927"), "978-2-07-010092-7")
# 978-3 (German language) 2000000-6999999
self.assertEqual(hyphenator.hyphenate("9783518188125"), "978-3-518-18812-5")
# 978-4 (Japan) 0000000-1999999
self.assertEqual(hyphenator.hyphenate("9784101050454"), "978-4-10-105045-4")
# 978-626 (Taiwan) 9500000-9999999
self.assertEqual(hyphenator.hyphenate("9786269533251"), "978-626-95332-5-1")
# 979-8 (United States) 4000000-8499999
self.assertEqual(hyphenator.hyphenate("9798627974040"), "979-8-6279-7404-0")
# 978-626 (Taiwan) 8000000-9499999 (unassigned)
self.assertEqual(hyphenator.hyphenate("9786268533251"), "9786268533251")
# 978 range 6600000-6999999 (unassigned)
self.assertEqual(hyphenator.hyphenate("9786769533251"), "9786769533251")
# 979-8 (United States) 2300000-3499999 (unassigned)
self.assertEqual(hyphenator.hyphenate("9798311111111"), "9798311111111")

View file

@ -16,6 +16,9 @@ ignore_errors = False
[mypy-bookwyrm.importers.*] [mypy-bookwyrm.importers.*]
ignore_errors = False ignore_errors = False
[mypy-bookwyrm.isbn.*]
ignore_errors = False
[mypy-celerywyrm.*] [mypy-celerywyrm.*]
ignore_errors = False ignore_errors = False