mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-11-29 04:51:11 +00:00
Merge pull request #2962 from jderuiter/mypy-isbn
Type annotations and tests for isbn
This commit is contained in:
commit
bc870a305f
4 changed files with 100 additions and 19 deletions
|
@ -1,11 +1,20 @@
|
||||||
""" Use the range message from isbn-international to hyphenate ISBNs """
|
""" Use the range message from isbn-international to hyphenate ISBNs """
|
||||||
import os
|
import os
|
||||||
|
from typing import Optional
|
||||||
from xml.etree import ElementTree
|
from xml.etree import ElementTree
|
||||||
|
from xml.etree.ElementTree import Element
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from bookwyrm import settings
|
from bookwyrm import settings
|
||||||
|
|
||||||
|
|
||||||
|
def _get_rules(element: Element) -> list[Element]:
|
||||||
|
if (rules_el := element.find("Rules")) is not None:
|
||||||
|
return rules_el.findall("Rule")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
class IsbnHyphenator:
|
class IsbnHyphenator:
|
||||||
"""Class to manage the range message xml file and use it to hyphenate ISBNs"""
|
"""Class to manage the range message xml file and use it to hyphenate ISBNs"""
|
||||||
|
|
||||||
|
@ -15,58 +24,94 @@ class IsbnHyphenator:
|
||||||
)
|
)
|
||||||
__element_tree = None
|
__element_tree = None
|
||||||
|
|
||||||
def update_range_message(self):
|
def update_range_message(self) -> None:
|
||||||
"""Download the range message xml file and save it locally"""
|
"""Download the range message xml file and save it locally"""
|
||||||
response = requests.get(self.__range_message_url)
|
response = requests.get(self.__range_message_url)
|
||||||
with open(self.__range_file_path, "w", encoding="utf-8") as file:
|
with open(self.__range_file_path, "w", encoding="utf-8") as file:
|
||||||
file.write(response.text)
|
file.write(response.text)
|
||||||
self.__element_tree = None
|
self.__element_tree = None
|
||||||
|
|
||||||
def hyphenate(self, isbn_13):
|
def hyphenate(self, isbn_13: Optional[str]) -> Optional[str]:
|
||||||
"""hyphenate the given ISBN-13 number using the range message"""
|
"""hyphenate the given ISBN-13 number using the range message"""
|
||||||
if isbn_13 is None:
|
if isbn_13 is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if self.__element_tree is None:
|
if self.__element_tree is None:
|
||||||
self.__element_tree = ElementTree.parse(self.__range_file_path)
|
self.__element_tree = ElementTree.parse(self.__range_file_path)
|
||||||
|
|
||||||
gs1_prefix = isbn_13[:3]
|
gs1_prefix = isbn_13[:3]
|
||||||
reg_group = self.__find_reg_group(isbn_13, gs1_prefix)
|
reg_group = self.__find_reg_group(isbn_13, gs1_prefix)
|
||||||
if reg_group is None:
|
if reg_group is None:
|
||||||
return isbn_13 # failed to hyphenate
|
return isbn_13 # failed to hyphenate
|
||||||
|
|
||||||
registrant = self.__find_registrant(isbn_13, gs1_prefix, reg_group)
|
registrant = self.__find_registrant(isbn_13, gs1_prefix, reg_group)
|
||||||
if registrant is None:
|
if registrant is None:
|
||||||
return isbn_13 # failed to hyphenate
|
return isbn_13 # failed to hyphenate
|
||||||
|
|
||||||
publication = isbn_13[len(gs1_prefix) + len(reg_group) + len(registrant) : -1]
|
publication = isbn_13[len(gs1_prefix) + len(reg_group) + len(registrant) : -1]
|
||||||
check_digit = isbn_13[-1:]
|
check_digit = isbn_13[-1:]
|
||||||
return "-".join((gs1_prefix, reg_group, registrant, publication, check_digit))
|
return "-".join((gs1_prefix, reg_group, registrant, publication, check_digit))
|
||||||
|
|
||||||
def __find_reg_group(self, isbn_13, gs1_prefix):
|
def __find_reg_group(self, isbn_13: str, gs1_prefix: str) -> Optional[str]:
|
||||||
for ean_ucc_el in self.__element_tree.find("EAN.UCCPrefixes").findall(
|
if self.__element_tree is None:
|
||||||
"EAN.UCC"
|
self.__element_tree = ElementTree.parse(self.__range_file_path)
|
||||||
):
|
|
||||||
if ean_ucc_el.find("Prefix").text == gs1_prefix:
|
ucc_prefixes_el = self.__element_tree.find("EAN.UCCPrefixes")
|
||||||
for rule_el in ean_ucc_el.find("Rules").findall("Rule"):
|
if ucc_prefixes_el is None:
|
||||||
length = int(rule_el.find("Length").text)
|
return None
|
||||||
|
|
||||||
|
for ean_ucc_el in ucc_prefixes_el.findall("EAN.UCC"):
|
||||||
|
if (
|
||||||
|
prefix_el := ean_ucc_el.find("Prefix")
|
||||||
|
) is not None and prefix_el.text == gs1_prefix:
|
||||||
|
for rule_el in _get_rules(ean_ucc_el):
|
||||||
|
length_el = rule_el.find("Length")
|
||||||
|
if length_el is None:
|
||||||
|
continue
|
||||||
|
length = int(text) if (text := length_el.text) else 0
|
||||||
if length == 0:
|
if length == 0:
|
||||||
continue
|
continue
|
||||||
reg_grp_range = [
|
|
||||||
int(x[:length]) for x in rule_el.find("Range").text.split("-")
|
range_el = rule_el.find("Range")
|
||||||
]
|
if range_el is None or range_el.text is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
reg_grp_range = [int(x[:length]) for x in range_el.text.split("-")]
|
||||||
reg_group = isbn_13[len(gs1_prefix) : len(gs1_prefix) + length]
|
reg_group = isbn_13[len(gs1_prefix) : len(gs1_prefix) + length]
|
||||||
if reg_grp_range[0] <= int(reg_group) <= reg_grp_range[1]:
|
if reg_grp_range[0] <= int(reg_group) <= reg_grp_range[1]:
|
||||||
return reg_group
|
return reg_group
|
||||||
return None
|
return None
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def __find_registrant(self, isbn_13, gs1_prefix, reg_group):
|
def __find_registrant(
|
||||||
|
self, isbn_13: str, gs1_prefix: str, reg_group: str
|
||||||
|
) -> Optional[str]:
|
||||||
from_ind = len(gs1_prefix) + len(reg_group)
|
from_ind = len(gs1_prefix) + len(reg_group)
|
||||||
for group_el in self.__element_tree.find("RegistrationGroups").findall("Group"):
|
|
||||||
if group_el.find("Prefix").text == "-".join((gs1_prefix, reg_group)):
|
if self.__element_tree is None:
|
||||||
for rule_el in group_el.find("Rules").findall("Rule"):
|
self.__element_tree = ElementTree.parse(self.__range_file_path)
|
||||||
length = int(rule_el.find("Length").text)
|
|
||||||
|
reg_groups_el = self.__element_tree.find("RegistrationGroups")
|
||||||
|
if reg_groups_el is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
for group_el in reg_groups_el.findall("Group"):
|
||||||
|
if (
|
||||||
|
prefix_el := group_el.find("Prefix")
|
||||||
|
) is not None and prefix_el.text == "-".join((gs1_prefix, reg_group)):
|
||||||
|
for rule_el in _get_rules(group_el):
|
||||||
|
length_el = rule_el.find("Length")
|
||||||
|
if length_el is None:
|
||||||
|
continue
|
||||||
|
length = int(text) if (text := length_el.text) else 0
|
||||||
if length == 0:
|
if length == 0:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
range_el = rule_el.find("Range")
|
||||||
|
if range_el is None or range_el.text is None:
|
||||||
|
continue
|
||||||
registrant_range = [
|
registrant_range = [
|
||||||
int(x[:length]) for x in rule_el.find("Range").text.split("-")
|
int(x[:length]) for x in range_el.text.split("-")
|
||||||
]
|
]
|
||||||
registrant = isbn_13[from_ind : from_ind + length]
|
registrant = isbn_13[from_ind : from_ind + length]
|
||||||
if registrant_range[0] <= int(registrant) <= registrant_range[1]:
|
if registrant_range[0] <= int(registrant) <= registrant_range[1]:
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
""" bookwyrm settings and configuration """
|
""" bookwyrm settings and configuration """
|
||||||
import os
|
import os
|
||||||
|
from typing import AnyStr
|
||||||
|
|
||||||
from environs import Env
|
from environs import Env
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
@ -37,7 +39,7 @@ EMAIL_SENDER_DOMAIN = env("EMAIL_SENDER_DOMAIN", DOMAIN)
|
||||||
EMAIL_SENDER = f"{EMAIL_SENDER_NAME}@{EMAIL_SENDER_DOMAIN}"
|
EMAIL_SENDER = f"{EMAIL_SENDER_NAME}@{EMAIL_SENDER_DOMAIN}"
|
||||||
|
|
||||||
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
|
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
|
||||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
BASE_DIR: AnyStr = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
LOCALE_PATHS = [
|
LOCALE_PATHS = [
|
||||||
os.path.join(BASE_DIR, "locale"),
|
os.path.join(BASE_DIR, "locale"),
|
||||||
]
|
]
|
||||||
|
|
31
bookwyrm/tests/test_isbn.py
Normal file
31
bookwyrm/tests/test_isbn.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
""" test ISBN hyphenator for books """
|
||||||
|
from django.test import TestCase
|
||||||
|
|
||||||
|
from bookwyrm.isbn.isbn import hyphenator_singleton as hyphenator
|
||||||
|
|
||||||
|
|
||||||
|
class TestISBN(TestCase):
|
||||||
|
"""isbn hyphenator"""
|
||||||
|
|
||||||
|
def test_isbn_hyphenation(self):
|
||||||
|
"""different isbn hyphenations"""
|
||||||
|
# nothing
|
||||||
|
self.assertEqual(hyphenator.hyphenate(None), None)
|
||||||
|
# 978-0 (English language) 3700000-6389999
|
||||||
|
self.assertEqual(hyphenator.hyphenate("9780439554930"), "978-0-439-55493-0")
|
||||||
|
# 978-2 (French language) 0000000-1999999
|
||||||
|
self.assertEqual(hyphenator.hyphenate("9782070100927"), "978-2-07-010092-7")
|
||||||
|
# 978-3 (German language) 2000000-6999999
|
||||||
|
self.assertEqual(hyphenator.hyphenate("9783518188125"), "978-3-518-18812-5")
|
||||||
|
# 978-4 (Japan) 0000000-1999999
|
||||||
|
self.assertEqual(hyphenator.hyphenate("9784101050454"), "978-4-10-105045-4")
|
||||||
|
# 978-626 (Taiwan) 9500000-9999999
|
||||||
|
self.assertEqual(hyphenator.hyphenate("9786269533251"), "978-626-95332-5-1")
|
||||||
|
# 979-8 (United States) 4000000-8499999
|
||||||
|
self.assertEqual(hyphenator.hyphenate("9798627974040"), "979-8-6279-7404-0")
|
||||||
|
# 978-626 (Taiwan) 8000000-9499999 (unassigned)
|
||||||
|
self.assertEqual(hyphenator.hyphenate("9786268533251"), "9786268533251")
|
||||||
|
# 978 range 6600000-6999999 (unassigned)
|
||||||
|
self.assertEqual(hyphenator.hyphenate("9786769533251"), "9786769533251")
|
||||||
|
# 979-8 (United States) 2300000-3499999 (unassigned)
|
||||||
|
self.assertEqual(hyphenator.hyphenate("9798311111111"), "9798311111111")
|
3
mypy.ini
3
mypy.ini
|
@ -16,6 +16,9 @@ ignore_errors = False
|
||||||
[mypy-bookwyrm.importers.*]
|
[mypy-bookwyrm.importers.*]
|
||||||
ignore_errors = False
|
ignore_errors = False
|
||||||
|
|
||||||
|
[mypy-bookwyrm.isbn.*]
|
||||||
|
ignore_errors = False
|
||||||
|
|
||||||
[mypy-celerywyrm.*]
|
[mypy-celerywyrm.*]
|
||||||
ignore_errors = False
|
ignore_errors = False
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue