Type annotations for bookwyrm.isbn

This commit is contained in:
Joeri de Ruiter 2023-08-21 15:46:50 +02:00
parent 3760e3b45c
commit f6d8786179
3 changed files with 69 additions and 19 deletions

View file

@ -1,11 +1,20 @@
""" Use the range message from isbn-international to hyphenate ISBNs """
import os
from typing import Optional
from xml.etree import ElementTree
from xml.etree.ElementTree import Element
import requests
from bookwyrm import settings
def _get_rules(element: Element) -> list[Element]:
if (rules_el := element.find("Rules")) is not None:
return rules_el.findall("Rule")
return []
class IsbnHyphenator:
"""Class to manage the range message xml file and use it to hyphenate ISBNs"""
@ -15,58 +24,94 @@ class IsbnHyphenator:
)
__element_tree = None
def update_range_message(self):
def update_range_message(self) -> None:
"""Download the range message xml file and save it locally"""
response = requests.get(self.__range_message_url)
with open(self.__range_file_path, "w", encoding="utf-8") as file:
file.write(response.text)
self.__element_tree = None
def hyphenate(self, isbn_13):
def hyphenate(self, isbn_13: Optional[str]) -> Optional[str]:
"""hyphenate the given ISBN-13 number using the range message"""
if isbn_13 is None:
return None
if self.__element_tree is None:
self.__element_tree = ElementTree.parse(self.__range_file_path)
gs1_prefix = isbn_13[:3]
reg_group = self.__find_reg_group(isbn_13, gs1_prefix)
if reg_group is None:
return isbn_13 # failed to hyphenate
registrant = self.__find_registrant(isbn_13, gs1_prefix, reg_group)
if registrant is None:
return isbn_13 # failed to hyphenate
publication = isbn_13[len(gs1_prefix) + len(reg_group) + len(registrant) : -1]
check_digit = isbn_13[-1:]
return "-".join((gs1_prefix, reg_group, registrant, publication, check_digit))
def __find_reg_group(self, isbn_13, gs1_prefix):
for ean_ucc_el in self.__element_tree.find("EAN.UCCPrefixes").findall(
"EAN.UCC"
):
if ean_ucc_el.find("Prefix").text == gs1_prefix:
for rule_el in ean_ucc_el.find("Rules").findall("Rule"):
length = int(rule_el.find("Length").text)
def __find_reg_group(self, isbn_13: str, gs1_prefix: str) -> Optional[str]:
if self.__element_tree is None:
self.__element_tree = ElementTree.parse(self.__range_file_path)
ucc_prefixes_el = self.__element_tree.find("EAN.UCCPrefixes")
if ucc_prefixes_el is None:
return None
for ean_ucc_el in ucc_prefixes_el.findall("EAN.UCC"):
if (
prefix_el := ean_ucc_el.find("Prefix")
) is not None and prefix_el.text == gs1_prefix:
for rule_el in _get_rules(ean_ucc_el):
length_el = rule_el.find("Length")
if length_el is None:
continue
length = int(text) if (text := length_el.text) else 0
if length == 0:
continue
reg_grp_range = [
int(x[:length]) for x in rule_el.find("Range").text.split("-")
]
range_el = rule_el.find("Range")
if range_el is None or range_el.text is None:
continue
reg_grp_range = [int(x[:length]) for x in range_el.text.split("-")]
reg_group = isbn_13[len(gs1_prefix) : len(gs1_prefix) + length]
if reg_grp_range[0] <= int(reg_group) <= reg_grp_range[1]:
return reg_group
return None
return None
def __find_registrant(self, isbn_13, gs1_prefix, reg_group):
def __find_registrant(
self, isbn_13: str, gs1_prefix: str, reg_group: str
) -> Optional[str]:
from_ind = len(gs1_prefix) + len(reg_group)
for group_el in self.__element_tree.find("RegistrationGroups").findall("Group"):
if group_el.find("Prefix").text == "-".join((gs1_prefix, reg_group)):
for rule_el in group_el.find("Rules").findall("Rule"):
length = int(rule_el.find("Length").text)
if self.__element_tree is None:
self.__element_tree = ElementTree.parse(self.__range_file_path)
reg_groups_el = self.__element_tree.find("RegistrationGroups")
if reg_groups_el is None:
return None
for group_el in reg_groups_el.findall("Group"):
if (
prefix_el := group_el.find("Prefix")
) is not None and prefix_el.text == "-".join((gs1_prefix, reg_group)):
for rule_el in _get_rules(group_el):
length_el = rule_el.find("Length")
if length_el is None:
continue
length = int(text) if (text := length_el.text) else 0
if length == 0:
continue
range_el = rule_el.find("Range")
if range_el is None or range_el.text is None:
continue
registrant_range = [
int(x[:length]) for x in rule_el.find("Range").text.split("-")
int(x[:length]) for x in range_el.text.split("-")
]
registrant = isbn_13[from_ind : from_ind + length]
if registrant_range[0] <= int(registrant) <= registrant_range[1]:

View file

@ -1,5 +1,7 @@
""" bookwyrm settings and configuration """
import os
from typing import AnyStr
from environs import Env
import requests
@ -37,7 +39,7 @@ EMAIL_SENDER_DOMAIN = env("EMAIL_SENDER_DOMAIN", DOMAIN)
EMAIL_SENDER = f"{EMAIL_SENDER_NAME}@{EMAIL_SENDER_DOMAIN}"
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
BASE_DIR: AnyStr = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
LOCALE_PATHS = [
os.path.join(BASE_DIR, "locale"),
]

View file

@ -13,6 +13,9 @@ implicit_reexport = True
[mypy-bookwyrm.connectors.*]
ignore_errors = False
[mypy-bookwyrm.isbn.*]
ignore_errors = False
[mypy-celerywyrm.*]
ignore_errors = False