diff --git a/bookwyrm/isbn/isbn.py b/bookwyrm/isbn/isbn.py index e07d2100d..4cc7f47dd 100644 --- a/bookwyrm/isbn/isbn.py +++ b/bookwyrm/isbn/isbn.py @@ -1,11 +1,20 @@ """ Use the range message from isbn-international to hyphenate ISBNs """ import os +from typing import Optional from xml.etree import ElementTree +from xml.etree.ElementTree import Element + import requests from bookwyrm import settings +def _get_rules(element: Element) -> list[Element]: + if (rules_el := element.find("Rules")) is not None: + return rules_el.findall("Rule") + return [] + + class IsbnHyphenator: """Class to manage the range message xml file and use it to hyphenate ISBNs""" @@ -15,58 +24,94 @@ class IsbnHyphenator: ) __element_tree = None - def update_range_message(self): + def update_range_message(self) -> None: """Download the range message xml file and save it locally""" response = requests.get(self.__range_message_url) with open(self.__range_file_path, "w", encoding="utf-8") as file: file.write(response.text) self.__element_tree = None - def hyphenate(self, isbn_13): + def hyphenate(self, isbn_13: Optional[str]) -> Optional[str]: """hyphenate the given ISBN-13 number using the range message""" if isbn_13 is None: return None + if self.__element_tree is None: self.__element_tree = ElementTree.parse(self.__range_file_path) + gs1_prefix = isbn_13[:3] reg_group = self.__find_reg_group(isbn_13, gs1_prefix) if reg_group is None: return isbn_13 # failed to hyphenate + registrant = self.__find_registrant(isbn_13, gs1_prefix, reg_group) if registrant is None: return isbn_13 # failed to hyphenate + publication = isbn_13[len(gs1_prefix) + len(reg_group) + len(registrant) : -1] check_digit = isbn_13[-1:] return "-".join((gs1_prefix, reg_group, registrant, publication, check_digit)) - def __find_reg_group(self, isbn_13, gs1_prefix): - for ean_ucc_el in self.__element_tree.find("EAN.UCCPrefixes").findall( - "EAN.UCC" - ): - if ean_ucc_el.find("Prefix").text == gs1_prefix: - for rule_el in ean_ucc_el.find("Rules").findall("Rule"): - length = int(rule_el.find("Length").text) + def __find_reg_group(self, isbn_13: str, gs1_prefix: str) -> Optional[str]: + if self.__element_tree is None: + self.__element_tree = ElementTree.parse(self.__range_file_path) + + ucc_prefixes_el = self.__element_tree.find("EAN.UCCPrefixes") + if ucc_prefixes_el is None: + return None + + for ean_ucc_el in ucc_prefixes_el.findall("EAN.UCC"): + if ( + prefix_el := ean_ucc_el.find("Prefix") + ) is not None and prefix_el.text == gs1_prefix: + for rule_el in _get_rules(ean_ucc_el): + length_el = rule_el.find("Length") + if length_el is None: + continue + length = int(text) if (text := length_el.text) else 0 if length == 0: continue - reg_grp_range = [ - int(x[:length]) for x in rule_el.find("Range").text.split("-") - ] + + range_el = rule_el.find("Range") + if range_el is None or range_el.text is None: + continue + + reg_grp_range = [int(x[:length]) for x in range_el.text.split("-")] reg_group = isbn_13[len(gs1_prefix) : len(gs1_prefix) + length] if reg_grp_range[0] <= int(reg_group) <= reg_grp_range[1]: return reg_group return None return None - def __find_registrant(self, isbn_13, gs1_prefix, reg_group): + def __find_registrant( + self, isbn_13: str, gs1_prefix: str, reg_group: str + ) -> Optional[str]: from_ind = len(gs1_prefix) + len(reg_group) - for group_el in self.__element_tree.find("RegistrationGroups").findall("Group"): - if group_el.find("Prefix").text == "-".join((gs1_prefix, reg_group)): - for rule_el in group_el.find("Rules").findall("Rule"): - length = int(rule_el.find("Length").text) + + if self.__element_tree is None: + self.__element_tree = ElementTree.parse(self.__range_file_path) + + reg_groups_el = self.__element_tree.find("RegistrationGroups") + if reg_groups_el is None: + return None + + for group_el in reg_groups_el.findall("Group"): + if ( + prefix_el := group_el.find("Prefix") + ) is not None and prefix_el.text == "-".join((gs1_prefix, reg_group)): + for rule_el in _get_rules(group_el): + length_el = rule_el.find("Length") + if length_el is None: + continue + length = int(text) if (text := length_el.text) else 0 if length == 0: continue + + range_el = rule_el.find("Range") + if range_el is None or range_el.text is None: + continue registrant_range = [ - int(x[:length]) for x in rule_el.find("Range").text.split("-") + int(x[:length]) for x in range_el.text.split("-") ] registrant = isbn_13[from_ind : from_ind + length] if registrant_range[0] <= int(registrant) <= registrant_range[1]: diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index 8cb47e5c8..d0c3c7fd3 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -217,6 +217,13 @@ class Book(BookDataModel): """editions and works both use "book" instead of model_name""" return f"https://{DOMAIN}/book/{self.id}" + def guess_sort_title(self): + """Get a best-guess sort title for the current book""" + articles = chain( + *(LANGUAGE_ARTICLES.get(language, ()) for language in tuple(self.languages)) + ) + return re.sub(f'^{" |^".join(articles)} ', "", str(self.title).lower()) + def __repr__(self): # pylint: disable=consider-using-f-string return "<{} key={!r} title={!r}>".format( @@ -374,16 +381,7 @@ class Edition(Book): # Create sort title by removing articles from title if self.sort_title in [None, ""]: - if self.sort_title in [None, ""]: - articles = chain( - *( - LANGUAGE_ARTICLES.get(language, ()) - for language in tuple(self.languages) - ) - ) - self.sort_title = re.sub( - f'^{" |^".join(articles)} ', "", str(self.title).lower() - ) + self.sort_title = self.guess_sort_title() return super().save(*args, **kwargs) diff --git a/bookwyrm/models/status.py b/bookwyrm/models/status.py index e51f2ba07..5d6109468 100644 --- a/bookwyrm/models/status.py +++ b/bookwyrm/models/status.py @@ -1,5 +1,6 @@ """ models for storing different kinds of Activities """ from dataclasses import MISSING +from typing import Optional import re from django.apps import apps @@ -269,7 +270,7 @@ class GeneratedNote(Status): """indicate the book in question for mastodon (or w/e) users""" message = self.content books = ", ".join( - f'"{book.title}"' + f'{book.title}' for book in self.mention_books.all() ) return f"{self.user.display_name} {message} {books}" @@ -320,17 +321,14 @@ class Comment(BookStatus): @property def pure_content(self): """indicate the book in question for mastodon (or w/e) users""" - if self.progress_mode == "PG" and self.progress and (self.progress > 0): - return_value = ( - f'{self.content}
(comment on ' - f'"{self.book.title}", page {self.progress})
' - ) - else: - return_value = ( - f'{self.content}(comment on ' - f'"{self.book.title}")
' - ) - return return_value + progress = self.progress or 0 + citation = ( + f'comment on ' + f"{self.book.title}" + ) + if self.progress_mode == "PG" and progress > 0: + citation += f", p. {progress}" + return f"{self.content}({citation})
" activity_serializer = activitypub.Comment @@ -354,22 +352,24 @@ class Quotation(BookStatus): blank=True, ) + def _format_position(self) -> Optional[str]: + """serialize page position""" + beg = self.position + end = self.endposition or 0 + if self.position_mode != "PG" or not beg: + return None + return f"pp. {beg}-{end}" if end > beg else f"p. {beg}" + @property def pure_content(self): """indicate the book in question for mastodon (or w/e) users""" quote = re.sub(r"^", '
"', self.quote) quote = re.sub(r"
$", '"', quote) - if self.position_mode == "PG" and self.position and (self.position > 0): - return_value = ( - f'{quote}-- ' - f'"{self.book.title}", page {self.position}
{self.content}' - ) - else: - return_value = ( - f'{quote} {self.content}' - ) - return return_value + title, href = self.book.title, self.book.remote_id + citation = f'— {title}' + if position := self._format_position(): + citation += f", {position}" + return f"{quote}{citation}
{self.content}" activity_serializer = activitypub.Quotation diff --git a/bookwyrm/settings.py b/bookwyrm/settings.py index 829ddaef7..9a4c9b5a4 100644 --- a/bookwyrm/settings.py +++ b/bookwyrm/settings.py @@ -1,5 +1,7 @@ """ bookwyrm settings and configuration """ import os +from typing import AnyStr + from environs import Env import requests @@ -12,7 +14,7 @@ from django.core.exceptions import ImproperlyConfigured env = Env() env.read_env() DOMAIN = env("DOMAIN") -VERSION = "0.6.4" +VERSION = "0.6.5" RELEASE_API = env( "RELEASE_API", @@ -37,7 +39,7 @@ EMAIL_SENDER_DOMAIN = env("EMAIL_SENDER_DOMAIN", DOMAIN) EMAIL_SENDER = f"{EMAIL_SENDER_NAME}@{EMAIL_SENDER_DOMAIN}" # Build paths inside the project like this: os.path.join(BASE_DIR, ...) -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +BASE_DIR: AnyStr = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) LOCALE_PATHS = [ os.path.join(BASE_DIR, "locale"), ] diff --git a/bookwyrm/static/js/autocomplete.js b/bookwyrm/static/js/autocomplete.js index 84474e43c..a98cd9634 100644 --- a/bookwyrm/static/js/autocomplete.js +++ b/bookwyrm/static/js/autocomplete.js @@ -106,7 +106,7 @@ const tries = { e: { p: { u: { - b: "ePub", + b: "EPUB", }, }, }, diff --git a/bookwyrm/templates/book/edit/edit_book_form.html b/bookwyrm/templates/book/edit/edit_book_form.html index 23cc6d097..4cc3965e7 100644 --- a/bookwyrm/templates/book/edit/edit_book_form.html +++ b/bookwyrm/templates/book/edit/edit_book_form.html @@ -10,7 +10,7 @@ {% csrf_token %} -{% if form.parent_work %} +{% if book.parent_work.id or form.parent_work %} {% endif %} diff --git a/bookwyrm/templates/book/editions/editions.html b/bookwyrm/templates/book/editions/editions.html index aa2b68bdb..e1766d1e1 100644 --- a/bookwyrm/templates/book/editions/editions.html +++ b/bookwyrm/templates/book/editions/editions.html @@ -5,7 +5,7 @@ {% block content %}(comment on "Test Edition")
', + ( + "test content" + f'(comment on ' + "Test Edition, p. 27)
" + ), ) self.assertEqual(activity["attachment"][0]["type"], "Document") # self.assertTrue( @@ -295,7 +299,11 @@ class Status(TestCase): self.assertEqual(activity["type"], "Note") self.assertEqual( activity["content"], - f'a sickening sense test content', + ( + "a sickening sense " + f'test content" + ), ) self.assertEqual(activity["attachment"][0]["type"], "Document") self.assertTrue( @@ -306,6 +314,29 @@ class Status(TestCase): ) self.assertEqual(activity["attachment"][0]["name"], "Test Edition") + def test_quotation_page_serialization(self, *_): + """serialization of quotation page position""" + tests = [ + ("single pos", 7, None, "p. 7"), + ("page range", 7, 10, "pp. 7-10"), + ] + for desc, beg, end, pages in tests: + with self.subTest(desc): + status = models.Quotation.objects.create( + quote="my quote
", + content="", + user=self.local_user, + book=self.book, + position=beg, + endposition=end, + position_mode="PG", + ) + activity = status.to_activity(pure=True) + self.assertRegex( + activity["content"], + f'^"my quote"
$', + ) + def test_review_to_activity(self, *_): """subclass of the base model version with a "pure" serializer""" status = models.Review.objects.create( diff --git a/bookwyrm/tests/test_isbn.py b/bookwyrm/tests/test_isbn.py new file mode 100644 index 000000000..b528e9210 --- /dev/null +++ b/bookwyrm/tests/test_isbn.py @@ -0,0 +1,31 @@ +""" test ISBN hyphenator for books """ +from django.test import TestCase + +from bookwyrm.isbn.isbn import hyphenator_singleton as hyphenator + + +class TestISBN(TestCase): + """isbn hyphenator""" + + def test_isbn_hyphenation(self): + """different isbn hyphenations""" + # nothing + self.assertEqual(hyphenator.hyphenate(None), None) + # 978-0 (English language) 3700000-6389999 + self.assertEqual(hyphenator.hyphenate("9780439554930"), "978-0-439-55493-0") + # 978-2 (French language) 0000000-1999999 + self.assertEqual(hyphenator.hyphenate("9782070100927"), "978-2-07-010092-7") + # 978-3 (German language) 2000000-6999999 + self.assertEqual(hyphenator.hyphenate("9783518188125"), "978-3-518-18812-5") + # 978-4 (Japan) 0000000-1999999 + self.assertEqual(hyphenator.hyphenate("9784101050454"), "978-4-10-105045-4") + # 978-626 (Taiwan) 9500000-9999999 + self.assertEqual(hyphenator.hyphenate("9786269533251"), "978-626-95332-5-1") + # 979-8 (United States) 4000000-8499999 + self.assertEqual(hyphenator.hyphenate("9798627974040"), "979-8-6279-7404-0") + # 978-626 (Taiwan) 8000000-9499999 (unassigned) + self.assertEqual(hyphenator.hyphenate("9786268533251"), "9786268533251") + # 978 range 6600000-6999999 (unassigned) + self.assertEqual(hyphenator.hyphenate("9786769533251"), "9786769533251") + # 979-8 (United States) 2300000-3499999 (unassigned) + self.assertEqual(hyphenator.hyphenate("9798311111111"), "9798311111111") diff --git a/bookwyrm/urls.py b/bookwyrm/urls.py index 255acc911..d126df080 100644 --- a/bookwyrm/urls.py +++ b/bookwyrm/urls.py @@ -1,6 +1,7 @@ """ url routing for the app and api """ from django.conf.urls.static import static from django.contrib import admin +from django.contrib.staticfiles.urls import staticfiles_urlpatterns from django.urls import path, re_path from django.views.generic.base import TemplateView @@ -780,5 +781,8 @@ urlpatterns = [ path("guided-tour/