From 0354e53eea491c9aeff605133d709c67879f86cf Mon Sep 17 00:00:00 2001 From: Joeri de Ruiter Date: Sun, 23 Jul 2023 20:50:44 +0200 Subject: [PATCH 01/40] Type annotations for utils --- bookwyrm/activitypub/base_activity.py | 21 +++-- bookwyrm/activitypub/book.py | 34 ++++---- bookwyrm/models/author.py | 4 +- bookwyrm/utils/cache.py | 9 +- bookwyrm/utils/isni.py | 116 ++++++++++++++++++-------- bookwyrm/utils/log.py | 2 +- bookwyrm/utils/sanitizer.py | 2 +- bookwyrm/utils/validate.py | 4 +- mypy.ini | 4 + 9 files changed, 130 insertions(+), 66 deletions(-) diff --git a/bookwyrm/activitypub/base_activity.py b/bookwyrm/activitypub/base_activity.py index c78b4f195..9e2545fc4 100644 --- a/bookwyrm/activitypub/base_activity.py +++ b/bookwyrm/activitypub/base_activity.py @@ -1,7 +1,10 @@ """ basics for an activitypub serializer """ +from __future__ import annotations from dataclasses import dataclass, fields, MISSING from json import JSONEncoder import logging +from typing import Optional, Type, TypeVar + import requests from django.apps import apps @@ -10,6 +13,7 @@ from django.utils.http import http_date from bookwyrm import models from bookwyrm.connectors import ConnectorException, get_data +from bookwyrm.models.base_model import BookWyrmModel from bookwyrm.signatures import make_signature from bookwyrm.settings import DOMAIN, INSTANCE_ACTOR_USERNAME from bookwyrm.tasks import app, MISC @@ -58,6 +62,9 @@ def naive_parse(activity_objects, activity_json, serializer=None): return serializer(activity_objects=activity_objects, **activity_json) +TModel = TypeVar("TModel", bound=BookWyrmModel) + + @dataclass(init=False) class ActivityObject: """actor activitypub json""" @@ -101,13 +108,13 @@ class ActivityObject: # pylint: disable=too-many-locals,too-many-branches,too-many-arguments def to_model( self, - model=None, - instance=None, - allow_create=True, - save=True, - overwrite=True, - allow_external_connections=True, - ): + model: Optional[Type[TModel]] = None, + instance: Optional[TModel] = None, + allow_create: bool = True, + save: bool = True, + overwrite: bool = True, + allow_external_connections: bool = True, + ) -> Optional[TModel]: """convert from an activity to a model instance. Args: model: the django model that this object is being converted to (will guess if not known) diff --git a/bookwyrm/activitypub/book.py b/bookwyrm/activitypub/book.py index d3aca4471..31543fcac 100644 --- a/bookwyrm/activitypub/book.py +++ b/bookwyrm/activitypub/book.py @@ -1,6 +1,6 @@ """ book and author data """ from dataclasses import dataclass, field -from typing import List +from typing import List, Optional from .base_activity import ActivityObject from .image import Document @@ -11,17 +11,17 @@ from .image import Document class BookData(ActivityObject): """shared fields for all book data and authors""" - openlibraryKey: str = None - inventaireId: str = None - librarythingKey: str = None - goodreadsKey: str = None - bnfId: str = None - viaf: str = None - wikidata: str = None - asin: str = None - aasin: str = None - isfdb: str = None - lastEditedBy: str = None + openlibraryKey: Optional[str] = None + inventaireId: Optional[str] = None + librarythingKey: Optional[str] = None + goodreadsKey: Optional[str] = None + bnfId: Optional[str] = None + viaf: Optional[str] = None + wikidata: Optional[str] = None + asin: Optional[str] = None + aasin: Optional[str] = None + isfdb: Optional[str] = None + lastEditedBy: Optional[str] = None links: List[str] = field(default_factory=lambda: []) fileLinks: List[str] = field(default_factory=lambda: []) @@ -83,11 +83,11 @@ class Author(BookData): """author of a book""" name: str - isni: str = None - viafId: str = None - gutenbergId: str = None - born: str = None - died: str = None + isni: Optional[str] = None + viafId: Optional[str] = None + gutenbergId: Optional[str] = None + born: Optional[str] = None + died: Optional[str] = None aliases: List[str] = field(default_factory=lambda: []) bio: str = "" wikipediaLink: str = "" diff --git a/bookwyrm/models/author.py b/bookwyrm/models/author.py index 5c0c087b2..981e3c0cc 100644 --- a/bookwyrm/models/author.py +++ b/bookwyrm/models/author.py @@ -1,5 +1,7 @@ """ database schema for info about authors """ import re +from typing import Tuple, Any + from django.contrib.postgres.indexes import GinIndex from django.db import models @@ -38,7 +40,7 @@ class Author(BookDataModel): ) bio = fields.HtmlField(null=True, blank=True) - def save(self, *args, **kwargs): + def save(self, *args: Tuple[Any, ...], **kwargs: dict[str, Any]) -> None: """normalize isni format""" if self.isni: self.isni = re.sub(r"\s", "", self.isni) diff --git a/bookwyrm/utils/cache.py b/bookwyrm/utils/cache.py index aebb8e754..5e896e621 100644 --- a/bookwyrm/utils/cache.py +++ b/bookwyrm/utils/cache.py @@ -1,8 +1,15 @@ """ Custom handler for caching """ +from typing import Any, Callable, Tuple, Union + from django.core.cache import cache -def get_or_set(cache_key, function, *args, timeout=None): +def get_or_set( + cache_key: str, + function: Callable[..., Any], + *args: Tuple[Any, ...], + timeout: Union[float, None] = None +) -> Any: """Django's built-in get_or_set isn't cutting it""" value = cache.get(cache_key) if value is None: diff --git a/bookwyrm/utils/isni.py b/bookwyrm/utils/isni.py index 318de30ef..6c4200660 100644 --- a/bookwyrm/utils/isni.py +++ b/bookwyrm/utils/isni.py @@ -1,15 +1,24 @@ """ISNI author checking utilities""" import xml.etree.ElementTree as ET +from typing import Union, Optional + import requests from bookwyrm import activitypub, models -def request_isni_data(search_index, search_term, max_records=5): +def get_element_text(element: Optional[ET.Element]) -> str: + """If the element is not None and there is a text attribute return this""" + if element is not None and element.text is not None: + return element.text + return "" + + +def request_isni_data(search_index: str, search_term: str, max_records: int = 5) -> str: """Request data from the ISNI API""" search_string = f'{search_index}="{search_term}"' - query_params = { + query_params: dict[str, Union[str, int]] = { "query": search_string, "version": "1.1", "operation": "searchRetrieve", @@ -26,41 +35,52 @@ def request_isni_data(search_index, search_term, max_records=5): return result.text -def make_name_string(element): +def make_name_string(element: ET.Element) -> str: """create a string of form 'personal_name surname'""" # NOTE: this will often be incorrect, many naming systems # list "surname" before personal name forename = element.find(".//forename") surname = element.find(".//surname") - if forename is not None: - return "".join([forename.text, " ", surname.text]) - return surname.text + + forename_text = get_element_text(forename) + surname_text = get_element_text(surname) + + return "".join( + [forename_text, " " if forename_text and surname_text else "", surname_text] + ) -def get_other_identifier(element, code): +def get_other_identifier(element: ET.Element, code: str) -> str: """Get other identifiers associated with an author from their ISNI record""" identifiers = element.findall(".//otherIdentifierOfIdentity") for section_head in identifiers: if ( - section_head.find(".//type") is not None - and section_head.find(".//type").text == code - and section_head.find(".//identifier") is not None + (section_type := section_head.find(".//type")) is not None + and section_type.text is not None + and section_type.text == code + and (identifier := section_head.find(".//identifier")) is not None + and identifier.text is not None ): - return section_head.find(".//identifier").text + return identifier.text # if we can't find it in otherIdentifierOfIdentity, # try sources for source in element.findall(".//sources"): - code_of_source = source.find(".//codeOfSource") - if code_of_source is not None and code_of_source.text.lower() == code.lower(): - return source.find(".//sourceIdentifier").text + if ( + (code_of_source := source.find(".//codeOfSource")) is not None + and code_of_source.text is not None + and code_of_source.text.lower() == code.lower() + and (source_identifier := source.find(".//sourceIdentifier")) is not None + and source_identifier.text is not None + ): + return source_identifier.text return "" -def get_external_information_uri(element, match_string): +def get_external_information_uri(element: ET.Element, match_string: str) -> str: """Get URLs associated with an author from their ISNI record""" sources = element.findall(".//externalInformation") @@ -69,14 +89,18 @@ def get_external_information_uri(element, match_string): uri = source.find(".//URI") if ( uri is not None + and uri.text is not None and information is not None + and information.text is not None and information.text.lower() == match_string.lower() ): return uri.text return "" -def find_authors_by_name(name_string, description=False): +def find_authors_by_name( + name_string: str, description: bool = False +) -> list[activitypub.Author]: """Query the ISNI database for possible author matches by name""" payload = request_isni_data("pica.na", name_string) @@ -92,7 +116,11 @@ def find_authors_by_name(name_string, description=False): if not personal_name: continue - author = get_author_from_isni(element.find(".//isniUnformatted").text) + author = get_author_from_isni( + get_element_text(element.find(".//isniUnformatted")) + ) + if author is None: + continue if bool(description): @@ -111,22 +139,23 @@ def find_authors_by_name(name_string, description=False): # some of the "titles" in ISNI are a little ...iffy # @ is used by ISNI/OCLC to index the starting point ignoring stop words # (e.g. "The @Government of no one") - title_elements = [ - e - for e in titles - if hasattr(e, "text") and not e.text.replace("@", "").isnumeric() - ] - if len(title_elements): - author.bio = title_elements[0].text.replace("@", "") - else: - author.bio = None + author.bio = "" + for title in titles: + if ( + title is not None + and hasattr(title, "text") + and title.text is not None + and not title.text.replace("@", "").isnumeric() + ): + author.bio = title.text.replace("@", "") + break possible_authors.append(author) return possible_authors -def get_author_from_isni(isni): +def get_author_from_isni(isni: str) -> Optional[activitypub.Author]: """Find data to populate a new author record from their ISNI""" payload = request_isni_data("pica.isn", isni) @@ -135,25 +164,32 @@ def get_author_from_isni(isni): # there should only be a single responseRecord # but let's use the first one just in case element = root.find(".//responseRecord") - name = make_name_string(element.find(".//forename/..")) + if element is None: + # TODO What if there is no responseRecord? + # return empty Author or None, or raise Exception? + return None + + name = ( + make_name_string(forename) + if (forename := element.find(".//forename/..")) is not None + else "" + ) viaf = get_other_identifier(element, "viaf") # use a set to dedupe aliases in ISNI aliases = set() aliases_element = element.findall(".//personalNameVariant") for entry in aliases_element: aliases.add(make_name_string(entry)) - # aliases needs to be list not set - aliases = list(aliases) - bio = element.find(".//nameTitle") - bio = bio.text if bio is not None else "" + bio = get_element_text(element.find(".//nameTitle")) wikipedia = get_external_information_uri(element, "Wikipedia") author = activitypub.Author( - id=element.find(".//isniURI").text, + id=get_element_text(element.find(".//isniURI")), name=name, isni=isni, viafId=viaf, - aliases=aliases, + # aliases needs to be list not set + aliases=list(aliases), bio=bio, wikipediaLink=wikipedia, ) @@ -161,21 +197,27 @@ def get_author_from_isni(isni): return author -def build_author_from_isni(match_value): +def build_author_from_isni(match_value: str) -> dict[str, activitypub.Author]: """Build basic author class object from ISNI URL""" # if it is an isni value get the data if match_value.startswith("https://isni.org/isni/"): isni = match_value.replace("https://isni.org/isni/", "") - return {"author": get_author_from_isni(isni)} + author = get_author_from_isni(isni) + if author is not None: + return {"author": author} # otherwise it's a name string return {} -def augment_author_metadata(author, isni): +def augment_author_metadata(author: models.Author, isni: str) -> None: """Update any missing author fields from ISNI data""" isni_author = get_author_from_isni(isni) + if isni_author is None: + # TODO Should we just return or raise an exception to indicate a problem? + return + isni_author.to_model(model=models.Author, instance=author, overwrite=False) # we DO want to overwrite aliases because we're adding them to the diff --git a/bookwyrm/utils/log.py b/bookwyrm/utils/log.py index 70f32ef03..7a4a2f898 100644 --- a/bookwyrm/utils/log.py +++ b/bookwyrm/utils/log.py @@ -10,7 +10,7 @@ class IgnoreVariableDoesNotExist(logging.Filter): these errors are not useful to us. """ - def filter(self, record): + def filter(self, record: logging.LogRecord) -> bool: if record.exc_info: (_, err_value, _) = record.exc_info while err_value: diff --git a/bookwyrm/utils/sanitizer.py b/bookwyrm/utils/sanitizer.py index 2fd0ff4ff..4f5132c9e 100644 --- a/bookwyrm/utils/sanitizer.py +++ b/bookwyrm/utils/sanitizer.py @@ -2,7 +2,7 @@ import bleach -def clean(input_text): +def clean(input_text: str) -> str: """Run through "bleach" """ return bleach.clean( input_text, diff --git a/bookwyrm/utils/validate.py b/bookwyrm/utils/validate.py index b91add3ad..ed1b00b0e 100644 --- a/bookwyrm/utils/validate.py +++ b/bookwyrm/utils/validate.py @@ -1,8 +1,10 @@ """Validations""" +from typing import Optional + from bookwyrm.settings import DOMAIN, USE_HTTPS -def validate_url_domain(url): +def validate_url_domain(url: str) -> Optional[str]: """Basic check that the URL starts with the instance domain name""" if not url: return None diff --git a/mypy.ini b/mypy.ini index 39f6863fe..ed7ad018d 100644 --- a/mypy.ini +++ b/mypy.ini @@ -10,6 +10,10 @@ django_settings_module = "bookwyrm.settings" ignore_errors = True implicit_reexport = True +[mypy-bookwyrm.utils.*] +ignore_errors = False +disallow_untyped_calls = False + [mypy-celerywyrm.*] ignore_errors = False From 66250e0dd8e3faf55d692f5e1f5b72c4b18b2289 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sun, 6 Aug 2023 15:36:56 -0700 Subject: [PATCH 02/40] Consistent null states and page titles in user profile views --- bookwyrm/templates/lists/list_items.html | 3 +++ bookwyrm/templates/lists/lists.html | 2 -- bookwyrm/templates/user/goal.html | 6 +++++- bookwyrm/templates/user/groups.html | 9 +++++++++ bookwyrm/templates/user/lists.html | 5 +++++ bookwyrm/templates/user/reviews_comments.html | 6 ++++-- bookwyrm/templates/user/user.html | 2 +- 7 files changed, 27 insertions(+), 6 deletions(-) diff --git a/bookwyrm/templates/lists/list_items.html b/bookwyrm/templates/lists/list_items.html index 1191a6264..2c4ca1227 100644 --- a/bookwyrm/templates/lists/list_items.html +++ b/bookwyrm/templates/lists/list_items.html @@ -47,4 +47,7 @@ {% endfor %} + {% if not lists or not lists.exists %} +

{% trans "No lists found." %}

+ {% endif %} diff --git a/bookwyrm/templates/lists/lists.html b/bookwyrm/templates/lists/lists.html index db6cc45f3..9103d4705 100644 --- a/bookwyrm/templates/lists/lists.html +++ b/bookwyrm/templates/lists/lists.html @@ -43,7 +43,6 @@ {% endif %} -{% if lists %}
{% include 'lists/list_items.html' with lists=lists %}
@@ -51,7 +50,6 @@
{% include 'snippets/pagination.html' with page=lists path=path %}
-{% endif %} {% endblock %} diff --git a/bookwyrm/templates/user/goal.html b/bookwyrm/templates/user/goal.html index f3547243a..b032cacaa 100644 --- a/bookwyrm/templates/user/goal.html +++ b/bookwyrm/templates/user/goal.html @@ -1,6 +1,10 @@ {% extends 'user/layout.html' %} -{% load i18n %} {% load utilities %} +{% load i18n %} + +{% block title %} +{% trans "Reading Goal" %} - {{ user|username }} +{% endblock %} {% block header %}
diff --git a/bookwyrm/templates/user/groups.html b/bookwyrm/templates/user/groups.html index 133739149..1e00bfe33 100644 --- a/bookwyrm/templates/user/groups.html +++ b/bookwyrm/templates/user/groups.html @@ -1,6 +1,11 @@ {% extends 'user/layout.html' %} +{% load utilities %} {% load i18n %} +{% block title %} +{% trans "Groups" %} - {{ user|username }} +{% endblock %} + {% block header %}
@@ -30,6 +35,10 @@
{% include 'groups/user_groups.html' with memberships=memberships %} + + {% if not memberships or not memberships.exists %} +

{% trans "No groups found." %}

+ {% endif %}
{% include 'snippets/pagination.html' with page=user.memberships path=path %} diff --git a/bookwyrm/templates/user/lists.html b/bookwyrm/templates/user/lists.html index aecd644a6..8a99d757e 100755 --- a/bookwyrm/templates/user/lists.html +++ b/bookwyrm/templates/user/lists.html @@ -1,6 +1,11 @@ {% extends 'user/layout.html' %} +{% load utilities %} {% load i18n %} +{% block title %} +{% trans "Lists" %} - {{ user|username }} +{% endblock %} + {% block header %}
diff --git a/bookwyrm/templates/user/reviews_comments.html b/bookwyrm/templates/user/reviews_comments.html index d85f8f512..22ae12fa7 100644 --- a/bookwyrm/templates/user/reviews_comments.html +++ b/bookwyrm/templates/user/reviews_comments.html @@ -2,7 +2,9 @@ {% load i18n %} {% load utilities %} -{% block title %}{{ user.display_name }}{% endblock %} +{% block title %} +{% trans "Reviews and Comments" %} - {{ user|username }} +{% endblock %} {% block header %}
@@ -21,7 +23,7 @@ {% endfor %} {% if not activities %}
-

{% trans "No reviews or comments yet!" %}

+

{% trans "No reviews or comments yet!" %}

{% endif %} diff --git a/bookwyrm/templates/user/user.html b/bookwyrm/templates/user/user.html index 0d015760c..135ace04f 100755 --- a/bookwyrm/templates/user/user.html +++ b/bookwyrm/templates/user/user.html @@ -126,7 +126,7 @@ {% endfor %} {% if not activities %}
-

{% trans "No activities yet!" %}

+

{% trans "No activities yet!" %}

{% endif %} From 27c40ccf20c6d10c0b258e5db716bc1a61399c69 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sun, 6 Aug 2023 14:39:40 -0700 Subject: [PATCH 03/40] Uses comma formatting on user follower/following display values --- bookwyrm/templates/user/user_preview.html | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bookwyrm/templates/user/user_preview.html b/bookwyrm/templates/user/user_preview.html index 4ae4e117f..97dfe4109 100755 --- a/bookwyrm/templates/user/user_preview.html +++ b/bookwyrm/templates/user/user_preview.html @@ -23,12 +23,12 @@

{% if request.user.id == user.id or admin_mode %} - {% blocktrans trimmed count counter=user.followers.count %} - {{ counter }} follower + {% blocktrans trimmed count counter=user.followers.count with display_count=user.followers.count|intcomma %} + {{ display_count }} follower {% plural %} - {{ counter }} followers + {{ display_count }} followers {% endblocktrans %}, - {% blocktrans trimmed with counter=user.following.count %} + {% blocktrans trimmed with counter=user.following.count|intcomma %} {{ counter }} following {% endblocktrans %} From e8949bbffdd0063068a44e75e433e8128750b3bd Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sun, 6 Aug 2023 15:17:29 -0700 Subject: [PATCH 04/40] Make sure defaults are set on directory filters --- bookwyrm/templates/directory/community_filter.html | 4 ++-- bookwyrm/templates/directory/sort_filter.html | 4 ++-- bookwyrm/views/directory.py | 4 +++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/bookwyrm/templates/directory/community_filter.html b/bookwyrm/templates/directory/community_filter.html index 91783fd36..3d101fcee 100644 --- a/bookwyrm/templates/directory/community_filter.html +++ b/bookwyrm/templates/directory/community_filter.html @@ -4,11 +4,11 @@ {% block filter %} {% trans "Community" %} {% endblock %} diff --git a/bookwyrm/templates/directory/sort_filter.html b/bookwyrm/templates/directory/sort_filter.html index 344366016..5de7d6a10 100644 --- a/bookwyrm/templates/directory/sort_filter.html +++ b/bookwyrm/templates/directory/sort_filter.html @@ -6,8 +6,8 @@

diff --git a/bookwyrm/views/directory.py b/bookwyrm/views/directory.py index 81898af26..7b2ee78b5 100644 --- a/bookwyrm/views/directory.py +++ b/bookwyrm/views/directory.py @@ -19,7 +19,7 @@ class Directory(View): software = request.GET.get("software") if not software or software == "bookwyrm": filters["bookwyrm_user"] = True - scope = request.GET.get("scope") + scope = request.GET.get("scope", "federated") if scope == "local": filters["local"] = True @@ -38,6 +38,8 @@ class Directory(View): page.number, on_each_side=2, on_ends=1 ), "users": page, + "sort": sort, + "scope": scope, } return TemplateResponse(request, "directory/directory.html", data) From d9f6449767bfc1f9fea9e8fc353d856bdd5d8ab8 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sun, 6 Aug 2023 17:52:23 -0700 Subject: [PATCH 05/40] Pre-populate sort title in edit book form if not provided It's confusing to edit a book when this isn't set, so this provides the best-guess version of the sort title if there isn't one provided, and allows the user to change it as needed. --- bookwyrm/models/book.py | 17 ++++++++--------- bookwyrm/views/books/edit_book.py | 4 ++++ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index 8cb47e5c8..a53321b26 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -217,6 +217,13 @@ class Book(BookDataModel): """editions and works both use "book" instead of model_name""" return f"https://{DOMAIN}/book/{self.id}" + def guess_sort_title(self): + """Get a best-guess sort title for the current book""" + articles = chain( + *(LANGUAGE_ARTICLES.get(language, ()) for language in tuple(self.languages)) + ) + return re.sub(f'^{" |^".join(articles)} ', "", str(self.title).lower()) + def __repr__(self): # pylint: disable=consider-using-f-string return "<{} key={!r} title={!r}>".format( @@ -375,15 +382,7 @@ class Edition(Book): # Create sort title by removing articles from title if self.sort_title in [None, ""]: if self.sort_title in [None, ""]: - articles = chain( - *( - LANGUAGE_ARTICLES.get(language, ()) - for language in tuple(self.languages) - ) - ) - self.sort_title = re.sub( - f'^{" |^".join(articles)} ', "", str(self.title).lower() - ) + self.sort_title = self.guess_sort_title() return super().save(*args, **kwargs) diff --git a/bookwyrm/views/books/edit_book.py b/bookwyrm/views/books/edit_book.py index 2a7f36dbb..ae492374f 100644 --- a/bookwyrm/views/books/edit_book.py +++ b/bookwyrm/views/books/edit_book.py @@ -32,6 +32,9 @@ class EditBook(View): def get(self, request, book_id): """info about a book""" book = get_edition(book_id) + # This doesn't update the sort title, just pre-populates it in the form + if book.sort_title in ["", None]: + book.sort_title = book.guess_sort_title() if not book.description: book.description = book.parent_work.description data = {"book": book, "form": forms.EditionForm(instance=book)} @@ -40,6 +43,7 @@ class EditBook(View): def post(self, request, book_id): """edit a book cool""" book = get_object_or_404(models.Edition, id=book_id) + form = forms.EditionForm(request.POST, request.FILES, instance=book) data = {"book": book, "form": form} From a05942fe15981f77761822ca1cc691d4658d76e5 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sun, 6 Aug 2023 18:23:57 -0700 Subject: [PATCH 06/40] Allow searching for local users when logged out --- bookwyrm/views/search.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/bookwyrm/views/search.py b/bookwyrm/views/search.py index bc3b2aa57..2b7303fd7 100644 --- a/bookwyrm/views/search.py +++ b/bookwyrm/views/search.py @@ -91,18 +91,15 @@ def book_search(request): def user_search(request): - """cool kids members only user search""" + """user search: search for a user""" viewer = request.user query = request.GET.get("q") query = query.strip() data = {"type": "user", "query": query} - # logged out viewers can't search users - if not viewer.is_authenticated: - return TemplateResponse(request, "search/user.html", data) # use webfinger for mastodon style account@domain.com username to load the user if # they don't exist locally (handle_remote_webfinger will check the db) - if re.match(regex.FULL_USERNAME, query): + if re.match(regex.FULL_USERNAME, query) and viewer.is_authenticated: handle_remote_webfinger(query) results = ( @@ -118,6 +115,11 @@ def user_search(request): ) .order_by("-similarity") ) + + # don't expose remote users + if not viewer.is_authenticated: + results = results.filter(local=True) + paginated = Paginator(results, PAGE_LENGTH) page = paginated.get_page(request.GET.get("page")) data["results"] = page From 185486c6fcae240ca279c366e65454176d9e5510 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sun, 6 Aug 2023 19:35:50 -0700 Subject: [PATCH 07/40] Uses {% empty %} instead of if statements --- bookwyrm/templates/groups/user_groups.html | 2 ++ bookwyrm/templates/lists/list_items.html | 5 ++--- bookwyrm/templates/user/groups.html | 4 ---- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/bookwyrm/templates/groups/user_groups.html b/bookwyrm/templates/groups/user_groups.html index cc27ce42d..fb07e238a 100644 --- a/bookwyrm/templates/groups/user_groups.html +++ b/bookwyrm/templates/groups/user_groups.html @@ -31,5 +31,7 @@
+ {% empty %} +

{% trans "No groups found." %}

{% endfor %}
diff --git a/bookwyrm/templates/lists/list_items.html b/bookwyrm/templates/lists/list_items.html index 2c4ca1227..cff4c16c1 100644 --- a/bookwyrm/templates/lists/list_items.html +++ b/bookwyrm/templates/lists/list_items.html @@ -46,8 +46,7 @@
- {% endfor %} - {% if not lists or not lists.exists %} + {% empty %}

{% trans "No lists found." %}

- {% endif %} + {% endfor %} diff --git a/bookwyrm/templates/user/groups.html b/bookwyrm/templates/user/groups.html index 1e00bfe33..d3b48c849 100644 --- a/bookwyrm/templates/user/groups.html +++ b/bookwyrm/templates/user/groups.html @@ -35,10 +35,6 @@ {% include 'groups/user_groups.html' with memberships=memberships %} - - {% if not memberships or not memberships.exists %} -

{% trans "No groups found." %}

- {% endif %}
{% include 'snippets/pagination.html' with page=user.memberships path=path %} From 63b60ad62c56f6db3d0d38cf9c47ef8d0cd4182c Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sun, 6 Aug 2023 19:40:59 -0700 Subject: [PATCH 08/40] Removes "all books" link from profile when there are none --- bookwyrm/templates/user/user.html | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/bookwyrm/templates/user/user.html b/bookwyrm/templates/user/user.html index 135ace04f..05a2eeabc 100755 --- a/bookwyrm/templates/user/user.html +++ b/bookwyrm/templates/user/user.html @@ -51,9 +51,15 @@ {% endfor %}
+ {% empty %} +

+ No books found. +

{% endfor %} + {% if shelves.exists %} {% trans "View all books" %} + {% endif %} {% endif %} @@ -119,16 +125,16 @@ {% endif %} + {% for activity in activities %}
{% include 'snippets/status/status.html' with status=activity %}
- {% endfor %} - {% if not activities %} + {% empty %}

{% trans "No activities yet!" %}

- {% endif %} + {% endfor %} {% include 'snippets/pagination.html' with page=activities path=user.local_path anchor="#feed" mode="chronological" %} From d7adada29c1f39a11983823fe533478ce0dfa257 Mon Sep 17 00:00:00 2001 From: Margaret Fero Date: Tue, 15 Aug 2023 22:53:43 -0700 Subject: [PATCH 09/40] Add Spanish Articles Added articles for spanish language to list of articles in settings --- bookwyrm/settings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bookwyrm/settings.py b/bookwyrm/settings.py index 829ddaef7..6c627c5c0 100644 --- a/bookwyrm/settings.py +++ b/bookwyrm/settings.py @@ -315,6 +315,7 @@ LANGUAGES = [ LANGUAGE_ARTICLES = { "English": {"the", "a", "an"}, + "Español (Spanish)": {"un", "una", "unos", "unas", "el", "la", "los", "las"}, } TIME_ZONE = "UTC" From 5ed1441ddb7e3a6e3bf364018d7d3ac52e4021fc Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Sat, 19 Aug 2023 08:34:03 +1000 Subject: [PATCH 10/40] fix illegal values in redis jobs 1. populate_streams_get_audience This tries to set status_reply_parent_privacy as None if there is no status.reply_parent, but None is not a valid value for privacy. This doesn't appear to be breaking anything but does result in a lot of error messages in the logs. I have set this to equal the original status.privacy - this won't realy have any effect since it only happens when there is no parent, however we could set this to "direct" if we want to be highly cautious. 2. rerank_user_task Again, this doesn't seem to caused major issues, but is throwing errors if the user in question no longer exists for some reason. This commit checks whether 'user' exists before attempting to rerank. --- bookwyrm/activitystreams.py | 2 +- bookwyrm/suggested_users.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bookwyrm/activitystreams.py b/bookwyrm/activitystreams.py index 71f9e42d7..42f99e209 100644 --- a/bookwyrm/activitystreams.py +++ b/bookwyrm/activitystreams.py @@ -112,7 +112,7 @@ class ActivityStream(RedisStore): trace.get_current_span().set_attribute("status_privacy", status.privacy) trace.get_current_span().set_attribute( "status_reply_parent_privacy", - status.reply_parent.privacy if status.reply_parent else None, + status.reply_parent.privacy if status.reply_parent else status.privacy, ) # direct messages don't appear in feeds, direct comments/reviews/etc do if status.privacy == "direct" and status.status_type == "Note": diff --git a/bookwyrm/suggested_users.py b/bookwyrm/suggested_users.py index d897feff7..3e9bef9c4 100644 --- a/bookwyrm/suggested_users.py +++ b/bookwyrm/suggested_users.py @@ -254,7 +254,8 @@ def rerank_suggestions_task(user_id): def rerank_user_task(user_id, update_only=False): """do the hard work in celery""" user = models.User.objects.get(id=user_id) - suggested_users.rerank_obj(user, update_only=update_only) + if user: + suggested_users.rerank_obj(user, update_only=update_only) @app.task(queue=SUGGESTED_USERS) From 1e0fe6d7c8b7fb58a02dec6b7f5070f8fd3b28e9 Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Sat, 19 Aug 2023 15:06:57 -0700 Subject: [PATCH 11/40] Remove duplicate if statement --- bookwyrm/models/book.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index a53321b26..d0c3c7fd3 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -381,8 +381,7 @@ class Edition(Book): # Create sort title by removing articles from title if self.sort_title in [None, ""]: - if self.sort_title in [None, ""]: - self.sort_title = self.guess_sort_title() + self.sort_title = self.guess_sort_title() return super().save(*args, **kwargs) From ff8e4597e5192f197c15b02500d6d1b22afe25cb Mon Sep 17 00:00:00 2001 From: Joeri de Ruiter Date: Sun, 23 Jul 2023 20:50:44 +0200 Subject: [PATCH 12/40] Type annotations for utils --- bookwyrm/models/author.py | 4 +- bookwyrm/utils/cache.py | 9 ++- bookwyrm/utils/isni.py | 116 +++++++++++++++++++++++++------------ bookwyrm/utils/log.py | 2 +- bookwyrm/utils/validate.py | 4 +- mypy.ini | 4 ++ 6 files changed, 98 insertions(+), 41 deletions(-) diff --git a/bookwyrm/models/author.py b/bookwyrm/models/author.py index 5c0c087b2..981e3c0cc 100644 --- a/bookwyrm/models/author.py +++ b/bookwyrm/models/author.py @@ -1,5 +1,7 @@ """ database schema for info about authors """ import re +from typing import Tuple, Any + from django.contrib.postgres.indexes import GinIndex from django.db import models @@ -38,7 +40,7 @@ class Author(BookDataModel): ) bio = fields.HtmlField(null=True, blank=True) - def save(self, *args, **kwargs): + def save(self, *args: Tuple[Any, ...], **kwargs: dict[str, Any]) -> None: """normalize isni format""" if self.isni: self.isni = re.sub(r"\s", "", self.isni) diff --git a/bookwyrm/utils/cache.py b/bookwyrm/utils/cache.py index aebb8e754..5e896e621 100644 --- a/bookwyrm/utils/cache.py +++ b/bookwyrm/utils/cache.py @@ -1,8 +1,15 @@ """ Custom handler for caching """ +from typing import Any, Callable, Tuple, Union + from django.core.cache import cache -def get_or_set(cache_key, function, *args, timeout=None): +def get_or_set( + cache_key: str, + function: Callable[..., Any], + *args: Tuple[Any, ...], + timeout: Union[float, None] = None +) -> Any: """Django's built-in get_or_set isn't cutting it""" value = cache.get(cache_key) if value is None: diff --git a/bookwyrm/utils/isni.py b/bookwyrm/utils/isni.py index 318de30ef..6c4200660 100644 --- a/bookwyrm/utils/isni.py +++ b/bookwyrm/utils/isni.py @@ -1,15 +1,24 @@ """ISNI author checking utilities""" import xml.etree.ElementTree as ET +from typing import Union, Optional + import requests from bookwyrm import activitypub, models -def request_isni_data(search_index, search_term, max_records=5): +def get_element_text(element: Optional[ET.Element]) -> str: + """If the element is not None and there is a text attribute return this""" + if element is not None and element.text is not None: + return element.text + return "" + + +def request_isni_data(search_index: str, search_term: str, max_records: int = 5) -> str: """Request data from the ISNI API""" search_string = f'{search_index}="{search_term}"' - query_params = { + query_params: dict[str, Union[str, int]] = { "query": search_string, "version": "1.1", "operation": "searchRetrieve", @@ -26,41 +35,52 @@ def request_isni_data(search_index, search_term, max_records=5): return result.text -def make_name_string(element): +def make_name_string(element: ET.Element) -> str: """create a string of form 'personal_name surname'""" # NOTE: this will often be incorrect, many naming systems # list "surname" before personal name forename = element.find(".//forename") surname = element.find(".//surname") - if forename is not None: - return "".join([forename.text, " ", surname.text]) - return surname.text + + forename_text = get_element_text(forename) + surname_text = get_element_text(surname) + + return "".join( + [forename_text, " " if forename_text and surname_text else "", surname_text] + ) -def get_other_identifier(element, code): +def get_other_identifier(element: ET.Element, code: str) -> str: """Get other identifiers associated with an author from their ISNI record""" identifiers = element.findall(".//otherIdentifierOfIdentity") for section_head in identifiers: if ( - section_head.find(".//type") is not None - and section_head.find(".//type").text == code - and section_head.find(".//identifier") is not None + (section_type := section_head.find(".//type")) is not None + and section_type.text is not None + and section_type.text == code + and (identifier := section_head.find(".//identifier")) is not None + and identifier.text is not None ): - return section_head.find(".//identifier").text + return identifier.text # if we can't find it in otherIdentifierOfIdentity, # try sources for source in element.findall(".//sources"): - code_of_source = source.find(".//codeOfSource") - if code_of_source is not None and code_of_source.text.lower() == code.lower(): - return source.find(".//sourceIdentifier").text + if ( + (code_of_source := source.find(".//codeOfSource")) is not None + and code_of_source.text is not None + and code_of_source.text.lower() == code.lower() + and (source_identifier := source.find(".//sourceIdentifier")) is not None + and source_identifier.text is not None + ): + return source_identifier.text return "" -def get_external_information_uri(element, match_string): +def get_external_information_uri(element: ET.Element, match_string: str) -> str: """Get URLs associated with an author from their ISNI record""" sources = element.findall(".//externalInformation") @@ -69,14 +89,18 @@ def get_external_information_uri(element, match_string): uri = source.find(".//URI") if ( uri is not None + and uri.text is not None and information is not None + and information.text is not None and information.text.lower() == match_string.lower() ): return uri.text return "" -def find_authors_by_name(name_string, description=False): +def find_authors_by_name( + name_string: str, description: bool = False +) -> list[activitypub.Author]: """Query the ISNI database for possible author matches by name""" payload = request_isni_data("pica.na", name_string) @@ -92,7 +116,11 @@ def find_authors_by_name(name_string, description=False): if not personal_name: continue - author = get_author_from_isni(element.find(".//isniUnformatted").text) + author = get_author_from_isni( + get_element_text(element.find(".//isniUnformatted")) + ) + if author is None: + continue if bool(description): @@ -111,22 +139,23 @@ def find_authors_by_name(name_string, description=False): # some of the "titles" in ISNI are a little ...iffy # @ is used by ISNI/OCLC to index the starting point ignoring stop words # (e.g. "The @Government of no one") - title_elements = [ - e - for e in titles - if hasattr(e, "text") and not e.text.replace("@", "").isnumeric() - ] - if len(title_elements): - author.bio = title_elements[0].text.replace("@", "") - else: - author.bio = None + author.bio = "" + for title in titles: + if ( + title is not None + and hasattr(title, "text") + and title.text is not None + and not title.text.replace("@", "").isnumeric() + ): + author.bio = title.text.replace("@", "") + break possible_authors.append(author) return possible_authors -def get_author_from_isni(isni): +def get_author_from_isni(isni: str) -> Optional[activitypub.Author]: """Find data to populate a new author record from their ISNI""" payload = request_isni_data("pica.isn", isni) @@ -135,25 +164,32 @@ def get_author_from_isni(isni): # there should only be a single responseRecord # but let's use the first one just in case element = root.find(".//responseRecord") - name = make_name_string(element.find(".//forename/..")) + if element is None: + # TODO What if there is no responseRecord? + # return empty Author or None, or raise Exception? + return None + + name = ( + make_name_string(forename) + if (forename := element.find(".//forename/..")) is not None + else "" + ) viaf = get_other_identifier(element, "viaf") # use a set to dedupe aliases in ISNI aliases = set() aliases_element = element.findall(".//personalNameVariant") for entry in aliases_element: aliases.add(make_name_string(entry)) - # aliases needs to be list not set - aliases = list(aliases) - bio = element.find(".//nameTitle") - bio = bio.text if bio is not None else "" + bio = get_element_text(element.find(".//nameTitle")) wikipedia = get_external_information_uri(element, "Wikipedia") author = activitypub.Author( - id=element.find(".//isniURI").text, + id=get_element_text(element.find(".//isniURI")), name=name, isni=isni, viafId=viaf, - aliases=aliases, + # aliases needs to be list not set + aliases=list(aliases), bio=bio, wikipediaLink=wikipedia, ) @@ -161,21 +197,27 @@ def get_author_from_isni(isni): return author -def build_author_from_isni(match_value): +def build_author_from_isni(match_value: str) -> dict[str, activitypub.Author]: """Build basic author class object from ISNI URL""" # if it is an isni value get the data if match_value.startswith("https://isni.org/isni/"): isni = match_value.replace("https://isni.org/isni/", "") - return {"author": get_author_from_isni(isni)} + author = get_author_from_isni(isni) + if author is not None: + return {"author": author} # otherwise it's a name string return {} -def augment_author_metadata(author, isni): +def augment_author_metadata(author: models.Author, isni: str) -> None: """Update any missing author fields from ISNI data""" isni_author = get_author_from_isni(isni) + if isni_author is None: + # TODO Should we just return or raise an exception to indicate a problem? + return + isni_author.to_model(model=models.Author, instance=author, overwrite=False) # we DO want to overwrite aliases because we're adding them to the diff --git a/bookwyrm/utils/log.py b/bookwyrm/utils/log.py index 70f32ef03..7a4a2f898 100644 --- a/bookwyrm/utils/log.py +++ b/bookwyrm/utils/log.py @@ -10,7 +10,7 @@ class IgnoreVariableDoesNotExist(logging.Filter): these errors are not useful to us. """ - def filter(self, record): + def filter(self, record: logging.LogRecord) -> bool: if record.exc_info: (_, err_value, _) = record.exc_info while err_value: diff --git a/bookwyrm/utils/validate.py b/bookwyrm/utils/validate.py index b91add3ad..ed1b00b0e 100644 --- a/bookwyrm/utils/validate.py +++ b/bookwyrm/utils/validate.py @@ -1,8 +1,10 @@ """Validations""" +from typing import Optional + from bookwyrm.settings import DOMAIN, USE_HTTPS -def validate_url_domain(url): +def validate_url_domain(url: str) -> Optional[str]: """Basic check that the URL starts with the instance domain name""" if not url: return None diff --git a/mypy.ini b/mypy.ini index 2a29e314f..f938cc7b6 100644 --- a/mypy.ini +++ b/mypy.ini @@ -13,6 +13,10 @@ implicit_reexport = True [mypy-bookwyrm.connectors.*] ignore_errors = False +[mypy-bookwyrm.utils.*] +ignore_errors = False +disallow_untyped_calls = False + [mypy-celerywyrm.*] ignore_errors = False From 767cd146399fb40086eb2cd93bc2f7aa8512610b Mon Sep 17 00:00:00 2001 From: Joeri de Ruiter Date: Mon, 21 Aug 2023 13:10:12 +0200 Subject: [PATCH 13/40] Stricter checks for bookwyrm.utils --- mypy.ini | 1 - 1 file changed, 1 deletion(-) diff --git a/mypy.ini b/mypy.ini index f938cc7b6..8eee0ba62 100644 --- a/mypy.ini +++ b/mypy.ini @@ -15,7 +15,6 @@ ignore_errors = False [mypy-bookwyrm.utils.*] ignore_errors = False -disallow_untyped_calls = False [mypy-celerywyrm.*] ignore_errors = False From 0f2c0c034db515cb8201d09143fb264797e6d6e7 Mon Sep 17 00:00:00 2001 From: Joeri de Ruiter Date: Mon, 21 Aug 2023 13:28:08 +0200 Subject: [PATCH 14/40] Removed TODOs. When data is invalid return None. --- bookwyrm/utils/isni.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/bookwyrm/utils/isni.py b/bookwyrm/utils/isni.py index 6c4200660..e3b2c65f3 100644 --- a/bookwyrm/utils/isni.py +++ b/bookwyrm/utils/isni.py @@ -165,8 +165,6 @@ def get_author_from_isni(isni: str) -> Optional[activitypub.Author]: # but let's use the first one just in case element = root.find(".//responseRecord") if element is None: - # TODO What if there is no responseRecord? - # return empty Author or None, or raise Exception? return None name = ( @@ -215,8 +213,7 @@ def augment_author_metadata(author: models.Author, isni: str) -> None: isni_author = get_author_from_isni(isni) if isni_author is None: - # TODO Should we just return or raise an exception to indicate a problem? - return + return None isni_author.to_model(model=models.Author, instance=author, overwrite=False) From 2e88e73509f6b226e564697e2203981542603689 Mon Sep 17 00:00:00 2001 From: Joeri de Ruiter Date: Mon, 21 Aug 2023 14:00:09 +0200 Subject: [PATCH 15/40] Remove returned None to make pylint happy --- bookwyrm/utils/isni.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bookwyrm/utils/isni.py b/bookwyrm/utils/isni.py index e3b2c65f3..0d0a86887 100644 --- a/bookwyrm/utils/isni.py +++ b/bookwyrm/utils/isni.py @@ -213,7 +213,7 @@ def augment_author_metadata(author: models.Author, isni: str) -> None: isni_author = get_author_from_isni(isni) if isni_author is None: - return None + return isni_author.to_model(model=models.Author, instance=author, overwrite=False) From 3760e3b45c0d1d81f07a501025f3d769f995b36a Mon Sep 17 00:00:00 2001 From: Joeri de Ruiter Date: Mon, 21 Aug 2023 15:46:24 +0200 Subject: [PATCH 16/40] Tests for ISBN hyphenation --- bookwyrm/tests/test_isbn.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 bookwyrm/tests/test_isbn.py diff --git a/bookwyrm/tests/test_isbn.py b/bookwyrm/tests/test_isbn.py new file mode 100644 index 000000000..b528e9210 --- /dev/null +++ b/bookwyrm/tests/test_isbn.py @@ -0,0 +1,31 @@ +""" test ISBN hyphenator for books """ +from django.test import TestCase + +from bookwyrm.isbn.isbn import hyphenator_singleton as hyphenator + + +class TestISBN(TestCase): + """isbn hyphenator""" + + def test_isbn_hyphenation(self): + """different isbn hyphenations""" + # nothing + self.assertEqual(hyphenator.hyphenate(None), None) + # 978-0 (English language) 3700000-6389999 + self.assertEqual(hyphenator.hyphenate("9780439554930"), "978-0-439-55493-0") + # 978-2 (French language) 0000000-1999999 + self.assertEqual(hyphenator.hyphenate("9782070100927"), "978-2-07-010092-7") + # 978-3 (German language) 2000000-6999999 + self.assertEqual(hyphenator.hyphenate("9783518188125"), "978-3-518-18812-5") + # 978-4 (Japan) 0000000-1999999 + self.assertEqual(hyphenator.hyphenate("9784101050454"), "978-4-10-105045-4") + # 978-626 (Taiwan) 9500000-9999999 + self.assertEqual(hyphenator.hyphenate("9786269533251"), "978-626-95332-5-1") + # 979-8 (United States) 4000000-8499999 + self.assertEqual(hyphenator.hyphenate("9798627974040"), "979-8-6279-7404-0") + # 978-626 (Taiwan) 8000000-9499999 (unassigned) + self.assertEqual(hyphenator.hyphenate("9786268533251"), "9786268533251") + # 978 range 6600000-6999999 (unassigned) + self.assertEqual(hyphenator.hyphenate("9786769533251"), "9786769533251") + # 979-8 (United States) 2300000-3499999 (unassigned) + self.assertEqual(hyphenator.hyphenate("9798311111111"), "9798311111111") From f6d87861792e67bbc4819adeaf40f024e8502354 Mon Sep 17 00:00:00 2001 From: Joeri de Ruiter Date: Mon, 21 Aug 2023 15:46:50 +0200 Subject: [PATCH 17/40] Type annotations for bookwyrm.isbn --- bookwyrm/isbn/isbn.py | 81 +++++++++++++++++++++++++++++++++---------- bookwyrm/settings.py | 4 ++- mypy.ini | 3 ++ 3 files changed, 69 insertions(+), 19 deletions(-) diff --git a/bookwyrm/isbn/isbn.py b/bookwyrm/isbn/isbn.py index e07d2100d..4cc7f47dd 100644 --- a/bookwyrm/isbn/isbn.py +++ b/bookwyrm/isbn/isbn.py @@ -1,11 +1,20 @@ """ Use the range message from isbn-international to hyphenate ISBNs """ import os +from typing import Optional from xml.etree import ElementTree +from xml.etree.ElementTree import Element + import requests from bookwyrm import settings +def _get_rules(element: Element) -> list[Element]: + if (rules_el := element.find("Rules")) is not None: + return rules_el.findall("Rule") + return [] + + class IsbnHyphenator: """Class to manage the range message xml file and use it to hyphenate ISBNs""" @@ -15,58 +24,94 @@ class IsbnHyphenator: ) __element_tree = None - def update_range_message(self): + def update_range_message(self) -> None: """Download the range message xml file and save it locally""" response = requests.get(self.__range_message_url) with open(self.__range_file_path, "w", encoding="utf-8") as file: file.write(response.text) self.__element_tree = None - def hyphenate(self, isbn_13): + def hyphenate(self, isbn_13: Optional[str]) -> Optional[str]: """hyphenate the given ISBN-13 number using the range message""" if isbn_13 is None: return None + if self.__element_tree is None: self.__element_tree = ElementTree.parse(self.__range_file_path) + gs1_prefix = isbn_13[:3] reg_group = self.__find_reg_group(isbn_13, gs1_prefix) if reg_group is None: return isbn_13 # failed to hyphenate + registrant = self.__find_registrant(isbn_13, gs1_prefix, reg_group) if registrant is None: return isbn_13 # failed to hyphenate + publication = isbn_13[len(gs1_prefix) + len(reg_group) + len(registrant) : -1] check_digit = isbn_13[-1:] return "-".join((gs1_prefix, reg_group, registrant, publication, check_digit)) - def __find_reg_group(self, isbn_13, gs1_prefix): - for ean_ucc_el in self.__element_tree.find("EAN.UCCPrefixes").findall( - "EAN.UCC" - ): - if ean_ucc_el.find("Prefix").text == gs1_prefix: - for rule_el in ean_ucc_el.find("Rules").findall("Rule"): - length = int(rule_el.find("Length").text) + def __find_reg_group(self, isbn_13: str, gs1_prefix: str) -> Optional[str]: + if self.__element_tree is None: + self.__element_tree = ElementTree.parse(self.__range_file_path) + + ucc_prefixes_el = self.__element_tree.find("EAN.UCCPrefixes") + if ucc_prefixes_el is None: + return None + + for ean_ucc_el in ucc_prefixes_el.findall("EAN.UCC"): + if ( + prefix_el := ean_ucc_el.find("Prefix") + ) is not None and prefix_el.text == gs1_prefix: + for rule_el in _get_rules(ean_ucc_el): + length_el = rule_el.find("Length") + if length_el is None: + continue + length = int(text) if (text := length_el.text) else 0 if length == 0: continue - reg_grp_range = [ - int(x[:length]) for x in rule_el.find("Range").text.split("-") - ] + + range_el = rule_el.find("Range") + if range_el is None or range_el.text is None: + continue + + reg_grp_range = [int(x[:length]) for x in range_el.text.split("-")] reg_group = isbn_13[len(gs1_prefix) : len(gs1_prefix) + length] if reg_grp_range[0] <= int(reg_group) <= reg_grp_range[1]: return reg_group return None return None - def __find_registrant(self, isbn_13, gs1_prefix, reg_group): + def __find_registrant( + self, isbn_13: str, gs1_prefix: str, reg_group: str + ) -> Optional[str]: from_ind = len(gs1_prefix) + len(reg_group) - for group_el in self.__element_tree.find("RegistrationGroups").findall("Group"): - if group_el.find("Prefix").text == "-".join((gs1_prefix, reg_group)): - for rule_el in group_el.find("Rules").findall("Rule"): - length = int(rule_el.find("Length").text) + + if self.__element_tree is None: + self.__element_tree = ElementTree.parse(self.__range_file_path) + + reg_groups_el = self.__element_tree.find("RegistrationGroups") + if reg_groups_el is None: + return None + + for group_el in reg_groups_el.findall("Group"): + if ( + prefix_el := group_el.find("Prefix") + ) is not None and prefix_el.text == "-".join((gs1_prefix, reg_group)): + for rule_el in _get_rules(group_el): + length_el = rule_el.find("Length") + if length_el is None: + continue + length = int(text) if (text := length_el.text) else 0 if length == 0: continue + + range_el = rule_el.find("Range") + if range_el is None or range_el.text is None: + continue registrant_range = [ - int(x[:length]) for x in rule_el.find("Range").text.split("-") + int(x[:length]) for x in range_el.text.split("-") ] registrant = isbn_13[from_ind : from_ind + length] if registrant_range[0] <= int(registrant) <= registrant_range[1]: diff --git a/bookwyrm/settings.py b/bookwyrm/settings.py index 829ddaef7..751ab5687 100644 --- a/bookwyrm/settings.py +++ b/bookwyrm/settings.py @@ -1,5 +1,7 @@ """ bookwyrm settings and configuration """ import os +from typing import AnyStr + from environs import Env import requests @@ -37,7 +39,7 @@ EMAIL_SENDER_DOMAIN = env("EMAIL_SENDER_DOMAIN", DOMAIN) EMAIL_SENDER = f"{EMAIL_SENDER_NAME}@{EMAIL_SENDER_DOMAIN}" # Build paths inside the project like this: os.path.join(BASE_DIR, ...) -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +BASE_DIR: AnyStr = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) LOCALE_PATHS = [ os.path.join(BASE_DIR, "locale"), ] diff --git a/mypy.ini b/mypy.ini index 2a29e314f..27891f501 100644 --- a/mypy.ini +++ b/mypy.ini @@ -13,6 +13,9 @@ implicit_reexport = True [mypy-bookwyrm.connectors.*] ignore_errors = False +[mypy-bookwyrm.isbn.*] +ignore_errors = False + [mypy-celerywyrm.*] ignore_errors = False From 0686926048de1238f99062affe235690c02668b5 Mon Sep 17 00:00:00 2001 From: Joeri de Ruiter Date: Mon, 21 Aug 2023 16:58:16 +0200 Subject: [PATCH 18/40] Type annotations for bookwyrm.importers --- bookwyrm/importers/calibre_import.py | 6 ++- bookwyrm/importers/importer.py | 51 ++++++++++++++++------- bookwyrm/importers/librarything_import.py | 20 ++++++--- bookwyrm/importers/openlibrary_import.py | 4 +- bookwyrm/models/import_job.py | 6 +-- mypy.ini | 3 ++ 6 files changed, 62 insertions(+), 28 deletions(-) diff --git a/bookwyrm/importers/calibre_import.py b/bookwyrm/importers/calibre_import.py index 5426e9333..5c22a539d 100644 --- a/bookwyrm/importers/calibre_import.py +++ b/bookwyrm/importers/calibre_import.py @@ -1,4 +1,6 @@ """ handle reading a csv from calibre """ +from typing import Any, Optional + from bookwyrm.models import Shelf from . import Importer @@ -9,7 +11,7 @@ class CalibreImporter(Importer): service = "Calibre" - def __init__(self, *args, **kwargs): + def __init__(self, *args: Any, **kwargs: Any): # Add timestamp to row_mappings_guesses for date_added to avoid # integrity error row_mappings_guesses = [] @@ -23,6 +25,6 @@ class CalibreImporter(Importer): self.row_mappings_guesses = row_mappings_guesses super().__init__(*args, **kwargs) - def get_shelf(self, normalized_row): + def get_shelf(self, normalized_row: dict[str, Optional[str]]) -> Optional[str]: # Calibre export does not indicate which shelf to use. Use a default one for now return Shelf.TO_READ diff --git a/bookwyrm/importers/importer.py b/bookwyrm/importers/importer.py index 4c2abb521..5b3192fa5 100644 --- a/bookwyrm/importers/importer.py +++ b/bookwyrm/importers/importer.py @@ -1,8 +1,10 @@ """ handle reading a csv from an external service, defaults are from Goodreads """ import csv from datetime import timedelta +from typing import Iterable, Optional + from django.utils import timezone -from bookwyrm.models import ImportJob, ImportItem, SiteSettings +from bookwyrm.models import ImportJob, ImportItem, SiteSettings, User class Importer: @@ -35,19 +37,26 @@ class Importer: } # pylint: disable=too-many-locals - def create_job(self, user, csv_file, include_reviews, privacy): + def create_job( + self, user: User, csv_file: Iterable[str], include_reviews: bool, privacy: str + ) -> ImportJob: """check over a csv and creates a database entry for the job""" csv_reader = csv.DictReader(csv_file, delimiter=self.delimiter) rows = list(csv_reader) if len(rows) < 1: raise ValueError("CSV file is empty") - rows = enumerate(rows) + + mappings = ( + self.create_row_mappings(list(fieldnames)) + if (fieldnames := csv_reader.fieldnames) + else {} + ) job = ImportJob.objects.create( user=user, include_reviews=include_reviews, privacy=privacy, - mappings=self.create_row_mappings(csv_reader.fieldnames), + mappings=mappings, source=self.service, ) @@ -55,16 +64,20 @@ class Importer: if enforce_limit and allowed_imports <= 0: job.complete_job() return job - for index, entry in rows: + for index, entry in enumerate(rows): if enforce_limit and index >= allowed_imports: break self.create_item(job, index, entry) return job - def update_legacy_job(self, job): + def update_legacy_job(self, job: ImportJob) -> None: """patch up a job that was in the old format""" items = job.items - headers = list(items.first().data.keys()) + first_item = items.first() + if first_item is None: + return + + headers = list(first_item.data.keys()) job.mappings = self.create_row_mappings(headers) job.updated_date = timezone.now() job.save() @@ -75,24 +88,24 @@ class Importer: item.normalized_data = normalized item.save() - def create_row_mappings(self, headers): + def create_row_mappings(self, headers: list[str]) -> dict[str, Optional[str]]: """guess what the headers mean""" mappings = {} for (key, guesses) in self.row_mappings_guesses: - value = [h for h in headers if h.lower() in guesses] - value = value[0] if len(value) else None + values = [h for h in headers if h.lower() in guesses] + value = values[0] if len(values) else None if value: headers.remove(value) mappings[key] = value return mappings - def create_item(self, job, index, data): + def create_item(self, job: ImportJob, index: int, data: dict[str, str]) -> None: """creates and saves an import item""" normalized = self.normalize_row(data, job.mappings) normalized["shelf"] = self.get_shelf(normalized) ImportItem(job=job, index=index, data=data, normalized_data=normalized).save() - def get_shelf(self, normalized_row): + def get_shelf(self, normalized_row: dict[str, Optional[str]]) -> Optional[str]: """determine which shelf to use""" shelf_name = normalized_row.get("shelf") if not shelf_name: @@ -103,11 +116,15 @@ class Importer: ] return shelf[0] if shelf else None - def normalize_row(self, entry, mappings): # pylint: disable=no-self-use + # pylint: disable=no-self-use + def normalize_row( + self, entry: dict[str, str], mappings: dict[str, Optional[str]] + ) -> dict[str, Optional[str]]: """use the dataclass to create the formatted row of data""" - return {k: entry.get(v) for k, v in mappings.items()} + return {k: entry.get(v) if v else None for k, v in mappings.items()} - def get_import_limit(self, user): # pylint: disable=no-self-use + # pylint: disable=no-self-use + def get_import_limit(self, user: User) -> tuple[int, int]: """check if import limit is set and return how many imports are left""" site_settings = SiteSettings.objects.get() import_size_limit = site_settings.import_size_limit @@ -125,7 +142,9 @@ class Importer: allowed_imports = import_size_limit - imported_books return enforce_limit, allowed_imports - def create_retry_job(self, user, original_job, items): + def create_retry_job( + self, user: User, original_job: ImportJob, items: list[ImportItem] + ) -> ImportJob: """retry items that didn't import""" job = ImportJob.objects.create( user=user, diff --git a/bookwyrm/importers/librarything_import.py b/bookwyrm/importers/librarything_import.py index ea31b46eb..145657ba0 100644 --- a/bookwyrm/importers/librarything_import.py +++ b/bookwyrm/importers/librarything_import.py @@ -1,11 +1,16 @@ """ handle reading a tsv from librarything """ import re +from typing import Optional from bookwyrm.models import Shelf from . import Importer +def _remove_brackets(value: Optional[str]) -> Optional[str]: + return re.sub(r"\[|\]", "", value) if value else None + + class LibrarythingImporter(Importer): """csv downloads from librarything""" @@ -13,16 +18,19 @@ class LibrarythingImporter(Importer): delimiter = "\t" encoding = "ISO-8859-1" - def normalize_row(self, entry, mappings): # pylint: disable=no-self-use + def normalize_row( + self, entry: dict[str, str], mappings: dict[str, Optional[str]] + ) -> dict[str, Optional[str]]: # pylint: disable=no-self-use """use the dataclass to create the formatted row of data""" - remove_brackets = lambda v: re.sub(r"\[|\]", "", v) if v else None - normalized = {k: remove_brackets(entry.get(v)) for k, v in mappings.items()} - isbn_13 = normalized.get("isbn_13") - isbn_13 = isbn_13.split(", ") if isbn_13 else [] + normalized = { + k: _remove_brackets(entry.get(v) if v else None) + for k, v in mappings.items() + } + isbn_13 = value.split(", ") if (value := normalized.get("isbn_13")) else [] normalized["isbn_13"] = isbn_13[1] if len(isbn_13) > 1 else None return normalized - def get_shelf(self, normalized_row): + def get_shelf(self, normalized_row: dict[str, Optional[str]]) -> Optional[str]: if normalized_row["date_finished"]: return Shelf.READ_FINISHED if normalized_row["date_started"]: diff --git a/bookwyrm/importers/openlibrary_import.py b/bookwyrm/importers/openlibrary_import.py index ef1030609..6a954ed3c 100644 --- a/bookwyrm/importers/openlibrary_import.py +++ b/bookwyrm/importers/openlibrary_import.py @@ -1,4 +1,6 @@ """ handle reading a csv from openlibrary""" +from typing import Any + from . import Importer @@ -7,7 +9,7 @@ class OpenLibraryImporter(Importer): service = "OpenLibrary" - def __init__(self, *args, **kwargs): + def __init__(self, *args: Any, **kwargs: Any): self.row_mappings_guesses.append(("openlibrary_key", ["edition id"])) self.row_mappings_guesses.append(("openlibrary_work_key", ["work id"])) super().__init__(*args, **kwargs) diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py index bb5144297..8929e9037 100644 --- a/bookwyrm/models/import_job.py +++ b/bookwyrm/models/import_job.py @@ -54,10 +54,10 @@ ImportStatuses = [ class ImportJob(models.Model): """entry for a specific request for book data import""" - user = models.ForeignKey(User, on_delete=models.CASCADE) + user: User = models.ForeignKey(User, on_delete=models.CASCADE) created_date = models.DateTimeField(default=timezone.now) updated_date = models.DateTimeField(default=timezone.now) - include_reviews = models.BooleanField(default=True) + include_reviews: bool = models.BooleanField(default=True) mappings = models.JSONField() source = models.CharField(max_length=100) privacy = models.CharField(max_length=255, default="public", choices=PrivacyLevels) @@ -76,7 +76,7 @@ class ImportJob(models.Model): self.save(update_fields=["task_id"]) - def complete_job(self): + def complete_job(self) -> None: """Report that the job has completed""" self.status = "complete" self.complete = True diff --git a/mypy.ini b/mypy.ini index 2a29e314f..fe181e365 100644 --- a/mypy.ini +++ b/mypy.ini @@ -13,6 +13,9 @@ implicit_reexport = True [mypy-bookwyrm.connectors.*] ignore_errors = False +[mypy-bookwyrm.importers.*] +ignore_errors = False + [mypy-celerywyrm.*] ignore_errors = False From 1c9da7b84b31b7e2403242fee9332a6be31bbee4 Mon Sep 17 00:00:00 2001 From: 0x29a Date: Fri, 25 Aug 2023 14:11:29 +0200 Subject: [PATCH 19/40] chore: bump version to match the latest tag --- bookwyrm/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bookwyrm/settings.py b/bookwyrm/settings.py index 829ddaef7..5c562ba26 100644 --- a/bookwyrm/settings.py +++ b/bookwyrm/settings.py @@ -12,7 +12,7 @@ from django.core.exceptions import ImproperlyConfigured env = Env() env.read_env() DOMAIN = env("DOMAIN") -VERSION = "0.6.4" +VERSION = "0.6.5" RELEASE_API = env( "RELEASE_API", From dfa935bd72828b5560ca1887937ddaa1a4c309e3 Mon Sep 17 00:00:00 2001 From: Abraham Toriz Date: Thu, 20 Jul 2023 22:16:28 -0600 Subject: [PATCH 20/40] fix pointed ids of labels in invite request form --- bookwyrm/templates/settings/registration.html | 2 +- bookwyrm/templates/settings/registration_limited.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bookwyrm/templates/settings/registration.html b/bookwyrm/templates/settings/registration.html index 3ebfff9ae..2de131811 100644 --- a/bookwyrm/templates/settings/registration.html +++ b/bookwyrm/templates/settings/registration.html @@ -75,7 +75,7 @@ {% include 'snippets/form_errors.html' with errors_list=form.invite_request_text.errors id="desc_invite_request_text" %}
-
-
-
-
-