From 2920973961f5828fd7db450a0e1198c40106a134 Mon Sep 17 00:00:00 2001 From: Joeri de Ruiter Date: Fri, 28 Jul 2023 20:54:03 +0200 Subject: [PATCH] Some small improvements to annotations --- bookwyrm/book_search.py | 33 ++++++++++++++-- bookwyrm/connectors/abstract_connector.py | 46 +++++++++++------------ bookwyrm/connectors/connector_manager.py | 29 +++++++++++--- bookwyrm/connectors/inventaire.py | 38 ++++++++++--------- bookwyrm/connectors/openlibrary.py | 30 +++++++-------- 5 files changed, 111 insertions(+), 65 deletions(-) diff --git a/bookwyrm/book_search.py b/bookwyrm/book_search.py index b7521a5d4..ceb228f40 100644 --- a/bookwyrm/book_search.py +++ b/bookwyrm/book_search.py @@ -3,7 +3,7 @@ from __future__ import annotations from dataclasses import asdict, dataclass from functools import reduce import operator -from typing import Optional, Union, Any +from typing import Optional, Union, Any, Literal, overload from django.contrib.postgres.search import SearchRank, SearchQuery from django.db.models import F, Q @@ -14,17 +14,40 @@ from bookwyrm import connectors from bookwyrm.settings import MEDIA_FULL_URL +@overload +def search( + query: str, + *, + min_confidence: float = 0, + filters: Optional[list[Any]] = None, + return_first: Literal[False], +) -> QuerySet[models.Edition]: + ... + + +@overload +def search( + query: str, + *, + min_confidence: float = 0, + filters: Optional[list[Any]] = None, + return_first: Literal[True], +) -> Optional[models.Edition]: + ... + + # pylint: disable=arguments-differ def search( query: str, + *, min_confidence: float = 0, filters: Optional[list[Any]] = None, return_first: bool = False, -): +) -> Union[Optional[models.Edition], QuerySet[models.Edition]]: """search your local database""" filters = filters or [] if not query: - return [] + return None if return_first else [] query = query.strip() results = None @@ -97,7 +120,9 @@ def search_identifiers( return results -def search_title_author(query, min_confidence, *filters, return_first=False): +def search_title_author( + query, min_confidence, *filters, return_first=False +) -> QuerySet[models.Edition]: """searches for title and author""" query = SearchQuery(query, config="simple") | SearchQuery(query, config="english") results = ( diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index c287ddb5b..8b6dcb885 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -22,15 +22,14 @@ from ..book_search import SearchResult logger = logging.getLogger(__name__) +JsonDict = dict[str, Any] + class ConnectorResults(TypedDict): """TypedDict for results returned by connector""" connector: AbstractMinimalConnector - results: list[Any] - - -Formatter = Callable[[Any], Optional[Union[str, list[str], list[Optional[str]]]]] + results: list[SearchResult] class AbstractMinimalConnector(ABC): @@ -98,12 +97,12 @@ class AbstractMinimalConnector(ABC): logger.exception(err) return None - return { - "connector": self, - "results": self.process_search_response( + return ConnectorResults( + connector=self, + results=self.process_search_response( query, raw_data, min_confidence ), - } + ) except asyncio.TimeoutError: logger.info("Connection timed out for url: %s", url) except aiohttp.ClientError as err: @@ -188,14 +187,14 @@ class AbstractConnector(AbstractMinimalConnector): load_more_data.delay(self.connector.id, work.id) return edition - def get_book_data(self, remote_id: str) -> BookData: # pylint: disable=no-self-use + def get_book_data(self, remote_id: str) -> JsonDict: # pylint: disable=no-self-use """this allows connectors to override the default behavior""" return get_data(remote_id) def create_edition_from_data( self, work: models.Work, - edition_data: Union[str, BookData], + edition_data: Union[str, JsonDict], instance: Optional[models.Edition] = None, ) -> Optional[models.Edition]: """if we already have the work, we're ready""" @@ -272,19 +271,19 @@ class AbstractConnector(AbstractMinimalConnector): return self.create_edition_from_data(obj.parent_work, data, instance=obj) @abstractmethod - def is_work_data(self, data: BookData) -> bool: + def is_work_data(self, data: JsonDict) -> bool: """differentiate works and editions""" @abstractmethod - def get_edition_from_work_data(self, data: BookData) -> Optional[BookData]: + def get_edition_from_work_data(self, data: JsonDict) -> JsonDict: """every work needs at least one edition""" @abstractmethod - def get_work_from_edition_data(self, data: BookData) -> BookData: + def get_work_from_edition_data(self, data: JsonDict) -> JsonDict: """every edition needs a work""" @abstractmethod - def get_authors_from_data(self, data: BookData) -> Iterator[models.Author]: + def get_authors_from_data(self, data: JsonDict) -> Iterator[models.Author]: """load author data""" @abstractmethod @@ -292,10 +291,10 @@ class AbstractConnector(AbstractMinimalConnector): """get more info on a book""" -def dict_from_mappings(data: BookData, mappings: list[Mapping]) -> dict[str, Any]: +def dict_from_mappings(data: JsonDict, mappings: list[Mapping]) -> JsonDict: """create a dict in Activitypub format, using mappings supplies by the subclass""" - result: dict[str, Any] = {} + result: JsonDict = {} for mapping in mappings: # sometimes there are multiple mappings for one field, don't # overwrite earlier writes in that case @@ -309,7 +308,7 @@ def get_data( url: str, params: Optional[dict[str, str]] = None, timeout: int = settings.QUERY_TIMEOUT, -) -> BookData: +) -> JsonDict: """wrapper for request.get""" # check if the url is blocked raise_not_valid_url(url) @@ -342,8 +341,10 @@ def get_data( logger.info(err) raise ConnectorException(err) - # For now assume the returned data is a correctly formatted dict - return data # type: ignore[no-any-return] + if not isinstance(data, dict): + raise ConnectorException("Unexpected data format") + + return data def get_image( @@ -382,7 +383,7 @@ class Mapping: self, local_field: str, remote_field: Optional[str] = None, - formatter: Optional[Formatter] = None, + formatter: Optional[Callable[[Any], Any]] = None, ): noop = lambda x: x @@ -390,7 +391,7 @@ class Mapping: self.remote_field = remote_field or local_field self.formatter = formatter or noop - def get_value(self, data: BookData) -> Optional[Any]: + def get_value(self, data: JsonDict) -> Optional[Any]: """pull a field from incoming json and return the formatted version""" value = data.get(self.remote_field) if not value: @@ -434,6 +435,3 @@ def maybe_isbn(query: str) -> bool: 10, 13, ] # ISBN10 or ISBN13, or maybe ISBN10 missing a leading zero - - -BookData = dict[str, Any] diff --git a/bookwyrm/connectors/connector_manager.py b/bookwyrm/connectors/connector_manager.py index e28c54dec..444a626ba 100644 --- a/bookwyrm/connectors/connector_manager.py +++ b/bookwyrm/connectors/connector_manager.py @@ -5,7 +5,7 @@ import importlib import ipaddress import logging from asyncio import Future -from typing import Iterator, Any, Optional, Union +from typing import Iterator, Any, Optional, Union, overload, Literal from urllib.parse import urlparse import aiohttp @@ -15,6 +15,7 @@ from django.db.models import signals from requests import HTTPError from bookwyrm import book_search, models +from bookwyrm.book_search import SearchResult from bookwyrm.connectors import abstract_connector from bookwyrm.settings import SEARCH_TIMEOUT from bookwyrm.tasks import app, CONNECTORS @@ -46,10 +47,26 @@ async def async_connector_search( return list(results) -def search(query: str, min_confidence: float = 0.1, return_first: bool = False) -> Any: +@overload +def search( + query: str, *, min_confidence: float = 0.1, return_first: Literal[False] +) -> list[abstract_connector.ConnectorResults]: + ... + + +@overload +def search( + query: str, *, min_confidence: float = 0.1, return_first: Literal[True] +) -> Optional[SearchResult]: + ... + + +def search( + query: str, *, min_confidence: float = 0.1, return_first: bool = False +) -> Union[list[abstract_connector.ConnectorResults], Optional[SearchResult]]: """find books based on arbitrary keywords""" if not query: - return [] + return None if return_first else [] items = [] for connector in get_connectors(): @@ -80,7 +97,9 @@ def search(query: str, min_confidence: float = 0.1, return_first: bool = False) return results -def first_search_result(query: str, min_confidence: float = 0.1) -> Any: +def first_search_result( + query: str, min_confidence: float = 0.1 +) -> Union[models.Edition, SearchResult, None]: """search until you find a result that fits""" # try local search first result = book_search.search(query, min_confidence=min_confidence, return_first=True) @@ -130,7 +149,7 @@ def load_more_data(connector_id: str, book_id: str) -> None: @app.task(queue=CONNECTORS) def create_edition_task( - connector_id: int, work_id: int, data: Union[str, abstract_connector.BookData] + connector_id: int, work_id: int, data: Union[str, abstract_connector.JsonDict] ) -> None: """separate task for each of the 10,000 editions of LoTR""" connector_info = models.Connector.objects.get(id=connector_id) diff --git a/bookwyrm/connectors/inventaire.py b/bookwyrm/connectors/inventaire.py index 8fa6b01d4..c08bcdee1 100644 --- a/bookwyrm/connectors/inventaire.py +++ b/bookwyrm/connectors/inventaire.py @@ -4,7 +4,7 @@ from typing import Any, Union, Optional, Iterator, Iterable from bookwyrm import models from bookwyrm.book_search import SearchResult -from .abstract_connector import AbstractConnector, Mapping, BookData +from .abstract_connector import AbstractConnector, Mapping, JsonDict from .abstract_connector import get_data from .connector_manager import ConnectorException, create_edition_task @@ -65,9 +65,9 @@ class Connector(AbstractConnector): """convert an id/uri into a url""" return f"{self.books_url}?action=by-uris&uris={value}" - def get_book_data(self, remote_id: str) -> BookData: + def get_book_data(self, remote_id: str) -> JsonDict: data = get_data(remote_id) - extracted = list(data.get("entities", []).values()) + extracted = list(data.get("entities", {}).values()) try: data = extracted[0] except (KeyError, IndexError): @@ -75,11 +75,15 @@ class Connector(AbstractConnector): # flatten the data so that images, uri, and claims are on the same level return { **data.get("claims", {}), - **{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]}, + **{ + k: data.get(k) + for k in ["uri", "image", "labels", "sitelinks", "type"] + if k in data + }, } def parse_search_data( - self, data: dict[str, Any], min_confidence: float + self, data: JsonDict, min_confidence: float ) -> Iterator[SearchResult]: for search_result in data.get("results", []): images = search_result.get("image") @@ -99,7 +103,7 @@ class Connector(AbstractConnector): connector=self, ) - def parse_isbn_search_data(self, data: dict[str, Any]) -> Iterator[SearchResult]: + def parse_isbn_search_data(self, data: JsonDict) -> Iterator[SearchResult]: """got some data""" results = data.get("entities") if not results: @@ -117,16 +121,16 @@ class Connector(AbstractConnector): connector=self, ) - def is_work_data(self, data: BookData) -> bool: + def is_work_data(self, data: JsonDict) -> bool: return data.get("type") == "work" - def load_edition_data(self, work_uri: str) -> BookData: + def load_edition_data(self, work_uri: str) -> JsonDict: """get a list of editions for a work""" # pylint: disable=line-too-long url = f"{self.books_url}?action=reverse-claims&property=wdt:P629&value={work_uri}&sort=true" return get_data(url) - def get_edition_from_work_data(self, data: BookData) -> Optional[BookData]: + def get_edition_from_work_data(self, data: JsonDict) -> JsonDict: work_uri = data.get("uri") if not work_uri: raise ConnectorException("Invalid URI") @@ -137,7 +141,7 @@ class Connector(AbstractConnector): raise ConnectorException("Invalid book data") return self.get_book_data(self.get_remote_id(uri)) - def get_work_from_edition_data(self, data: BookData) -> BookData: + def get_work_from_edition_data(self, data: JsonDict) -> JsonDict: try: uri = data.get("wdt:P629", [])[0] except IndexError: @@ -147,7 +151,7 @@ class Connector(AbstractConnector): raise ConnectorException("Invalid book data") return self.get_book_data(self.get_remote_id(uri)) - def get_authors_from_data(self, data: BookData) -> Iterator[models.Author]: + def get_authors_from_data(self, data: JsonDict) -> Iterator[models.Author]: authors = data.get("wdt:P50", []) for author in authors: model = self.get_or_create_author(self.get_remote_id(author)) @@ -173,7 +177,7 @@ class Connector(AbstractConnector): def create_edition_from_data( self, work: models.Work, - edition_data: Union[str, BookData], + edition_data: Union[str, JsonDict], instance: Optional[models.Edition] = None, ) -> Optional[models.Edition]: """pass in the url as data and then call the version in abstract connector""" @@ -186,7 +190,7 @@ class Connector(AbstractConnector): return super().create_edition_from_data(work, edition_data, instance=instance) def get_cover_url( - self, cover_blob: Union[list[dict[str, str]], dict[str, str]], *_: Any + self, cover_blob: Union[list[JsonDict], JsonDict], *_: Any ) -> Optional[str]: """format the relative cover url into an absolute one: {"url": "/img/entities/e794783f01b9d4f897a1ea9820b96e00d346994f"} @@ -197,7 +201,7 @@ class Connector(AbstractConnector): return None cover_blob = cover_blob[0] cover_id = cover_blob.get("url") - if not cover_id: + if not isinstance(cover_id, str): return None # cover may or may not be an absolute url already if re.match(r"^http", cover_id): @@ -215,7 +219,7 @@ class Connector(AbstractConnector): results.append(get_language_code(data.get("labels", {}))) return results - def get_description(self, links: dict[str, str]) -> Optional[str]: + def get_description(self, links: JsonDict) -> str: """grab an extracted excerpt from wikipedia""" link = links.get("enwiki") if not link: @@ -225,7 +229,7 @@ class Connector(AbstractConnector): data = get_data(url) except ConnectorException: return "" - return data.get("extract") + return data.get("extract", "") def get_remote_id_from_model(self, obj: models.BookDataModel) -> str: """use get_remote_id to figure out the link from a model obj""" @@ -233,7 +237,7 @@ class Connector(AbstractConnector): return self.get_remote_id(remote_id_value) -def get_language_code(options: dict[str, Any], code: str = "en") -> Any: +def get_language_code(options: JsonDict, code: str = "en") -> Any: """when there are a bunch of translation but we need a single field""" result = options.get(code) if result: diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index 40bab5a67..98c1b2b7c 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -4,7 +4,7 @@ from typing import Any, Optional, Union, Iterator, Iterable from bookwyrm import models from bookwyrm.book_search import SearchResult -from .abstract_connector import AbstractConnector, Mapping, BookData +from .abstract_connector import AbstractConnector, Mapping, JsonDict from .abstract_connector import get_data, infer_physical_format, unique_physical_format from .connector_manager import ConnectorException, create_edition_task from .openlibrary_languages import languages @@ -95,14 +95,14 @@ class Connector(AbstractConnector): Mapping("inventaire_id", remote_field="links", formatter=get_inventaire_id), ] - def get_book_data(self, remote_id: str) -> dict[str, Any]: + def get_book_data(self, remote_id: str) -> JsonDict: data = get_data(remote_id) if data.get("type", {}).get("key") == "/type/redirect": remote_id = self.base_url + data.get("location", "") return get_data(remote_id) return data - def get_remote_id_from_data(self, data: dict[str, Any]) -> str: + def get_remote_id_from_data(self, data: JsonDict) -> str: """format a url from an openlibrary id field""" try: key = data["key"] @@ -110,10 +110,10 @@ class Connector(AbstractConnector): raise ConnectorException("Invalid book data") return f"{self.books_url}{key}" - def is_work_data(self, data: dict[str, Any]) -> bool: + def is_work_data(self, data: JsonDict) -> bool: return bool(re.match(r"^[\/\w]+OL\d+W$", data["key"])) - def get_edition_from_work_data(self, data: dict[str, Any]) -> Optional[BookData]: + def get_edition_from_work_data(self, data: JsonDict) -> JsonDict: try: key = data["key"] except KeyError: @@ -125,7 +125,7 @@ class Connector(AbstractConnector): raise ConnectorException("No editions for work") return edition - def get_work_from_edition_data(self, data: BookData) -> BookData: + def get_work_from_edition_data(self, data: JsonDict) -> JsonDict: try: key = data["works"][0]["key"] except (IndexError, KeyError): @@ -133,7 +133,7 @@ class Connector(AbstractConnector): url = f"{self.books_url}{key}" return self.get_book_data(url) - def get_authors_from_data(self, data: BookData) -> Iterator[models.Author]: + def get_authors_from_data(self, data: JsonDict) -> Iterator[models.Author]: """parse author json and load or create authors""" for author_blob in data.get("authors", []): author_blob = author_blob.get("author", author_blob) @@ -154,7 +154,7 @@ class Connector(AbstractConnector): return f"{self.covers_url}/b/id/{image_name}" def parse_search_data( - self, data: dict[str, Any], min_confidence: float + self, data: JsonDict, min_confidence: float ) -> Iterator[SearchResult]: for idx, search_result in enumerate(data.get("docs", [])): # build the remote id from the openlibrary key @@ -177,7 +177,7 @@ class Connector(AbstractConnector): confidence=confidence, ) - def parse_isbn_search_data(self, data: dict[str, Any]) -> Iterator[SearchResult]: + def parse_isbn_search_data(self, data: JsonDict) -> Iterator[SearchResult]: for search_result in list(data.values()): # build the remote id from the openlibrary key key = self.books_url + search_result["key"] @@ -191,7 +191,7 @@ class Connector(AbstractConnector): year=search_result.get("publish_date"), ) - def load_edition_data(self, olkey: str) -> BookData: + def load_edition_data(self, olkey: str) -> JsonDict: """query openlibrary for editions of a work""" url = f"{self.books_url}/works/{olkey}/editions" return self.get_book_data(url) @@ -216,7 +216,7 @@ class Connector(AbstractConnector): create_edition_task.delay(self.connector.id, work.id, edition_data) -def ignore_edition(edition_data: BookData) -> bool: +def ignore_edition(edition_data: JsonDict) -> bool: """don't load a million editions that have no metadata""" # an isbn, we love to see it if edition_data.get("isbn_13") or edition_data.get("isbn_10"): @@ -235,7 +235,7 @@ def ignore_edition(edition_data: BookData) -> bool: return True -def get_description(description_blob: Union[dict[str, Any], str]) -> Optional[str]: +def get_description(description_blob: Union[JsonDict, str]) -> Optional[str]: """descriptions can be a string or a dict""" if isinstance(description_blob, dict): return description_blob.get("value") @@ -247,7 +247,7 @@ def get_openlibrary_key(key: str) -> str: return key.split("/")[-1] -def get_languages(language_blob: Iterable[dict[str, str]]) -> list[Optional[str]]: +def get_languages(language_blob: Iterable[JsonDict]) -> list[Optional[str]]: """/language/eng -> English""" langs = [] for lang in language_blob: @@ -255,7 +255,7 @@ def get_languages(language_blob: Iterable[dict[str, str]]) -> list[Optional[str] return langs -def get_dict_field(blob: Optional[dict[str, Any]], field_name: str) -> Optional[Any]: +def get_dict_field(blob: Optional[JsonDict], field_name: str) -> Optional[Any]: """extract the isni from the remote id data for the author""" if not blob or not isinstance(blob, dict): return None @@ -291,7 +291,7 @@ def get_inventaire_id(links: list[Any]) -> Optional[str]: return None -def pick_default_edition(options: list[BookData]) -> Optional[BookData]: +def pick_default_edition(options: list[JsonDict]) -> Optional[JsonDict]: """favor physical copies with covers in english""" if not options: return None