Merge pull request #2925 from jderuiter/mypy-connectors

Type annotations and related changes for connectors
This commit is contained in:
Mouse Reeve 2023-08-01 20:46:56 -07:00 committed by GitHub
commit acafa0b417
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 412 additions and 185 deletions

View file

@ -2,6 +2,8 @@
from dataclasses import dataclass, fields, MISSING from dataclasses import dataclass, fields, MISSING
from json import JSONEncoder from json import JSONEncoder
import logging import logging
from typing import Optional, Union, TypeVar, overload, Any
import requests import requests
from django.apps import apps from django.apps import apps
@ -10,12 +12,15 @@ from django.utils.http import http_date
from bookwyrm import models from bookwyrm import models
from bookwyrm.connectors import ConnectorException, get_data from bookwyrm.connectors import ConnectorException, get_data
from bookwyrm.models import base_model
from bookwyrm.signatures import make_signature from bookwyrm.signatures import make_signature
from bookwyrm.settings import DOMAIN, INSTANCE_ACTOR_USERNAME from bookwyrm.settings import DOMAIN, INSTANCE_ACTOR_USERNAME
from bookwyrm.tasks import app, MISC from bookwyrm.tasks import app, MISC
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
TBookWyrmModel = TypeVar("TBookWyrmModel", bound=base_model.BookWyrmModel)
class ActivitySerializerError(ValueError): class ActivitySerializerError(ValueError):
"""routine problems serializing activitypub json""" """routine problems serializing activitypub json"""
@ -65,7 +70,11 @@ class ActivityObject:
id: str id: str
type: str type: str
def __init__(self, activity_objects=None, **kwargs): def __init__(
self,
activity_objects: Optional[list[str, base_model.BookWyrmModel]] = None,
**kwargs: dict[str, Any],
):
"""this lets you pass in an object with fields that aren't in the """this lets you pass in an object with fields that aren't in the
dataclass, which it ignores. Any field in the dataclass is required or dataclass, which it ignores. Any field in the dataclass is required or
has a default value""" has a default value"""
@ -101,13 +110,13 @@ class ActivityObject:
# pylint: disable=too-many-locals,too-many-branches,too-many-arguments # pylint: disable=too-many-locals,too-many-branches,too-many-arguments
def to_model( def to_model(
self, self,
model=None, model: Optional[type[TBookWyrmModel]] = None,
instance=None, instance: Optional[TBookWyrmModel] = None,
allow_create=True, allow_create: bool = True,
save=True, save: bool = True,
overwrite=True, overwrite: bool = True,
allow_external_connections=True, allow_external_connections: bool = True,
): ) -> Optional[TBookWyrmModel]:
"""convert from an activity to a model instance. Args: """convert from an activity to a model instance. Args:
model: the django model that this object is being converted to model: the django model that this object is being converted to
(will guess if not known) (will guess if not known)
@ -296,14 +305,40 @@ def get_model_from_type(activity_type):
# pylint: disable=too-many-arguments # pylint: disable=too-many-arguments
@overload
def resolve_remote_id( def resolve_remote_id(
remote_id, remote_id: str,
model=None, model: type[TBookWyrmModel],
refresh=False, refresh: bool = False,
save=True, save: bool = True,
get_activity=False, get_activity: bool = False,
allow_external_connections=True, allow_external_connections: bool = True,
): ) -> TBookWyrmModel:
...
# pylint: disable=too-many-arguments
@overload
def resolve_remote_id(
remote_id: str,
model: Optional[str] = None,
refresh: bool = False,
save: bool = True,
get_activity: bool = False,
allow_external_connections: bool = True,
) -> base_model.BookWyrmModel:
...
# pylint: disable=too-many-arguments
def resolve_remote_id(
remote_id: str,
model: Optional[Union[str, type[base_model.BookWyrmModel]]] = None,
refresh: bool = False,
save: bool = True,
get_activity: bool = False,
allow_external_connections: bool = True,
) -> base_model.BookWyrmModel:
"""take a remote_id and return an instance, creating if necessary. Args: """take a remote_id and return an instance, creating if necessary. Args:
remote_id: the unique url for looking up the object in the db or by http remote_id: the unique url for looking up the object in the db or by http
model: a string or object representing the model that corresponds to the object model: a string or object representing the model that corresponds to the object

View file

@ -1,6 +1,6 @@
""" book and author data """ """ book and author data """
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import List from typing import Optional
from .base_activity import ActivityObject from .base_activity import ActivityObject
from .image import Document from .image import Document
@ -11,19 +11,19 @@ from .image import Document
class BookData(ActivityObject): class BookData(ActivityObject):
"""shared fields for all book data and authors""" """shared fields for all book data and authors"""
openlibraryKey: str = None openlibraryKey: Optional[str] = None
inventaireId: str = None inventaireId: Optional[str] = None
librarythingKey: str = None librarythingKey: Optional[str] = None
goodreadsKey: str = None goodreadsKey: Optional[str] = None
bnfId: str = None bnfId: Optional[str] = None
viaf: str = None viaf: Optional[str] = None
wikidata: str = None wikidata: Optional[str] = None
asin: str = None asin: Optional[str] = None
aasin: str = None aasin: Optional[str] = None
isfdb: str = None isfdb: Optional[str] = None
lastEditedBy: str = None lastEditedBy: Optional[str] = None
links: List[str] = field(default_factory=lambda: []) links: list[str] = field(default_factory=list)
fileLinks: List[str] = field(default_factory=lambda: []) fileLinks: list[str] = field(default_factory=list)
# pylint: disable=invalid-name # pylint: disable=invalid-name
@ -35,17 +35,17 @@ class Book(BookData):
sortTitle: str = None sortTitle: str = None
subtitle: str = None subtitle: str = None
description: str = "" description: str = ""
languages: List[str] = field(default_factory=lambda: []) languages: list[str] = field(default_factory=list)
series: str = "" series: str = ""
seriesNumber: str = "" seriesNumber: str = ""
subjects: List[str] = field(default_factory=lambda: []) subjects: list[str] = field(default_factory=list)
subjectPlaces: List[str] = field(default_factory=lambda: []) subjectPlaces: list[str] = field(default_factory=list)
authors: List[str] = field(default_factory=lambda: []) authors: list[str] = field(default_factory=list)
firstPublishedDate: str = "" firstPublishedDate: str = ""
publishedDate: str = "" publishedDate: str = ""
cover: Document = None cover: Optional[Document] = None
type: str = "Book" type: str = "Book"
@ -58,10 +58,10 @@ class Edition(Book):
isbn10: str = "" isbn10: str = ""
isbn13: str = "" isbn13: str = ""
oclcNumber: str = "" oclcNumber: str = ""
pages: int = None pages: Optional[int] = None
physicalFormat: str = "" physicalFormat: str = ""
physicalFormatDetail: str = "" physicalFormatDetail: str = ""
publishers: List[str] = field(default_factory=lambda: []) publishers: list[str] = field(default_factory=list)
editionRank: int = 0 editionRank: int = 0
type: str = "Edition" type: str = "Edition"
@ -73,7 +73,7 @@ class Work(Book):
"""work instance of a book object""" """work instance of a book object"""
lccn: str = "" lccn: str = ""
editions: List[str] = field(default_factory=lambda: []) editions: list[str] = field(default_factory=list)
type: str = "Work" type: str = "Work"
@ -83,12 +83,12 @@ class Author(BookData):
"""author of a book""" """author of a book"""
name: str name: str
isni: str = None isni: Optional[str] = None
viafId: str = None viafId: Optional[str] = None
gutenbergId: str = None gutenbergId: Optional[str] = None
born: str = None born: Optional[str] = None
died: str = None died: Optional[str] = None
aliases: List[str] = field(default_factory=lambda: []) aliases: list[str] = field(default_factory=list)
bio: str = "" bio: str = ""
wikipediaLink: str = "" wikipediaLink: str = ""
type: str = "Author" type: str = "Author"

View file

@ -1,22 +1,53 @@
""" using a bookwyrm instance as a source of book data """ """ using a bookwyrm instance as a source of book data """
from __future__ import annotations
from dataclasses import asdict, dataclass from dataclasses import asdict, dataclass
from functools import reduce from functools import reduce
import operator import operator
from typing import Optional, Union, Any, Literal, overload
from django.contrib.postgres.search import SearchRank, SearchQuery from django.contrib.postgres.search import SearchRank, SearchQuery
from django.db.models import F, Q from django.db.models import F, Q
from django.db.models.query import QuerySet
from bookwyrm import models from bookwyrm import models
from bookwyrm import connectors from bookwyrm import connectors
from bookwyrm.settings import MEDIA_FULL_URL from bookwyrm.settings import MEDIA_FULL_URL
@overload
def search(
query: str,
*,
min_confidence: float = 0,
filters: Optional[list[Any]] = None,
return_first: Literal[False],
) -> QuerySet[models.Edition]:
...
@overload
def search(
query: str,
*,
min_confidence: float = 0,
filters: Optional[list[Any]] = None,
return_first: Literal[True],
) -> Optional[models.Edition]:
...
# pylint: disable=arguments-differ # pylint: disable=arguments-differ
def search(query, min_confidence=0, filters=None, return_first=False): def search(
query: str,
*,
min_confidence: float = 0,
filters: Optional[list[Any]] = None,
return_first: bool = False,
) -> Union[Optional[models.Edition], QuerySet[models.Edition]]:
"""search your local database""" """search your local database"""
filters = filters or [] filters = filters or []
if not query: if not query:
return [] return None if return_first else []
query = query.strip() query = query.strip()
results = None results = None
@ -66,7 +97,9 @@ def format_search_result(search_result):
).json() ).json()
def search_identifiers(query, *filters, return_first=False): def search_identifiers(
query, *filters, return_first=False
) -> Union[Optional[models.Edition], QuerySet[models.Edition]]:
"""tries remote_id, isbn; defined as dedupe fields on the model""" """tries remote_id, isbn; defined as dedupe fields on the model"""
if connectors.maybe_isbn(query): if connectors.maybe_isbn(query):
# Oh did you think the 'S' in ISBN stood for 'standard'? # Oh did you think the 'S' in ISBN stood for 'standard'?
@ -87,7 +120,9 @@ def search_identifiers(query, *filters, return_first=False):
return results return results
def search_title_author(query, min_confidence, *filters, return_first=False): def search_title_author(
query, min_confidence, *filters, return_first=False
) -> QuerySet[models.Edition]:
"""searches for title and author""" """searches for title and author"""
query = SearchQuery(query, config="simple") | SearchQuery(query, config="english") query = SearchQuery(query, config="simple") | SearchQuery(query, config="english")
results = ( results = (
@ -122,11 +157,11 @@ class SearchResult:
title: str title: str
key: str key: str
connector: object connector: object
view_link: str = None view_link: Optional[str] = None
author: str = None author: Optional[str] = None
year: str = None year: Optional[str] = None
cover: str = None cover: Optional[str] = None
confidence: int = 1 confidence: float = 1.0
def __repr__(self): def __repr__(self):
# pylint: disable=consider-using-f-string # pylint: disable=consider-using-f-string

View file

@ -1,5 +1,7 @@
""" functionality outline for a book data connector """ """ functionality outline for a book data connector """
from __future__ import annotations
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Optional, TypedDict, Any, Callable, Union, Iterator
from urllib.parse import quote_plus from urllib.parse import quote_plus
import imghdr import imghdr
import logging import logging
@ -16,33 +18,38 @@ from bookwyrm import activitypub, models, settings
from bookwyrm.settings import USER_AGENT from bookwyrm.settings import USER_AGENT
from .connector_manager import load_more_data, ConnectorException, raise_not_valid_url from .connector_manager import load_more_data, ConnectorException, raise_not_valid_url
from .format_mappings import format_mappings from .format_mappings import format_mappings
from ..book_search import SearchResult
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
JsonDict = dict[str, Any]
class ConnectorResults(TypedDict):
"""TypedDict for results returned by connector"""
connector: AbstractMinimalConnector
results: list[SearchResult]
class AbstractMinimalConnector(ABC): class AbstractMinimalConnector(ABC):
"""just the bare bones, for other bookwyrm instances""" """just the bare bones, for other bookwyrm instances"""
def __init__(self, identifier): def __init__(self, identifier: str):
# load connector settings # load connector settings
info = models.Connector.objects.get(identifier=identifier) info = models.Connector.objects.get(identifier=identifier)
self.connector = info self.connector = info
# the things in the connector model to copy over # the things in the connector model to copy over
self_fields = [ self.base_url = info.base_url
"base_url", self.books_url = info.books_url
"books_url", self.covers_url = info.covers_url
"covers_url", self.search_url = info.search_url
"search_url", self.isbn_search_url = info.isbn_search_url
"isbn_search_url", self.name = info.name
"name", self.identifier = info.identifier
"identifier",
]
for field in self_fields:
setattr(self, field, getattr(info, field))
def get_search_url(self, query): def get_search_url(self, query: str) -> str:
"""format the query url""" """format the query url"""
# Check if the query resembles an ISBN # Check if the query resembles an ISBN
if maybe_isbn(query) and self.isbn_search_url and self.isbn_search_url != "": if maybe_isbn(query) and self.isbn_search_url and self.isbn_search_url != "":
@ -54,13 +61,21 @@ class AbstractMinimalConnector(ABC):
# searched as free text. This, instead, only searches isbn if it's isbn-y # searched as free text. This, instead, only searches isbn if it's isbn-y
return f"{self.search_url}{quote_plus(query)}" return f"{self.search_url}{quote_plus(query)}"
def process_search_response(self, query, data, min_confidence): def process_search_response(
self, query: str, data: Any, min_confidence: float
) -> list[SearchResult]:
"""Format the search results based on the format of the query""" """Format the search results based on the format of the query"""
if maybe_isbn(query): if maybe_isbn(query):
return list(self.parse_isbn_search_data(data))[:10] return list(self.parse_isbn_search_data(data))[:10]
return list(self.parse_search_data(data, min_confidence))[:10] return list(self.parse_search_data(data, min_confidence))[:10]
async def get_results(self, session, url, min_confidence, query): async def get_results(
self,
session: aiohttp.ClientSession,
url: str,
min_confidence: float,
query: str,
) -> Optional[ConnectorResults]:
"""try this specific connector""" """try this specific connector"""
# pylint: disable=line-too-long # pylint: disable=line-too-long
headers = { headers = {
@ -74,55 +89,63 @@ class AbstractMinimalConnector(ABC):
async with session.get(url, headers=headers, params=params) as response: async with session.get(url, headers=headers, params=params) as response:
if not response.ok: if not response.ok:
logger.info("Unable to connect to %s: %s", url, response.reason) logger.info("Unable to connect to %s: %s", url, response.reason)
return return None
try: try:
raw_data = await response.json() raw_data = await response.json()
except aiohttp.client_exceptions.ContentTypeError as err: except aiohttp.client_exceptions.ContentTypeError as err:
logger.exception(err) logger.exception(err)
return return None
return { return ConnectorResults(
"connector": self, connector=self,
"results": self.process_search_response( results=self.process_search_response(
query, raw_data, min_confidence query, raw_data, min_confidence
), ),
} )
except asyncio.TimeoutError: except asyncio.TimeoutError:
logger.info("Connection timed out for url: %s", url) logger.info("Connection timed out for url: %s", url)
except aiohttp.ClientError as err: except aiohttp.ClientError as err:
logger.info(err) logger.info(err)
return None
@abstractmethod @abstractmethod
def get_or_create_book(self, remote_id): def get_or_create_book(self, remote_id: str) -> Optional[models.Book]:
"""pull up a book record by whatever means possible""" """pull up a book record by whatever means possible"""
@abstractmethod @abstractmethod
def parse_search_data(self, data, min_confidence): def parse_search_data(
self, data: Any, min_confidence: float
) -> Iterator[SearchResult]:
"""turn the result json from a search into a list""" """turn the result json from a search into a list"""
@abstractmethod @abstractmethod
def parse_isbn_search_data(self, data): def parse_isbn_search_data(self, data: Any) -> Iterator[SearchResult]:
"""turn the result json from a search into a list""" """turn the result json from a search into a list"""
class AbstractConnector(AbstractMinimalConnector): class AbstractConnector(AbstractMinimalConnector):
"""generic book data connector""" """generic book data connector"""
def __init__(self, identifier): generated_remote_link_field = ""
def __init__(self, identifier: str):
super().__init__(identifier) super().__init__(identifier)
# fields we want to look for in book data to copy over # fields we want to look for in book data to copy over
# title we handle separately. # title we handle separately.
self.book_mappings = [] self.book_mappings: list[Mapping] = []
self.author_mappings: list[Mapping] = []
def get_or_create_book(self, remote_id): def get_or_create_book(self, remote_id: str) -> Optional[models.Book]:
"""translate arbitrary json into an Activitypub dataclass""" """translate arbitrary json into an Activitypub dataclass"""
# first, check if we have the origin_id saved # first, check if we have the origin_id saved
existing = models.Edition.find_existing_by_remote_id( existing = models.Edition.find_existing_by_remote_id(
remote_id remote_id
) or models.Work.find_existing_by_remote_id(remote_id) ) or models.Work.find_existing_by_remote_id(remote_id)
if existing: if existing:
if hasattr(existing, "default_edition"): if hasattr(existing, "default_edition") and isinstance(
existing.default_edition, models.Edition
):
return existing.default_edition return existing.default_edition
return existing return existing
@ -154,6 +177,9 @@ class AbstractConnector(AbstractMinimalConnector):
) )
# this will dedupe automatically # this will dedupe automatically
work = work_activity.to_model(model=models.Work, overwrite=False) work = work_activity.to_model(model=models.Work, overwrite=False)
if not work:
return None
for author in self.get_authors_from_data(work_data): for author in self.get_authors_from_data(work_data):
work.authors.add(author) work.authors.add(author)
@ -161,12 +187,21 @@ class AbstractConnector(AbstractMinimalConnector):
load_more_data.delay(self.connector.id, work.id) load_more_data.delay(self.connector.id, work.id)
return edition return edition
def get_book_data(self, remote_id): # pylint: disable=no-self-use def get_book_data(self, remote_id: str) -> JsonDict: # pylint: disable=no-self-use
"""this allows connectors to override the default behavior""" """this allows connectors to override the default behavior"""
return get_data(remote_id) return get_data(remote_id)
def create_edition_from_data(self, work, edition_data, instance=None): def create_edition_from_data(
self,
work: models.Work,
edition_data: Union[str, JsonDict],
instance: Optional[models.Edition] = None,
) -> Optional[models.Edition]:
"""if we already have the work, we're ready""" """if we already have the work, we're ready"""
if isinstance(edition_data, str):
# We don't expect a string here
return None
mapped_data = dict_from_mappings(edition_data, self.book_mappings) mapped_data = dict_from_mappings(edition_data, self.book_mappings)
mapped_data["work"] = work.remote_id mapped_data["work"] = work.remote_id
edition_activity = activitypub.Edition(**mapped_data) edition_activity = activitypub.Edition(**mapped_data)
@ -174,6 +209,9 @@ class AbstractConnector(AbstractMinimalConnector):
model=models.Edition, overwrite=False, instance=instance model=models.Edition, overwrite=False, instance=instance
) )
if not edition:
return None
# if we're updating an existing instance, we don't need to load authors # if we're updating an existing instance, we don't need to load authors
if instance: if instance:
return edition return edition
@ -190,7 +228,9 @@ class AbstractConnector(AbstractMinimalConnector):
return edition return edition
def get_or_create_author(self, remote_id, instance=None): def get_or_create_author(
self, remote_id: str, instance: Optional[models.Author] = None
) -> Optional[models.Author]:
"""load that author""" """load that author"""
if not instance: if not instance:
existing = models.Author.find_existing_by_remote_id(remote_id) existing = models.Author.find_existing_by_remote_id(remote_id)
@ -210,46 +250,51 @@ class AbstractConnector(AbstractMinimalConnector):
model=models.Author, overwrite=False, instance=instance model=models.Author, overwrite=False, instance=instance
) )
def get_remote_id_from_model(self, obj): def get_remote_id_from_model(self, obj: models.BookDataModel) -> Optional[str]:
"""given the data stored, how can we look this up""" """given the data stored, how can we look this up"""
return getattr(obj, getattr(self, "generated_remote_link_field")) remote_id: Optional[str] = getattr(obj, self.generated_remote_link_field)
return remote_id
def update_author_from_remote(self, obj): def update_author_from_remote(self, obj: models.Author) -> Optional[models.Author]:
"""load the remote data from this connector and add it to an existing author""" """load the remote data from this connector and add it to an existing author"""
remote_id = self.get_remote_id_from_model(obj) remote_id = self.get_remote_id_from_model(obj)
if not remote_id:
return None
return self.get_or_create_author(remote_id, instance=obj) return self.get_or_create_author(remote_id, instance=obj)
def update_book_from_remote(self, obj): def update_book_from_remote(self, obj: models.Edition) -> Optional[models.Edition]:
"""load the remote data from this connector and add it to an existing book""" """load the remote data from this connector and add it to an existing book"""
remote_id = self.get_remote_id_from_model(obj) remote_id = self.get_remote_id_from_model(obj)
if not remote_id:
return None
data = self.get_book_data(remote_id) data = self.get_book_data(remote_id)
return self.create_edition_from_data(obj.parent_work, data, instance=obj) return self.create_edition_from_data(obj.parent_work, data, instance=obj)
@abstractmethod @abstractmethod
def is_work_data(self, data): def is_work_data(self, data: JsonDict) -> bool:
"""differentiate works and editions""" """differentiate works and editions"""
@abstractmethod @abstractmethod
def get_edition_from_work_data(self, data): def get_edition_from_work_data(self, data: JsonDict) -> JsonDict:
"""every work needs at least one edition""" """every work needs at least one edition"""
@abstractmethod @abstractmethod
def get_work_from_edition_data(self, data): def get_work_from_edition_data(self, data: JsonDict) -> JsonDict:
"""every edition needs a work""" """every edition needs a work"""
@abstractmethod @abstractmethod
def get_authors_from_data(self, data): def get_authors_from_data(self, data: JsonDict) -> Iterator[models.Author]:
"""load author data""" """load author data"""
@abstractmethod @abstractmethod
def expand_book_data(self, book): def expand_book_data(self, book: models.Book) -> None:
"""get more info on a book""" """get more info on a book"""
def dict_from_mappings(data, mappings): def dict_from_mappings(data: JsonDict, mappings: list[Mapping]) -> JsonDict:
"""create a dict in Activitypub format, using mappings supplies by """create a dict in Activitypub format, using mappings supplies by
the subclass""" the subclass"""
result = {} result: JsonDict = {}
for mapping in mappings: for mapping in mappings:
# sometimes there are multiple mappings for one field, don't # sometimes there are multiple mappings for one field, don't
# overwrite earlier writes in that case # overwrite earlier writes in that case
@ -259,7 +304,11 @@ def dict_from_mappings(data, mappings):
return result return result
def get_data(url, params=None, timeout=settings.QUERY_TIMEOUT): def get_data(
url: str,
params: Optional[dict[str, str]] = None,
timeout: int = settings.QUERY_TIMEOUT,
) -> JsonDict:
"""wrapper for request.get""" """wrapper for request.get"""
# check if the url is blocked # check if the url is blocked
raise_not_valid_url(url) raise_not_valid_url(url)
@ -292,10 +341,15 @@ def get_data(url, params=None, timeout=settings.QUERY_TIMEOUT):
logger.info(err) logger.info(err)
raise ConnectorException(err) raise ConnectorException(err)
if not isinstance(data, dict):
raise ConnectorException("Unexpected data format")
return data return data
def get_image(url, timeout=10): def get_image(
url: str, timeout: int = 10
) -> Union[tuple[ContentFile[bytes], str], tuple[None, None]]:
"""wrapper for requesting an image""" """wrapper for requesting an image"""
raise_not_valid_url(url) raise_not_valid_url(url)
try: try:
@ -325,14 +379,19 @@ def get_image(url, timeout=10):
class Mapping: class Mapping:
"""associate a local database field with a field in an external dataset""" """associate a local database field with a field in an external dataset"""
def __init__(self, local_field, remote_field=None, formatter=None): def __init__(
self,
local_field: str,
remote_field: Optional[str] = None,
formatter: Optional[Callable[[Any], Any]] = None,
):
noop = lambda x: x noop = lambda x: x
self.local_field = local_field self.local_field = local_field
self.remote_field = remote_field or local_field self.remote_field = remote_field or local_field
self.formatter = formatter or noop self.formatter = formatter or noop
def get_value(self, data): def get_value(self, data: JsonDict) -> Optional[Any]:
"""pull a field from incoming json and return the formatted version""" """pull a field from incoming json and return the formatted version"""
value = data.get(self.remote_field) value = data.get(self.remote_field)
if not value: if not value:
@ -343,7 +402,7 @@ class Mapping:
return None return None
def infer_physical_format(format_text): def infer_physical_format(format_text: str) -> Optional[str]:
"""try to figure out what the standardized format is from the free value""" """try to figure out what the standardized format is from the free value"""
format_text = format_text.lower() format_text = format_text.lower()
if format_text in format_mappings: if format_text in format_mappings:
@ -356,7 +415,7 @@ def infer_physical_format(format_text):
return matches[0] return matches[0]
def unique_physical_format(format_text): def unique_physical_format(format_text: str) -> Optional[str]:
"""only store the format if it isn't directly in the format mappings""" """only store the format if it isn't directly in the format mappings"""
format_text = format_text.lower() format_text = format_text.lower()
if format_text in format_mappings: if format_text in format_mappings:
@ -365,7 +424,7 @@ def unique_physical_format(format_text):
return format_text return format_text
def maybe_isbn(query): def maybe_isbn(query: str) -> bool:
"""check if a query looks like an isbn""" """check if a query looks like an isbn"""
isbn = re.sub(r"[\W_]", "", query) # removes filler characters isbn = re.sub(r"[\W_]", "", query) # removes filler characters
# ISBNs must be numeric except an ISBN10 checkdigit can be 'X' # ISBNs must be numeric except an ISBN10 checkdigit can be 'X'

View file

@ -1,4 +1,7 @@
""" using another bookwyrm instance as a source of book data """ """ using another bookwyrm instance as a source of book data """
from __future__ import annotations
from typing import Any, Iterator
from bookwyrm import activitypub, models from bookwyrm import activitypub, models
from bookwyrm.book_search import SearchResult from bookwyrm.book_search import SearchResult
from .abstract_connector import AbstractMinimalConnector from .abstract_connector import AbstractMinimalConnector
@ -7,15 +10,19 @@ from .abstract_connector import AbstractMinimalConnector
class Connector(AbstractMinimalConnector): class Connector(AbstractMinimalConnector):
"""this is basically just for search""" """this is basically just for search"""
def get_or_create_book(self, remote_id): def get_or_create_book(self, remote_id: str) -> models.Edition:
return activitypub.resolve_remote_id(remote_id, model=models.Edition) return activitypub.resolve_remote_id(remote_id, model=models.Edition)
def parse_search_data(self, data, min_confidence): def parse_search_data(
self, data: list[dict[str, Any]], min_confidence: float
) -> Iterator[SearchResult]:
for search_result in data: for search_result in data:
search_result["connector"] = self search_result["connector"] = self
yield SearchResult(**search_result) yield SearchResult(**search_result)
def parse_isbn_search_data(self, data): def parse_isbn_search_data(
self, data: list[dict[str, Any]]
) -> Iterator[SearchResult]:
for search_result in data: for search_result in data:
search_result["connector"] = self search_result["connector"] = self
yield SearchResult(**search_result) yield SearchResult(**search_result)

View file

@ -1,8 +1,11 @@
""" interface with whatever connectors the app has """ """ interface with whatever connectors the app has """
from __future__ import annotations
import asyncio import asyncio
import importlib import importlib
import ipaddress import ipaddress
import logging import logging
from asyncio import Future
from typing import Iterator, Any, Optional, Union, overload, Literal
from urllib.parse import urlparse from urllib.parse import urlparse
import aiohttp import aiohttp
@ -12,6 +15,8 @@ from django.db.models import signals
from requests import HTTPError from requests import HTTPError
from bookwyrm import book_search, models from bookwyrm import book_search, models
from bookwyrm.book_search import SearchResult
from bookwyrm.connectors import abstract_connector
from bookwyrm.settings import SEARCH_TIMEOUT from bookwyrm.settings import SEARCH_TIMEOUT
from bookwyrm.tasks import app, CONNECTORS from bookwyrm.tasks import app, CONNECTORS
@ -22,11 +27,15 @@ class ConnectorException(HTTPError):
"""when the connector can't do what was asked""" """when the connector can't do what was asked"""
async def async_connector_search(query, items, min_confidence): async def async_connector_search(
query: str,
items: list[tuple[str, abstract_connector.AbstractConnector]],
min_confidence: float,
) -> list[Optional[abstract_connector.ConnectorResults]]:
"""Try a number of requests simultaneously""" """Try a number of requests simultaneously"""
timeout = aiohttp.ClientTimeout(total=SEARCH_TIMEOUT) timeout = aiohttp.ClientTimeout(total=SEARCH_TIMEOUT)
async with aiohttp.ClientSession(timeout=timeout) as session: async with aiohttp.ClientSession(timeout=timeout) as session:
tasks = [] tasks: list[Future[Optional[abstract_connector.ConnectorResults]]] = []
for url, connector in items: for url, connector in items:
tasks.append( tasks.append(
asyncio.ensure_future( asyncio.ensure_future(
@ -35,14 +44,29 @@ async def async_connector_search(query, items, min_confidence):
) )
results = await asyncio.gather(*tasks) results = await asyncio.gather(*tasks)
return results return list(results)
def search(query, min_confidence=0.1, return_first=False): @overload
def search(
query: str, *, min_confidence: float = 0.1, return_first: Literal[False]
) -> list[abstract_connector.ConnectorResults]:
...
@overload
def search(
query: str, *, min_confidence: float = 0.1, return_first: Literal[True]
) -> Optional[SearchResult]:
...
def search(
query: str, *, min_confidence: float = 0.1, return_first: bool = False
) -> Union[list[abstract_connector.ConnectorResults], Optional[SearchResult]]:
"""find books based on arbitrary keywords""" """find books based on arbitrary keywords"""
if not query: if not query:
return [] return None if return_first else []
results = []
items = [] items = []
for connector in get_connectors(): for connector in get_connectors():
@ -57,8 +81,12 @@ def search(query, min_confidence=0.1, return_first=False):
items.append((url, connector)) items.append((url, connector))
# load as many results as we can # load as many results as we can
results = asyncio.run(async_connector_search(query, items, min_confidence)) # failed requests will return None, so filter those out
results = [r for r in results if r] results = [
r
for r in asyncio.run(async_connector_search(query, items, min_confidence))
if r
]
if return_first: if return_first:
# find the best result from all the responses and return that # find the best result from all the responses and return that
@ -66,11 +94,12 @@ def search(query, min_confidence=0.1, return_first=False):
all_results = sorted(all_results, key=lambda r: r.confidence, reverse=True) all_results = sorted(all_results, key=lambda r: r.confidence, reverse=True)
return all_results[0] if all_results else None return all_results[0] if all_results else None
# failed requests will return None, so filter those out
return results return results
def first_search_result(query, min_confidence=0.1): def first_search_result(
query: str, min_confidence: float = 0.1
) -> Union[models.Edition, SearchResult, None]:
"""search until you find a result that fits""" """search until you find a result that fits"""
# try local search first # try local search first
result = book_search.search(query, min_confidence=min_confidence, return_first=True) result = book_search.search(query, min_confidence=min_confidence, return_first=True)
@ -80,13 +109,13 @@ def first_search_result(query, min_confidence=0.1):
return search(query, min_confidence=min_confidence, return_first=True) or None return search(query, min_confidence=min_confidence, return_first=True) or None
def get_connectors(): def get_connectors() -> Iterator[abstract_connector.AbstractConnector]:
"""load all connectors""" """load all connectors"""
for info in models.Connector.objects.filter(active=True).order_by("priority").all(): for info in models.Connector.objects.filter(active=True).order_by("priority").all():
yield load_connector(info) yield load_connector(info)
def get_or_create_connector(remote_id): def get_or_create_connector(remote_id: str) -> abstract_connector.AbstractConnector:
"""get the connector related to the object's server""" """get the connector related to the object's server"""
url = urlparse(remote_id) url = urlparse(remote_id)
identifier = url.netloc identifier = url.netloc
@ -110,7 +139,7 @@ def get_or_create_connector(remote_id):
@app.task(queue=CONNECTORS) @app.task(queue=CONNECTORS)
def load_more_data(connector_id, book_id): def load_more_data(connector_id: str, book_id: str) -> None:
"""background the work of getting all 10,000 editions of LoTR""" """background the work of getting all 10,000 editions of LoTR"""
connector_info = models.Connector.objects.get(id=connector_id) connector_info = models.Connector.objects.get(id=connector_id)
connector = load_connector(connector_info) connector = load_connector(connector_info)
@ -119,7 +148,9 @@ def load_more_data(connector_id, book_id):
@app.task(queue=CONNECTORS) @app.task(queue=CONNECTORS)
def create_edition_task(connector_id, work_id, data): def create_edition_task(
connector_id: int, work_id: int, data: Union[str, abstract_connector.JsonDict]
) -> None:
"""separate task for each of the 10,000 editions of LoTR""" """separate task for each of the 10,000 editions of LoTR"""
connector_info = models.Connector.objects.get(id=connector_id) connector_info = models.Connector.objects.get(id=connector_id)
connector = load_connector(connector_info) connector = load_connector(connector_info)
@ -127,23 +158,31 @@ def create_edition_task(connector_id, work_id, data):
connector.create_edition_from_data(work, data) connector.create_edition_from_data(work, data)
def load_connector(connector_info): def load_connector(
connector_info: models.Connector,
) -> abstract_connector.AbstractConnector:
"""instantiate the connector class""" """instantiate the connector class"""
connector = importlib.import_module( connector = importlib.import_module(
f"bookwyrm.connectors.{connector_info.connector_file}" f"bookwyrm.connectors.{connector_info.connector_file}"
) )
return connector.Connector(connector_info.identifier) return connector.Connector(connector_info.identifier) # type: ignore[no-any-return]
@receiver(signals.post_save, sender="bookwyrm.FederatedServer") @receiver(signals.post_save, sender="bookwyrm.FederatedServer")
# pylint: disable=unused-argument # pylint: disable=unused-argument
def create_connector(sender, instance, created, *args, **kwargs): def create_connector(
sender: Any,
instance: models.FederatedServer,
created: Any,
*args: Any,
**kwargs: Any,
) -> None:
"""create a connector to an external bookwyrm server""" """create a connector to an external bookwyrm server"""
if instance.application_type == "bookwyrm": if instance.application_type == "bookwyrm":
get_or_create_connector(f"https://{instance.server_name}") get_or_create_connector(f"https://{instance.server_name}")
def raise_not_valid_url(url): def raise_not_valid_url(url: str) -> None:
"""do some basic reality checks on the url""" """do some basic reality checks on the url"""
parsed = urlparse(url) parsed = urlparse(url)
if not parsed.scheme in ["http", "https"]: if not parsed.scheme in ["http", "https"]:

View file

@ -1,9 +1,10 @@
""" inventaire data connector """ """ inventaire data connector """
import re import re
from typing import Any, Union, Optional, Iterator, Iterable
from bookwyrm import models from bookwyrm import models
from bookwyrm.book_search import SearchResult from bookwyrm.book_search import SearchResult
from .abstract_connector import AbstractConnector, Mapping from .abstract_connector import AbstractConnector, Mapping, JsonDict
from .abstract_connector import get_data from .abstract_connector import get_data
from .connector_manager import ConnectorException, create_edition_task from .connector_manager import ConnectorException, create_edition_task
@ -13,7 +14,7 @@ class Connector(AbstractConnector):
generated_remote_link_field = "inventaire_id" generated_remote_link_field = "inventaire_id"
def __init__(self, identifier): def __init__(self, identifier: str):
super().__init__(identifier) super().__init__(identifier)
get_first = lambda a: a[0] get_first = lambda a: a[0]
@ -60,13 +61,13 @@ class Connector(AbstractConnector):
Mapping("died", remote_field="wdt:P570", formatter=get_first), Mapping("died", remote_field="wdt:P570", formatter=get_first),
] + shared_mappings ] + shared_mappings
def get_remote_id(self, value): def get_remote_id(self, value: str) -> str:
"""convert an id/uri into a url""" """convert an id/uri into a url"""
return f"{self.books_url}?action=by-uris&uris={value}" return f"{self.books_url}?action=by-uris&uris={value}"
def get_book_data(self, remote_id): def get_book_data(self, remote_id: str) -> JsonDict:
data = get_data(remote_id) data = get_data(remote_id)
extracted = list(data.get("entities").values()) extracted = list(data.get("entities", {}).values())
try: try:
data = extracted[0] data = extracted[0]
except (KeyError, IndexError): except (KeyError, IndexError):
@ -74,10 +75,16 @@ class Connector(AbstractConnector):
# flatten the data so that images, uri, and claims are on the same level # flatten the data so that images, uri, and claims are on the same level
return { return {
**data.get("claims", {}), **data.get("claims", {}),
**{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]}, **{
k: data.get(k)
for k in ["uri", "image", "labels", "sitelinks", "type"]
if k in data
},
} }
def parse_search_data(self, data, min_confidence): def parse_search_data(
self, data: JsonDict, min_confidence: float
) -> Iterator[SearchResult]:
for search_result in data.get("results", []): for search_result in data.get("results", []):
images = search_result.get("image") images = search_result.get("image")
cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None
@ -96,7 +103,7 @@ class Connector(AbstractConnector):
connector=self, connector=self,
) )
def parse_isbn_search_data(self, data): def parse_isbn_search_data(self, data: JsonDict) -> Iterator[SearchResult]:
"""got some data""" """got some data"""
results = data.get("entities") results = data.get("entities")
if not results: if not results:
@ -114,35 +121,44 @@ class Connector(AbstractConnector):
connector=self, connector=self,
) )
def is_work_data(self, data): def is_work_data(self, data: JsonDict) -> bool:
return data.get("type") == "work" return data.get("type") == "work"
def load_edition_data(self, work_uri): def load_edition_data(self, work_uri: str) -> JsonDict:
"""get a list of editions for a work""" """get a list of editions for a work"""
# pylint: disable=line-too-long # pylint: disable=line-too-long
url = f"{self.books_url}?action=reverse-claims&property=wdt:P629&value={work_uri}&sort=true" url = f"{self.books_url}?action=reverse-claims&property=wdt:P629&value={work_uri}&sort=true"
return get_data(url) return get_data(url)
def get_edition_from_work_data(self, data): def get_edition_from_work_data(self, data: JsonDict) -> JsonDict:
data = self.load_edition_data(data.get("uri")) work_uri = data.get("uri")
if not work_uri:
raise ConnectorException("Invalid URI")
data = self.load_edition_data(work_uri)
try: try:
uri = data.get("uris", [])[0] uri = data.get("uris", [])[0]
except IndexError: except IndexError:
raise ConnectorException("Invalid book data") raise ConnectorException("Invalid book data")
return self.get_book_data(self.get_remote_id(uri)) return self.get_book_data(self.get_remote_id(uri))
def get_work_from_edition_data(self, data): def get_work_from_edition_data(self, data: JsonDict) -> JsonDict:
uri = data.get("wdt:P629", [None])[0] try:
uri = data.get("wdt:P629", [])[0]
except IndexError:
raise ConnectorException("Invalid book data")
if not uri: if not uri:
raise ConnectorException("Invalid book data") raise ConnectorException("Invalid book data")
return self.get_book_data(self.get_remote_id(uri)) return self.get_book_data(self.get_remote_id(uri))
def get_authors_from_data(self, data): def get_authors_from_data(self, data: JsonDict) -> Iterator[models.Author]:
authors = data.get("wdt:P50", []) authors = data.get("wdt:P50", [])
for author in authors: for author in authors:
yield self.get_or_create_author(self.get_remote_id(author)) model = self.get_or_create_author(self.get_remote_id(author))
if model:
yield model
def expand_book_data(self, book): def expand_book_data(self, book: models.Book) -> None:
work = book work = book
# go from the edition to the work, if necessary # go from the edition to the work, if necessary
if isinstance(book, models.Edition): if isinstance(book, models.Edition):
@ -154,11 +170,16 @@ class Connector(AbstractConnector):
# who knows, man # who knows, man
return return
for edition_uri in edition_options.get("uris"): for edition_uri in edition_options.get("uris", []):
remote_id = self.get_remote_id(edition_uri) remote_id = self.get_remote_id(edition_uri)
create_edition_task.delay(self.connector.id, work.id, remote_id) create_edition_task.delay(self.connector.id, work.id, remote_id)
def create_edition_from_data(self, work, edition_data, instance=None): def create_edition_from_data(
self,
work: models.Work,
edition_data: Union[str, JsonDict],
instance: Optional[models.Edition] = None,
) -> Optional[models.Edition]:
"""pass in the url as data and then call the version in abstract connector""" """pass in the url as data and then call the version in abstract connector"""
if isinstance(edition_data, str): if isinstance(edition_data, str):
try: try:
@ -168,22 +189,26 @@ class Connector(AbstractConnector):
return None return None
return super().create_edition_from_data(work, edition_data, instance=instance) return super().create_edition_from_data(work, edition_data, instance=instance)
def get_cover_url(self, cover_blob, *_): def get_cover_url(
self, cover_blob: Union[list[JsonDict], JsonDict], *_: Any
) -> Optional[str]:
"""format the relative cover url into an absolute one: """format the relative cover url into an absolute one:
{"url": "/img/entities/e794783f01b9d4f897a1ea9820b96e00d346994f"} {"url": "/img/entities/e794783f01b9d4f897a1ea9820b96e00d346994f"}
""" """
# covers may or may not be a list # covers may or may not be a list
if isinstance(cover_blob, list) and len(cover_blob) > 0: if isinstance(cover_blob, list):
if len(cover_blob) == 0:
return None
cover_blob = cover_blob[0] cover_blob = cover_blob[0]
cover_id = cover_blob.get("url") cover_id = cover_blob.get("url")
if not cover_id: if not isinstance(cover_id, str):
return None return None
# cover may or may not be an absolute url already # cover may or may not be an absolute url already
if re.match(r"^http", cover_id): if re.match(r"^http", cover_id):
return cover_id return cover_id
return f"{self.covers_url}{cover_id}" return f"{self.covers_url}{cover_id}"
def resolve_keys(self, keys): def resolve_keys(self, keys: Iterable[str]) -> list[str]:
"""cool, it's "wd:Q3156592" now what the heck does that mean""" """cool, it's "wd:Q3156592" now what the heck does that mean"""
results = [] results = []
for uri in keys: for uri in keys:
@ -191,10 +216,10 @@ class Connector(AbstractConnector):
data = self.get_book_data(self.get_remote_id(uri)) data = self.get_book_data(self.get_remote_id(uri))
except ConnectorException: except ConnectorException:
continue continue
results.append(get_language_code(data.get("labels"))) results.append(get_language_code(data.get("labels", {})))
return results return results
def get_description(self, links): def get_description(self, links: JsonDict) -> str:
"""grab an extracted excerpt from wikipedia""" """grab an extracted excerpt from wikipedia"""
link = links.get("enwiki") link = links.get("enwiki")
if not link: if not link:
@ -204,15 +229,15 @@ class Connector(AbstractConnector):
data = get_data(url) data = get_data(url)
except ConnectorException: except ConnectorException:
return "" return ""
return data.get("extract") return data.get("extract", "")
def get_remote_id_from_model(self, obj): def get_remote_id_from_model(self, obj: models.BookDataModel) -> str:
"""use get_remote_id to figure out the link from a model obj""" """use get_remote_id to figure out the link from a model obj"""
remote_id_value = obj.inventaire_id remote_id_value = obj.inventaire_id
return self.get_remote_id(remote_id_value) return self.get_remote_id(remote_id_value)
def get_language_code(options, code="en"): def get_language_code(options: JsonDict, code: str = "en") -> Any:
"""when there are a bunch of translation but we need a single field""" """when there are a bunch of translation but we need a single field"""
result = options.get(code) result = options.get(code)
if result: if result:

View file

@ -1,9 +1,10 @@
""" openlibrary data connector """ """ openlibrary data connector """
import re import re
from typing import Any, Optional, Union, Iterator, Iterable
from bookwyrm import models from bookwyrm import models
from bookwyrm.book_search import SearchResult from bookwyrm.book_search import SearchResult
from .abstract_connector import AbstractConnector, Mapping from .abstract_connector import AbstractConnector, Mapping, JsonDict
from .abstract_connector import get_data, infer_physical_format, unique_physical_format from .abstract_connector import get_data, infer_physical_format, unique_physical_format
from .connector_manager import ConnectorException, create_edition_task from .connector_manager import ConnectorException, create_edition_task
from .openlibrary_languages import languages from .openlibrary_languages import languages
@ -14,7 +15,7 @@ class Connector(AbstractConnector):
generated_remote_link_field = "openlibrary_link" generated_remote_link_field = "openlibrary_link"
def __init__(self, identifier): def __init__(self, identifier: str):
super().__init__(identifier) super().__init__(identifier)
get_first = lambda a, *args: a[0] get_first = lambda a, *args: a[0]
@ -94,14 +95,14 @@ class Connector(AbstractConnector):
Mapping("inventaire_id", remote_field="links", formatter=get_inventaire_id), Mapping("inventaire_id", remote_field="links", formatter=get_inventaire_id),
] ]
def get_book_data(self, remote_id): def get_book_data(self, remote_id: str) -> JsonDict:
data = get_data(remote_id) data = get_data(remote_id)
if data.get("type", {}).get("key") == "/type/redirect": if data.get("type", {}).get("key") == "/type/redirect":
remote_id = self.base_url + data.get("location") remote_id = self.base_url + data.get("location", "")
return get_data(remote_id) return get_data(remote_id)
return data return data
def get_remote_id_from_data(self, data): def get_remote_id_from_data(self, data: JsonDict) -> str:
"""format a url from an openlibrary id field""" """format a url from an openlibrary id field"""
try: try:
key = data["key"] key = data["key"]
@ -109,10 +110,10 @@ class Connector(AbstractConnector):
raise ConnectorException("Invalid book data") raise ConnectorException("Invalid book data")
return f"{self.books_url}{key}" return f"{self.books_url}{key}"
def is_work_data(self, data): def is_work_data(self, data: JsonDict) -> bool:
return bool(re.match(r"^[\/\w]+OL\d+W$", data["key"])) return bool(re.match(r"^[\/\w]+OL\d+W$", data["key"]))
def get_edition_from_work_data(self, data): def get_edition_from_work_data(self, data: JsonDict) -> JsonDict:
try: try:
key = data["key"] key = data["key"]
except KeyError: except KeyError:
@ -124,7 +125,7 @@ class Connector(AbstractConnector):
raise ConnectorException("No editions for work") raise ConnectorException("No editions for work")
return edition return edition
def get_work_from_edition_data(self, data): def get_work_from_edition_data(self, data: JsonDict) -> JsonDict:
try: try:
key = data["works"][0]["key"] key = data["works"][0]["key"]
except (IndexError, KeyError): except (IndexError, KeyError):
@ -132,7 +133,7 @@ class Connector(AbstractConnector):
url = f"{self.books_url}{key}" url = f"{self.books_url}{key}"
return self.get_book_data(url) return self.get_book_data(url)
def get_authors_from_data(self, data): def get_authors_from_data(self, data: JsonDict) -> Iterator[models.Author]:
"""parse author json and load or create authors""" """parse author json and load or create authors"""
for author_blob in data.get("authors", []): for author_blob in data.get("authors", []):
author_blob = author_blob.get("author", author_blob) author_blob = author_blob.get("author", author_blob)
@ -144,7 +145,7 @@ class Connector(AbstractConnector):
continue continue
yield author yield author
def get_cover_url(self, cover_blob, size="L"): def get_cover_url(self, cover_blob: list[str], size: str = "L") -> Optional[str]:
"""ask openlibrary for the cover""" """ask openlibrary for the cover"""
if not cover_blob: if not cover_blob:
return None return None
@ -152,8 +153,10 @@ class Connector(AbstractConnector):
image_name = f"{cover_id}-{size}.jpg" image_name = f"{cover_id}-{size}.jpg"
return f"{self.covers_url}/b/id/{image_name}" return f"{self.covers_url}/b/id/{image_name}"
def parse_search_data(self, data, min_confidence): def parse_search_data(
for idx, search_result in enumerate(data.get("docs")): self, data: JsonDict, min_confidence: float
) -> Iterator[SearchResult]:
for idx, search_result in enumerate(data.get("docs", [])):
# build the remote id from the openlibrary key # build the remote id from the openlibrary key
key = self.books_url + search_result["key"] key = self.books_url + search_result["key"]
author = search_result.get("author_name") or ["Unknown"] author = search_result.get("author_name") or ["Unknown"]
@ -174,7 +177,7 @@ class Connector(AbstractConnector):
confidence=confidence, confidence=confidence,
) )
def parse_isbn_search_data(self, data): def parse_isbn_search_data(self, data: JsonDict) -> Iterator[SearchResult]:
for search_result in list(data.values()): for search_result in list(data.values()):
# build the remote id from the openlibrary key # build the remote id from the openlibrary key
key = self.books_url + search_result["key"] key = self.books_url + search_result["key"]
@ -188,12 +191,12 @@ class Connector(AbstractConnector):
year=search_result.get("publish_date"), year=search_result.get("publish_date"),
) )
def load_edition_data(self, olkey): def load_edition_data(self, olkey: str) -> JsonDict:
"""query openlibrary for editions of a work""" """query openlibrary for editions of a work"""
url = f"{self.books_url}/works/{olkey}/editions" url = f"{self.books_url}/works/{olkey}/editions"
return self.get_book_data(url) return self.get_book_data(url)
def expand_book_data(self, book): def expand_book_data(self, book: models.Book) -> None:
work = book work = book
# go from the edition to the work, if necessary # go from the edition to the work, if necessary
if isinstance(book, models.Edition): if isinstance(book, models.Edition):
@ -206,14 +209,14 @@ class Connector(AbstractConnector):
# who knows, man # who knows, man
return return
for edition_data in edition_options.get("entries"): for edition_data in edition_options.get("entries", []):
# does this edition have ANY interesting data? # does this edition have ANY interesting data?
if ignore_edition(edition_data): if ignore_edition(edition_data):
continue continue
create_edition_task.delay(self.connector.id, work.id, edition_data) create_edition_task.delay(self.connector.id, work.id, edition_data)
def ignore_edition(edition_data): def ignore_edition(edition_data: JsonDict) -> bool:
"""don't load a million editions that have no metadata""" """don't load a million editions that have no metadata"""
# an isbn, we love to see it # an isbn, we love to see it
if edition_data.get("isbn_13") or edition_data.get("isbn_10"): if edition_data.get("isbn_13") or edition_data.get("isbn_10"):
@ -232,19 +235,19 @@ def ignore_edition(edition_data):
return True return True
def get_description(description_blob): def get_description(description_blob: Union[JsonDict, str]) -> Optional[str]:
"""descriptions can be a string or a dict""" """descriptions can be a string or a dict"""
if isinstance(description_blob, dict): if isinstance(description_blob, dict):
return description_blob.get("value") return description_blob.get("value")
return description_blob return description_blob
def get_openlibrary_key(key): def get_openlibrary_key(key: str) -> str:
"""convert /books/OL27320736M into OL27320736M""" """convert /books/OL27320736M into OL27320736M"""
return key.split("/")[-1] return key.split("/")[-1]
def get_languages(language_blob): def get_languages(language_blob: Iterable[JsonDict]) -> list[Optional[str]]:
"""/language/eng -> English""" """/language/eng -> English"""
langs = [] langs = []
for lang in language_blob: for lang in language_blob:
@ -252,14 +255,14 @@ def get_languages(language_blob):
return langs return langs
def get_dict_field(blob, field_name): def get_dict_field(blob: Optional[JsonDict], field_name: str) -> Optional[Any]:
"""extract the isni from the remote id data for the author""" """extract the isni from the remote id data for the author"""
if not blob or not isinstance(blob, dict): if not blob or not isinstance(blob, dict):
return None return None
return blob.get(field_name) return blob.get(field_name)
def get_wikipedia_link(links): def get_wikipedia_link(links: list[Any]) -> Optional[str]:
"""extract wikipedia links""" """extract wikipedia links"""
if not isinstance(links, list): if not isinstance(links, list):
return None return None
@ -272,7 +275,7 @@ def get_wikipedia_link(links):
return None return None
def get_inventaire_id(links): def get_inventaire_id(links: list[Any]) -> Optional[str]:
"""extract and format inventaire ids""" """extract and format inventaire ids"""
if not isinstance(links, list): if not isinstance(links, list):
return None return None
@ -282,11 +285,13 @@ def get_inventaire_id(links):
continue continue
if link.get("title") == "inventaire.io": if link.get("title") == "inventaire.io":
iv_link = link.get("url") iv_link = link.get("url")
if not isinstance(iv_link, str):
return None
return iv_link.split("/")[-1] return iv_link.split("/")[-1]
return None return None
def pick_default_edition(options): def pick_default_edition(options: list[JsonDict]) -> Optional[JsonDict]:
"""favor physical copies with covers in english""" """favor physical copies with covers in english"""
if not options: if not options:
return None return None

View file

@ -6,8 +6,9 @@ from functools import reduce
import json import json
import operator import operator
import logging import logging
from typing import List from typing import Any, Optional
from uuid import uuid4 from uuid import uuid4
from typing_extensions import Self
import aiohttp import aiohttp
from Crypto.PublicKey import RSA from Crypto.PublicKey import RSA
@ -85,7 +86,7 @@ class ActivitypubMixin:
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
@classmethod @classmethod
def find_existing_by_remote_id(cls, remote_id): def find_existing_by_remote_id(cls, remote_id: str) -> Self:
"""look up a remote id in the db""" """look up a remote id in the db"""
return cls.find_existing({"id": remote_id}) return cls.find_existing({"id": remote_id})
@ -137,7 +138,7 @@ class ActivitypubMixin:
queue=queue, queue=queue,
) )
def get_recipients(self, software=None) -> List[str]: def get_recipients(self, software=None) -> list[str]:
"""figure out which inbox urls to post to""" """figure out which inbox urls to post to"""
# first we have to figure out who should receive this activity # first we have to figure out who should receive this activity
privacy = self.privacy if hasattr(self, "privacy") else "public" privacy = self.privacy if hasattr(self, "privacy") else "public"
@ -198,7 +199,14 @@ class ActivitypubMixin:
class ObjectMixin(ActivitypubMixin): class ObjectMixin(ActivitypubMixin):
"""add this mixin for object models that are AP serializable""" """add this mixin for object models that are AP serializable"""
def save(self, *args, created=None, software=None, priority=BROADCAST, **kwargs): def save(
self,
*args: Any,
created: Optional[bool] = None,
software: Any = None,
priority: str = BROADCAST,
**kwargs: Any,
) -> None:
"""broadcast created/updated/deleted objects as appropriate""" """broadcast created/updated/deleted objects as appropriate"""
broadcast = kwargs.get("broadcast", True) broadcast = kwargs.get("broadcast", True)
# this bonus kwarg would cause an error in the base save method # this bonus kwarg would cause an error in the base save method
@ -507,14 +515,14 @@ def unfurl_related_field(related_field, sort_field=None):
@app.task(queue=BROADCAST) @app.task(queue=BROADCAST)
def broadcast_task(sender_id: int, activity: str, recipients: List[str]): def broadcast_task(sender_id: int, activity: str, recipients: list[str]):
"""the celery task for broadcast""" """the celery task for broadcast"""
user_model = apps.get_model("bookwyrm.User", require_ready=True) user_model = apps.get_model("bookwyrm.User", require_ready=True)
sender = user_model.objects.select_related("key_pair").get(id=sender_id) sender = user_model.objects.select_related("key_pair").get(id=sender_id)
asyncio.run(async_broadcast(recipients, sender, activity)) asyncio.run(async_broadcast(recipients, sender, activity))
async def async_broadcast(recipients: List[str], sender, data: str): async def async_broadcast(recipients: list[str], sender, data: str):
"""Send all the broadcasts simultaneously""" """Send all the broadcasts simultaneously"""
timeout = aiohttp.ClientTimeout(total=10) timeout = aiohttp.ClientTimeout(total=10)
async with aiohttp.ClientSession(timeout=timeout) as session: async with aiohttp.ClientSession(timeout=timeout) as session:

View file

@ -1,6 +1,7 @@
""" database schema for books and shelves """ """ database schema for books and shelves """
from itertools import chain from itertools import chain
import re import re
from typing import Any
from django.contrib.postgres.search import SearchVectorField from django.contrib.postgres.search import SearchVectorField
from django.contrib.postgres.indexes import GinIndex from django.contrib.postgres.indexes import GinIndex
@ -90,7 +91,7 @@ class BookDataModel(ObjectMixin, BookWyrmModel):
abstract = True abstract = True
def save(self, *args, **kwargs): def save(self, *args: Any, **kwargs: Any) -> None:
"""ensure that the remote_id is within this instance""" """ensure that the remote_id is within this instance"""
if self.id: if self.id:
self.remote_id = self.get_remote_id() self.remote_id = self.get_remote_id()
@ -204,7 +205,7 @@ class Book(BookDataModel):
text += f" ({self.edition_info})" text += f" ({self.edition_info})"
return text return text
def save(self, *args, **kwargs): def save(self, *args: Any, **kwargs: Any) -> None:
"""can't be abstract for query reasons, but you shouldn't USE it""" """can't be abstract for query reasons, but you shouldn't USE it"""
if not isinstance(self, Edition) and not isinstance(self, Work): if not isinstance(self, Edition) and not isinstance(self, Work):
raise ValueError("Books should be added as Editions or Works") raise ValueError("Books should be added as Editions or Works")
@ -343,7 +344,7 @@ class Edition(Book):
# max rank is 9 # max rank is 9
return rank return rank
def save(self, *args, **kwargs): def save(self, *args: Any, **kwargs: Any) -> None:
"""set some fields on the edition object""" """set some fields on the edition object"""
# calculate isbn 10/13 # calculate isbn 10/13
if self.isbn_13 and self.isbn_13[:3] == "978" and not self.isbn_10: if self.isbn_13 and self.isbn_13[:3] == "978" and not self.isbn_10:

View file

@ -61,7 +61,7 @@ class FederatedServer(BookWyrmModel):
).update(active=True, deactivation_reason=None) ).update(active=True, deactivation_reason=None)
@classmethod @classmethod
def is_blocked(cls, url): def is_blocked(cls, url: str) -> bool:
"""look up if a domain is blocked""" """look up if a domain is blocked"""
url = urlparse(url) url = urlparse(url)
domain = url.netloc domain = url.netloc

View file

@ -233,3 +233,13 @@ class Openlibrary(TestCase):
self.assertFalse(ignore_edition({"languages": "languages/fr"})) self.assertFalse(ignore_edition({"languages": "languages/fr"}))
self.assertTrue(ignore_edition({"languages": "languages/eng"})) self.assertTrue(ignore_edition({"languages": "languages/eng"}))
self.assertTrue(ignore_edition({"format": "paperback"})) self.assertTrue(ignore_edition({"format": "paperback"}))
def test_remote_id_from_model(self):
"""figure out a url from an id"""
obj = models.Author.objects.create(
name="George Elliott", openlibrary_key="OL453734A"
)
self.assertEqual(
self.connector.get_remote_id_from_model(obj),
"https://openlibrary.org/authors/OL453734A",
)

View file

@ -10,6 +10,9 @@ django_settings_module = "bookwyrm.settings"
ignore_errors = True ignore_errors = True
implicit_reexport = True implicit_reexport = True
[mypy-bookwyrm.connectors.*]
ignore_errors = False
[mypy-celerywyrm.*] [mypy-celerywyrm.*]
ignore_errors = False ignore_errors = False