diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 6184a225a..dc4be4b3d 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -48,20 +48,18 @@ class AbstractMinimalConnector(ABC): # searched as free text. This, instead, only searches isbn if it's isbn-y return f"{self.search_url}{query}" - def process_search_response(self, query, data): + def process_search_response(self, query, data, min_confidence): """Format the search results based on the formt of the query""" - # TODO: inventaire min confidence - parser = self.parse_search_data if maybe_isbn(query): - parser = self.parse_isbn_search_data - return list(parser(data))[:10] + return list(self.parse_isbn_search_data(data))[:10] + return list(self.parse_search_data(data, min_confidence))[:10] @abstractmethod def get_or_create_book(self, remote_id): """pull up a book record by whatever means possible""" @abstractmethod - def parse_search_data(self, data): + def parse_search_data(self, data, min_confidence): """turn the result json from a search into a list""" @abstractmethod diff --git a/bookwyrm/connectors/bookwyrm_connector.py b/bookwyrm/connectors/bookwyrm_connector.py index 05612d9eb..e07a0b281 100644 --- a/bookwyrm/connectors/bookwyrm_connector.py +++ b/bookwyrm/connectors/bookwyrm_connector.py @@ -10,7 +10,7 @@ class Connector(AbstractMinimalConnector): def get_or_create_book(self, remote_id): return activitypub.resolve_remote_id(remote_id, model=models.Edition) - def parse_search_data(self, data): + def parse_search_data(self, data, min_confidence): for search_result in data: search_result["connector"] = self yield SearchResult(**search_result) diff --git a/bookwyrm/connectors/connector_manager.py b/bookwyrm/connectors/connector_manager.py index 69d31a7ea..0488421e9 100644 --- a/bookwyrm/connectors/connector_manager.py +++ b/bookwyrm/connectors/connector_manager.py @@ -22,7 +22,7 @@ class ConnectorException(HTTPError): """when the connector can't do what was asked""" -async def get_results(session, url, params, query, connector): +async def get_results(session, url, min_confidence, query, connector): """try this specific connector""" # pylint: disable=line-too-long headers = { @@ -31,6 +31,7 @@ async def get_results(session, url, params, query, connector): ), "User-Agent": USER_AGENT, } + params = {"min_confidence": min_confidence} try: async with session.get(url, headers=headers, params=params) as response: if not response.ok: @@ -45,7 +46,7 @@ async def get_results(session, url, params, query, connector): return { "connector": connector, - "results": connector.process_search_response(query, raw_data), + "results": connector.process_search_response(query, raw_data, min_confidence), } except asyncio.TimeoutError: logger.info("Connection timed out for url: %s", url) @@ -53,7 +54,7 @@ async def get_results(session, url, params, query, connector): logger.exception(err) -async def async_connector_search(query, items, params): +async def async_connector_search(query, items, min_confidence): """Try a number of requests simultaneously""" timeout = aiohttp.ClientTimeout(total=SEARCH_TIMEOUT) async with aiohttp.ClientSession(timeout=timeout) as session: @@ -61,7 +62,7 @@ async def async_connector_search(query, items, params): for url, connector in items: tasks.append( asyncio.ensure_future( - get_results(session, url, params, query, connector) + get_results(session, url, min_confidence, query, connector) ) ) @@ -87,8 +88,7 @@ def search(query, min_confidence=0.1, return_first=False): items.append((url, connector)) # load as many results as we can - params = {"min_confidence": min_confidence} - results = asyncio.run(async_connector_search(query, items, params)) + results = asyncio.run(async_connector_search(query, items, min_confidence)) if return_first: # find the best result from all the responses and return that diff --git a/bookwyrm/connectors/inventaire.py b/bookwyrm/connectors/inventaire.py index 0495d8c2f..c13f4e3e6 100644 --- a/bookwyrm/connectors/inventaire.py +++ b/bookwyrm/connectors/inventaire.py @@ -77,13 +77,15 @@ class Connector(AbstractConnector): **{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]}, } - def parse_search_data(self, data): + def parse_search_data(self, data, min_confidence): for search_result in data.get("results", []): images = search_result.get("image") cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None # a deeply messy translation of inventaire's scores confidence = float(search_result.get("_score", 0.1)) confidence = 0.1 if confidence < 150 else 0.999 + if confidence < min_confidence: + continue yield SearchResult( title=search_result.get("label"), key=self.get_remote_id(search_result.get("uri")), diff --git a/bookwyrm/connectors/openlibrary.py b/bookwyrm/connectors/openlibrary.py index fa9aeeb30..2b625dffc 100644 --- a/bookwyrm/connectors/openlibrary.py +++ b/bookwyrm/connectors/openlibrary.py @@ -152,7 +152,7 @@ class Connector(AbstractConnector): image_name = f"{cover_id}-{size}.jpg" return f"{self.covers_url}/b/id/{image_name}" - def parse_search_data(self, data): + def parse_search_data(self, data, min_confidence): for search_result in data.get("docs"): # build the remote id from the openlibrary key key = self.books_url + search_result["key"]