From 0adda36da7a2de92ca36805b1b848a8eb100c98e Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Mon, 30 May 2022 10:34:03 -0700 Subject: [PATCH] Remove search endpoints from Connector Instead of having individual search functions that make individual requests, the connectors will always be searched asynchronously together. The process_seach_response combines the parse and format functions, which could probably be merged into one over-rideable function. The current to-do on this is to remove Inventaire search results that are below the confidence threshhold after search, which used to happen in the `search` function. --- bookwyrm/connectors/abstract_connector.py | 56 +++++++---------------- bookwyrm/connectors/connector_manager.py | 5 +- bookwyrm/connectors/inventaire.py | 8 ---- 3 files changed, 19 insertions(+), 50 deletions(-) diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 6685d5a0..fa3624f8 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -39,52 +39,24 @@ class AbstractMinimalConnector(ABC): setattr(self, field, getattr(info, field)) def get_search_url(self, query): - """ format the query url """ + """format the query url""" # Check if the query resembles an ISBN - isbn = re.sub(r"[\W_]", "", query) # removes filler characters - maybe_isbn = len(isbn) in [10, 13] # ISBN10 or ISBN13 - if maybe_isbn and self.isbn_search_url and self.isbn_search_url != "": + if maybe_isbn(query) and self.isbn_search_url and self.isbn_search_url != "": return f"{self.isbn_search_url}{query}" # NOTE: previously, we tried searching isbn and if that produces no results, # searched as free text. This, instead, only searches isbn if it's isbn-y return f"{self.search_url}{query}" - def search(self, query, min_confidence=None, timeout=settings.QUERY_TIMEOUT): - """free text search""" - params = {} - if min_confidence: - params["min_confidence"] = min_confidence - - data = self.get_search_data( - f"{self.search_url}{query}", - params=params, - timeout=timeout, - ) - results = [] - - for doc in self.parse_search_data(data)[:10]: - results.append(self.format_search_result(doc)) - return results - - def isbn_search(self, query, timeout=settings.QUERY_TIMEOUT): - """isbn search""" - params = {} - data = self.get_search_data( - f"{self.isbn_search_url}{query}", - params=params, - timeout=timeout, - ) - results = [] - - # this shouldn't be returning mutliple results, but just in case - for doc in self.parse_isbn_search_data(data)[:10]: - results.append(self.format_isbn_search_result(doc)) - return results - - def get_search_data(self, remote_id, **kwargs): # pylint: disable=no-self-use - """this allows connectors to override the default behavior""" - return get_data(remote_id, **kwargs) + def process_search_response(self, query, data): + """Format the search results based on the formt of the query""" + # TODO: inventaire min confidence + parser = self.parse_search_data + formatter = self.format_search_result + if maybe_isbn(query): + parser = self.parse_isbn_search_data + formatter = self.format_isbn_search_result + return [formatter(doc) for doc in parser(data)[:10]] @abstractmethod def get_or_create_book(self, remote_id): @@ -360,3 +332,9 @@ def unique_physical_format(format_text): # try a direct match, so saving this would be redundant return None return format_text + + +def maybe_isbn(query): + """check if a query looks like an isbn""" + isbn = re.sub(r"[\W_]", "", query) # removes filler characters + return len(isbn) in [10, 13] # ISBN10 or ISBN13 diff --git a/bookwyrm/connectors/connector_manager.py b/bookwyrm/connectors/connector_manager.py index 2b5ab1c9..3c90ba5f 100644 --- a/bookwyrm/connectors/connector_manager.py +++ b/bookwyrm/connectors/connector_manager.py @@ -33,12 +33,12 @@ async def async_connector_search(query, connectors, params): async with session.get(url, params=params) as response: print("Status:", response.status) print(response.ok) - print("Content-type:", response.headers['content-type']) + print("Content-type:", response.headers["content-type"]) raw_response = await response.json() yield { "connector": connector, - "results": connector.parse_search_data(raw_response) + "results": connector.process_search_response(query, raw_response), } @@ -48,7 +48,6 @@ def search(query, min_confidence=0.1, return_first=False): return [] results = [] - connectors = list(get_connectors()) # load as many results as we can diff --git a/bookwyrm/connectors/inventaire.py b/bookwyrm/connectors/inventaire.py index a9aeb94f..f2579612 100644 --- a/bookwyrm/connectors/inventaire.py +++ b/bookwyrm/connectors/inventaire.py @@ -77,14 +77,6 @@ class Connector(AbstractConnector): **{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]}, } - def search(self, query, min_confidence=None): # pylint: disable=arguments-differ - """overrides default search function with confidence ranking""" - results = super().search(query) - if min_confidence: - # filter the search results after the fact - return [r for r in results if r.confidence >= min_confidence] - return results - def parse_search_data(self, data): return data.get("results")