Remove search endpoints from Connector

Instead of having individual search functions that make individual requests, the connectors will always be searched asynchronously together. The process_seach_response combines the parse and format functions, which could probably be merged into one over-rideable function. The current to-do on this is to remove Inventaire search results that are below the confidence threshhold after search, which used to happen in the `search` function.
2025-04-05 18:09:52 +00:00 · 2022-05-30 10:34:03 -07:00 · 2022-05-30 10:34:03 -07:00 · 0adda36da7
commit 0adda36da7
parent 9c03bf782e
3 changed files with 19 additions and 50 deletions
--- a/bookwyrm/connectors/abstract_connector.py
+++ b/bookwyrm/connectors/abstract_connector.py
@ -39,52 +39,24 @@ class AbstractMinimalConnector(ABC):
            setattr(self, field, getattr(info, field))

    def get_search_url(self, query):
-        """ format the query url """
+        """format the query url"""
        # Check if the query resembles an ISBN
-        isbn = re.sub(r"[\W_]", "", query) # removes filler characters
-        maybe_isbn = len(isbn) in [10, 13]  # ISBN10 or ISBN13
-        if maybe_isbn and self.isbn_search_url and self.isbn_search_url != "":
+        if maybe_isbn(query) and self.isbn_search_url and self.isbn_search_url != "":
            return f"{self.isbn_search_url}{query}"

        # NOTE: previously, we tried searching isbn and if that produces no results,
        # searched as free text. This, instead, only searches isbn if it's isbn-y
        return f"{self.search_url}{query}"

-    def search(self, query, min_confidence=None, timeout=settings.QUERY_TIMEOUT):
-        """free text search"""
-        params = {}
-        if min_confidence:
-            params["min_confidence"] = min_confidence
-
-        data = self.get_search_data(
-            f"{self.search_url}{query}",
-            params=params,
-            timeout=timeout,
-        )
-        results = []
-
-        for doc in self.parse_search_data(data)[:10]:
-            results.append(self.format_search_result(doc))
-        return results
-
-    def isbn_search(self, query, timeout=settings.QUERY_TIMEOUT):
-        """isbn search"""
-        params = {}
-        data = self.get_search_data(
-            f"{self.isbn_search_url}{query}",
-            params=params,
-            timeout=timeout,
-        )
-        results = []
-
-        # this shouldn't be returning mutliple results, but just in case
-        for doc in self.parse_isbn_search_data(data)[:10]:
-            results.append(self.format_isbn_search_result(doc))
-        return results
-
-    def get_search_data(self, remote_id, **kwargs):  # pylint: disable=no-self-use
-        """this allows connectors to override the default behavior"""
-        return get_data(remote_id, **kwargs)
+    def process_search_response(self, query, data):
+        """Format the search results based on the formt of the query"""
+        # TODO: inventaire min confidence
+        parser = self.parse_search_data
+        formatter = self.format_search_result
+        if maybe_isbn(query):
+            parser = self.parse_isbn_search_data
+            formatter = self.format_isbn_search_result
+        return [formatter(doc) for doc in parser(data)[:10]]

    @abstractmethod
    def get_or_create_book(self, remote_id):
@ -360,3 +332,9 @@ def unique_physical_format(format_text):
        # try a direct match, so saving this would be redundant
        return None
    return format_text
+
+
+def maybe_isbn(query):
+    """check if a query looks like an isbn"""
+    isbn = re.sub(r"[\W_]", "", query)  # removes filler characters
+    return len(isbn) in [10, 13]  # ISBN10 or ISBN13
--- a/bookwyrm/connectors/connector_manager.py
+++ b/bookwyrm/connectors/connector_manager.py
@ -33,12 +33,12 @@ async def async_connector_search(query, connectors, params):
            async with session.get(url, params=params) as response:
                print("Status:", response.status)
                print(response.ok)
-                print("Content-type:", response.headers['content-type'])
+                print("Content-type:", response.headers["content-type"])

                raw_response = await response.json()
                yield {
                    "connector": connector,
-                    "results": connector.parse_search_data(raw_response)
+                    "results": connector.process_search_response(query, raw_response),
                }


@ -48,7 +48,6 @@ def search(query, min_confidence=0.1, return_first=False):
        return []
    results = []

-
    connectors = list(get_connectors())

    # load as many results as we can
--- a/bookwyrm/connectors/inventaire.py
+++ b/bookwyrm/connectors/inventaire.py
@ -77,14 +77,6 @@ class Connector(AbstractConnector):
            **{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]},
        }

-    def search(self, query, min_confidence=None):  # pylint: disable=arguments-differ
-        """overrides default search function with confidence ranking"""
-        results = super().search(query)
-        if min_confidence:
-            # filter the search results after the fact
-            return [r for r in results if r.confidence >= min_confidence]
-        return results
-
    def parse_search_data(self, data):
        return data.get("results")