From 0adda36da7a2de92ca36805b1b848a8eb100c98e Mon Sep 17 00:00:00 2001
From: Mouse Reeve <mousereeve@riseup.net>
Date: Mon, 30 May 2022 10:34:03 -0700
Subject: [PATCH] Remove search endpoints from Connector

Instead of having individual search functions that make individual
requests, the connectors will always be searched asynchronously
together. The process_seach_response combines the parse and format
functions, which could probably be merged into one over-rideable
function.

The current to-do on this is to remove Inventaire search results that
are below the confidence threshhold after search, which used to happen
in the `search` function.
---
 bookwyrm/connectors/abstract_connector.py | 56 +++++++----------------
 bookwyrm/connectors/connector_manager.py  |  5 +-
 bookwyrm/connectors/inventaire.py         |  8 ----
 3 files changed, 19 insertions(+), 50 deletions(-)

diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py
index 6685d5a0..fa3624f8 100644
--- a/bookwyrm/connectors/abstract_connector.py
+++ b/bookwyrm/connectors/abstract_connector.py
@@ -39,52 +39,24 @@ class AbstractMinimalConnector(ABC):
             setattr(self, field, getattr(info, field))
 
     def get_search_url(self, query):
-        """ format the query url """
+        """format the query url"""
         # Check if the query resembles an ISBN
-        isbn = re.sub(r"[\W_]", "", query) # removes filler characters
-        maybe_isbn = len(isbn) in [10, 13]  # ISBN10 or ISBN13
-        if maybe_isbn and self.isbn_search_url and self.isbn_search_url != "":
+        if maybe_isbn(query) and self.isbn_search_url and self.isbn_search_url != "":
             return f"{self.isbn_search_url}{query}"
 
         # NOTE: previously, we tried searching isbn and if that produces no results,
         # searched as free text. This, instead, only searches isbn if it's isbn-y
         return f"{self.search_url}{query}"
 
-    def search(self, query, min_confidence=None, timeout=settings.QUERY_TIMEOUT):
-        """free text search"""
-        params = {}
-        if min_confidence:
-            params["min_confidence"] = min_confidence
-
-        data = self.get_search_data(
-            f"{self.search_url}{query}",
-            params=params,
-            timeout=timeout,
-        )
-        results = []
-
-        for doc in self.parse_search_data(data)[:10]:
-            results.append(self.format_search_result(doc))
-        return results
-
-    def isbn_search(self, query, timeout=settings.QUERY_TIMEOUT):
-        """isbn search"""
-        params = {}
-        data = self.get_search_data(
-            f"{self.isbn_search_url}{query}",
-            params=params,
-            timeout=timeout,
-        )
-        results = []
-
-        # this shouldn't be returning mutliple results, but just in case
-        for doc in self.parse_isbn_search_data(data)[:10]:
-            results.append(self.format_isbn_search_result(doc))
-        return results
-
-    def get_search_data(self, remote_id, **kwargs):  # pylint: disable=no-self-use
-        """this allows connectors to override the default behavior"""
-        return get_data(remote_id, **kwargs)
+    def process_search_response(self, query, data):
+        """Format the search results based on the formt of the query"""
+        # TODO: inventaire min confidence
+        parser = self.parse_search_data
+        formatter = self.format_search_result
+        if maybe_isbn(query):
+            parser = self.parse_isbn_search_data
+            formatter = self.format_isbn_search_result
+        return [formatter(doc) for doc in parser(data)[:10]]
 
     @abstractmethod
     def get_or_create_book(self, remote_id):
@@ -360,3 +332,9 @@ def unique_physical_format(format_text):
         # try a direct match, so saving this would be redundant
         return None
     return format_text
+
+
+def maybe_isbn(query):
+    """check if a query looks like an isbn"""
+    isbn = re.sub(r"[\W_]", "", query)  # removes filler characters
+    return len(isbn) in [10, 13]  # ISBN10 or ISBN13
diff --git a/bookwyrm/connectors/connector_manager.py b/bookwyrm/connectors/connector_manager.py
index 2b5ab1c9..3c90ba5f 100644
--- a/bookwyrm/connectors/connector_manager.py
+++ b/bookwyrm/connectors/connector_manager.py
@@ -33,12 +33,12 @@ async def async_connector_search(query, connectors, params):
             async with session.get(url, params=params) as response:
                 print("Status:", response.status)
                 print(response.ok)
-                print("Content-type:", response.headers['content-type'])
+                print("Content-type:", response.headers["content-type"])
 
                 raw_response = await response.json()
                 yield {
                     "connector": connector,
-                    "results": connector.parse_search_data(raw_response)
+                    "results": connector.process_search_response(query, raw_response),
                 }
 
 
@@ -48,7 +48,6 @@ def search(query, min_confidence=0.1, return_first=False):
         return []
     results = []
 
-
     connectors = list(get_connectors())
 
     # load as many results as we can
diff --git a/bookwyrm/connectors/inventaire.py b/bookwyrm/connectors/inventaire.py
index a9aeb94f..f2579612 100644
--- a/bookwyrm/connectors/inventaire.py
+++ b/bookwyrm/connectors/inventaire.py
@@ -77,14 +77,6 @@ class Connector(AbstractConnector):
             **{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]},
         }
 
-    def search(self, query, min_confidence=None):  # pylint: disable=arguments-differ
-        """overrides default search function with confidence ranking"""
-        results = super().search(query)
-        if min_confidence:
-            # filter the search results after the fact
-            return [r for r in results if r.confidence >= min_confidence]
-        return results
-
     def parse_search_data(self, data):
         return data.get("results")