Remove search endpoints from Connector

Instead of having individual search functions that make individual
requests, the connectors will always be searched asynchronously
together. The process_seach_response combines the parse and format
functions, which could probably be merged into one over-rideable
function.

The current to-do on this is to remove Inventaire search results that
are below the confidence threshhold after search, which used to happen
in the `search` function.
This commit is contained in:
Mouse Reeve 2022-05-30 10:34:03 -07:00
parent 9c03bf782e
commit 0adda36da7
3 changed files with 19 additions and 50 deletions

View file

@ -41,50 +41,22 @@ class AbstractMinimalConnector(ABC):
def get_search_url(self, query): def get_search_url(self, query):
"""format the query url""" """format the query url"""
# Check if the query resembles an ISBN # Check if the query resembles an ISBN
isbn = re.sub(r"[\W_]", "", query) # removes filler characters if maybe_isbn(query) and self.isbn_search_url and self.isbn_search_url != "":
maybe_isbn = len(isbn) in [10, 13] # ISBN10 or ISBN13
if maybe_isbn and self.isbn_search_url and self.isbn_search_url != "":
return f"{self.isbn_search_url}{query}" return f"{self.isbn_search_url}{query}"
# NOTE: previously, we tried searching isbn and if that produces no results, # NOTE: previously, we tried searching isbn and if that produces no results,
# searched as free text. This, instead, only searches isbn if it's isbn-y # searched as free text. This, instead, only searches isbn if it's isbn-y
return f"{self.search_url}{query}" return f"{self.search_url}{query}"
def search(self, query, min_confidence=None, timeout=settings.QUERY_TIMEOUT): def process_search_response(self, query, data):
"""free text search""" """Format the search results based on the formt of the query"""
params = {} # TODO: inventaire min confidence
if min_confidence: parser = self.parse_search_data
params["min_confidence"] = min_confidence formatter = self.format_search_result
if maybe_isbn(query):
data = self.get_search_data( parser = self.parse_isbn_search_data
f"{self.search_url}{query}", formatter = self.format_isbn_search_result
params=params, return [formatter(doc) for doc in parser(data)[:10]]
timeout=timeout,
)
results = []
for doc in self.parse_search_data(data)[:10]:
results.append(self.format_search_result(doc))
return results
def isbn_search(self, query, timeout=settings.QUERY_TIMEOUT):
"""isbn search"""
params = {}
data = self.get_search_data(
f"{self.isbn_search_url}{query}",
params=params,
timeout=timeout,
)
results = []
# this shouldn't be returning mutliple results, but just in case
for doc in self.parse_isbn_search_data(data)[:10]:
results.append(self.format_isbn_search_result(doc))
return results
def get_search_data(self, remote_id, **kwargs): # pylint: disable=no-self-use
"""this allows connectors to override the default behavior"""
return get_data(remote_id, **kwargs)
@abstractmethod @abstractmethod
def get_or_create_book(self, remote_id): def get_or_create_book(self, remote_id):
@ -360,3 +332,9 @@ def unique_physical_format(format_text):
# try a direct match, so saving this would be redundant # try a direct match, so saving this would be redundant
return None return None
return format_text return format_text
def maybe_isbn(query):
"""check if a query looks like an isbn"""
isbn = re.sub(r"[\W_]", "", query) # removes filler characters
return len(isbn) in [10, 13] # ISBN10 or ISBN13

View file

@ -33,12 +33,12 @@ async def async_connector_search(query, connectors, params):
async with session.get(url, params=params) as response: async with session.get(url, params=params) as response:
print("Status:", response.status) print("Status:", response.status)
print(response.ok) print(response.ok)
print("Content-type:", response.headers['content-type']) print("Content-type:", response.headers["content-type"])
raw_response = await response.json() raw_response = await response.json()
yield { yield {
"connector": connector, "connector": connector,
"results": connector.parse_search_data(raw_response) "results": connector.process_search_response(query, raw_response),
} }
@ -48,7 +48,6 @@ def search(query, min_confidence=0.1, return_first=False):
return [] return []
results = [] results = []
connectors = list(get_connectors()) connectors = list(get_connectors())
# load as many results as we can # load as many results as we can

View file

@ -77,14 +77,6 @@ class Connector(AbstractConnector):
**{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]}, **{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]},
} }
def search(self, query, min_confidence=None): # pylint: disable=arguments-differ
"""overrides default search function with confidence ranking"""
results = super().search(query)
if min_confidence:
# filter the search results after the fact
return [r for r in results if r.confidence >= min_confidence]
return results
def parse_search_data(self, data): def parse_search_data(self, data):
return data.get("results") return data.get("results")