mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-12-23 08:36:32 +00:00
Remove search endpoints from Connector
Instead of having individual search functions that make individual requests, the connectors will always be searched asynchronously together. The process_seach_response combines the parse and format functions, which could probably be merged into one over-rideable function. The current to-do on this is to remove Inventaire search results that are below the confidence threshhold after search, which used to happen in the `search` function.
This commit is contained in:
parent
9c03bf782e
commit
0adda36da7
3 changed files with 19 additions and 50 deletions
|
@ -39,52 +39,24 @@ class AbstractMinimalConnector(ABC):
|
|||
setattr(self, field, getattr(info, field))
|
||||
|
||||
def get_search_url(self, query):
|
||||
""" format the query url """
|
||||
"""format the query url"""
|
||||
# Check if the query resembles an ISBN
|
||||
isbn = re.sub(r"[\W_]", "", query) # removes filler characters
|
||||
maybe_isbn = len(isbn) in [10, 13] # ISBN10 or ISBN13
|
||||
if maybe_isbn and self.isbn_search_url and self.isbn_search_url != "":
|
||||
if maybe_isbn(query) and self.isbn_search_url and self.isbn_search_url != "":
|
||||
return f"{self.isbn_search_url}{query}"
|
||||
|
||||
# NOTE: previously, we tried searching isbn and if that produces no results,
|
||||
# searched as free text. This, instead, only searches isbn if it's isbn-y
|
||||
return f"{self.search_url}{query}"
|
||||
|
||||
def search(self, query, min_confidence=None, timeout=settings.QUERY_TIMEOUT):
|
||||
"""free text search"""
|
||||
params = {}
|
||||
if min_confidence:
|
||||
params["min_confidence"] = min_confidence
|
||||
|
||||
data = self.get_search_data(
|
||||
f"{self.search_url}{query}",
|
||||
params=params,
|
||||
timeout=timeout,
|
||||
)
|
||||
results = []
|
||||
|
||||
for doc in self.parse_search_data(data)[:10]:
|
||||
results.append(self.format_search_result(doc))
|
||||
return results
|
||||
|
||||
def isbn_search(self, query, timeout=settings.QUERY_TIMEOUT):
|
||||
"""isbn search"""
|
||||
params = {}
|
||||
data = self.get_search_data(
|
||||
f"{self.isbn_search_url}{query}",
|
||||
params=params,
|
||||
timeout=timeout,
|
||||
)
|
||||
results = []
|
||||
|
||||
# this shouldn't be returning mutliple results, but just in case
|
||||
for doc in self.parse_isbn_search_data(data)[:10]:
|
||||
results.append(self.format_isbn_search_result(doc))
|
||||
return results
|
||||
|
||||
def get_search_data(self, remote_id, **kwargs): # pylint: disable=no-self-use
|
||||
"""this allows connectors to override the default behavior"""
|
||||
return get_data(remote_id, **kwargs)
|
||||
def process_search_response(self, query, data):
|
||||
"""Format the search results based on the formt of the query"""
|
||||
# TODO: inventaire min confidence
|
||||
parser = self.parse_search_data
|
||||
formatter = self.format_search_result
|
||||
if maybe_isbn(query):
|
||||
parser = self.parse_isbn_search_data
|
||||
formatter = self.format_isbn_search_result
|
||||
return [formatter(doc) for doc in parser(data)[:10]]
|
||||
|
||||
@abstractmethod
|
||||
def get_or_create_book(self, remote_id):
|
||||
|
@ -360,3 +332,9 @@ def unique_physical_format(format_text):
|
|||
# try a direct match, so saving this would be redundant
|
||||
return None
|
||||
return format_text
|
||||
|
||||
|
||||
def maybe_isbn(query):
|
||||
"""check if a query looks like an isbn"""
|
||||
isbn = re.sub(r"[\W_]", "", query) # removes filler characters
|
||||
return len(isbn) in [10, 13] # ISBN10 or ISBN13
|
||||
|
|
|
@ -33,12 +33,12 @@ async def async_connector_search(query, connectors, params):
|
|||
async with session.get(url, params=params) as response:
|
||||
print("Status:", response.status)
|
||||
print(response.ok)
|
||||
print("Content-type:", response.headers['content-type'])
|
||||
print("Content-type:", response.headers["content-type"])
|
||||
|
||||
raw_response = await response.json()
|
||||
yield {
|
||||
"connector": connector,
|
||||
"results": connector.parse_search_data(raw_response)
|
||||
"results": connector.process_search_response(query, raw_response),
|
||||
}
|
||||
|
||||
|
||||
|
@ -48,7 +48,6 @@ def search(query, min_confidence=0.1, return_first=False):
|
|||
return []
|
||||
results = []
|
||||
|
||||
|
||||
connectors = list(get_connectors())
|
||||
|
||||
# load as many results as we can
|
||||
|
|
|
@ -77,14 +77,6 @@ class Connector(AbstractConnector):
|
|||
**{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]},
|
||||
}
|
||||
|
||||
def search(self, query, min_confidence=None): # pylint: disable=arguments-differ
|
||||
"""overrides default search function with confidence ranking"""
|
||||
results = super().search(query)
|
||||
if min_confidence:
|
||||
# filter the search results after the fact
|
||||
return [r for r in results if r.confidence >= min_confidence]
|
||||
return results
|
||||
|
||||
def parse_search_data(self, data):
|
||||
return data.get("results")
|
||||
|
||||
|
|
Loading…
Reference in a new issue