mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-11-26 19:41:11 +00:00
Remove search endpoints from Connector
Instead of having individual search functions that make individual requests, the connectors will always be searched asynchronously together. The process_seach_response combines the parse and format functions, which could probably be merged into one over-rideable function. The current to-do on this is to remove Inventaire search results that are below the confidence threshhold after search, which used to happen in the `search` function.
This commit is contained in:
parent
9c03bf782e
commit
0adda36da7
3 changed files with 19 additions and 50 deletions
|
@ -39,52 +39,24 @@ class AbstractMinimalConnector(ABC):
|
||||||
setattr(self, field, getattr(info, field))
|
setattr(self, field, getattr(info, field))
|
||||||
|
|
||||||
def get_search_url(self, query):
|
def get_search_url(self, query):
|
||||||
""" format the query url """
|
"""format the query url"""
|
||||||
# Check if the query resembles an ISBN
|
# Check if the query resembles an ISBN
|
||||||
isbn = re.sub(r"[\W_]", "", query) # removes filler characters
|
if maybe_isbn(query) and self.isbn_search_url and self.isbn_search_url != "":
|
||||||
maybe_isbn = len(isbn) in [10, 13] # ISBN10 or ISBN13
|
|
||||||
if maybe_isbn and self.isbn_search_url and self.isbn_search_url != "":
|
|
||||||
return f"{self.isbn_search_url}{query}"
|
return f"{self.isbn_search_url}{query}"
|
||||||
|
|
||||||
# NOTE: previously, we tried searching isbn and if that produces no results,
|
# NOTE: previously, we tried searching isbn and if that produces no results,
|
||||||
# searched as free text. This, instead, only searches isbn if it's isbn-y
|
# searched as free text. This, instead, only searches isbn if it's isbn-y
|
||||||
return f"{self.search_url}{query}"
|
return f"{self.search_url}{query}"
|
||||||
|
|
||||||
def search(self, query, min_confidence=None, timeout=settings.QUERY_TIMEOUT):
|
def process_search_response(self, query, data):
|
||||||
"""free text search"""
|
"""Format the search results based on the formt of the query"""
|
||||||
params = {}
|
# TODO: inventaire min confidence
|
||||||
if min_confidence:
|
parser = self.parse_search_data
|
||||||
params["min_confidence"] = min_confidence
|
formatter = self.format_search_result
|
||||||
|
if maybe_isbn(query):
|
||||||
data = self.get_search_data(
|
parser = self.parse_isbn_search_data
|
||||||
f"{self.search_url}{query}",
|
formatter = self.format_isbn_search_result
|
||||||
params=params,
|
return [formatter(doc) for doc in parser(data)[:10]]
|
||||||
timeout=timeout,
|
|
||||||
)
|
|
||||||
results = []
|
|
||||||
|
|
||||||
for doc in self.parse_search_data(data)[:10]:
|
|
||||||
results.append(self.format_search_result(doc))
|
|
||||||
return results
|
|
||||||
|
|
||||||
def isbn_search(self, query, timeout=settings.QUERY_TIMEOUT):
|
|
||||||
"""isbn search"""
|
|
||||||
params = {}
|
|
||||||
data = self.get_search_data(
|
|
||||||
f"{self.isbn_search_url}{query}",
|
|
||||||
params=params,
|
|
||||||
timeout=timeout,
|
|
||||||
)
|
|
||||||
results = []
|
|
||||||
|
|
||||||
# this shouldn't be returning mutliple results, but just in case
|
|
||||||
for doc in self.parse_isbn_search_data(data)[:10]:
|
|
||||||
results.append(self.format_isbn_search_result(doc))
|
|
||||||
return results
|
|
||||||
|
|
||||||
def get_search_data(self, remote_id, **kwargs): # pylint: disable=no-self-use
|
|
||||||
"""this allows connectors to override the default behavior"""
|
|
||||||
return get_data(remote_id, **kwargs)
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_or_create_book(self, remote_id):
|
def get_or_create_book(self, remote_id):
|
||||||
|
@ -360,3 +332,9 @@ def unique_physical_format(format_text):
|
||||||
# try a direct match, so saving this would be redundant
|
# try a direct match, so saving this would be redundant
|
||||||
return None
|
return None
|
||||||
return format_text
|
return format_text
|
||||||
|
|
||||||
|
|
||||||
|
def maybe_isbn(query):
|
||||||
|
"""check if a query looks like an isbn"""
|
||||||
|
isbn = re.sub(r"[\W_]", "", query) # removes filler characters
|
||||||
|
return len(isbn) in [10, 13] # ISBN10 or ISBN13
|
||||||
|
|
|
@ -33,12 +33,12 @@ async def async_connector_search(query, connectors, params):
|
||||||
async with session.get(url, params=params) as response:
|
async with session.get(url, params=params) as response:
|
||||||
print("Status:", response.status)
|
print("Status:", response.status)
|
||||||
print(response.ok)
|
print(response.ok)
|
||||||
print("Content-type:", response.headers['content-type'])
|
print("Content-type:", response.headers["content-type"])
|
||||||
|
|
||||||
raw_response = await response.json()
|
raw_response = await response.json()
|
||||||
yield {
|
yield {
|
||||||
"connector": connector,
|
"connector": connector,
|
||||||
"results": connector.parse_search_data(raw_response)
|
"results": connector.process_search_response(query, raw_response),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -48,7 +48,6 @@ def search(query, min_confidence=0.1, return_first=False):
|
||||||
return []
|
return []
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
|
|
||||||
connectors = list(get_connectors())
|
connectors = list(get_connectors())
|
||||||
|
|
||||||
# load as many results as we can
|
# load as many results as we can
|
||||||
|
|
|
@ -77,14 +77,6 @@ class Connector(AbstractConnector):
|
||||||
**{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]},
|
**{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]},
|
||||||
}
|
}
|
||||||
|
|
||||||
def search(self, query, min_confidence=None): # pylint: disable=arguments-differ
|
|
||||||
"""overrides default search function with confidence ranking"""
|
|
||||||
results = super().search(query)
|
|
||||||
if min_confidence:
|
|
||||||
# filter the search results after the fact
|
|
||||||
return [r for r in results if r.confidence >= min_confidence]
|
|
||||||
return results
|
|
||||||
|
|
||||||
def parse_search_data(self, data):
|
def parse_search_data(self, data):
|
||||||
return data.get("results")
|
return data.get("results")
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue