mirror of
https://github.com/bookwyrm-social/bookwyrm.git
synced 2024-11-26 19:41:11 +00:00
Filter intentaire results by confidence
This commit is contained in:
parent
af19d728d2
commit
83ee5a756f
5 changed files with 15 additions and 15 deletions
|
@ -48,20 +48,18 @@ class AbstractMinimalConnector(ABC):
|
||||||
# searched as free text. This, instead, only searches isbn if it's isbn-y
|
# searched as free text. This, instead, only searches isbn if it's isbn-y
|
||||||
return f"{self.search_url}{query}"
|
return f"{self.search_url}{query}"
|
||||||
|
|
||||||
def process_search_response(self, query, data):
|
def process_search_response(self, query, data, min_confidence):
|
||||||
"""Format the search results based on the formt of the query"""
|
"""Format the search results based on the formt of the query"""
|
||||||
# TODO: inventaire min confidence
|
|
||||||
parser = self.parse_search_data
|
|
||||||
if maybe_isbn(query):
|
if maybe_isbn(query):
|
||||||
parser = self.parse_isbn_search_data
|
return list(self.parse_isbn_search_data(data))[:10]
|
||||||
return list(parser(data))[:10]
|
return list(self.parse_search_data(data, min_confidence))[:10]
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_or_create_book(self, remote_id):
|
def get_or_create_book(self, remote_id):
|
||||||
"""pull up a book record by whatever means possible"""
|
"""pull up a book record by whatever means possible"""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def parse_search_data(self, data):
|
def parse_search_data(self, data, min_confidence):
|
||||||
"""turn the result json from a search into a list"""
|
"""turn the result json from a search into a list"""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
|
|
@ -10,7 +10,7 @@ class Connector(AbstractMinimalConnector):
|
||||||
def get_or_create_book(self, remote_id):
|
def get_or_create_book(self, remote_id):
|
||||||
return activitypub.resolve_remote_id(remote_id, model=models.Edition)
|
return activitypub.resolve_remote_id(remote_id, model=models.Edition)
|
||||||
|
|
||||||
def parse_search_data(self, data):
|
def parse_search_data(self, data, min_confidence):
|
||||||
for search_result in data:
|
for search_result in data:
|
||||||
search_result["connector"] = self
|
search_result["connector"] = self
|
||||||
yield SearchResult(**search_result)
|
yield SearchResult(**search_result)
|
||||||
|
|
|
@ -22,7 +22,7 @@ class ConnectorException(HTTPError):
|
||||||
"""when the connector can't do what was asked"""
|
"""when the connector can't do what was asked"""
|
||||||
|
|
||||||
|
|
||||||
async def get_results(session, url, params, query, connector):
|
async def get_results(session, url, min_confidence, query, connector):
|
||||||
"""try this specific connector"""
|
"""try this specific connector"""
|
||||||
# pylint: disable=line-too-long
|
# pylint: disable=line-too-long
|
||||||
headers = {
|
headers = {
|
||||||
|
@ -31,6 +31,7 @@ async def get_results(session, url, params, query, connector):
|
||||||
),
|
),
|
||||||
"User-Agent": USER_AGENT,
|
"User-Agent": USER_AGENT,
|
||||||
}
|
}
|
||||||
|
params = {"min_confidence": min_confidence}
|
||||||
try:
|
try:
|
||||||
async with session.get(url, headers=headers, params=params) as response:
|
async with session.get(url, headers=headers, params=params) as response:
|
||||||
if not response.ok:
|
if not response.ok:
|
||||||
|
@ -45,7 +46,7 @@ async def get_results(session, url, params, query, connector):
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"connector": connector,
|
"connector": connector,
|
||||||
"results": connector.process_search_response(query, raw_data),
|
"results": connector.process_search_response(query, raw_data, min_confidence),
|
||||||
}
|
}
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
logger.info("Connection timed out for url: %s", url)
|
logger.info("Connection timed out for url: %s", url)
|
||||||
|
@ -53,7 +54,7 @@ async def get_results(session, url, params, query, connector):
|
||||||
logger.exception(err)
|
logger.exception(err)
|
||||||
|
|
||||||
|
|
||||||
async def async_connector_search(query, items, params):
|
async def async_connector_search(query, items, min_confidence):
|
||||||
"""Try a number of requests simultaneously"""
|
"""Try a number of requests simultaneously"""
|
||||||
timeout = aiohttp.ClientTimeout(total=SEARCH_TIMEOUT)
|
timeout = aiohttp.ClientTimeout(total=SEARCH_TIMEOUT)
|
||||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||||
|
@ -61,7 +62,7 @@ async def async_connector_search(query, items, params):
|
||||||
for url, connector in items:
|
for url, connector in items:
|
||||||
tasks.append(
|
tasks.append(
|
||||||
asyncio.ensure_future(
|
asyncio.ensure_future(
|
||||||
get_results(session, url, params, query, connector)
|
get_results(session, url, min_confidence, query, connector)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -87,8 +88,7 @@ def search(query, min_confidence=0.1, return_first=False):
|
||||||
items.append((url, connector))
|
items.append((url, connector))
|
||||||
|
|
||||||
# load as many results as we can
|
# load as many results as we can
|
||||||
params = {"min_confidence": min_confidence}
|
results = asyncio.run(async_connector_search(query, items, min_confidence))
|
||||||
results = asyncio.run(async_connector_search(query, items, params))
|
|
||||||
|
|
||||||
if return_first:
|
if return_first:
|
||||||
# find the best result from all the responses and return that
|
# find the best result from all the responses and return that
|
||||||
|
|
|
@ -77,13 +77,15 @@ class Connector(AbstractConnector):
|
||||||
**{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]},
|
**{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]},
|
||||||
}
|
}
|
||||||
|
|
||||||
def parse_search_data(self, data):
|
def parse_search_data(self, data, min_confidence):
|
||||||
for search_result in data.get("results", []):
|
for search_result in data.get("results", []):
|
||||||
images = search_result.get("image")
|
images = search_result.get("image")
|
||||||
cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None
|
cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None
|
||||||
# a deeply messy translation of inventaire's scores
|
# a deeply messy translation of inventaire's scores
|
||||||
confidence = float(search_result.get("_score", 0.1))
|
confidence = float(search_result.get("_score", 0.1))
|
||||||
confidence = 0.1 if confidence < 150 else 0.999
|
confidence = 0.1 if confidence < 150 else 0.999
|
||||||
|
if confidence < min_confidence:
|
||||||
|
continue
|
||||||
yield SearchResult(
|
yield SearchResult(
|
||||||
title=search_result.get("label"),
|
title=search_result.get("label"),
|
||||||
key=self.get_remote_id(search_result.get("uri")),
|
key=self.get_remote_id(search_result.get("uri")),
|
||||||
|
|
|
@ -152,7 +152,7 @@ class Connector(AbstractConnector):
|
||||||
image_name = f"{cover_id}-{size}.jpg"
|
image_name = f"{cover_id}-{size}.jpg"
|
||||||
return f"{self.covers_url}/b/id/{image_name}"
|
return f"{self.covers_url}/b/id/{image_name}"
|
||||||
|
|
||||||
def parse_search_data(self, data):
|
def parse_search_data(self, data, min_confidence):
|
||||||
for search_result in data.get("docs"):
|
for search_result in data.get("docs"):
|
||||||
# build the remote id from the openlibrary key
|
# build the remote id from the openlibrary key
|
||||||
key = self.books_url + search_result["key"]
|
key = self.books_url + search_result["key"]
|
||||||
|
|
Loading…
Reference in a new issue