Merge pull request #2139 from bookwyrm-social/search-refactor

Search refactor
This commit is contained in:
Mouse Reeve 2022-05-31 10:22:17 -07:00 committed by GitHub
commit 355e7039f0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 214 additions and 367 deletions

View file

@ -148,8 +148,8 @@ class SearchResult:
def __repr__(self):
# pylint: disable=consider-using-f-string
return "<SearchResult key={!r} title={!r} author={!r}>".format(
self.key, self.title, self.author
return "<SearchResult key={!r} title={!r} author={!r} confidence={!r}>".format(
self.key, self.title, self.author, self.confidence
)
def json(self):

View file

@ -1,9 +1,8 @@
""" functionality outline for a book data connector """
from abc import ABC, abstractmethod
import imghdr
import ipaddress
import logging
from urllib.parse import urlparse
import re
from django.core.files.base import ContentFile
from django.db import transaction
@ -11,7 +10,7 @@ import requests
from requests.exceptions import RequestException
from bookwyrm import activitypub, models, settings
from .connector_manager import load_more_data, ConnectorException
from .connector_manager import load_more_data, ConnectorException, raise_not_valid_url
from .format_mappings import format_mappings
@ -39,62 +38,34 @@ class AbstractMinimalConnector(ABC):
for field in self_fields:
setattr(self, field, getattr(info, field))
def search(self, query, min_confidence=None, timeout=settings.QUERY_TIMEOUT):
"""free text search"""
params = {}
if min_confidence:
params["min_confidence"] = min_confidence
def get_search_url(self, query):
"""format the query url"""
# Check if the query resembles an ISBN
if maybe_isbn(query) and self.isbn_search_url and self.isbn_search_url != "":
return f"{self.isbn_search_url}{query}"
data = self.get_search_data(
f"{self.search_url}{query}",
params=params,
timeout=timeout,
)
results = []
# NOTE: previously, we tried searching isbn and if that produces no results,
# searched as free text. This, instead, only searches isbn if it's isbn-y
return f"{self.search_url}{query}"
for doc in self.parse_search_data(data)[:10]:
results.append(self.format_search_result(doc))
return results
def isbn_search(self, query, timeout=settings.QUERY_TIMEOUT):
"""isbn search"""
params = {}
data = self.get_search_data(
f"{self.isbn_search_url}{query}",
params=params,
timeout=timeout,
)
results = []
# this shouldn't be returning mutliple results, but just in case
for doc in self.parse_isbn_search_data(data)[:10]:
results.append(self.format_isbn_search_result(doc))
return results
def get_search_data(self, remote_id, **kwargs): # pylint: disable=no-self-use
"""this allows connectors to override the default behavior"""
return get_data(remote_id, **kwargs)
def process_search_response(self, query, data, min_confidence):
"""Format the search results based on the formt of the query"""
if maybe_isbn(query):
return list(self.parse_isbn_search_data(data))[:10]
return list(self.parse_search_data(data, min_confidence))[:10]
@abstractmethod
def get_or_create_book(self, remote_id):
"""pull up a book record by whatever means possible"""
@abstractmethod
def parse_search_data(self, data):
def parse_search_data(self, data, min_confidence):
"""turn the result json from a search into a list"""
@abstractmethod
def format_search_result(self, search_result):
"""create a SearchResult obj from json"""
@abstractmethod
def parse_isbn_search_data(self, data):
"""turn the result json from a search into a list"""
@abstractmethod
def format_isbn_search_result(self, search_result):
"""create a SearchResult obj from json"""
class AbstractConnector(AbstractMinimalConnector):
"""generic book data connector"""
@ -254,9 +225,6 @@ def get_data(url, params=None, timeout=10):
# check if the url is blocked
raise_not_valid_url(url)
if models.FederatedServer.is_blocked(url):
raise ConnectorException(f"Attempting to load data from blocked url: {url}")
try:
resp = requests.get(
url,
@ -311,20 +279,6 @@ def get_image(url, timeout=10):
return image_content, extension
def raise_not_valid_url(url):
"""do some basic reality checks on the url"""
parsed = urlparse(url)
if not parsed.scheme in ["http", "https"]:
raise ConnectorException("Invalid scheme: ", url)
try:
ipaddress.ip_address(parsed.netloc)
raise ConnectorException("Provided url is an IP address: ", url)
except ValueError:
# it's not an IP address, which is good
pass
class Mapping:
"""associate a local database field with a field in an external dataset"""
@ -366,3 +320,9 @@ def unique_physical_format(format_text):
# try a direct match, so saving this would be redundant
return None
return format_text
def maybe_isbn(query):
"""check if a query looks like an isbn"""
isbn = re.sub(r"[\W_]", "", query) # removes filler characters
return len(isbn) in [10, 13] # ISBN10 or ISBN13

View file

@ -10,15 +10,12 @@ class Connector(AbstractMinimalConnector):
def get_or_create_book(self, remote_id):
return activitypub.resolve_remote_id(remote_id, model=models.Edition)
def parse_search_data(self, data):
return data
def format_search_result(self, search_result):
search_result["connector"] = self
return SearchResult(**search_result)
def parse_search_data(self, data, min_confidence):
for search_result in data:
search_result["connector"] = self
yield SearchResult(**search_result)
def parse_isbn_search_data(self, data):
return data
def format_isbn_search_result(self, search_result):
return self.format_search_result(search_result)
for search_result in data:
search_result["connector"] = self
yield SearchResult(**search_result)

View file

@ -1,17 +1,18 @@
""" interface with whatever connectors the app has """
from datetime import datetime
import asyncio
import importlib
import ipaddress
import logging
import re
from urllib.parse import urlparse
import aiohttp
from django.dispatch import receiver
from django.db.models import signals
from requests import HTTPError
from bookwyrm import book_search, models
from bookwyrm.settings import SEARCH_TIMEOUT
from bookwyrm.settings import SEARCH_TIMEOUT, USER_AGENT
from bookwyrm.tasks import app
logger = logging.getLogger(__name__)
@ -21,53 +22,85 @@ class ConnectorException(HTTPError):
"""when the connector can't do what was asked"""
async def get_results(session, url, min_confidence, query, connector):
"""try this specific connector"""
# pylint: disable=line-too-long
headers = {
"Accept": (
'application/json, application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"; charset=utf-8'
),
"User-Agent": USER_AGENT,
}
params = {"min_confidence": min_confidence}
try:
async with session.get(url, headers=headers, params=params) as response:
if not response.ok:
logger.info("Unable to connect to %s: %s", url, response.reason)
return
try:
raw_data = await response.json()
except aiohttp.client_exceptions.ContentTypeError as err:
logger.exception(err)
return
return {
"connector": connector,
"results": connector.process_search_response(
query, raw_data, min_confidence
),
}
except asyncio.TimeoutError:
logger.info("Connection timed out for url: %s", url)
except aiohttp.ClientError as err:
logger.exception(err)
async def async_connector_search(query, items, min_confidence):
"""Try a number of requests simultaneously"""
timeout = aiohttp.ClientTimeout(total=SEARCH_TIMEOUT)
async with aiohttp.ClientSession(timeout=timeout) as session:
tasks = []
for url, connector in items:
tasks.append(
asyncio.ensure_future(
get_results(session, url, min_confidence, query, connector)
)
)
results = await asyncio.gather(*tasks)
return results
def search(query, min_confidence=0.1, return_first=False):
"""find books based on arbitary keywords"""
if not query:
return []
results = []
# Have we got a ISBN ?
isbn = re.sub(r"[\W_]", "", query)
maybe_isbn = len(isbn) in [10, 13] # ISBN10 or ISBN13
start_time = datetime.now()
items = []
for connector in get_connectors():
result_set = None
if maybe_isbn and connector.isbn_search_url and connector.isbn_search_url != "":
# Search on ISBN
try:
result_set = connector.isbn_search(isbn)
except Exception as err: # pylint: disable=broad-except
logger.info(err)
# if this fails, we can still try regular search
# get the search url from the connector before sending
url = connector.get_search_url(query)
try:
raise_not_valid_url(url)
except ConnectorException:
# if this URL is invalid we should skip it and move on
logger.info("Request denied to blocked domain: %s", url)
continue
items.append((url, connector))
# if no isbn search results, we fallback to generic search
if not result_set:
try:
result_set = connector.search(query, min_confidence=min_confidence)
except Exception as err: # pylint: disable=broad-except
# we don't want *any* error to crash the whole search page
logger.info(err)
continue
if return_first and result_set:
# if we found anything, return it
return result_set[0]
if result_set:
results.append(
{
"connector": connector,
"results": result_set,
}
)
if (datetime.now() - start_time).seconds >= SEARCH_TIMEOUT:
break
# load as many results as we can
results = asyncio.run(async_connector_search(query, items, min_confidence))
results = [r for r in results if r]
if return_first:
return None
# find the best result from all the responses and return that
all_results = [r for con in results for r in con["results"]]
all_results = sorted(all_results, key=lambda r: r.confidence, reverse=True)
return all_results[0] if all_results else None
# failed requests will return None, so filter those out
return results
@ -133,3 +166,20 @@ def create_connector(sender, instance, created, *args, **kwargs):
"""create a connector to an external bookwyrm server"""
if instance.application_type == "bookwyrm":
get_or_create_connector(f"https://{instance.server_name}")
def raise_not_valid_url(url):
"""do some basic reality checks on the url"""
parsed = urlparse(url)
if not parsed.scheme in ["http", "https"]:
raise ConnectorException("Invalid scheme: ", url)
try:
ipaddress.ip_address(parsed.netloc)
raise ConnectorException("Provided url is an IP address: ", url)
except ValueError:
# it's not an IP address, which is good
pass
if models.FederatedServer.is_blocked(url):
raise ConnectorException(f"Attempting to load data from blocked url: {url}")

View file

@ -77,53 +77,42 @@ class Connector(AbstractConnector):
**{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]},
}
def search(self, query, min_confidence=None): # pylint: disable=arguments-differ
"""overrides default search function with confidence ranking"""
results = super().search(query)
if min_confidence:
# filter the search results after the fact
return [r for r in results if r.confidence >= min_confidence]
return results
def parse_search_data(self, data):
return data.get("results")
def format_search_result(self, search_result):
images = search_result.get("image")
cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None
# a deeply messy translation of inventaire's scores
confidence = float(search_result.get("_score", 0.1))
confidence = 0.1 if confidence < 150 else 0.999
return SearchResult(
title=search_result.get("label"),
key=self.get_remote_id(search_result.get("uri")),
author=search_result.get("description"),
view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
cover=cover,
confidence=confidence,
connector=self,
)
def parse_search_data(self, data, min_confidence):
for search_result in data.get("results", []):
images = search_result.get("image")
cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None
# a deeply messy translation of inventaire's scores
confidence = float(search_result.get("_score", 0.1))
confidence = 0.1 if confidence < 150 else 0.999
if confidence < min_confidence:
continue
yield SearchResult(
title=search_result.get("label"),
key=self.get_remote_id(search_result.get("uri")),
author=search_result.get("description"),
view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
cover=cover,
confidence=confidence,
connector=self,
)
def parse_isbn_search_data(self, data):
"""got some daaaata"""
results = data.get("entities")
if not results:
return []
return list(results.values())
def format_isbn_search_result(self, search_result):
"""totally different format than a regular search result"""
title = search_result.get("claims", {}).get("wdt:P1476", [])
if not title:
return None
return SearchResult(
title=title[0],
key=self.get_remote_id(search_result.get("uri")),
author=search_result.get("description"),
view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
cover=self.get_cover_url(search_result.get("image")),
connector=self,
)
return
for search_result in list(results.values()):
title = search_result.get("claims", {}).get("wdt:P1476", [])
if not title:
continue
yield SearchResult(
title=title[0],
key=self.get_remote_id(search_result.get("uri")),
author=search_result.get("description"),
view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
cover=self.get_cover_url(search_result.get("image")),
connector=self,
)
def is_work_data(self, data):
return data.get("type") == "work"

View file

@ -152,39 +152,35 @@ class Connector(AbstractConnector):
image_name = f"{cover_id}-{size}.jpg"
return f"{self.covers_url}/b/id/{image_name}"
def parse_search_data(self, data):
return data.get("docs")
def format_search_result(self, search_result):
# build the remote id from the openlibrary key
key = self.books_url + search_result["key"]
author = search_result.get("author_name") or ["Unknown"]
cover_blob = search_result.get("cover_i")
cover = self.get_cover_url([cover_blob], size="M") if cover_blob else None
return SearchResult(
title=search_result.get("title"),
key=key,
author=", ".join(author),
connector=self,
year=search_result.get("first_publish_year"),
cover=cover,
)
def parse_search_data(self, data, min_confidence):
for search_result in data.get("docs"):
# build the remote id from the openlibrary key
key = self.books_url + search_result["key"]
author = search_result.get("author_name") or ["Unknown"]
cover_blob = search_result.get("cover_i")
cover = self.get_cover_url([cover_blob], size="M") if cover_blob else None
yield SearchResult(
title=search_result.get("title"),
key=key,
author=", ".join(author),
connector=self,
year=search_result.get("first_publish_year"),
cover=cover,
)
def parse_isbn_search_data(self, data):
return list(data.values())
def format_isbn_search_result(self, search_result):
# build the remote id from the openlibrary key
key = self.books_url + search_result["key"]
authors = search_result.get("authors") or [{"name": "Unknown"}]
author_names = [author.get("name") for author in authors]
return SearchResult(
title=search_result.get("title"),
key=key,
author=", ".join(author_names),
connector=self,
year=search_result.get("publish_date"),
)
for search_result in list(data.values()):
# build the remote id from the openlibrary key
key = self.books_url + search_result["key"]
authors = search_result.get("authors") or [{"name": "Unknown"}]
author_names = [author.get("name") for author in authors]
yield SearchResult(
title=search_result.get("title"),
key=key,
author=", ".join(author_names),
connector=self,
year=search_result.get("publish_date"),
)
def load_edition_data(self, olkey):
"""query openlibrary for editions of a work"""

View file

@ -89,7 +89,7 @@ def init_connectors():
covers_url="https://inventaire.io",
search_url="https://inventaire.io/api/search?types=works&types=works&search=",
isbn_search_url="https://inventaire.io/api/entities?action=by-uris&uris=isbn%3A",
priority=3,
priority=1,
)
models.Connector.objects.create(
@ -101,7 +101,7 @@ def init_connectors():
covers_url="https://covers.openlibrary.org",
search_url="https://openlibrary.org/search?q=",
isbn_search_url="https://openlibrary.org/api/books?jscmd=data&format=json&bibkeys=ISBN:",
priority=3,
priority=1,
)

View file

@ -212,7 +212,7 @@ STREAMS = [
# Search configuration
# total time in seconds that the instance will spend searching connectors
SEARCH_TIMEOUT = int(env("SEARCH_TIMEOUT", 15))
SEARCH_TIMEOUT = int(env("SEARCH_TIMEOUT", 8))
# timeout for a query to an individual connector
QUERY_TIMEOUT = int(env("QUERY_TIMEOUT", 5))

View file

@ -36,7 +36,7 @@
{% if result_set.results %}
<section class="mb-5">
{% if not result_set.connector.local %}
<details class="details-panel box" {% if forloop.first %}open{% endif %}>
<details class="details-panel box" open>
{% endif %}
{% if not result_set.connector.local %}
<summary class="is-flex is-align-items-center is-flex-wrap-wrap is-gap-2">

View file

@ -42,15 +42,9 @@ class AbstractConnector(TestCase):
generated_remote_link_field = "openlibrary_link"
def format_search_result(self, search_result):
return search_result
def parse_search_data(self, data):
def parse_search_data(self, data, min_confidence):
return data
def format_isbn_search_result(self, search_result):
return search_result
def parse_isbn_search_data(self, data):
return data

View file

@ -1,6 +1,5 @@
""" testing book data connectors """
from django.test import TestCase
import responses
from bookwyrm import models
from bookwyrm.connectors import abstract_connector
@ -25,18 +24,12 @@ class AbstractConnector(TestCase):
class TestConnector(abstract_connector.AbstractMinimalConnector):
"""nothing added here"""
def format_search_result(self, search_result):
return search_result
def get_or_create_book(self, remote_id):
pass
def parse_search_data(self, data):
def parse_search_data(self, data, min_confidence):
return data
def format_isbn_search_result(self, search_result):
return search_result
def parse_isbn_search_data(self, data):
return data
@ -54,45 +47,6 @@ class AbstractConnector(TestCase):
self.assertIsNone(connector.name)
self.assertEqual(connector.identifier, "example.com")
@responses.activate
def test_search(self):
"""makes an http request to the outside service"""
responses.add(
responses.GET,
"https://example.com/search?q=a%20book%20title",
json=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"],
status=200,
)
results = self.test_connector.search("a book title")
self.assertEqual(len(results), 10)
self.assertEqual(results[0], "a")
self.assertEqual(results[1], "b")
self.assertEqual(results[2], "c")
@responses.activate
def test_search_min_confidence(self):
"""makes an http request to the outside service"""
responses.add(
responses.GET,
"https://example.com/search?q=a%20book%20title&min_confidence=1",
json=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"],
status=200,
)
results = self.test_connector.search("a book title", min_confidence=1)
self.assertEqual(len(results), 10)
@responses.activate
def test_isbn_search(self):
"""makes an http request to the outside service"""
responses.add(
responses.GET,
"https://example.com/isbn?q=123456",
json=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"],
status=200,
)
results = self.test_connector.isbn_search("123456")
self.assertEqual(len(results), 10)
def test_create_mapping(self):
"""maps remote fields for book data to bookwyrm activitypub fields"""
mapping = Mapping("isbn")

View file

@ -30,14 +30,11 @@ class BookWyrmConnector(TestCase):
result = self.connector.get_or_create_book(book.remote_id)
self.assertEqual(book, result)
def test_format_search_result(self):
def test_parse_search_data(self):
"""create a SearchResult object from search response json"""
datafile = pathlib.Path(__file__).parent.joinpath("../data/bw_search.json")
search_data = json.loads(datafile.read_bytes())
results = self.connector.parse_search_data(search_data)
self.assertIsInstance(results, list)
result = self.connector.format_search_result(results[0])
result = list(self.connector.parse_search_data(search_data, 0))[0]
self.assertIsInstance(result, SearchResult)
self.assertEqual(result.title, "Jonathan Strange and Mr Norrell")
self.assertEqual(result.key, "https://example.com/book/122")
@ -45,10 +42,9 @@ class BookWyrmConnector(TestCase):
self.assertEqual(result.year, 2017)
self.assertEqual(result.connector, self.connector)
def test_format_isbn_search_result(self):
def test_parse_isbn_search_data(self):
"""just gotta attach the connector"""
datafile = pathlib.Path(__file__).parent.joinpath("../data/bw_search.json")
search_data = json.loads(datafile.read_bytes())
results = self.connector.parse_isbn_search_data(search_data)
result = self.connector.format_isbn_search_result(results[0])
result = list(self.connector.parse_isbn_search_data(search_data))[0]
self.assertEqual(result.connector, self.connector)

View file

@ -49,39 +49,11 @@ class ConnectorManager(TestCase):
self.assertEqual(len(connectors), 1)
self.assertIsInstance(connectors[0], BookWyrmConnector)
@responses.activate
def test_search_plaintext(self):
"""search all connectors"""
responses.add(
responses.GET,
"http://fake.ciom/search/Example?min_confidence=0.1",
json=[{"title": "Hello", "key": "https://www.example.com/search/1"}],
)
results = connector_manager.search("Example")
self.assertEqual(len(results), 1)
self.assertEqual(len(results[0]["results"]), 1)
self.assertEqual(results[0]["connector"].identifier, "test_connector_remote")
self.assertEqual(results[0]["results"][0].title, "Hello")
def test_search_empty_query(self):
"""don't panic on empty queries"""
results = connector_manager.search("")
self.assertEqual(results, [])
@responses.activate
def test_search_isbn(self):
"""special handling if a query resembles an isbn"""
responses.add(
responses.GET,
"http://fake.ciom/isbn/0000000000",
json=[{"title": "Hello", "key": "https://www.example.com/search/1"}],
)
results = connector_manager.search("0000000000")
self.assertEqual(len(results), 1)
self.assertEqual(len(results[0]["results"]), 1)
self.assertEqual(results[0]["connector"].identifier, "test_connector_remote")
self.assertEqual(results[0]["results"][0].title, "Hello")
def test_first_search_result(self):
"""only get one search result"""
result = connector_manager.first_search_result("Example")

View file

@ -66,38 +66,14 @@ class Inventaire(TestCase):
with self.assertRaises(ConnectorException):
self.connector.get_book_data("https://test.url/ok")
@responses.activate
def test_search(self):
"""min confidence filtering"""
responses.add(
responses.GET,
"https://inventaire.io/search?q=hi",
json={
"results": [
{
"_score": 200,
"label": "hello",
},
{
"_score": 100,
"label": "hi",
},
],
},
)
results = self.connector.search("hi", min_confidence=0.5)
self.assertEqual(len(results), 1)
self.assertEqual(results[0].title, "hello")
def test_format_search_result(self):
def test_parse_search_data(self):
"""json to search result objs"""
search_file = pathlib.Path(__file__).parent.joinpath(
"../data/inventaire_search.json"
)
search_results = json.loads(search_file.read_bytes())
results = self.connector.parse_search_data(search_results)
formatted = self.connector.format_search_result(results[0])
formatted = list(self.connector.parse_search_data(search_results, 0))[0]
self.assertEqual(formatted.title, "The Stories of Vladimir Nabokov")
self.assertEqual(
@ -178,15 +154,14 @@ class Inventaire(TestCase):
result = self.connector.resolve_keys(keys)
self.assertEqual(result, ["epistolary novel", "crime novel"])
def test_isbn_search(self):
def test_pase_isbn_search_data(self):
"""another search type"""
search_file = pathlib.Path(__file__).parent.joinpath(
"../data/inventaire_isbn_search.json"
)
search_results = json.loads(search_file.read_bytes())
results = self.connector.parse_isbn_search_data(search_results)
formatted = self.connector.format_isbn_search_result(results[0])
formatted = list(self.connector.parse_isbn_search_data(search_results))[0]
self.assertEqual(formatted.title, "L'homme aux cercles bleus")
self.assertEqual(
@ -198,25 +173,12 @@ class Inventaire(TestCase):
"https://covers.inventaire.io/img/entities/12345",
)
def test_isbn_search_empty(self):
def test_parse_isbn_search_data_empty(self):
"""another search type"""
search_results = {}
results = self.connector.parse_isbn_search_data(search_results)
results = list(self.connector.parse_isbn_search_data(search_results))
self.assertEqual(results, [])
def test_isbn_search_no_title(self):
"""another search type"""
search_file = pathlib.Path(__file__).parent.joinpath(
"../data/inventaire_isbn_search.json"
)
search_results = json.loads(search_file.read_bytes())
search_results["entities"]["isbn:9782290349229"]["claims"]["wdt:P1476"] = None
result = self.connector.format_isbn_search_result(
search_results.get("entities")
)
self.assertIsNone(result)
def test_is_work_data(self):
"""is it a work"""
work_file = pathlib.Path(__file__).parent.joinpath(

View file

@ -122,21 +122,11 @@ class Openlibrary(TestCase):
self.assertEqual(result, "https://covers.openlibrary.org/b/id/image-L.jpg")
def test_parse_search_result(self):
"""extract the results from the search json response"""
datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_search.json")
search_data = json.loads(datafile.read_bytes())
result = self.connector.parse_search_data(search_data)
self.assertIsInstance(result, list)
self.assertEqual(len(result), 2)
def test_format_search_result(self):
"""translate json from openlibrary into SearchResult"""
datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_search.json")
search_data = json.loads(datafile.read_bytes())
results = self.connector.parse_search_data(search_data)
self.assertIsInstance(results, list)
result = list(self.connector.parse_search_data(search_data, 0))[0]
result = self.connector.format_search_result(results[0])
self.assertIsInstance(result, SearchResult)
self.assertEqual(result.title, "This Is How You Lose the Time War")
self.assertEqual(result.key, "https://openlibrary.org/works/OL20639540W")
@ -148,18 +138,10 @@ class Openlibrary(TestCase):
"""extract the results from the search json response"""
datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_isbn_search.json")
search_data = json.loads(datafile.read_bytes())
result = self.connector.parse_isbn_search_data(search_data)
self.assertIsInstance(result, list)
result = list(self.connector.parse_isbn_search_data(search_data))
self.assertEqual(len(result), 1)
def test_format_isbn_search_result(self):
"""translate json from openlibrary into SearchResult"""
datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_isbn_search.json")
search_data = json.loads(datafile.read_bytes())
results = self.connector.parse_isbn_search_data(search_data)
self.assertIsInstance(results, list)
result = self.connector.format_isbn_search_result(results[0])
result = result[0]
self.assertIsInstance(result, SearchResult)
self.assertEqual(result.title, "Les ombres errantes")
self.assertEqual(result.key, "https://openlibrary.org/books/OL16262504M")

View file

@ -102,18 +102,12 @@ class BookSearch(TestCase):
class TestConnector(AbstractMinimalConnector):
"""nothing added here"""
def format_search_result(self, search_result):
return search_result
def get_or_create_book(self, remote_id):
pass
def parse_search_data(self, data):
def parse_search_data(self, data, min_confidence):
return data
def format_isbn_search_result(self, search_result):
return search_result
def parse_isbn_search_data(self, data):
return data

View file

@ -1,6 +1,5 @@
""" test for app action functionality """
import json
import pathlib
from unittest.mock import patch
from django.contrib.auth.models import AnonymousUser
@ -8,9 +7,9 @@ from django.http import JsonResponse
from django.template.response import TemplateResponse
from django.test import TestCase
from django.test.client import RequestFactory
import responses
from bookwyrm import models, views
from bookwyrm.book_search import SearchResult
from bookwyrm.settings import DOMAIN
from bookwyrm.tests.validate_html import validate_html
@ -65,12 +64,11 @@ class Views(TestCase):
self.assertIsInstance(response, TemplateResponse)
validate_html(response.render())
@responses.activate
def test_search_books(self):
"""searches remote connectors"""
view = views.Search.as_view()
models.Connector.objects.create(
connector = models.Connector.objects.create(
identifier="example.com",
connector_file="openlibrary",
base_url="https://example.com",
@ -78,26 +76,24 @@ class Views(TestCase):
covers_url="https://example.com/covers",
search_url="https://example.com/search?q=",
)
datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_search.json")
search_data = json.loads(datafile.read_bytes())
responses.add(
responses.GET, "https://example.com/search?q=Test%20Book", json=search_data
)
mock_result = SearchResult(title="Mock Book", connector=connector, key="hello")
request = self.factory.get("", {"q": "Test Book", "remote": True})
request.user = self.local_user
with patch("bookwyrm.views.search.is_api_request") as is_api:
is_api.return_value = False
response = view(request)
with patch("bookwyrm.connectors.connector_manager.search") as remote_search:
remote_search.return_value = [
{"results": [mock_result], "connector": connector}
]
response = view(request)
self.assertIsInstance(response, TemplateResponse)
validate_html(response.render())
connector_results = response.context_data["results"]
self.assertEqual(len(connector_results), 2)
self.assertEqual(connector_results[0]["results"][0].title, "Test Book")
self.assertEqual(
connector_results[1]["results"][0].title,
"This Is How You Lose the Time War",
)
self.assertEqual(connector_results[1]["results"][0].title, "Mock Book")
# don't search remote
request = self.factory.get("", {"q": "Test Book", "remote": True})
@ -106,7 +102,11 @@ class Views(TestCase):
request.user = anonymous_user
with patch("bookwyrm.views.search.is_api_request") as is_api:
is_api.return_value = False
response = view(request)
with patch("bookwyrm.connectors.connector_manager.search") as remote_search:
remote_search.return_value = [
{"results": [mock_result], "connector": connector}
]
response = view(request)
self.assertIsInstance(response, TemplateResponse)
validate_html(response.render())
connector_results = response.context_data["results"]

View file

@ -1,3 +1,4 @@
aiohttp==3.8.1
celery==5.2.2
colorthief==0.2.1
Django==3.2.13