Merge branch 'main' into production

This commit is contained in:
Mouse Reeve 2022-05-31 10:41:58 -07:00
commit 14e73d18dd
21 changed files with 223 additions and 374 deletions

View file

@ -148,8 +148,8 @@ class SearchResult:
def __repr__(self): def __repr__(self):
# pylint: disable=consider-using-f-string # pylint: disable=consider-using-f-string
return "<SearchResult key={!r} title={!r} author={!r}>".format( return "<SearchResult key={!r} title={!r} author={!r} confidence={!r}>".format(
self.key, self.title, self.author self.key, self.title, self.author, self.confidence
) )
def json(self): def json(self):

View file

@ -1,9 +1,8 @@
""" functionality outline for a book data connector """ """ functionality outline for a book data connector """
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
import imghdr import imghdr
import ipaddress
import logging import logging
from urllib.parse import urlparse import re
from django.core.files.base import ContentFile from django.core.files.base import ContentFile
from django.db import transaction from django.db import transaction
@ -11,7 +10,7 @@ import requests
from requests.exceptions import RequestException from requests.exceptions import RequestException
from bookwyrm import activitypub, models, settings from bookwyrm import activitypub, models, settings
from .connector_manager import load_more_data, ConnectorException from .connector_manager import load_more_data, ConnectorException, raise_not_valid_url
from .format_mappings import format_mappings from .format_mappings import format_mappings
@ -39,62 +38,34 @@ class AbstractMinimalConnector(ABC):
for field in self_fields: for field in self_fields:
setattr(self, field, getattr(info, field)) setattr(self, field, getattr(info, field))
def search(self, query, min_confidence=None, timeout=settings.QUERY_TIMEOUT): def get_search_url(self, query):
"""free text search""" """format the query url"""
params = {} # Check if the query resembles an ISBN
if min_confidence: if maybe_isbn(query) and self.isbn_search_url and self.isbn_search_url != "":
params["min_confidence"] = min_confidence return f"{self.isbn_search_url}{query}"
data = self.get_search_data( # NOTE: previously, we tried searching isbn and if that produces no results,
f"{self.search_url}{query}", # searched as free text. This, instead, only searches isbn if it's isbn-y
params=params, return f"{self.search_url}{query}"
timeout=timeout,
)
results = []
for doc in self.parse_search_data(data)[:10]: def process_search_response(self, query, data, min_confidence):
results.append(self.format_search_result(doc)) """Format the search results based on the formt of the query"""
return results if maybe_isbn(query):
return list(self.parse_isbn_search_data(data))[:10]
def isbn_search(self, query, timeout=settings.QUERY_TIMEOUT): return list(self.parse_search_data(data, min_confidence))[:10]
"""isbn search"""
params = {}
data = self.get_search_data(
f"{self.isbn_search_url}{query}",
params=params,
timeout=timeout,
)
results = []
# this shouldn't be returning mutliple results, but just in case
for doc in self.parse_isbn_search_data(data)[:10]:
results.append(self.format_isbn_search_result(doc))
return results
def get_search_data(self, remote_id, **kwargs): # pylint: disable=no-self-use
"""this allows connectors to override the default behavior"""
return get_data(remote_id, **kwargs)
@abstractmethod @abstractmethod
def get_or_create_book(self, remote_id): def get_or_create_book(self, remote_id):
"""pull up a book record by whatever means possible""" """pull up a book record by whatever means possible"""
@abstractmethod @abstractmethod
def parse_search_data(self, data): def parse_search_data(self, data, min_confidence):
"""turn the result json from a search into a list""" """turn the result json from a search into a list"""
@abstractmethod
def format_search_result(self, search_result):
"""create a SearchResult obj from json"""
@abstractmethod @abstractmethod
def parse_isbn_search_data(self, data): def parse_isbn_search_data(self, data):
"""turn the result json from a search into a list""" """turn the result json from a search into a list"""
@abstractmethod
def format_isbn_search_result(self, search_result):
"""create a SearchResult obj from json"""
class AbstractConnector(AbstractMinimalConnector): class AbstractConnector(AbstractMinimalConnector):
"""generic book data connector""" """generic book data connector"""
@ -254,9 +225,6 @@ def get_data(url, params=None, timeout=10):
# check if the url is blocked # check if the url is blocked
raise_not_valid_url(url) raise_not_valid_url(url)
if models.FederatedServer.is_blocked(url):
raise ConnectorException(f"Attempting to load data from blocked url: {url}")
try: try:
resp = requests.get( resp = requests.get(
url, url,
@ -311,20 +279,6 @@ def get_image(url, timeout=10):
return image_content, extension return image_content, extension
def raise_not_valid_url(url):
"""do some basic reality checks on the url"""
parsed = urlparse(url)
if not parsed.scheme in ["http", "https"]:
raise ConnectorException("Invalid scheme: ", url)
try:
ipaddress.ip_address(parsed.netloc)
raise ConnectorException("Provided url is an IP address: ", url)
except ValueError:
# it's not an IP address, which is good
pass
class Mapping: class Mapping:
"""associate a local database field with a field in an external dataset""" """associate a local database field with a field in an external dataset"""
@ -366,3 +320,9 @@ def unique_physical_format(format_text):
# try a direct match, so saving this would be redundant # try a direct match, so saving this would be redundant
return None return None
return format_text return format_text
def maybe_isbn(query):
"""check if a query looks like an isbn"""
isbn = re.sub(r"[\W_]", "", query) # removes filler characters
return len(isbn) in [10, 13] # ISBN10 or ISBN13

View file

@ -10,15 +10,12 @@ class Connector(AbstractMinimalConnector):
def get_or_create_book(self, remote_id): def get_or_create_book(self, remote_id):
return activitypub.resolve_remote_id(remote_id, model=models.Edition) return activitypub.resolve_remote_id(remote_id, model=models.Edition)
def parse_search_data(self, data): def parse_search_data(self, data, min_confidence):
return data for search_result in data:
search_result["connector"] = self
def format_search_result(self, search_result): yield SearchResult(**search_result)
search_result["connector"] = self
return SearchResult(**search_result)
def parse_isbn_search_data(self, data): def parse_isbn_search_data(self, data):
return data for search_result in data:
search_result["connector"] = self
def format_isbn_search_result(self, search_result): yield SearchResult(**search_result)
return self.format_search_result(search_result)

View file

@ -1,17 +1,18 @@
""" interface with whatever connectors the app has """ """ interface with whatever connectors the app has """
from datetime import datetime import asyncio
import importlib import importlib
import ipaddress
import logging import logging
import re
from urllib.parse import urlparse from urllib.parse import urlparse
import aiohttp
from django.dispatch import receiver from django.dispatch import receiver
from django.db.models import signals from django.db.models import signals
from requests import HTTPError from requests import HTTPError
from bookwyrm import book_search, models from bookwyrm import book_search, models
from bookwyrm.settings import SEARCH_TIMEOUT from bookwyrm.settings import SEARCH_TIMEOUT, USER_AGENT
from bookwyrm.tasks import app from bookwyrm.tasks import app
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -21,53 +22,85 @@ class ConnectorException(HTTPError):
"""when the connector can't do what was asked""" """when the connector can't do what was asked"""
async def get_results(session, url, min_confidence, query, connector):
"""try this specific connector"""
# pylint: disable=line-too-long
headers = {
"Accept": (
'application/json, application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"; charset=utf-8'
),
"User-Agent": USER_AGENT,
}
params = {"min_confidence": min_confidence}
try:
async with session.get(url, headers=headers, params=params) as response:
if not response.ok:
logger.info("Unable to connect to %s: %s", url, response.reason)
return
try:
raw_data = await response.json()
except aiohttp.client_exceptions.ContentTypeError as err:
logger.exception(err)
return
return {
"connector": connector,
"results": connector.process_search_response(
query, raw_data, min_confidence
),
}
except asyncio.TimeoutError:
logger.info("Connection timed out for url: %s", url)
except aiohttp.ClientError as err:
logger.exception(err)
async def async_connector_search(query, items, min_confidence):
"""Try a number of requests simultaneously"""
timeout = aiohttp.ClientTimeout(total=SEARCH_TIMEOUT)
async with aiohttp.ClientSession(timeout=timeout) as session:
tasks = []
for url, connector in items:
tasks.append(
asyncio.ensure_future(
get_results(session, url, min_confidence, query, connector)
)
)
results = await asyncio.gather(*tasks)
return results
def search(query, min_confidence=0.1, return_first=False): def search(query, min_confidence=0.1, return_first=False):
"""find books based on arbitary keywords""" """find books based on arbitary keywords"""
if not query: if not query:
return [] return []
results = [] results = []
# Have we got a ISBN ? items = []
isbn = re.sub(r"[\W_]", "", query)
maybe_isbn = len(isbn) in [10, 13] # ISBN10 or ISBN13
start_time = datetime.now()
for connector in get_connectors(): for connector in get_connectors():
result_set = None # get the search url from the connector before sending
if maybe_isbn and connector.isbn_search_url and connector.isbn_search_url != "": url = connector.get_search_url(query)
# Search on ISBN try:
try: raise_not_valid_url(url)
result_set = connector.isbn_search(isbn) except ConnectorException:
except Exception as err: # pylint: disable=broad-except # if this URL is invalid we should skip it and move on
logger.info(err) logger.info("Request denied to blocked domain: %s", url)
# if this fails, we can still try regular search continue
items.append((url, connector))
# if no isbn search results, we fallback to generic search # load as many results as we can
if not result_set: results = asyncio.run(async_connector_search(query, items, min_confidence))
try: results = [r for r in results if r]
result_set = connector.search(query, min_confidence=min_confidence)
except Exception as err: # pylint: disable=broad-except
# we don't want *any* error to crash the whole search page
logger.info(err)
continue
if return_first and result_set:
# if we found anything, return it
return result_set[0]
if result_set:
results.append(
{
"connector": connector,
"results": result_set,
}
)
if (datetime.now() - start_time).seconds >= SEARCH_TIMEOUT:
break
if return_first: if return_first:
return None # find the best result from all the responses and return that
all_results = [r for con in results for r in con["results"]]
all_results = sorted(all_results, key=lambda r: r.confidence, reverse=True)
return all_results[0] if all_results else None
# failed requests will return None, so filter those out
return results return results
@ -133,3 +166,20 @@ def create_connector(sender, instance, created, *args, **kwargs):
"""create a connector to an external bookwyrm server""" """create a connector to an external bookwyrm server"""
if instance.application_type == "bookwyrm": if instance.application_type == "bookwyrm":
get_or_create_connector(f"https://{instance.server_name}") get_or_create_connector(f"https://{instance.server_name}")
def raise_not_valid_url(url):
"""do some basic reality checks on the url"""
parsed = urlparse(url)
if not parsed.scheme in ["http", "https"]:
raise ConnectorException("Invalid scheme: ", url)
try:
ipaddress.ip_address(parsed.netloc)
raise ConnectorException("Provided url is an IP address: ", url)
except ValueError:
# it's not an IP address, which is good
pass
if models.FederatedServer.is_blocked(url):
raise ConnectorException(f"Attempting to load data from blocked url: {url}")

View file

@ -77,53 +77,42 @@ class Connector(AbstractConnector):
**{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]}, **{k: data.get(k) for k in ["uri", "image", "labels", "sitelinks", "type"]},
} }
def search(self, query, min_confidence=None): # pylint: disable=arguments-differ def parse_search_data(self, data, min_confidence):
"""overrides default search function with confidence ranking""" for search_result in data.get("results", []):
results = super().search(query) images = search_result.get("image")
if min_confidence: cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None
# filter the search results after the fact # a deeply messy translation of inventaire's scores
return [r for r in results if r.confidence >= min_confidence] confidence = float(search_result.get("_score", 0.1))
return results confidence = 0.1 if confidence < 150 else 0.999
if confidence < min_confidence:
def parse_search_data(self, data): continue
return data.get("results") yield SearchResult(
title=search_result.get("label"),
def format_search_result(self, search_result): key=self.get_remote_id(search_result.get("uri")),
images = search_result.get("image") author=search_result.get("description"),
cover = f"{self.covers_url}/img/entities/{images[0]}" if images else None view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
# a deeply messy translation of inventaire's scores cover=cover,
confidence = float(search_result.get("_score", 0.1)) confidence=confidence,
confidence = 0.1 if confidence < 150 else 0.999 connector=self,
return SearchResult( )
title=search_result.get("label"),
key=self.get_remote_id(search_result.get("uri")),
author=search_result.get("description"),
view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
cover=cover,
confidence=confidence,
connector=self,
)
def parse_isbn_search_data(self, data): def parse_isbn_search_data(self, data):
"""got some daaaata""" """got some daaaata"""
results = data.get("entities") results = data.get("entities")
if not results: if not results:
return [] return
return list(results.values()) for search_result in list(results.values()):
title = search_result.get("claims", {}).get("wdt:P1476", [])
def format_isbn_search_result(self, search_result): if not title:
"""totally different format than a regular search result""" continue
title = search_result.get("claims", {}).get("wdt:P1476", []) yield SearchResult(
if not title: title=title[0],
return None key=self.get_remote_id(search_result.get("uri")),
return SearchResult( author=search_result.get("description"),
title=title[0], view_link=f"{self.base_url}/entity/{search_result.get('uri')}",
key=self.get_remote_id(search_result.get("uri")), cover=self.get_cover_url(search_result.get("image")),
author=search_result.get("description"), connector=self,
view_link=f"{self.base_url}/entity/{search_result.get('uri')}", )
cover=self.get_cover_url(search_result.get("image")),
connector=self,
)
def is_work_data(self, data): def is_work_data(self, data):
return data.get("type") == "work" return data.get("type") == "work"

View file

@ -152,39 +152,35 @@ class Connector(AbstractConnector):
image_name = f"{cover_id}-{size}.jpg" image_name = f"{cover_id}-{size}.jpg"
return f"{self.covers_url}/b/id/{image_name}" return f"{self.covers_url}/b/id/{image_name}"
def parse_search_data(self, data): def parse_search_data(self, data, min_confidence):
return data.get("docs") for search_result in data.get("docs"):
# build the remote id from the openlibrary key
def format_search_result(self, search_result): key = self.books_url + search_result["key"]
# build the remote id from the openlibrary key author = search_result.get("author_name") or ["Unknown"]
key = self.books_url + search_result["key"] cover_blob = search_result.get("cover_i")
author = search_result.get("author_name") or ["Unknown"] cover = self.get_cover_url([cover_blob], size="M") if cover_blob else None
cover_blob = search_result.get("cover_i") yield SearchResult(
cover = self.get_cover_url([cover_blob], size="M") if cover_blob else None title=search_result.get("title"),
return SearchResult( key=key,
title=search_result.get("title"), author=", ".join(author),
key=key, connector=self,
author=", ".join(author), year=search_result.get("first_publish_year"),
connector=self, cover=cover,
year=search_result.get("first_publish_year"), )
cover=cover,
)
def parse_isbn_search_data(self, data): def parse_isbn_search_data(self, data):
return list(data.values()) for search_result in list(data.values()):
# build the remote id from the openlibrary key
def format_isbn_search_result(self, search_result): key = self.books_url + search_result["key"]
# build the remote id from the openlibrary key authors = search_result.get("authors") or [{"name": "Unknown"}]
key = self.books_url + search_result["key"] author_names = [author.get("name") for author in authors]
authors = search_result.get("authors") or [{"name": "Unknown"}] yield SearchResult(
author_names = [author.get("name") for author in authors] title=search_result.get("title"),
return SearchResult( key=key,
title=search_result.get("title"), author=", ".join(author_names),
key=key, connector=self,
author=", ".join(author_names), year=search_result.get("publish_date"),
connector=self, )
year=search_result.get("publish_date"),
)
def load_edition_data(self, olkey): def load_edition_data(self, olkey):
"""query openlibrary for editions of a work""" """query openlibrary for editions of a work"""

View file

@ -89,7 +89,7 @@ def init_connectors():
covers_url="https://inventaire.io", covers_url="https://inventaire.io",
search_url="https://inventaire.io/api/search?types=works&types=works&search=", search_url="https://inventaire.io/api/search?types=works&types=works&search=",
isbn_search_url="https://inventaire.io/api/entities?action=by-uris&uris=isbn%3A", isbn_search_url="https://inventaire.io/api/entities?action=by-uris&uris=isbn%3A",
priority=3, priority=1,
) )
models.Connector.objects.create( models.Connector.objects.create(
@ -101,7 +101,7 @@ def init_connectors():
covers_url="https://covers.openlibrary.org", covers_url="https://covers.openlibrary.org",
search_url="https://openlibrary.org/search?q=", search_url="https://openlibrary.org/search?q=",
isbn_search_url="https://openlibrary.org/api/books?jscmd=data&format=json&bibkeys=ISBN:", isbn_search_url="https://openlibrary.org/api/books?jscmd=data&format=json&bibkeys=ISBN:",
priority=3, priority=1,
) )

View file

@ -374,7 +374,7 @@ class Review(BookStatus):
def save(self, *args, **kwargs): def save(self, *args, **kwargs):
"""clear rating caches""" """clear rating caches"""
if self.book.parent_work: if self.book.parent_work:
cache.delete(f"book-rating-{self.book.parent_work.id}-*") cache.delete(f"book-rating-{self.book.parent_work.id}")
super().save(*args, **kwargs) super().save(*args, **kwargs)

View file

@ -216,7 +216,7 @@ STREAMS = [
# Search configuration # Search configuration
# total time in seconds that the instance will spend searching connectors # total time in seconds that the instance will spend searching connectors
SEARCH_TIMEOUT = int(env("SEARCH_TIMEOUT", 15)) SEARCH_TIMEOUT = int(env("SEARCH_TIMEOUT", 8))
# timeout for a query to an individual connector # timeout for a query to an individual connector
QUERY_TIMEOUT = int(env("QUERY_TIMEOUT", 5)) QUERY_TIMEOUT = int(env("QUERY_TIMEOUT", 5))

View file

@ -36,7 +36,7 @@
{% if result_set.results %} {% if result_set.results %}
<section class="mb-5"> <section class="mb-5">
{% if not result_set.connector.local %} {% if not result_set.connector.local %}
<details class="details-panel box" {% if forloop.first %}open{% endif %}> <details class="details-panel box" open>
{% endif %} {% endif %}
{% if not result_set.connector.local %} {% if not result_set.connector.local %}
<summary class="is-flex is-align-items-center is-flex-wrap-wrap is-gap-2"> <summary class="is-flex is-align-items-center is-flex-wrap-wrap is-gap-2">

View file

@ -13,10 +13,10 @@ register = template.Library()
def get_rating(book, user): def get_rating(book, user):
"""get the overall rating of a book""" """get the overall rating of a book"""
return cache.get_or_set( return cache.get_or_set(
f"book-rating-{book.parent_work.id}-{user.id}", f"book-rating-{book.parent_work.id}",
lambda u, b: models.Review.privacy_filter(u) lambda u, b: models.Review.objects.filter(
.filter(book__parent_work__editions=b, rating__gt=0) book__parent_work__editions=b, rating__gt=0
.aggregate(Avg("rating"))["rating__avg"] ).aggregate(Avg("rating"))["rating__avg"]
or 0, or 0,
user, user,
book, book,

View file

@ -42,15 +42,9 @@ class AbstractConnector(TestCase):
generated_remote_link_field = "openlibrary_link" generated_remote_link_field = "openlibrary_link"
def format_search_result(self, search_result): def parse_search_data(self, data, min_confidence):
return search_result
def parse_search_data(self, data):
return data return data
def format_isbn_search_result(self, search_result):
return search_result
def parse_isbn_search_data(self, data): def parse_isbn_search_data(self, data):
return data return data

View file

@ -1,6 +1,5 @@
""" testing book data connectors """ """ testing book data connectors """
from django.test import TestCase from django.test import TestCase
import responses
from bookwyrm import models from bookwyrm import models
from bookwyrm.connectors import abstract_connector from bookwyrm.connectors import abstract_connector
@ -25,18 +24,12 @@ class AbstractConnector(TestCase):
class TestConnector(abstract_connector.AbstractMinimalConnector): class TestConnector(abstract_connector.AbstractMinimalConnector):
"""nothing added here""" """nothing added here"""
def format_search_result(self, search_result):
return search_result
def get_or_create_book(self, remote_id): def get_or_create_book(self, remote_id):
pass pass
def parse_search_data(self, data): def parse_search_data(self, data, min_confidence):
return data return data
def format_isbn_search_result(self, search_result):
return search_result
def parse_isbn_search_data(self, data): def parse_isbn_search_data(self, data):
return data return data
@ -54,45 +47,6 @@ class AbstractConnector(TestCase):
self.assertIsNone(connector.name) self.assertIsNone(connector.name)
self.assertEqual(connector.identifier, "example.com") self.assertEqual(connector.identifier, "example.com")
@responses.activate
def test_search(self):
"""makes an http request to the outside service"""
responses.add(
responses.GET,
"https://example.com/search?q=a%20book%20title",
json=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"],
status=200,
)
results = self.test_connector.search("a book title")
self.assertEqual(len(results), 10)
self.assertEqual(results[0], "a")
self.assertEqual(results[1], "b")
self.assertEqual(results[2], "c")
@responses.activate
def test_search_min_confidence(self):
"""makes an http request to the outside service"""
responses.add(
responses.GET,
"https://example.com/search?q=a%20book%20title&min_confidence=1",
json=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"],
status=200,
)
results = self.test_connector.search("a book title", min_confidence=1)
self.assertEqual(len(results), 10)
@responses.activate
def test_isbn_search(self):
"""makes an http request to the outside service"""
responses.add(
responses.GET,
"https://example.com/isbn?q=123456",
json=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"],
status=200,
)
results = self.test_connector.isbn_search("123456")
self.assertEqual(len(results), 10)
def test_create_mapping(self): def test_create_mapping(self):
"""maps remote fields for book data to bookwyrm activitypub fields""" """maps remote fields for book data to bookwyrm activitypub fields"""
mapping = Mapping("isbn") mapping = Mapping("isbn")

View file

@ -30,14 +30,11 @@ class BookWyrmConnector(TestCase):
result = self.connector.get_or_create_book(book.remote_id) result = self.connector.get_or_create_book(book.remote_id)
self.assertEqual(book, result) self.assertEqual(book, result)
def test_format_search_result(self): def test_parse_search_data(self):
"""create a SearchResult object from search response json""" """create a SearchResult object from search response json"""
datafile = pathlib.Path(__file__).parent.joinpath("../data/bw_search.json") datafile = pathlib.Path(__file__).parent.joinpath("../data/bw_search.json")
search_data = json.loads(datafile.read_bytes()) search_data = json.loads(datafile.read_bytes())
results = self.connector.parse_search_data(search_data) result = list(self.connector.parse_search_data(search_data, 0))[0]
self.assertIsInstance(results, list)
result = self.connector.format_search_result(results[0])
self.assertIsInstance(result, SearchResult) self.assertIsInstance(result, SearchResult)
self.assertEqual(result.title, "Jonathan Strange and Mr Norrell") self.assertEqual(result.title, "Jonathan Strange and Mr Norrell")
self.assertEqual(result.key, "https://example.com/book/122") self.assertEqual(result.key, "https://example.com/book/122")
@ -45,10 +42,9 @@ class BookWyrmConnector(TestCase):
self.assertEqual(result.year, 2017) self.assertEqual(result.year, 2017)
self.assertEqual(result.connector, self.connector) self.assertEqual(result.connector, self.connector)
def test_format_isbn_search_result(self): def test_parse_isbn_search_data(self):
"""just gotta attach the connector""" """just gotta attach the connector"""
datafile = pathlib.Path(__file__).parent.joinpath("../data/bw_search.json") datafile = pathlib.Path(__file__).parent.joinpath("../data/bw_search.json")
search_data = json.loads(datafile.read_bytes()) search_data = json.loads(datafile.read_bytes())
results = self.connector.parse_isbn_search_data(search_data) result = list(self.connector.parse_isbn_search_data(search_data))[0]
result = self.connector.format_isbn_search_result(results[0])
self.assertEqual(result.connector, self.connector) self.assertEqual(result.connector, self.connector)

View file

@ -49,39 +49,11 @@ class ConnectorManager(TestCase):
self.assertEqual(len(connectors), 1) self.assertEqual(len(connectors), 1)
self.assertIsInstance(connectors[0], BookWyrmConnector) self.assertIsInstance(connectors[0], BookWyrmConnector)
@responses.activate
def test_search_plaintext(self):
"""search all connectors"""
responses.add(
responses.GET,
"http://fake.ciom/search/Example?min_confidence=0.1",
json=[{"title": "Hello", "key": "https://www.example.com/search/1"}],
)
results = connector_manager.search("Example")
self.assertEqual(len(results), 1)
self.assertEqual(len(results[0]["results"]), 1)
self.assertEqual(results[0]["connector"].identifier, "test_connector_remote")
self.assertEqual(results[0]["results"][0].title, "Hello")
def test_search_empty_query(self): def test_search_empty_query(self):
"""don't panic on empty queries""" """don't panic on empty queries"""
results = connector_manager.search("") results = connector_manager.search("")
self.assertEqual(results, []) self.assertEqual(results, [])
@responses.activate
def test_search_isbn(self):
"""special handling if a query resembles an isbn"""
responses.add(
responses.GET,
"http://fake.ciom/isbn/0000000000",
json=[{"title": "Hello", "key": "https://www.example.com/search/1"}],
)
results = connector_manager.search("0000000000")
self.assertEqual(len(results), 1)
self.assertEqual(len(results[0]["results"]), 1)
self.assertEqual(results[0]["connector"].identifier, "test_connector_remote")
self.assertEqual(results[0]["results"][0].title, "Hello")
def test_first_search_result(self): def test_first_search_result(self):
"""only get one search result""" """only get one search result"""
result = connector_manager.first_search_result("Example") result = connector_manager.first_search_result("Example")

View file

@ -66,38 +66,14 @@ class Inventaire(TestCase):
with self.assertRaises(ConnectorException): with self.assertRaises(ConnectorException):
self.connector.get_book_data("https://test.url/ok") self.connector.get_book_data("https://test.url/ok")
@responses.activate def test_parse_search_data(self):
def test_search(self):
"""min confidence filtering"""
responses.add(
responses.GET,
"https://inventaire.io/search?q=hi",
json={
"results": [
{
"_score": 200,
"label": "hello",
},
{
"_score": 100,
"label": "hi",
},
],
},
)
results = self.connector.search("hi", min_confidence=0.5)
self.assertEqual(len(results), 1)
self.assertEqual(results[0].title, "hello")
def test_format_search_result(self):
"""json to search result objs""" """json to search result objs"""
search_file = pathlib.Path(__file__).parent.joinpath( search_file = pathlib.Path(__file__).parent.joinpath(
"../data/inventaire_search.json" "../data/inventaire_search.json"
) )
search_results = json.loads(search_file.read_bytes()) search_results = json.loads(search_file.read_bytes())
results = self.connector.parse_search_data(search_results) formatted = list(self.connector.parse_search_data(search_results, 0))[0]
formatted = self.connector.format_search_result(results[0])
self.assertEqual(formatted.title, "The Stories of Vladimir Nabokov") self.assertEqual(formatted.title, "The Stories of Vladimir Nabokov")
self.assertEqual( self.assertEqual(
@ -178,15 +154,14 @@ class Inventaire(TestCase):
result = self.connector.resolve_keys(keys) result = self.connector.resolve_keys(keys)
self.assertEqual(result, ["epistolary novel", "crime novel"]) self.assertEqual(result, ["epistolary novel", "crime novel"])
def test_isbn_search(self): def test_pase_isbn_search_data(self):
"""another search type""" """another search type"""
search_file = pathlib.Path(__file__).parent.joinpath( search_file = pathlib.Path(__file__).parent.joinpath(
"../data/inventaire_isbn_search.json" "../data/inventaire_isbn_search.json"
) )
search_results = json.loads(search_file.read_bytes()) search_results = json.loads(search_file.read_bytes())
results = self.connector.parse_isbn_search_data(search_results) formatted = list(self.connector.parse_isbn_search_data(search_results))[0]
formatted = self.connector.format_isbn_search_result(results[0])
self.assertEqual(formatted.title, "L'homme aux cercles bleus") self.assertEqual(formatted.title, "L'homme aux cercles bleus")
self.assertEqual( self.assertEqual(
@ -198,25 +173,12 @@ class Inventaire(TestCase):
"https://covers.inventaire.io/img/entities/12345", "https://covers.inventaire.io/img/entities/12345",
) )
def test_isbn_search_empty(self): def test_parse_isbn_search_data_empty(self):
"""another search type""" """another search type"""
search_results = {} search_results = {}
results = self.connector.parse_isbn_search_data(search_results) results = list(self.connector.parse_isbn_search_data(search_results))
self.assertEqual(results, []) self.assertEqual(results, [])
def test_isbn_search_no_title(self):
"""another search type"""
search_file = pathlib.Path(__file__).parent.joinpath(
"../data/inventaire_isbn_search.json"
)
search_results = json.loads(search_file.read_bytes())
search_results["entities"]["isbn:9782290349229"]["claims"]["wdt:P1476"] = None
result = self.connector.format_isbn_search_result(
search_results.get("entities")
)
self.assertIsNone(result)
def test_is_work_data(self): def test_is_work_data(self):
"""is it a work""" """is it a work"""
work_file = pathlib.Path(__file__).parent.joinpath( work_file = pathlib.Path(__file__).parent.joinpath(

View file

@ -122,21 +122,11 @@ class Openlibrary(TestCase):
self.assertEqual(result, "https://covers.openlibrary.org/b/id/image-L.jpg") self.assertEqual(result, "https://covers.openlibrary.org/b/id/image-L.jpg")
def test_parse_search_result(self): def test_parse_search_result(self):
"""extract the results from the search json response"""
datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_search.json")
search_data = json.loads(datafile.read_bytes())
result = self.connector.parse_search_data(search_data)
self.assertIsInstance(result, list)
self.assertEqual(len(result), 2)
def test_format_search_result(self):
"""translate json from openlibrary into SearchResult""" """translate json from openlibrary into SearchResult"""
datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_search.json") datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_search.json")
search_data = json.loads(datafile.read_bytes()) search_data = json.loads(datafile.read_bytes())
results = self.connector.parse_search_data(search_data) result = list(self.connector.parse_search_data(search_data, 0))[0]
self.assertIsInstance(results, list)
result = self.connector.format_search_result(results[0])
self.assertIsInstance(result, SearchResult) self.assertIsInstance(result, SearchResult)
self.assertEqual(result.title, "This Is How You Lose the Time War") self.assertEqual(result.title, "This Is How You Lose the Time War")
self.assertEqual(result.key, "https://openlibrary.org/works/OL20639540W") self.assertEqual(result.key, "https://openlibrary.org/works/OL20639540W")
@ -148,18 +138,10 @@ class Openlibrary(TestCase):
"""extract the results from the search json response""" """extract the results from the search json response"""
datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_isbn_search.json") datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_isbn_search.json")
search_data = json.loads(datafile.read_bytes()) search_data = json.loads(datafile.read_bytes())
result = self.connector.parse_isbn_search_data(search_data) result = list(self.connector.parse_isbn_search_data(search_data))
self.assertIsInstance(result, list)
self.assertEqual(len(result), 1) self.assertEqual(len(result), 1)
def test_format_isbn_search_result(self): result = result[0]
"""translate json from openlibrary into SearchResult"""
datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_isbn_search.json")
search_data = json.loads(datafile.read_bytes())
results = self.connector.parse_isbn_search_data(search_data)
self.assertIsInstance(results, list)
result = self.connector.format_isbn_search_result(results[0])
self.assertIsInstance(result, SearchResult) self.assertIsInstance(result, SearchResult)
self.assertEqual(result.title, "Les ombres errantes") self.assertEqual(result.title, "Les ombres errantes")
self.assertEqual(result.key, "https://openlibrary.org/books/OL16262504M") self.assertEqual(result.key, "https://openlibrary.org/books/OL16262504M")

View file

@ -40,7 +40,8 @@ class RatingTags(TestCase):
@patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async") @patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async")
def test_get_rating(self, *_): def test_get_rating(self, *_):
"""privacy filtered rating""" """privacy filtered rating. Commented versions are how it ought to work with
subjective ratings, which are currenly not used for performance reasons."""
# follows-only: not included # follows-only: not included
models.ReviewRating.objects.create( models.ReviewRating.objects.create(
user=self.remote_user, user=self.remote_user,
@ -48,7 +49,8 @@ class RatingTags(TestCase):
book=self.book, book=self.book,
privacy="followers", privacy="followers",
) )
self.assertEqual(rating_tags.get_rating(self.book, self.local_user), 0) # self.assertEqual(rating_tags.get_rating(self.book, self.local_user), 0)
self.assertEqual(rating_tags.get_rating(self.book, self.local_user), 5)
# public: included # public: included
models.ReviewRating.objects.create( models.ReviewRating.objects.create(

View file

@ -102,18 +102,12 @@ class BookSearch(TestCase):
class TestConnector(AbstractMinimalConnector): class TestConnector(AbstractMinimalConnector):
"""nothing added here""" """nothing added here"""
def format_search_result(self, search_result):
return search_result
def get_or_create_book(self, remote_id): def get_or_create_book(self, remote_id):
pass pass
def parse_search_data(self, data): def parse_search_data(self, data, min_confidence):
return data return data
def format_isbn_search_result(self, search_result):
return search_result
def parse_isbn_search_data(self, data): def parse_isbn_search_data(self, data):
return data return data

View file

@ -1,6 +1,5 @@
""" test for app action functionality """ """ test for app action functionality """
import json import json
import pathlib
from unittest.mock import patch from unittest.mock import patch
from django.contrib.auth.models import AnonymousUser from django.contrib.auth.models import AnonymousUser
@ -8,9 +7,9 @@ from django.http import JsonResponse
from django.template.response import TemplateResponse from django.template.response import TemplateResponse
from django.test import TestCase from django.test import TestCase
from django.test.client import RequestFactory from django.test.client import RequestFactory
import responses
from bookwyrm import models, views from bookwyrm import models, views
from bookwyrm.book_search import SearchResult
from bookwyrm.settings import DOMAIN from bookwyrm.settings import DOMAIN
from bookwyrm.tests.validate_html import validate_html from bookwyrm.tests.validate_html import validate_html
@ -65,12 +64,11 @@ class Views(TestCase):
self.assertIsInstance(response, TemplateResponse) self.assertIsInstance(response, TemplateResponse)
validate_html(response.render()) validate_html(response.render())
@responses.activate
def test_search_books(self): def test_search_books(self):
"""searches remote connectors""" """searches remote connectors"""
view = views.Search.as_view() view = views.Search.as_view()
models.Connector.objects.create( connector = models.Connector.objects.create(
identifier="example.com", identifier="example.com",
connector_file="openlibrary", connector_file="openlibrary",
base_url="https://example.com", base_url="https://example.com",
@ -78,26 +76,24 @@ class Views(TestCase):
covers_url="https://example.com/covers", covers_url="https://example.com/covers",
search_url="https://example.com/search?q=", search_url="https://example.com/search?q=",
) )
datafile = pathlib.Path(__file__).parent.joinpath("../data/ol_search.json") mock_result = SearchResult(title="Mock Book", connector=connector, key="hello")
search_data = json.loads(datafile.read_bytes())
responses.add(
responses.GET, "https://example.com/search?q=Test%20Book", json=search_data
)
request = self.factory.get("", {"q": "Test Book", "remote": True}) request = self.factory.get("", {"q": "Test Book", "remote": True})
request.user = self.local_user request.user = self.local_user
with patch("bookwyrm.views.search.is_api_request") as is_api: with patch("bookwyrm.views.search.is_api_request") as is_api:
is_api.return_value = False is_api.return_value = False
response = view(request) with patch("bookwyrm.connectors.connector_manager.search") as remote_search:
remote_search.return_value = [
{"results": [mock_result], "connector": connector}
]
response = view(request)
self.assertIsInstance(response, TemplateResponse) self.assertIsInstance(response, TemplateResponse)
validate_html(response.render()) validate_html(response.render())
connector_results = response.context_data["results"] connector_results = response.context_data["results"]
self.assertEqual(len(connector_results), 2) self.assertEqual(len(connector_results), 2)
self.assertEqual(connector_results[0]["results"][0].title, "Test Book") self.assertEqual(connector_results[0]["results"][0].title, "Test Book")
self.assertEqual( self.assertEqual(connector_results[1]["results"][0].title, "Mock Book")
connector_results[1]["results"][0].title,
"This Is How You Lose the Time War",
)
# don't search remote # don't search remote
request = self.factory.get("", {"q": "Test Book", "remote": True}) request = self.factory.get("", {"q": "Test Book", "remote": True})
@ -106,7 +102,11 @@ class Views(TestCase):
request.user = anonymous_user request.user = anonymous_user
with patch("bookwyrm.views.search.is_api_request") as is_api: with patch("bookwyrm.views.search.is_api_request") as is_api:
is_api.return_value = False is_api.return_value = False
response = view(request) with patch("bookwyrm.connectors.connector_manager.search") as remote_search:
remote_search.return_value = [
{"results": [mock_result], "connector": connector}
]
response = view(request)
self.assertIsInstance(response, TemplateResponse) self.assertIsInstance(response, TemplateResponse)
validate_html(response.render()) validate_html(response.render())
connector_results = response.context_data["results"] connector_results = response.context_data["results"]

View file

@ -1,3 +1,4 @@
aiohttp==3.8.1
celery==5.2.2 celery==5.2.2
colorthief==0.2.1 colorthief==0.2.1
Django==3.2.13 Django==3.2.13