Merge pull request #436 from mouse-reeve/search-errors

Catch error response decoding json in search connector
This commit is contained in:
Mouse Reeve 2020-12-30 12:18:43 -08:00 committed by GitHub
commit 877e3356e5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 176 additions and 10 deletions

View file

@ -5,6 +5,7 @@ from urllib.parse import urlparse
from requests import HTTPError from requests import HTTPError
from bookwyrm import models from bookwyrm import models
from bookwyrm.connectors import ConnectorException
from bookwyrm.tasks import app from bookwyrm.tasks import app
@ -55,7 +56,7 @@ def search(query, min_confidence=0.1):
for connector in get_connectors(): for connector in get_connectors():
try: try:
result_set = connector.search(query, min_confidence=min_confidence) result_set = connector.search(query, min_confidence=min_confidence)
except HTTPError: except (HTTPError, ConnectorException):
continue continue
result_set = [r for r in result_set \ result_set = [r for r in result_set \

View file

@ -1,6 +1,7 @@
''' functionality outline for a book data connector ''' ''' functionality outline for a book data connector '''
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass from dataclasses import asdict, dataclass
import logging
from urllib3.exceptions import RequestError from urllib3.exceptions import RequestError
from django.db import transaction from django.db import transaction
@ -11,6 +12,7 @@ from requests.exceptions import SSLError
from bookwyrm import activitypub, models, settings from bookwyrm import activitypub, models, settings
logger = logging.getLogger(__name__)
class ConnectorException(HTTPError): class ConnectorException(HTTPError):
''' when the connector can't do what was asked ''' ''' when the connector can't do what was asked '''
@ -47,7 +49,11 @@ class AbstractMinimalConnector(ABC):
) )
if not resp.ok: if not resp.ok:
resp.raise_for_status() resp.raise_for_status()
data = resp.json() try:
data = resp.json()
except ValueError as e:
logger.exception(e)
raise ConnectorException('Unable to parse json response', e)
results = [] results = []
for doc in self.parse_search_data(data)[:10]: for doc in self.parse_search_data(data)[:10]:
@ -242,6 +248,12 @@ class SearchResult:
return "<SearchResult key={!r} title={!r} author={!r}>".format( return "<SearchResult key={!r} title={!r} author={!r}>".format(
self.key, self.title, self.author) self.key, self.title, self.author)
def json(self):
''' serialize a connector for json response '''
serialized = asdict(self)
del serialized['connector']
return serialized
class Mapping: class Mapping:
''' associate a local database field with a field in an external dataset ''' ''' associate a local database field with a field in an external dataset '''

View file

@ -1,21 +1,25 @@
''' testing book data connectors ''' ''' testing book data connectors '''
from django.test import TestCase from django.test import TestCase
import responses
from bookwyrm import models from bookwyrm import models
from bookwyrm.connectors.abstract_connector import Mapping from bookwyrm.connectors import abstract_connector
from bookwyrm.connectors.abstract_connector import Mapping, SearchResult
from bookwyrm.connectors.openlibrary import Connector from bookwyrm.connectors.openlibrary import Connector
class AbstractConnector(TestCase): class AbstractConnector(TestCase):
''' generic code for connecting to outside data sources '''
def setUp(self): def setUp(self):
''' we need an example connector '''
self.book = models.Edition.objects.create(title='Example Edition') self.book = models.Edition.objects.create(title='Example Edition')
models.Connector.objects.create( self.connector_info = models.Connector.objects.create(
identifier='example.com', identifier='example.com',
connector_file='openlibrary', connector_file='openlibrary',
base_url='https://example.com', base_url='https://example.com',
books_url='https:/example.com', books_url='https://example.com/books',
covers_url='https://example.com', covers_url='https://example.com/covers',
search_url='https://example.com/search?q=', search_url='https://example.com/search?q=',
) )
self.connector = Connector('example.com') self.connector = Connector('example.com')
@ -37,7 +41,69 @@ class AbstractConnector(TestCase):
] ]
def test_abstract_minimal_connector_init(self):
''' barebones connector for search with defaults '''
class TestConnector(abstract_connector.AbstractMinimalConnector):
''' nothing added here '''
def format_search_result():
pass
def get_or_create_book():
pass
def parse_search_data():
pass
connector = TestConnector('example.com')
self.assertEqual(connector.connector, self.connector_info)
self.assertEqual(connector.base_url, 'https://example.com')
self.assertEqual(connector.books_url, 'https://example.com/books')
self.assertEqual(connector.covers_url, 'https://example.com/covers')
self.assertEqual(connector.search_url, 'https://example.com/search?q=')
self.assertIsNone(connector.name),
self.assertEqual(connector.identifier, 'example.com'),
self.assertIsNone(connector.max_query_count)
self.assertFalse(connector.local)
@responses.activate
def test_abstract_minimal_connector_search(self):
''' makes an http request to the outside service '''
class TestConnector(abstract_connector.AbstractMinimalConnector):
''' nothing added here '''
def format_search_result(self, data):
return data
def get_or_create_book(self, data):
pass
def parse_search_data(self, data):
return data
connector = TestConnector('example.com')
responses.add(
responses.GET,
'https://example.com/search?q=a%20book%20title',
json=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l'],
status=200)
results = connector.search('a book title')
self.assertEqual(len(results), 10)
self.assertEqual(results[0], 'a')
self.assertEqual(results[1], 'b')
self.assertEqual(results[2], 'c')
def test_search_result(self):
''' a class that stores info about a search result '''
result = SearchResult(
title='Title',
key='https://example.com/book/1',
author='Author Name',
year='1850',
connector=self.connector,
)
# there's really not much to test here, it's just a dataclass
self.assertEqual(result.confidence, 1)
self.assertEqual(result.title, 'Title')
def test_create_mapping(self): def test_create_mapping(self):
''' maps remote fields for book data to bookwyrm activitypub fields '''
mapping = Mapping('isbn') mapping = Mapping('isbn')
self.assertEqual(mapping.local_field, 'isbn') self.assertEqual(mapping.local_field, 'isbn')
self.assertEqual(mapping.remote_field, 'isbn') self.assertEqual(mapping.remote_field, 'isbn')
@ -45,6 +111,7 @@ class AbstractConnector(TestCase):
def test_create_mapping_with_remote(self): def test_create_mapping_with_remote(self):
''' the remote field is different than the local field '''
mapping = Mapping('isbn', remote_field='isbn13') mapping = Mapping('isbn', remote_field='isbn13')
self.assertEqual(mapping.local_field, 'isbn') self.assertEqual(mapping.local_field, 'isbn')
self.assertEqual(mapping.remote_field, 'isbn13') self.assertEqual(mapping.remote_field, 'isbn13')
@ -52,6 +119,7 @@ class AbstractConnector(TestCase):
def test_create_mapping_with_formatter(self): def test_create_mapping_with_formatter(self):
''' a function is provided to modify the data '''
formatter = lambda x: 'aa' + x formatter = lambda x: 'aa' + x
mapping = Mapping('isbn', formatter=formatter) mapping = Mapping('isbn', formatter=formatter)
self.assertEqual(mapping.local_field, 'isbn') self.assertEqual(mapping.local_field, 'isbn')

View file

@ -31,6 +31,7 @@ class BookWyrmConnector(TestCase):
def test_format_search_result(self): def test_format_search_result(self):
''' create a SearchResult object from search response json '''
datafile = pathlib.Path(__file__).parent.joinpath( datafile = pathlib.Path(__file__).parent.joinpath(
'../data/fr_search.json') '../data/fr_search.json')
search_data = json.loads(datafile.read_bytes()) search_data = json.loads(datafile.read_bytes())
@ -43,3 +44,4 @@ class BookWyrmConnector(TestCase):
self.assertEqual(result.key, 'https://example.com/book/122') self.assertEqual(result.key, 'https://example.com/book/122')
self.assertEqual(result.author, 'Susanna Clarke') self.assertEqual(result.author, 'Susanna Clarke')
self.assertEqual(result.year, 2017) self.assertEqual(result.year, 2017)
self.assertEqual(result.connector, self.connector)

View file

@ -0,0 +1,81 @@
''' test for app action functionality '''
import json
from unittest.mock import patch
from django.http import JsonResponse
from django.template.response import TemplateResponse
from django.test import TestCase
from django.test.client import RequestFactory
from bookwyrm import models, views
from bookwyrm.connectors import abstract_connector
from bookwyrm.settings import DOMAIN
class Views(TestCase):
''' every response to a get request, html or json '''
def setUp(self):
''' we need basic test data and mocks '''
self.factory = RequestFactory()
self.book = models.Edition.objects.create(title='Test Book')
models.Connector.objects.create(
identifier='self',
connector_file='self_connector',
local=True
)
def test_search_json_response(self):
''' searches local data only and returns book data in json format '''
# we need a connector for this, sorry
request = self.factory.get('', {'q': 'Test Book'})
with patch('bookwyrm.views.is_api_request') as is_api:
is_api.return_value = True
response = views.search(request)
self.assertIsInstance(response, JsonResponse)
data = json.loads(response.content)
self.assertEqual(len(data), 1)
self.assertEqual(data[0]['title'], 'Test Book')
self.assertEqual(
data[0]['key'], 'https://%s/book/%d' % (DOMAIN, self.book.id))
def test_search_html_response(self):
''' searches remote connectors '''
class TestConnector(abstract_connector.AbstractMinimalConnector):
''' nothing added here '''
def format_search_result(self, search_result):
pass
def get_or_create_book(self, remote_id):
pass
def parse_search_data(self, data):
pass
models.Connector.objects.create(
identifier='example.com',
connector_file='openlibrary',
base_url='https://example.com',
books_url='https://example.com/books',
covers_url='https://example.com/covers',
search_url='https://example.com/search?q=',
)
connector = TestConnector('example.com')
search_result = abstract_connector.SearchResult(
key='http://www.example.com/book/1',
title='Gideon the Ninth',
author='Tamsyn Muir',
year='2019',
connector=connector
)
request = self.factory.get('', {'q': 'Test Book'})
with patch('bookwyrm.views.is_api_request') as is_api:
is_api.return_value = False
with patch('bookwyrm.books_manager.search') as manager:
manager.return_value = [search_result]
response = views.search(request)
self.assertIsInstance(response, TemplateResponse)
self.assertEqual(response.template_name, 'search_results.html')
self.assertEqual(
response.context_data['book_results'][0].title, 'Gideon the Ninth')

View file

@ -38,12 +38,14 @@ def is_api_request(request):
def server_error_page(request): def server_error_page(request):
''' 500 errors ''' ''' 500 errors '''
return TemplateResponse(request, 'error.html', {'title': 'Oops!'}) return TemplateResponse(
request, 'error.html', {'title': 'Oops!'}, status=500)
def not_found_page(request, _): def not_found_page(request, _):
''' 404s ''' ''' 404s '''
return TemplateResponse(request, 'notfound.html', {'title': 'Not found'}) return TemplateResponse(
request, 'notfound.html', {'title': 'Not found'}, status=404)
@login_required @login_required
@ -210,7 +212,7 @@ def search(request):
if is_api_request(request): if is_api_request(request):
# only return local book results via json so we don't cause a cascade # only return local book results via json so we don't cause a cascade
book_results = books_manager.local_search(query) book_results = books_manager.local_search(query)
return JsonResponse([r.__dict__ for r in book_results], safe=False) return JsonResponse([r.json() for r in book_results], safe=False)
# use webfinger for mastodon style account@domain.com username # use webfinger for mastodon style account@domain.com username
if re.match(regex.full_username, query): if re.match(regex.full_username, query):