mirror of
https://github.com/searxng/searxng.git
synced 2024-11-29 14:11:02 +00:00
Fix tineye engine url, datetime parsing, and minor refactor
Changes made to tineye engine: 1. Importing logging if TYPE_CHECKING is enabled 2. Remove unecessary try-catch around json parsing the response, as this masked the original error and had no immediate benefit 3. Improve error handling explicitely for status code 422 and 400 upfront, deferring json_parsing only for these status codes and successful status codes 4. Unit test all new applicable changes to ensure compatability
This commit is contained in:
parent
5be55e3309
commit
5276219b9d
2 changed files with 130 additions and 29 deletions
|
@ -14,10 +14,16 @@ billion images `[tineye.com] <https://tineye.com/how>`_.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger()
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://tineye.com',
|
"website": 'https://tineye.com',
|
||||||
"wikidata_id": 'Q2382535',
|
"wikidata_id": 'Q2382535',
|
||||||
|
@ -34,7 +40,7 @@ categories = ['general']
|
||||||
paging = True
|
paging = True
|
||||||
safesearch = False
|
safesearch = False
|
||||||
base_url = 'https://tineye.com'
|
base_url = 'https://tineye.com'
|
||||||
search_string = '/result_json/?page={page}&{query}'
|
search_string = '/api/v1/result_json/?page={page}&{query}'
|
||||||
|
|
||||||
FORMAT_NOT_SUPPORTED = gettext(
|
FORMAT_NOT_SUPPORTED = gettext(
|
||||||
"Could not read that image url. This may be due to an unsupported file"
|
"Could not read that image url. This may be due to an unsupported file"
|
||||||
|
@ -120,7 +126,7 @@ def parse_tineye_match(match_json):
|
||||||
|
|
||||||
crawl_date = backlink_json.get("crawl_date")
|
crawl_date = backlink_json.get("crawl_date")
|
||||||
if crawl_date:
|
if crawl_date:
|
||||||
crawl_date = datetime.fromisoformat(crawl_date[:-3])
|
crawl_date = datetime.strptime(crawl_date, '%Y-%m-%d')
|
||||||
else:
|
else:
|
||||||
crawl_date = datetime.min
|
crawl_date = datetime.min
|
||||||
|
|
||||||
|
@ -150,29 +156,15 @@ def parse_tineye_match(match_json):
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
"""Parse HTTP response from TinEye."""
|
"""Parse HTTP response from TinEye."""
|
||||||
results = []
|
|
||||||
|
|
||||||
try:
|
# handle the 422 client side errors, and the possible 400 status code error
|
||||||
json_data = resp.json()
|
|
||||||
except Exception as exc: # pylint: disable=broad-except
|
|
||||||
msg = "can't parse JSON response // %s" % exc
|
|
||||||
logger.error(msg)
|
|
||||||
json_data = {'error': msg}
|
|
||||||
|
|
||||||
# handle error codes from Tineye
|
|
||||||
|
|
||||||
if resp.is_error:
|
|
||||||
if resp.status_code in (400, 422):
|
if resp.status_code in (400, 422):
|
||||||
|
json_data = resp.json()
|
||||||
|
suggestions = json_data.get('suggestions', {})
|
||||||
|
message = f'HTTP Status Code: {resp.status_code}'
|
||||||
|
|
||||||
message = 'HTTP status: %s' % resp.status_code
|
if resp.status_code == 422:
|
||||||
error = json_data.get('error')
|
s_key = suggestions.get('key', '')
|
||||||
s_key = json_data.get('suggestions', {}).get('key', '')
|
|
||||||
|
|
||||||
if error and s_key:
|
|
||||||
message = "%s (%s)" % (error, s_key)
|
|
||||||
elif error:
|
|
||||||
message = error
|
|
||||||
|
|
||||||
if s_key == "Invalid image URL":
|
if s_key == "Invalid image URL":
|
||||||
# test https://docs.searxng.org/_static/searxng-wordmark.svg
|
# test https://docs.searxng.org/_static/searxng-wordmark.svg
|
||||||
message = FORMAT_NOT_SUPPORTED
|
message = FORMAT_NOT_SUPPORTED
|
||||||
|
@ -182,16 +174,23 @@ def response(resp):
|
||||||
elif s_key == 'Download Error':
|
elif s_key == 'Download Error':
|
||||||
# test https://notexists
|
# test https://notexists
|
||||||
message = DOWNLOAD_ERROR
|
message = DOWNLOAD_ERROR
|
||||||
|
else:
|
||||||
|
logger.warning("Unknown suggestion key encountered: %s", s_key)
|
||||||
|
else: # 400
|
||||||
|
description = suggestions.get('description')
|
||||||
|
if isinstance(description, list):
|
||||||
|
message = ','.join(description)
|
||||||
|
|
||||||
# see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023
|
# see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023
|
||||||
# results.append({'answer': message})
|
# results.append({'answer': message})
|
||||||
logger.error(message)
|
logger.error(message)
|
||||||
|
return []
|
||||||
|
|
||||||
return results
|
# Raise for all other responses
|
||||||
|
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
|
||||||
# append results from matches
|
results = []
|
||||||
|
json_data = resp.json()
|
||||||
|
|
||||||
for match_json in json_data['matches']:
|
for match_json in json_data['matches']:
|
||||||
|
|
||||||
|
|
102
tests/unit/test_tineye.py
Normal file
102
tests/unit/test_tineye.py
Normal file
|
@ -0,0 +1,102 @@
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
# pylint: disable=missing-module-docstring
|
||||||
|
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from unittest.mock import Mock
|
||||||
|
from requests import HTTPError
|
||||||
|
from searx.engines import load_engines, tineye
|
||||||
|
from tests import SearxTestCase
|
||||||
|
|
||||||
|
|
||||||
|
class TinEyeTests(SearxTestCase): # pylint: disable=missing-class-docstring
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
load_engines([{'name': 'tineye', 'engine': 'tineye', 'shortcut': 'tin', 'timeout': 9.0, 'disabled': True}])
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
load_engines([])
|
||||||
|
|
||||||
|
def test_status_code_raises(self):
|
||||||
|
response = Mock()
|
||||||
|
response.status_code = 401
|
||||||
|
response.raise_for_status.side_effect = HTTPError()
|
||||||
|
self.assertRaises(HTTPError, lambda: tineye.response(response))
|
||||||
|
|
||||||
|
def test_returns_empty_list_for_422(self):
|
||||||
|
response = Mock()
|
||||||
|
response.json.return_value = {}
|
||||||
|
response.status_code = 422
|
||||||
|
response.raise_for_status.side_effect = HTTPError()
|
||||||
|
with self.assertLogs(tineye.logger) as _dev_null:
|
||||||
|
results = tineye.response(response)
|
||||||
|
self.assertEqual(0, len(results))
|
||||||
|
|
||||||
|
def test_logs_format_for_422(self):
|
||||||
|
response = Mock()
|
||||||
|
response.json.return_value = {"suggestions": {"key": "Invalid image URL"}}
|
||||||
|
response.status_code = 422
|
||||||
|
response.raise_for_status.side_effect = HTTPError()
|
||||||
|
|
||||||
|
with self.assertLogs(tineye.logger) as assert_logs_context:
|
||||||
|
tineye.response(response)
|
||||||
|
self.assertIn(tineye.FORMAT_NOT_SUPPORTED, ','.join(assert_logs_context.output))
|
||||||
|
|
||||||
|
def test_logs_signature_for_422(self):
|
||||||
|
response = Mock()
|
||||||
|
response.json.return_value = {"suggestions": {"key": "NO_SIGNATURE_ERROR"}}
|
||||||
|
response.status_code = 422
|
||||||
|
response.raise_for_status.side_effect = HTTPError()
|
||||||
|
|
||||||
|
with self.assertLogs(tineye.logger) as assert_logs_context:
|
||||||
|
tineye.response(response)
|
||||||
|
self.assertIn(tineye.NO_SIGNATURE_ERROR, ','.join(assert_logs_context.output))
|
||||||
|
|
||||||
|
def test_logs_download_for_422(self):
|
||||||
|
response = Mock()
|
||||||
|
response.json.return_value = {"suggestions": {"key": "Download Error"}}
|
||||||
|
response.status_code = 422
|
||||||
|
response.raise_for_status.side_effect = HTTPError()
|
||||||
|
|
||||||
|
with self.assertLogs(tineye.logger) as assert_logs_context:
|
||||||
|
tineye.response(response)
|
||||||
|
self.assertIn(tineye.DOWNLOAD_ERROR, ','.join(assert_logs_context.output))
|
||||||
|
|
||||||
|
def test_empty_list_for_400(self):
|
||||||
|
response = Mock()
|
||||||
|
response.json.return_value = {}
|
||||||
|
response.status_code = 400
|
||||||
|
response.raise_for_status.side_effect = HTTPError()
|
||||||
|
with self.assertLogs(tineye.logger) as _dev_null:
|
||||||
|
results = tineye.response(response)
|
||||||
|
self.assertEqual(0, len(results))
|
||||||
|
|
||||||
|
def test_logs_description_for_400(self):
|
||||||
|
description = 'There was a problem with that request. Error ID: ad5fc955-a934-43c1-8187-f9a61d301645'
|
||||||
|
response = Mock()
|
||||||
|
response.json.return_value = {"suggestions": {"description": [description], "title": "Oops! We're sorry!"}}
|
||||||
|
response.status_code = 400
|
||||||
|
response.raise_for_status.side_effect = HTTPError()
|
||||||
|
|
||||||
|
with self.assertLogs(tineye.logger) as assert_logs_context:
|
||||||
|
tineye.response(response)
|
||||||
|
self.assertIn(description, ','.join(assert_logs_context.output))
|
||||||
|
|
||||||
|
def test_crawl_date_parses(self):
|
||||||
|
date_str = '2020-05-25'
|
||||||
|
date = datetime.strptime(date_str, '%Y-%m-%d')
|
||||||
|
response = Mock()
|
||||||
|
response.json.return_value = {
|
||||||
|
'matches': [
|
||||||
|
{
|
||||||
|
'backlinks': [
|
||||||
|
{
|
||||||
|
'crawl_date': date_str,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
response.status_code = 200
|
||||||
|
results = tineye.response(response)
|
||||||
|
self.assertEqual(date, results[0]['publishedDate'])
|
Loading…
Reference in a new issue