mirror of
https://github.com/searxng/searxng.git
synced 2024-12-23 09:46:31 +00:00
xpath engine: change raise_for_httperror to no_result_for_http_status
no_result_for_http_status contains a list of HTTP status. These HTTP status are seen an empty result list. In other cases an exception is thrown as usual. Previously raise_for_httperror were ignoring all HTTP error, which make defective engines invisible in the stats.
This commit is contained in:
parent
a15dfa5ee1
commit
dd0887be18
2 changed files with 19 additions and 6 deletions
|
@ -22,6 +22,7 @@ from urllib.parse import urlencode
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list
|
from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list
|
||||||
|
from searx.network import raise_for_httperror
|
||||||
|
|
||||||
search_url = None
|
search_url = None
|
||||||
"""
|
"""
|
||||||
|
@ -60,9 +61,14 @@ lang_all = 'en'
|
||||||
'''Replacement ``{lang}`` in :py:obj:`search_url` if language ``all`` is
|
'''Replacement ``{lang}`` in :py:obj:`search_url` if language ``all`` is
|
||||||
selected.
|
selected.
|
||||||
'''
|
'''
|
||||||
raise_for_httperror = True
|
|
||||||
'''True by default: raise an exception if the HTTP code of response is ``>=
|
no_result_for_http_status = []
|
||||||
300``'''
|
'''Return empty result for these HTTP status codes instead of throwing an error.
|
||||||
|
|
||||||
|
.. code:: yaml
|
||||||
|
|
||||||
|
no_result_for_http_status: []
|
||||||
|
'''
|
||||||
|
|
||||||
soft_max_redirects = 0
|
soft_max_redirects = 0
|
||||||
'''Maximum redirects, soft limit. Record an error but don't stop the engine'''
|
'''Maximum redirects, soft limit. Record an error but don't stop the engine'''
|
||||||
|
@ -179,12 +185,19 @@ def request(query, params):
|
||||||
|
|
||||||
params['url'] = search_url.format(**fargs)
|
params['url'] = search_url.format(**fargs)
|
||||||
params['soft_max_redirects'] = soft_max_redirects
|
params['soft_max_redirects'] = soft_max_redirects
|
||||||
params['raise_for_httperror'] = raise_for_httperror
|
|
||||||
|
params['raise_for_httperror'] = False
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp): # pylint: disable=too-many-branches
|
||||||
'''Scrap *results* from the response (see :ref:`engine results`).'''
|
'''Scrap *results* from the response (see :ref:`engine results`).'''
|
||||||
|
if no_result_for_http_status and resp.status_code in no_result_for_http_status:
|
||||||
|
return []
|
||||||
|
|
||||||
|
raise_for_httperror(resp)
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
is_onion = 'onions' in categories
|
is_onion = 'onions' in categories
|
||||||
|
|
|
@ -1808,7 +1808,7 @@ engines:
|
||||||
url_xpath: //div[@class="upper-synonyms"]/a/@href
|
url_xpath: //div[@class="upper-synonyms"]/a/@href
|
||||||
content_xpath: //div[@class="synonyms-list-group"]
|
content_xpath: //div[@class="synonyms-list-group"]
|
||||||
title_xpath: //div[@class="upper-synonyms"]/a
|
title_xpath: //div[@class="upper-synonyms"]/a
|
||||||
raise_for_httperror: false
|
no_result_for_http_status: [404]
|
||||||
about:
|
about:
|
||||||
website: https://www.woxikon.de/
|
website: https://www.woxikon.de/
|
||||||
wikidata_id: # No Wikidata ID
|
wikidata_id: # No Wikidata ID
|
||||||
|
|
Loading…
Reference in a new issue