Merge pull request #2132 from dalf/update_pr_1967

search.suspended_time settings: bug fixes
This commit is contained in:
Alexandre Flament 2023-01-29 20:48:43 +01:00 committed by GitHub
commit 9d102fb08f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 63 additions and 30 deletions

View file

@ -110,6 +110,13 @@ Global Settings
default_lang: "" default_lang: ""
ban_time_on_fail: 5 ban_time_on_fail: 5
max_ban_time_on_fail: 120 max_ban_time_on_fail: 120
suspended_times:
SearxEngineAccessDenied: 86400
SearxEngineCaptcha: 86400
SearxEngineTooManyRequests: 3600
cf_SearxEngineCaptcha: 1296000
cf_SearxEngineAccessDenied: 86400
recaptcha_SearxEngineCaptcha: 604800
formats: formats:
- html - html
@ -159,6 +166,25 @@ Global Settings
``max_ban_time_on_fail``: ``max_ban_time_on_fail``:
Max ban time in seconds after engine errors. Max ban time in seconds after engine errors.
``suspended_times``:
Engine suspension time after error (in seconds; set to 0 to disable)
``SearxEngineAccessDenied``: 86400
For error "Access denied" and "HTTP error [402, 403]"
``SearxEngineCaptcha``: 86400
For error "CAPTCHA"
``SearxEngineTooManyRequests``: 3600
For error "Too many request" and "HTTP error 429"
Cloudflare CAPTCHA:
- ``cf_SearxEngineCaptcha``: 1296000
- ``cf_SearxEngineAccessDenied``: 86400
Google CAPTCHA:
- ``recaptcha_SearxEngineCaptcha``: 604800
``formats``: ``formats``:
Result formats available from web, remove format to deny access (use lower Result formats available from web, remove format to deny access (use lower
case). case).
@ -168,6 +194,7 @@ Global Settings
- ``json`` - ``json``
- ``rss`` - ``rss``
.. _settings server: .. _settings server:
``server:`` ``server:``

View file

@ -0,0 +1,8 @@
.. _searx.exceptions:
==================
SearXNG Exceptions
==================
.. automodule:: searx.exceptions
:members:

View file

@ -62,8 +62,7 @@ sc_code = ''
def raise_captcha(resp): def raise_captcha(resp):
if str(resp.url).startswith('https://www.startpage.com/sp/captcha'): if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
# suspend CAPTCHA for 7 days raise SearxEngineCaptchaException()
raise SearxEngineCaptchaException(suspended_time=7 * 24 * 3600)
def get_sc_code(headers): def get_sc_code(headers):

View file

@ -1,29 +1,19 @@
''' # -*- coding: utf-8 -*-
searx is free software: you can redistribute it and/or modify # SPDX-License-Identifier: AGPL-3.0-or-later
it under the terms of the GNU Affero General Public License as published by # lint: pylint
the Free Software Foundation, either version 3 of the License, or """Exception types raised by SearXNG modules.
(at your option) any later version. """
searx is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2017- by Alexandre Flament, <alex@al-f.net>
'''
from typing import Optional, Union from typing import Optional, Union
class SearxException(Exception): class SearxException(Exception):
pass """Base SearXNG exception."""
class SearxParameterException(SearxException): class SearxParameterException(SearxException):
"""Raised when query miss a required paramater"""
def __init__(self, name, value): def __init__(self, name, value):
if value == '' or value is None: if value == '' or value is None:
message = 'Empty ' + name + ' parameter' message = 'Empty ' + name + ' parameter'
@ -70,26 +60,35 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException):
"""The website is blocking the access""" """The website is blocking the access"""
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied" SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied"
"""This settings contains the default suspended time (default 86400 sec / 1
day)."""
def __init__(self, suspended_time=None, message='Access denied'): def __init__(self, suspended_time: int = None, message: str = 'Access denied'):
"""Generic exception to raise when an engine denies access to the results.
:param suspended_time: How long the engine is going to be suspended in
second. Defaults to None.
:type suspended_time: int, None
:param message: Internal message. Defaults to ``Access denied``
:type message: str
"""
suspended_time = suspended_time or self._get_default_suspended_time() suspended_time = suspended_time or self._get_default_suspended_time()
super().__init__(message + ', suspended_time=' + str(suspended_time)) super().__init__(message + ', suspended_time=' + str(suspended_time))
self.suspended_time = suspended_time self.suspended_time = suspended_time
self.message = message self.message = message
def _get_default_suspended_time(self): def _get_default_suspended_time(self):
from searx import get_setting from searx import get_setting # pylint: disable=C0415
return get_setting(self.SUSPEND_TIME_SETTING) return get_setting(self.SUSPEND_TIME_SETTING)
class SearxEngineCaptchaException(SearxEngineAccessDeniedException): class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
"""The website has returned a CAPTCHA """The website has returned a CAPTCHA."""
By default, searx stops sending requests to this engine for 1 day.
"""
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineCaptcha" SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineCaptcha"
"""This settings contains the default suspended time (default 86400 sec / 1
day)."""
def __init__(self, suspended_time=None, message='CAPTCHA'): def __init__(self, suspended_time=None, message='CAPTCHA'):
super().__init__(message=message, suspended_time=suspended_time) super().__init__(message=message, suspended_time=suspended_time)
@ -102,6 +101,8 @@ class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException):
""" """
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineTooManyRequests" SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineTooManyRequests"
"""This settings contains the default suspended time (default 3660 sec / 1
hour)."""
def __init__(self, suspended_time=None, message='Too many request'): def __init__(self, suspended_time=None, message='Too many request'):
super().__init__(message=message, suspended_time=suspended_time) super().__init__(message=message, suspended_time=suspended_time)

View file

@ -72,9 +72,7 @@ def raise_for_httperror(resp):
if resp.status_code and resp.status_code >= 400: if resp.status_code and resp.status_code >= 400:
raise_for_captcha(resp) raise_for_captcha(resp)
if resp.status_code in (402, 403): if resp.status_code in (402, 403):
raise SearxEngineAccessDeniedException( raise SearxEngineAccessDeniedException(message='HTTP error ' + str(resp.status_code))
message='HTTP error ' + str(resp.status_code), suspended_time=3600 * 24
)
if resp.status_code == 429: if resp.status_code == 429:
raise SearxEngineTooManyRequestsException() raise SearxEngineTooManyRequestsException()
resp.raise_for_status() resp.raise_for_status()

View file

@ -45,7 +45,7 @@ search:
ban_time_on_fail: 5 ban_time_on_fail: 5
# max ban time in seconds after engine errors # max ban time in seconds after engine errors
max_ban_time_on_fail: 120 max_ban_time_on_fail: 120
suspend_times: suspended_times:
# Engine suspension time after error (in seconds; set to 0 to disable) # Engine suspension time after error (in seconds; set to 0 to disable)
# For error "Access denied" and "HTTP error [402, 403]" # For error "Access denied" and "HTTP error [402, 403]"
SearxEngineAccessDenied: 86400 SearxEngineAccessDenied: 86400