From 37addec69e4211aac6b2302c82cb5c5c7b1d5e04 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Sat, 28 Jan 2023 10:24:14 +0000 Subject: [PATCH 1/3] search.suspended_time settings: bug fixes * fix type in settings.yml: replace suspend_times by suspended_times * always use delay defined in settings.yml: * HTTP status 402 and 403: read the value from settings.yml instead of using the hardcoded value of 1 day. * startpage engine: CAPTCHA suspend the engine for one day instead of one week --- searx/engines/startpage.py | 3 +-- searx/exceptions.py | 9 ++++++++- searx/network/raise_for_httperror.py | 4 +--- searx/settings.yml | 2 +- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 24aa59d03..f857f7b6d 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -62,8 +62,7 @@ sc_code = '' def raise_captcha(resp): if str(resp.url).startswith('https://www.startpage.com/sp/captcha'): - # suspend CAPTCHA for 7 days - raise SearxEngineCaptchaException(suspended_time=7 * 24 * 3600) + raise SearxEngineCaptchaException() def get_sc_code(headers): diff --git a/searx/exceptions.py b/searx/exceptions.py index af81bfb23..b11821b17 100644 --- a/searx/exceptions.py +++ b/searx/exceptions.py @@ -70,8 +70,15 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException): """The website is blocking the access""" SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied" + """This settings contains the default suspended time""" - def __init__(self, suspended_time=None, message='Access denied'): + def __init__(self, suspended_time: int = None, message: str = 'Access denied'): + """Generic exception to raise when an engine denies access to the results + + Args: + suspended_time (int, optional): How long the engine is going to be suspended in second. Defaults to None. + message (str, optional): Internal message. Defaults to 'Access denied'. + """ suspended_time = suspended_time or self._get_default_suspended_time() super().__init__(message + ', suspended_time=' + str(suspended_time)) self.suspended_time = suspended_time diff --git a/searx/network/raise_for_httperror.py b/searx/network/raise_for_httperror.py index 7fc2b7877..9f847d436 100644 --- a/searx/network/raise_for_httperror.py +++ b/searx/network/raise_for_httperror.py @@ -72,9 +72,7 @@ def raise_for_httperror(resp): if resp.status_code and resp.status_code >= 400: raise_for_captcha(resp) if resp.status_code in (402, 403): - raise SearxEngineAccessDeniedException( - message='HTTP error ' + str(resp.status_code), suspended_time=3600 * 24 - ) + raise SearxEngineAccessDeniedException(message='HTTP error ' + str(resp.status_code)) if resp.status_code == 429: raise SearxEngineTooManyRequestsException() resp.raise_for_status() diff --git a/searx/settings.yml b/searx/settings.yml index 81025d653..216cb3c82 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -45,7 +45,7 @@ search: ban_time_on_fail: 5 # max ban time in seconds after engine errors max_ban_time_on_fail: 120 - suspend_times: + suspended_times: # Engine suspension time after error (in seconds; set to 0 to disable) # For error "Access denied" and "HTTP error [402, 403]" SearxEngineAccessDenied: 86400 From feccee01c004f1e6cf2242dc33c8f3456e1b4e1a Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sun, 29 Jan 2023 18:51:13 +0100 Subject: [PATCH 2/3] [doc] Add doc-strings to searx.exceptions Signed-off-by: Markus Heiser --- docs/src/searx.exceptions.rst | 8 ++++++ searx/exceptions.py | 50 +++++++++++++++-------------------- 2 files changed, 30 insertions(+), 28 deletions(-) create mode 100644 docs/src/searx.exceptions.rst diff --git a/docs/src/searx.exceptions.rst b/docs/src/searx.exceptions.rst new file mode 100644 index 000000000..72117e148 --- /dev/null +++ b/docs/src/searx.exceptions.rst @@ -0,0 +1,8 @@ +.. _searx.exceptions: + +================== +SearXNG Exceptions +================== + +.. automodule:: searx.exceptions + :members: diff --git a/searx/exceptions.py b/searx/exceptions.py index b11821b17..069be9057 100644 --- a/searx/exceptions.py +++ b/searx/exceptions.py @@ -1,29 +1,19 @@ -''' -searx is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -searx is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with searx. If not, see < http://www.gnu.org/licenses/ >. - -(C) 2017- by Alexandre Flament, -''' - +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Exception types raised by SearXNG modules. +""" from typing import Optional, Union class SearxException(Exception): - pass + """Base SearXNG exception.""" class SearxParameterException(SearxException): + """Raised when query miss a required paramater""" + def __init__(self, name, value): if value == '' or value is None: message = 'Empty ' + name + ' parameter' @@ -70,14 +60,17 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException): """The website is blocking the access""" SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied" - """This settings contains the default suspended time""" + """This settings contains the default suspended time (default 86400 sec / 1 + day).""" def __init__(self, suspended_time: int = None, message: str = 'Access denied'): - """Generic exception to raise when an engine denies access to the results + """Generic exception to raise when an engine denies access to the results. - Args: - suspended_time (int, optional): How long the engine is going to be suspended in second. Defaults to None. - message (str, optional): Internal message. Defaults to 'Access denied'. + :param suspended_time: How long the engine is going to be suspended in + second. Defaults to None. + :type suspended_time: int, None + :param message: Internal message. Defaults to ``Access denied`` + :type message: str """ suspended_time = suspended_time or self._get_default_suspended_time() super().__init__(message + ', suspended_time=' + str(suspended_time)) @@ -85,18 +78,17 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException): self.message = message def _get_default_suspended_time(self): - from searx import get_setting + from searx import get_setting # pylint: disable=C0415 return get_setting(self.SUSPEND_TIME_SETTING) class SearxEngineCaptchaException(SearxEngineAccessDeniedException): - """The website has returned a CAPTCHA - - By default, searx stops sending requests to this engine for 1 day. - """ + """The website has returned a CAPTCHA.""" SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineCaptcha" + """This settings contains the default suspended time (default 86400 sec / 1 + day).""" def __init__(self, suspended_time=None, message='CAPTCHA'): super().__init__(message=message, suspended_time=suspended_time) @@ -109,6 +101,8 @@ class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException): """ SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineTooManyRequests" + """This settings contains the default suspended time (default 3660 sec / 1 + hour).""" def __init__(self, suspended_time=None, message='Too many request'): super().__init__(message=message, suspended_time=suspended_time) From 031162be0471650c09c25954b5251d06d8c042e1 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sun, 29 Jan 2023 19:25:07 +0100 Subject: [PATCH 3/3] [doc] settings.py document search.suspended_times Signed-off-by: Markus Heiser --- docs/admin/engines/settings.rst | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/docs/admin/engines/settings.rst b/docs/admin/engines/settings.rst index c747e3f43..97f5ef63e 100644 --- a/docs/admin/engines/settings.rst +++ b/docs/admin/engines/settings.rst @@ -110,6 +110,13 @@ Global Settings default_lang: "" ban_time_on_fail: 5 max_ban_time_on_fail: 120 + suspended_times: + SearxEngineAccessDenied: 86400 + SearxEngineCaptcha: 86400 + SearxEngineTooManyRequests: 3600 + cf_SearxEngineCaptcha: 1296000 + cf_SearxEngineAccessDenied: 86400 + recaptcha_SearxEngineCaptcha: 604800 formats: - html @@ -159,6 +166,25 @@ Global Settings ``max_ban_time_on_fail``: Max ban time in seconds after engine errors. +``suspended_times``: + Engine suspension time after error (in seconds; set to 0 to disable) + + ``SearxEngineAccessDenied``: 86400 + For error "Access denied" and "HTTP error [402, 403]" + + ``SearxEngineCaptcha``: 86400 + For error "CAPTCHA" + + ``SearxEngineTooManyRequests``: 3600 + For error "Too many request" and "HTTP error 429" + + Cloudflare CAPTCHA: + - ``cf_SearxEngineCaptcha``: 1296000 + - ``cf_SearxEngineAccessDenied``: 86400 + + Google CAPTCHA: + - ``recaptcha_SearxEngineCaptcha``: 604800 + ``formats``: Result formats available from web, remove format to deny access (use lower case). @@ -168,6 +194,7 @@ Global Settings - ``json`` - ``rss`` + .. _settings server: ``server:``