From b513917ef94d2f18e6db3228363f8089ce1ba08a Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Mon, 6 Sep 2021 19:46:08 +0200 Subject: [PATCH 1/5] [mod] searx.metrics & searx.search: use the engine loggers metrics & processors use the engine logger --- searx/metrics/error_recorder.py | 5 ++-- searx/search/__init__.py | 2 +- searx/search/processors/__init__.py | 2 +- searx/search/processors/abstract.py | 15 ++++++------ searx/search/processors/offline.py | 6 ++--- searx/search/processors/online.py | 36 ++++++++++++++++------------- 6 files changed, 35 insertions(+), 31 deletions(-) diff --git a/searx/metrics/error_recorder.py b/searx/metrics/error_recorder.py index c5de008cc..6963cda2f 100644 --- a/searx/metrics/error_recorder.py +++ b/searx/metrics/error_recorder.py @@ -5,7 +5,8 @@ from urllib.parse import urlparse from httpx import HTTPError, HTTPStatusError from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathException, SearxEngineAPIException, SearxEngineAccessDeniedException) -from searx import logger, searx_parent_dir +from searx import searx_parent_dir +from searx.engines import engines errors_per_engines = {} @@ -47,7 +48,7 @@ class ErrorContext: def add_error_context(engine_name: str, error_context: ErrorContext) -> None: errors_for_engine = errors_per_engines.setdefault(engine_name, {}) errors_for_engine[error_context] = errors_for_engine.get(error_context, 0) + 1 - logger.debug('%s: %s', engine_name, str(error_context)) + engines[engine_name].logger.warning('%s', str(error_context)) def get_trace(traces): diff --git a/searx/search/__init__.py b/searx/search/__init__.py index 69d7ffb25..97515622d 100644 --- a/searx/search/__init__.py +++ b/searx/search/__init__.py @@ -147,7 +147,7 @@ class Search: if th.is_alive(): th._timeout = True self.result_container.add_unresponsive_engine(th._engine_name, 'timeout') - logger.warning('engine timeout: {0}'.format(th._engine_name)) + PROCESSORS[th._engine_name].logger.error('engine timeout') def search_standard(self): """ diff --git a/searx/search/processors/__init__.py b/searx/search/processors/__init__.py index b3d121f0e..8108f8dfa 100644 --- a/searx/search/processors/__init__.py +++ b/searx/search/processors/__init__.py @@ -65,6 +65,6 @@ def initialize(engine_list): processor = get_processor(engine, engine_name) initialize_processor(processor) if processor is None: - logger.error('Error get processor for engine %s', engine_name) + engine.logger.error('Error get processor for engine %s', engine_name) else: PROCESSORS[engine_name] = processor diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index c8d81c026..b5fa063fd 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -9,8 +9,8 @@ import threading from abc import abstractmethod, ABC from timeit import default_timer -from searx import logger -from searx.engines import settings +from searx import settings, logger +from searx.engines import engines from searx.network import get_time_for_thread, get_network from searx.metrics import histogram_observe, counter_inc, count_exception, count_error from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException @@ -43,7 +43,7 @@ class SuspendedStatus: self.continuous_errors * settings['search']['ban_time_on_fail']) self.suspend_end_time = default_timer() + suspended_time self.suspend_reason = suspend_reason - logger.debug('Suspend engine for %i seconds', suspended_time) + logger.debug('Suspend for %i seconds', suspended_time) def resume(self): with self.lock: @@ -56,11 +56,12 @@ class SuspendedStatus: class EngineProcessor(ABC): """Base classes used for all types of reqest processores.""" - __slots__ = 'engine', 'engine_name', 'lock', 'suspended_status' + __slots__ = 'engine', 'engine_name', 'lock', 'suspended_status', 'logger' def __init__(self, engine, engine_name): self.engine = engine self.engine_name = engine_name + self.logger = engines[engine_name].logger key = get_network(self.engine_name) key = id(key) if key else self.engine_name self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus()) @@ -69,11 +70,11 @@ class EngineProcessor(ABC): try: self.engine.init(get_engine_from_settings(self.engine_name)) except SearxEngineResponseException as exc: - logger.warn('%s engine: Fail to initialize // %s', self.engine_name, exc) + self.logger.warn('Fail to initialize // %s', exc) except Exception: # pylint: disable=broad-except - logger.exception('%s engine: Fail to initialize', self.engine_name) + self.logger.exception('Fail to initialize') else: - logger.debug('%s engine: Initialized', self.engine_name) + self.logger.debug('Initialized') @property def has_initialize_function(self): diff --git a/searx/search/processors/offline.py b/searx/search/processors/offline.py index f40626f39..ec7a4a36e 100644 --- a/searx/search/processors/offline.py +++ b/searx/search/processors/offline.py @@ -5,10 +5,8 @@ """ -from searx import logger from .abstract import EngineProcessor -logger = logger.getChild('searx.search.processor.offline') class OfflineProcessor(EngineProcessor): """Processor class used by ``offline`` engines""" @@ -24,7 +22,7 @@ class OfflineProcessor(EngineProcessor): self.extend_container(result_container, start_time, search_results) except ValueError as e: # do not record the error - logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e)) + self.logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e)) except Exception as e: # pylint: disable=broad-except self.handle_exception(result_container, e) - logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e)) + self.logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e)) diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index 48a514e8a..c4ee58e11 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -10,7 +10,6 @@ import asyncio import httpx import searx.network -from searx import logger from searx.utils import gen_useragent from searx.exceptions import ( SearxEngineAccessDeniedException, @@ -20,7 +19,6 @@ from searx.exceptions import ( from searx.metrics.error_recorder import count_error from .abstract import EngineProcessor -logger = logger.getChild('searx.search.processor.online') def default_request_params(): """Default request parameters for ``online`` engines.""" @@ -146,31 +144,37 @@ class OnlineProcessor(EngineProcessor): except (httpx.TimeoutException, asyncio.TimeoutError) as e: # requests timeout (connect or read) self.handle_exception(result_container, e, suspend=True) - logger.error("engine {0} : HTTP requests timeout" - "(search duration : {1} s, timeout: {2} s) : {3}" - .format(self.engine_name, default_timer() - start_time, - timeout_limit, - e.__class__.__name__)) + self.logger.error( + "HTTP requests timeout (search duration : {0} s, timeout: {1} s) : {2}" + .format( + default_timer() - start_time, + timeout_limit, + e.__class__.__name__ + ) + ) except (httpx.HTTPError, httpx.StreamError) as e: # other requests exception self.handle_exception(result_container, e, suspend=True) - logger.exception("engine {0} : requests exception" - "(search duration : {1} s, timeout: {2} s) : {3}" - .format(self.engine_name, default_timer() - start_time, - timeout_limit, - e)) + self.logger.exception( + "requests exception (search duration : {0} s, timeout: {1} s) : {2}" + .format( + default_timer() - start_time, + timeout_limit, + e + ) + ) except SearxEngineCaptchaException as e: self.handle_exception(result_container, e, suspend=True) - logger.exception('engine {0} : CAPTCHA'.format(self.engine_name)) + self.logger.exception('CAPTCHA') except SearxEngineTooManyRequestsException as e: self.handle_exception(result_container, e, suspend=True) - logger.exception('engine {0} : Too many requests'.format(self.engine_name)) + self.logger.exception('Too many requests') except SearxEngineAccessDeniedException as e: self.handle_exception(result_container, e, suspend=True) - logger.exception('engine {0} : Searx is blocked'.format(self.engine_name)) + self.logger.exception('Searx is blocked') except Exception as e: # pylint: disable=broad-except self.handle_exception(result_container, e) - logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e)) + self.logger.exception('exception : {0}'.format(e)) def get_default_tests(self): tests = {} From 577998106aed4c3df5cda8f6af77d55dda8ef60c Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Mon, 6 Sep 2021 19:47:50 +0200 Subject: [PATCH 2/5] [mod] debug mode: use coloredlogs package in debug mode, add some padding to make the output more readable in procution mode, add the timestamp --- Makefile | 2 +- requirements-dev.txt | 1 + searx/__init__.py | 84 +++++++++++++++++++++++++++++++++++--------- tests/__init__.py | 1 + 4 files changed, 71 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index 88edef5cf..3710af4ec 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ run: install sleep 2 ; \ xdg-open http://127.0.0.1:8888/ ; \ ) & - SEARX_DEBUG=1 ./manage pyenv.cmd python ./searx/webapp.py + SEARX_DEBUG=1 ./manage pyenv.cmd python -m searx.webapp PHONY += install uninstall install uninstall: diff --git a/requirements-dev.txt b/requirements-dev.txt index ae60b6c8f..4be587f34 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -17,3 +17,4 @@ linuxdoc==20210324 aiounittest==1.4.0 yamllint==1.26.3 wlc==1.12 +coloredlogs==15.0.1 diff --git a/searx/__init__.py b/searx/__init__.py index 265f926bc..12376b0fd 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -2,13 +2,23 @@ # lint: pylint # pylint: disable=missing-module-docstring +import sys +import os from os.path import dirname, abspath + import logging import searx.unixthreadname import searx.settings_loader from searx.settings_defaults import settings_set_defaults + +# Debug +LOG_FORMAT_DEBUG = '%(levelname)-7s %(name)-30.30s: %(message)s' + +# Production +LOG_FORMAT_PROD = '%(asctime)-15s %(levelname)s:%(name)s: %(message)s' + searx_dir = abspath(dirname(__file__)) searx_parent_dir = abspath(dirname(dirname(__file__))) settings, settings_load_message = searx.settings_loader.load_settings() @@ -16,22 +26,6 @@ settings, settings_load_message = searx.settings_loader.load_settings() if settings is not None: settings = settings_set_defaults(settings) -searx_debug = settings['general']['debug'] -if searx_debug: - logging.basicConfig(level=logging.DEBUG) -else: - logging.basicConfig(level=logging.WARNING) - -logger = logging.getLogger('searx') -logger.info(settings_load_message) - -# log max_request_timeout -max_request_timeout = settings['outgoing']['max_request_timeout'] -if max_request_timeout is None: - logger.info('max_request_timeout=%s', repr(max_request_timeout)) -else: - logger.info('max_request_timeout=%i second(s)', max_request_timeout) - _unset = object() def get_setting(name, default=_unset): @@ -53,3 +47,61 @@ def get_setting(name, default=_unset): break return value + + +def is_color_terminal(): + if os.getenv('TERM') in ('dumb', 'unknown'): + return False + return sys.stdout.isatty() + + +def logging_config_debug(): + try: + import coloredlogs # pylint: disable=import-outside-toplevel + except ImportError: + coloredlogs = None + + log_level = os.environ.get('SEARX_DEBUG_LOG_LEVEL', 'DEBUG') + if coloredlogs and is_color_terminal(): + level_styles = { + 'spam': {'color': 'green', 'faint': True}, + 'debug': {}, + 'notice': {'color': 'magenta'}, + 'success': {'bold': True, 'color': 'green'}, + 'info': {'bold': True, 'color': 'cyan'}, + 'warning': {'color': 'yellow'}, + 'error': {'color': 'red'}, + 'critical': {'bold': True, 'color': 'red'}, + } + field_styles = { + 'asctime': {'color': 'green'}, + 'hostname': {'color': 'magenta'}, + 'levelname': {'color': 8}, + 'name': {'color': 8}, + 'programname': {'color': 'cyan'}, + 'username': {'color': 'yellow'} + } + coloredlogs.install( + level=log_level, + level_styles=level_styles, + field_styles=field_styles, + fmt=LOG_FORMAT_DEBUG + ) + else: + logging.basicConfig(level=logging.getLevelName(log_level), format=LOG_FORMAT_DEBUG) + + +searx_debug = settings['general']['debug'] +if searx_debug: + logging_config_debug() +else: + logging.basicConfig(level=logging.WARNING, format=LOG_FORMAT_PROD) +logger = logging.getLogger('searx') +logger.info(settings_load_message) + +# log max_request_timeout +max_request_timeout = settings['outgoing']['max_request_timeout'] +if max_request_timeout is None: + logger.info('max_request_timeout=%s', repr(max_request_timeout)) +else: + logger.info('max_request_timeout=%i second(s)', max_request_timeout) diff --git a/tests/__init__.py b/tests/__init__.py index d4b101cc4..23e62f040 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -3,6 +3,7 @@ import os import aiounittest os.environ['SEARX_DEBUG'] = '1' +os.environ['SEARX_DEBUG_LOG_LEVEL'] = 'WARNING' os.environ['SEARX_DISABLE_ETC_SETTINGS'] = '1' os.environ.pop('SEARX_SETTINGS_PATH', None) From 91a6d80e8291852264abdcda83c203dfb5770381 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Mon, 6 Sep 2021 21:42:30 +0200 Subject: [PATCH 3/5] [mod] debug mode: log HTTP requests with network name For example wikipedia requests use the logger name "searx.network.wikipedia" Log is disable when searx_debug is False --- searx/network/client.py | 16 ++++++++++++--- searx/network/network.py | 42 +++++++++++++++++++++++++++++----------- 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/searx/network/client.py b/searx/network/client.py index 187ae5366..e1abff05a 100644 --- a/searx/network/client.py +++ b/searx/network/client.py @@ -194,7 +194,7 @@ def new_client( # pylint: disable=too-many-arguments enable_http, verify, enable_http2, max_connections, max_keepalive_connections, keepalive_expiry, - proxies, local_address, retries, max_redirects ): + proxies, local_address, retries, max_redirects, hook_log_response ): limit = httpx.Limits( max_connections=max_connections, max_keepalive_connections=max_keepalive_connections, @@ -221,7 +221,17 @@ def new_client( mounts['http://'] = AsyncHTTPTransportNoHttp() transport = get_transport(verify, enable_http2, local_address, None, limit, retries) - return httpx.AsyncClient(transport=transport, mounts=mounts, max_redirects=max_redirects) + + event_hooks = None + if hook_log_response: + event_hooks = {'response': [ hook_log_response ]} + + return httpx.AsyncClient( + transport=transport, + mounts=mounts, + max_redirects=max_redirects, + event_hooks=event_hooks, + ) def get_loop(): @@ -231,7 +241,7 @@ def get_loop(): def init(): # log - for logger_name in ('hpack.hpack', 'hpack.table'): + for logger_name in ('hpack.hpack', 'hpack.table', 'httpx._client'): logging.getLogger(logger_name).setLevel(logging.WARNING) # loop diff --git a/searx/network/network.py b/searx/network/network.py index d09a2ee0e..9954f0507 100644 --- a/searx/network/network.py +++ b/searx/network/network.py @@ -10,9 +10,11 @@ from itertools import cycle import httpx +from searx import logger, searx_debug from .client import new_client, get_loop +logger = logger.getChild('network') DEFAULT_NAME = '__DEFAULT__' NETWORKS = {} # requests compatibility when reading proxy settings from settings.yml @@ -41,7 +43,7 @@ class Network: 'enable_http', 'verify', 'enable_http2', 'max_connections', 'max_keepalive_connections', 'keepalive_expiry', 'local_addresses', 'proxies', 'max_redirects', 'retries', 'retry_on_http_error', - '_local_addresses_cycle', '_proxies_cycle', '_clients' + '_local_addresses_cycle', '_proxies_cycle', '_clients', '_logger' ) def __init__( @@ -57,7 +59,8 @@ class Network: local_addresses=None, retries=0, retry_on_http_error=None, - max_redirects=30 ): + max_redirects=30, + logger_name=None): self.enable_http = enable_http self.verify = verify @@ -73,6 +76,7 @@ class Network: self._local_addresses_cycle = self.get_ipaddress_cycle() self._proxies_cycle = self.get_proxy_cycles() self._clients = {} + self._logger = logger.getChild(logger_name) if logger_name else logger self.check_parameters() def check_parameters(self): @@ -130,12 +134,25 @@ class Network: # pylint: disable=stop-iteration-return yield tuple((pattern, next(proxy_url_cycle)) for pattern, proxy_url_cycle in proxy_settings.items()) + async def log_response(self, response: httpx.Response): + request = response.request + status = f"{response.status_code} {response.reason_phrase}" + response_line = f"{response.http_version} {status}" + if hasattr(response, "_elapsed"): + elapsed_time = f"{response.elapsed.total_seconds()} sec" + else: + elapsed_time = "stream" + self._logger.debug( + f'HTTP Request: {request.method} {request.url} "{response_line}" ({elapsed_time})' + ) + def get_client(self, verify=None, max_redirects=None): verify = self.verify if verify is None else verify max_redirects = self.max_redirects if max_redirects is None else max_redirects local_address = next(self._local_addresses_cycle) proxies = next(self._proxies_cycle) # is a tuple so it can be part of the key key = (verify, max_redirects, local_address, proxies) + hook_log_response = self.log_response if searx_debug else None if key not in self._clients or self._clients[key].is_closed: self._clients[key] = new_client( self.enable_http, @@ -147,7 +164,8 @@ class Network: dict(proxies), local_address, 0, - max_redirects + max_redirects, + hook_log_response ) return self._clients[key] @@ -243,11 +261,13 @@ def initialize(settings_engines=None, settings_outgoing=None): 'retry_on_http_error': None, } - def new_network(params): + def new_network(params, logger_name=None): nonlocal default_params result = {} result.update(default_params) result.update(params) + if logger_name: + result['logger_name'] = logger_name return Network(**result) def iter_networks(): @@ -263,13 +283,13 @@ def initialize(settings_engines=None, settings_outgoing=None): if NETWORKS: done() NETWORKS.clear() - NETWORKS[DEFAULT_NAME] = new_network({}) - NETWORKS['ipv4'] = new_network({'local_addresses': '0.0.0.0'}) - NETWORKS['ipv6'] = new_network({'local_addresses': '::'}) + NETWORKS[DEFAULT_NAME] = new_network({}, logger_name='default') + NETWORKS['ipv4'] = new_network({'local_addresses': '0.0.0.0'}, logger_name='ipv4') + NETWORKS['ipv6'] = new_network({'local_addresses': '::'}, logger_name='ipv6') # define networks from outgoing.networks for network_name, network in settings_outgoing['networks'].items(): - NETWORKS[network_name] = new_network(network) + NETWORKS[network_name] = new_network(network, logger_name=network_name) # define networks from engines.[i].network (except references) for engine_name, engine, network in iter_networks(): @@ -280,9 +300,9 @@ def initialize(settings_engines=None, settings_outgoing=None): network[attribute_name] = getattr(engine, attribute_name) else: network[attribute_name] = attribute_value - NETWORKS[engine_name] = new_network(network) + NETWORKS[engine_name] = new_network(network, logger_name=engine_name) elif isinstance(network, dict): - NETWORKS[engine_name] = new_network(network) + NETWORKS[engine_name] = new_network(network, logger_name=engine_name) # define networks from engines.[i].network (references) for engine_name, engine, network in iter_networks(): @@ -295,7 +315,7 @@ def initialize(settings_engines=None, settings_outgoing=None): if 'image_proxy' not in NETWORKS: image_proxy_params = default_params.copy() image_proxy_params['enable_http2'] = False - NETWORKS['image_proxy'] = new_network(image_proxy_params) + NETWORKS['image_proxy'] = new_network(image_proxy_params, logger_name='image_proxy') @atexit.register From 0e42db9da111bbf1984ae48889acee05e4dd2dd8 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Mon, 6 Sep 2021 21:43:48 +0200 Subject: [PATCH 4/5] [mod] xpath engine: remove logging of the requested URL --- searx/engines/xpath.py | 1 - 1 file changed, 1 deletion(-) diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 8338d5301..08677b708 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -173,7 +173,6 @@ def request(query, params): params['url'] = search_url.format(**fargs) params['soft_max_redirects'] = soft_max_redirects - logger.debug("query_url --> %s", params['url']) return params From 6abc37efc75a472b977306e5352c10358f3a1d08 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Wed, 8 Sep 2021 08:44:40 +0200 Subject: [PATCH 5/5] [mod] production mode: make sure to set the log level to WARNING set: * the root logger level to WARNING * the werkzeug logger level to WARNING (useful when there is no uwsgi) --- searx/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/searx/__init__.py b/searx/__init__.py index 12376b0fd..93ad76bb1 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -18,6 +18,7 @@ LOG_FORMAT_DEBUG = '%(levelname)-7s %(name)-30.30s: %(message)s' # Production LOG_FORMAT_PROD = '%(asctime)-15s %(levelname)s:%(name)s: %(message)s' +LOG_LEVEL_PROD = logging.WARNING searx_dir = abspath(dirname(__file__)) searx_parent_dir = abspath(dirname(dirname(__file__))) @@ -95,7 +96,9 @@ searx_debug = settings['general']['debug'] if searx_debug: logging_config_debug() else: - logging.basicConfig(level=logging.WARNING, format=LOG_FORMAT_PROD) + logging.basicConfig(level=LOG_LEVEL_PROD, format=LOG_FORMAT_PROD) + logging.root.setLevel(level=LOG_LEVEL_PROD) + logging.getLogger('werkzeug').setLevel(level=LOG_LEVEL_PROD) logger = logging.getLogger('searx') logger.info(settings_load_message)