diff --git a/docs/admin/engines/settings.rst b/docs/admin/engines/settings.rst index 118e01efd..250a27461 100644 --- a/docs/admin/engines/settings.rst +++ b/docs/admin/engines/settings.rst @@ -397,14 +397,26 @@ Communication with search engines. Global timeout of the requests made to others engines in seconds. A bigger timeout will allow to wait for answers from slow engines, but in consequence will slow SearXNG reactivity (the result page may take the time specified in the - timeout to load). Can be override by :ref:`settings engine` + timeout to load). Can be override by ``timeout`` in the :ref:`settings engine`. ``useragent_suffix`` : Suffix to the user-agent SearXNG uses to send requests to others engines. If an engine wish to block you, a contact info here may be useful to avoid that. +.. _Pool limit configuration: https://www.python-httpx.org/advanced/#pool-limit-configuration + +``pool_maxsize``: + Number of allowable keep-alive connections, or ``null`` to always allow. The + default is 10. See ``max_keepalive_connections`` `Pool limit configuration`_. + +``pool_connections`` : + Maximum number of allowable connections, or ``null`` # for no limits. The + default is 100. See ``max_connections`` `Pool limit configuration`_. + ``keepalive_expiry`` : - Number of seconds to keep a connection in the pool. By default 5.0 seconds. + Number of seconds to keep a connection in the pool. By default 5.0 seconds. + See ``keepalive_expiry`` `Pool limit configuration`_. + .. _httpx proxies: https://www.python-httpx.org/advanced/#http-proxying @@ -429,15 +441,6 @@ Communication with search engines. Number of retry in case of an HTTP error. On each retry, SearXNG uses an different proxy and source ip. -``retry_on_http_error`` : - Retry request on some HTTP status code. - - Example: - - * ``true`` : on HTTP status code between 400 and 599. - * ``403`` : on HTTP status code 403. - * ``[403, 429]``: on HTTP status code 403 and 429. - ``enable_http2`` : Enable by default. Set to ``false`` to disable HTTP/2. @@ -455,6 +458,11 @@ Communication with search engines. ``max_redirects`` : 30 by default. Maximum redirect before it is an error. +``using_tor_proxy`` : + Using tor proxy (``true``) or not (``false``) for all engines. The default is + ``false`` and can be overwritten in the :ref:`settings engine` + + .. _settings categories_as_tabs: @@ -522,13 +530,14 @@ engine is shown. Most of the options have a default value or even are optional. use_official_api: true require_api_key: true results: HTML - enable_http: false + + # overwrite values from section 'outgoing:' enable_http2: false retries: 1 - retry_on_http_error: true # or 403 or [404, 429] max_connections: 100 max_keepalive_connections: 10 keepalive_expiry: 5.0 + using_tor_proxy: false proxies: http: - http://proxy1:8080 @@ -539,6 +548,11 @@ engine is shown. Most of the options have a default value or even are optional. - socks5://user:password@proxy3:1080 - socks5h://user:password@proxy4:1080 + # other network settings + enable_http: false + retry_on_http_error: true # or 403 or [404, 429] + + ``name`` : Name that will be used across SearXNG to define this engine. In settings, on the result page... @@ -579,7 +593,8 @@ engine is shown. Most of the options have a default value or even are optional. query all search engines in that category (group). ``timeout`` : optional - Timeout of the search with the current search engine. **Be careful, it will + Timeout of the search with the current search engine. Overwrites + ``request_timeout`` from :ref:`settings outgoing`. **Be careful, it will modify the global timeout of SearXNG.** ``api_key`` : optional @@ -615,6 +630,37 @@ engine is shown. Most of the options have a default value or even are optional. - ``ipv4`` set ``local_addresses`` to ``0.0.0.0`` (use only IPv4 local addresses) - ``ipv6`` set ``local_addresses`` to ``::`` (use only IPv6 local addresses) +``enable_http`` : optional + Enable HTTP for this engine (by default only HTTPS is enabled). + +``retry_on_http_error`` : optional + Retry request on some HTTP status code. + + Example: + + * ``true`` : on HTTP status code between 400 and 599. + * ``403`` : on HTTP status code 403. + * ``[403, 429]``: on HTTP status code 403 and 429. + +``proxies`` : + Overwrites proxy settings from :ref:`settings outgoing`. + +``using_tor_proxy`` : + Using tor proxy (``true``) or not (``false``) for this engine. The default is + taken from ``using_tor_proxy`` of the :ref:`settings outgoing`. + +``max_keepalive_connection#s`` : + `Pool limit configuration`_, overwrites value ``pool_maxsize`` from + :ref:`settings outgoing` for this engine. + +``max_connections`` : + `Pool limit configuration`_, overwrites value ``pool_connections`` from + :ref:`settings outgoing` for this engine. + +``keepalive_expiry`` : + `Pool limit configuration`_, overwrites value ``keepalive_expiry`` from + :ref:`settings outgoing` for this engine. + .. note:: A few more options are possible, but they are pretty specific to some diff --git a/searx/enginelib/__init__.py b/searx/enginelib/__init__.py index 00962e215..fd3019e6c 100644 --- a/searx/enginelib/__init__.py +++ b/searx/enginelib/__init__.py @@ -17,7 +17,7 @@ from __future__ import annotations -from typing import Union, Dict, List, Callable, TYPE_CHECKING +from typing import List, Callable, TYPE_CHECKING if TYPE_CHECKING: from searx.enginelib import traits @@ -134,3 +134,15 @@ class Engine: # pylint: disable=too-few-public-methods require_api_key: true results: HTML """ + + using_tor_proxy: bool + """Using tor proxy (``true``) or not (``false``) for this engine.""" + + send_accept_language_header: bool + """When this option is activated, the language (locale) that is selected by + the user is used to build and send a ``Accept-Language`` header in the + request to the origin search engine.""" + + tokens: List[str] + """A list of secret tokens to make this engine *private*, more details see + :ref:`private engines`.""" diff --git a/searx/enginelib/traits.py b/searx/enginelib/traits.py index ae27d46f1..8a7356ce2 100644 --- a/searx/enginelib/traits.py +++ b/searx/enginelib/traits.py @@ -13,6 +13,7 @@ used. from __future__ import annotations import json import dataclasses +import types from typing import Dict, Iterable, Union, Callable, Optional, TYPE_CHECKING from typing_extensions import Literal, Self @@ -82,8 +83,7 @@ class EngineTraits: """ custom: Dict[str, Union[Dict[str, Dict], Iterable[str]]] = dataclasses.field(default_factory=dict) - """A place to store engine's custom traits, not related to the SearXNG core - + """A place to store engine's custom traits, not related to the SearXNG core. """ def get_language(self, searxng_locale: str, default=None): @@ -228,7 +228,7 @@ class EngineTraitsMap(Dict[str, EngineTraits]): return obj - def set_traits(self, engine: Engine): + def set_traits(self, engine: Engine | types.ModuleType): """Set traits in a :py:obj:`Engine` namespace. :param engine: engine instance build by :py:func:`searx.engines.load_engine` diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index a2db26816..e9e9f87c9 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -17,7 +17,9 @@ import sys import copy from os.path import realpath, dirname -from typing import TYPE_CHECKING, Dict, Optional +from typing import TYPE_CHECKING, Dict +import types +import inspect from searx import logger, settings from searx.utils import load_module @@ -28,21 +30,23 @@ if TYPE_CHECKING: logger = logger.getChild('engines') ENGINE_DIR = dirname(realpath(__file__)) ENGINE_DEFAULT_ARGS = { + # Common options in the engine module "engine_type": "online", - "inactive": False, - "disabled": False, - "timeout": settings["outgoing"]["request_timeout"], - "shortcut": "-", - "categories": ["general"], "paging": False, - "safesearch": False, "time_range_support": False, + "safesearch": False, + # settings.yml + "categories": ["general"], "enable_http": False, - "using_tor_proxy": False, + "shortcut": "-", + "timeout": settings["outgoing"]["request_timeout"], "display_error_messages": True, + "disabled": False, + "inactive": False, + "about": {}, + "using_tor_proxy": False, "send_accept_language_header": False, "tokens": [], - "about": {}, } # set automatically when an engine does not have any tab category DEFAULT_CATEGORY = 'other' @@ -51,7 +55,7 @@ DEFAULT_CATEGORY = 'other' # Defaults for the namespace of an engine module, see :py:func:`load_engine` categories = {'general': []} -engines: Dict[str, Engine] = {} +engines: Dict[str, Engine | types.ModuleType] = {} engine_shortcuts = {} """Simple map of registered *shortcuts* to name of the engine (or ``None``). @@ -63,7 +67,19 @@ engine_shortcuts = {} """ -def load_engine(engine_data: dict) -> Optional[Engine]: +def check_engine_module(module: types.ModuleType): + # probe unintentional name collisions / for example name collisions caused + # by import statements in the engine module .. + + # network: https://github.com/searxng/searxng/issues/762#issuecomment-1605323861 + obj = getattr(module, 'network', None) + if obj and inspect.ismodule(obj): + msg = f'type of {module.__name__}.network is a module ({obj.__name__}), expected a string' + # logger.error(msg) + raise TypeError(msg) + + +def load_engine(engine_data: dict) -> Engine | types.ModuleType | None: """Load engine from ``engine_data``. :param dict engine_data: Attributes from YAML ``settings:engines/`` @@ -100,19 +116,20 @@ def load_engine(engine_data: dict) -> Optional[Engine]: engine_data['name'] = engine_name # load_module - engine_module = engine_data.get('engine') - if engine_module is None: + module_name = engine_data.get('engine') + if module_name is None: logger.error('The "engine" field is missing for the engine named "{}"'.format(engine_name)) return None try: - engine = load_module(engine_module + '.py', ENGINE_DIR) + engine = load_module(module_name + '.py', ENGINE_DIR) except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError): - logger.exception('Fatal exception in engine "{}"'.format(engine_module)) + logger.exception('Fatal exception in engine "{}"'.format(module_name)) sys.exit(1) except BaseException: - logger.exception('Cannot load engine "{}"'.format(engine_module)) + logger.exception('Cannot load engine "{}"'.format(module_name)) return None + check_engine_module(engine) update_engine_attributes(engine, engine_data) update_attributes_for_tor(engine) @@ -153,18 +170,18 @@ def set_loggers(engine, engine_name): and not hasattr(module, "logger") ): module_engine_name = module_name.split(".")[-1] - module.logger = logger.getChild(module_engine_name) + module.logger = logger.getChild(module_engine_name) # type: ignore -def update_engine_attributes(engine: Engine, engine_data): +def update_engine_attributes(engine: Engine | types.ModuleType, engine_data): # set engine attributes from engine_data for param_name, param_value in engine_data.items(): if param_name == 'categories': if isinstance(param_value, str): param_value = list(map(str.strip, param_value.split(','))) - engine.categories = param_value + engine.categories = param_value # type: ignore elif hasattr(engine, 'about') and param_name == 'about': - engine.about = {**engine.about, **engine_data['about']} + engine.about = {**engine.about, **engine_data['about']} # type: ignore else: setattr(engine, param_name, param_value) @@ -174,10 +191,10 @@ def update_engine_attributes(engine: Engine, engine_data): setattr(engine, arg_name, copy.deepcopy(arg_value)) -def update_attributes_for_tor(engine: Engine) -> bool: +def update_attributes_for_tor(engine: Engine | types.ModuleType): if using_tor_proxy(engine) and hasattr(engine, 'onion_url'): - engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') - engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) + engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') # type: ignore + engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) # type: ignore def is_missing_required_attributes(engine): @@ -193,12 +210,12 @@ def is_missing_required_attributes(engine): return missing -def using_tor_proxy(engine: Engine): +def using_tor_proxy(engine: Engine | types.ModuleType): """Return True if the engine configuration declares to use Tor.""" return settings['outgoing'].get('using_tor_proxy') or getattr(engine, 'using_tor_proxy', False) -def is_engine_active(engine: Engine): +def is_engine_active(engine: Engine | types.ModuleType): # check if engine is inactive if engine.inactive is True: return False @@ -210,7 +227,7 @@ def is_engine_active(engine: Engine): return True -def register_engine(engine: Engine): +def register_engine(engine: Engine | types.ModuleType): if engine.name in engines: logger.error('Engine config error: ambiguous name: {0}'.format(engine.name)) sys.exit(1) diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index 56c3b447f..17bb1b6c5 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -14,7 +14,6 @@ from urllib.parse import urlencode, urljoin, urlparse import lxml import babel -from searx import network from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex from searx.enginelib.traits import EngineTraits from searx.locales import language_tag @@ -45,13 +44,13 @@ main_wiki = 'wiki.archlinux.org' def request(query, params): sxng_lang = params['searxng_locale'].split('-')[0] - netloc = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki) - title = traits.custom['title'].get(sxng_lang, 'Special:Search') + netloc: str = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki) # type: ignore + title: str = traits.custom['title'].get(sxng_lang, 'Special:Search') # type: ignore base_url = 'https://' + netloc + '/index.php?' offset = (params['pageno'] - 1) * 20 if netloc == main_wiki: - eng_lang: str = traits.get_language(sxng_lang, 'English') + eng_lang: str = traits.get_language(sxng_lang, 'English') # type: ignore query += ' (' + eng_lang + ')' elif netloc == 'wiki.archlinuxcn.org': base_url = 'https://' + netloc + '/wzh/index.php?' @@ -71,11 +70,11 @@ def request(query, params): def response(resp): results = [] - dom = lxml.html.fromstring(resp.text) + dom = lxml.html.fromstring(resp.text) # type: ignore # get the base URL for the language in which request was made sxng_lang = resp.search_params['searxng_locale'].split('-')[0] - netloc = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki) + netloc: str = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki) # type: ignore base_url = 'https://' + netloc + '/index.php?' for result in eval_xpath_list(dom, '//ul[@class="mw-search-results"]/li'): @@ -83,7 +82,7 @@ def response(resp): content = extract_text(result.xpath('.//div[@class="searchresult"]')) results.append( { - 'url': urljoin(base_url, link.get('href')), + 'url': urljoin(base_url, link.get('href')), # type: ignore 'title': extract_text(link), 'content': content, } @@ -114,6 +113,8 @@ def fetch_traits(engine_traits: EngineTraits): }, """ + # pylint: disable=import-outside-toplevel + from searx.network import get # see https://github.com/searxng/searxng/issues/762 engine_traits.custom['wiki_netloc'] = {} engine_traits.custom['title'] = {} @@ -125,11 +126,11 @@ def fetch_traits(engine_traits: EngineTraits): 'zh': 'Special:搜索', } - resp = network.get('https://wiki.archlinux.org/') - if not resp.ok: + resp = get('https://wiki.archlinux.org/') + if not resp.ok: # type: ignore print("ERROR: response from wiki.archlinix.org is not OK.") - dom = lxml.html.fromstring(resp.text) + dom = lxml.html.fromstring(resp.text) # type: ignore for a in eval_xpath_list(dom, "//a[@class='interlanguage-link-target']"): sxng_tag = language_tag(babel.Locale.parse(a.get('lang'), sep='-')) @@ -143,9 +144,9 @@ def fetch_traits(engine_traits: EngineTraits): print("ERROR: title tag from %s (%s) is unknown" % (netloc, sxng_tag)) continue engine_traits.custom['wiki_netloc'][sxng_tag] = netloc - engine_traits.custom['title'][sxng_tag] = title + engine_traits.custom['title'][sxng_tag] = title # type: ignore eng_tag = extract_text(eval_xpath_list(a, ".//span")) - engine_traits.languages[sxng_tag] = eng_tag + engine_traits.languages[sxng_tag] = eng_tag # type: ignore engine_traits.languages['en'] = 'English' diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 81a0cf6a5..3cd707870 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -38,7 +38,6 @@ import babel import babel.languages from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex -from searx import network from searx.locales import language_tag, region_tag from searx.enginelib.traits import EngineTraits @@ -180,6 +179,10 @@ def request(query, params): def response(resp): + # pylint: disable=too-many-locals,import-outside-toplevel + + from searx.network import Request, multi_requests # see https://github.com/searxng/searxng/issues/762 + results = [] result_len = 0 @@ -231,9 +234,9 @@ def response(resp): # resolve all Bing redirections in parallel request_list = [ - network.Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve + Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve ] - response_list = network.multi_requests(request_list) + response_list = multi_requests(request_list) for i, redirect_response in enumerate(response_list): if not isinstance(redirect_response, Exception): results[url_to_resolve_index[i]]['url'] = redirect_response.headers['location'] @@ -272,16 +275,19 @@ def fetch_traits(engine_traits: EngineTraits): def _fetch_traits(engine_traits: EngineTraits, url: str, xpath_language_codes: str, xpath_market_codes: str): + # pylint: disable=too-many-locals,import-outside-toplevel + + from searx.network import get # see https://github.com/searxng/searxng/issues/762 # insert alias to map from a language (zh) to a language + script (zh_Hans) engine_traits.languages['zh'] = 'zh-hans' - resp = network.get(url) + resp = get(url) - if not resp.ok: + if not resp.ok: # type: ignore print("ERROR: response from peertube is not OK.") - dom = html.fromstring(resp.text) + dom = html.fromstring(resp.text) # type: ignore map_lang = {'jp': 'ja'} for td in eval_xpath(dom, xpath_language_codes): diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index d734ec3c8..99da9616c 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -18,9 +18,9 @@ from urllib.parse import urlencode import time import babel -from searx.exceptions import SearxEngineAPIException -from searx import network +from searx.network import get, raise_for_httperror # see https://github.com/searxng/searxng/issues/762 from searx.utils import html_to_text +from searx.exceptions import SearxEngineAPIException from searx.locales import region_tag, language_tag from searx.enginelib.traits import EngineTraits @@ -106,7 +106,7 @@ def request(query, params): if not query: return False - eng_region = traits.get_region(params['searxng_locale'], 'en_US') + eng_region: str = traits.get_region(params['searxng_locale'], 'en_US') # type: ignore eng_lang = traits.get_language(params['searxng_locale'], 'en') args = { @@ -156,7 +156,7 @@ def response(resp): if 'error' in search_res: raise SearxEngineAPIException(search_res['error'].get('message')) - network.raise_for_httperror(resp) + raise_for_httperror(resp) # parse results for res in search_res.get('list', []): @@ -218,11 +218,11 @@ def fetch_traits(engine_traits: EngineTraits): """ - resp = network.get('https://api.dailymotion.com/locales') - if not resp.ok: + resp = get('https://api.dailymotion.com/locales') + if not resp.ok: # type: ignore print("ERROR: response from dailymotion/locales is not OK.") - for item in resp.json()['list']: + for item in resp.json()['list']: # type: ignore eng_tag = item['locale'] if eng_tag in ('en_EN', 'ar_AA'): continue @@ -241,11 +241,11 @@ def fetch_traits(engine_traits: EngineTraits): locale_lang_list = [x.split('_')[0] for x in engine_traits.regions.values()] - resp = network.get('https://api.dailymotion.com/languages') - if not resp.ok: + resp = get('https://api.dailymotion.com/languages') + if not resp.ok: # type: ignore print("ERROR: response from dailymotion/languages is not OK.") - for item in resp.json()['list']: + for item in resp.json()['list']: # type: ignore eng_tag = item['code'] if eng_tag in locale_lang_list: sxng_tag = language_tag(babel.Locale.parse(eng_tag)) diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index d37e28c2d..8349ad8e3 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -13,17 +13,17 @@ import babel import lxml.html from searx import ( - network, locales, redislib, external_bang, ) -from searx import redisdb from searx.utils import ( eval_xpath, eval_xpath_getindex, extract_text, ) +from searx.network import get # see https://github.com/searxng/searxng/issues/762 +from searx import redisdb from searx.enginelib.traits import EngineTraits from searx.exceptions import SearxEngineAPIException @@ -95,8 +95,8 @@ def get_vqd(query, headers): return value query_url = 'https://duckduckgo.com/?q={query}&atb=v290-5'.format(query=urlencode({'q': query})) - res = network.get(query_url, headers=headers) - content = res.text + res = get(query_url, headers=headers) + content = res.text # type: ignore if content.find('vqd=\"') == -1: raise SearxEngineAPIException('Request failed') value = content[content.find('vqd=\"') + 5 :] @@ -139,7 +139,9 @@ def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'): params['cookies']['kl'] = eng_region # 'ar-es' """ - return eng_traits.custom['lang_region'].get(sxng_locale, eng_traits.get_language(sxng_locale, default)) + return eng_traits.custom['lang_region'].get( # type: ignore + sxng_locale, eng_traits.get_language(sxng_locale, default) + ) ddg_reg_map = { @@ -358,13 +360,13 @@ def fetch_traits(engine_traits: EngineTraits): engine_traits.all_locale = 'wt-wt' # updated from u588 to u661 / should be updated automatically? - resp = network.get('https://duckduckgo.com/util/u661.js') + resp = get('https://duckduckgo.com/util/u661.js') - if not resp.ok: + if not resp.ok: # type: ignore print("ERROR: response from DuckDuckGo is not OK.") - pos = resp.text.find('regions:{') + 8 - js_code = resp.text[pos:] + pos = resp.text.find('regions:{') + 8 # type: ignore + js_code = resp.text[pos:] # type: ignore pos = js_code.find('}') + 1 regions = json.loads(js_code[:pos]) @@ -399,8 +401,8 @@ def fetch_traits(engine_traits: EngineTraits): engine_traits.custom['lang_region'] = {} - pos = resp.text.find('languages:{') + 10 - js_code = resp.text[pos:] + pos = resp.text.find('languages:{') + 10 # type: ignore + js_code = resp.text[pos:] # type: ignore pos = js_code.find('}') + 1 js_code = '{"' + js_code[1:pos].replace(':', '":').replace(',', ',"') languages = json.loads(js_code) diff --git a/searx/engines/google.py b/searx/engines/google.py index 708068f3a..6aaac2f22 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -23,7 +23,7 @@ import babel.languages from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex from searx.locales import language_tag, region_tag, get_offical_locales -from searx import network +from searx.network import get # see https://github.com/searxng/searxng/issues/762 from searx.exceptions import SearxEngineCaptchaException from searx.enginelib.traits import EngineTraits @@ -419,11 +419,11 @@ def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True): engine_traits.custom['supported_domains'] = {} - resp = network.get('https://www.google.com/preferences') - if not resp.ok: + resp = get('https://www.google.com/preferences') + if not resp.ok: # type: ignore raise RuntimeError("Response from Google's preferences is not OK.") - dom = html.fromstring(resp.text) + dom = html.fromstring(resp.text) # type: ignore # supported language codes @@ -474,18 +474,18 @@ def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True): # supported domains if add_domains: - resp = network.get('https://www.google.com/supported_domains') - if not resp.ok: + resp = get('https://www.google.com/supported_domains') + if not resp.ok: # type: ignore raise RuntimeError("Response from https://www.google.com/supported_domains is not OK.") - for domain in resp.text.split(): + for domain in resp.text.split(): # type: ignore domain = domain.strip() if not domain or domain in [ '.google.com', ]: continue region = domain.split('.')[-1].upper() - engine_traits.custom['supported_domains'][region] = 'www' + domain + engine_traits.custom['supported_domains'][region] = 'www' + domain # type: ignore if region == 'HK': # There is no google.cn, we use .com.hk for zh-CN - engine_traits.custom['supported_domains']['CN'] = 'www' + domain + engine_traits.custom['supported_domains']['CN'] = 'www' + domain # type: ignore diff --git a/searx/engines/peertube.py b/searx/engines/peertube.py index 87b386d7a..d0eba6b88 100644 --- a/searx/engines/peertube.py +++ b/searx/engines/peertube.py @@ -13,7 +13,7 @@ from dateutil.relativedelta import relativedelta import babel -from searx import network +from searx.network import get # see https://github.com/searxng/searxng/issues/762 from searx.locales import language_tag from searx.utils import html_to_text from searx.enginelib.traits import EngineTraits @@ -147,32 +147,30 @@ def fetch_traits(engine_traits: EngineTraits): https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291 """ - resp = network.get( + resp = get( 'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue', # the response from search-index repository is very slow timeout=60, ) - if not resp.ok: + if not resp.ok: # type: ignore print("ERROR: response from peertube is not OK.") return - js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL) + js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL) # type: ignore if not js_lang: print("ERROR: can't determine languages from peertube") return for lang in re.finditer(r"\{ id: '([a-z]+)', label:", js_lang.group(1)): + eng_tag = lang.group(1) + if eng_tag == 'oc': + # Occitanis not known by babel, its closest relative is Catalan + # but 'ca' is already in the list of engine_traits.languages --> + # 'oc' will be ignored. + continue try: - eng_tag = lang.group(1) - if eng_tag == 'oc': - # Occitanis not known by babel, its closest relative is Catalan - # but 'ca' is already in the list of engine_traits.languages --> - # 'oc' will be ignored. - continue - sxng_tag = language_tag(babel.Locale.parse(eng_tag)) - except babel.UnknownLocaleError: print("ERROR: %s is unknown by babel" % eng_tag) continue diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 2813d0bf3..92d69867a 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -91,8 +91,8 @@ import dateutil.parser import lxml.html import babel -from searx import network from searx.utils import extract_text, eval_xpath, gen_useragent +from searx.network import get # see https://github.com/searxng/searxng/issues/762 from searx.exceptions import SearxEngineCaptchaException from searx.locales import region_tag from searx.enginelib.traits import EngineTraits @@ -211,25 +211,25 @@ def get_sc_code(searxng_locale, params): get_sc_url = base_url + '/?sc=%s' % (sc_code) logger.debug("query new sc time-stamp ... %s", get_sc_url) logger.debug("headers: %s", headers) - resp = network.get(get_sc_url, headers=headers) + resp = get(get_sc_url, headers=headers) # ?? x = network.get('https://www.startpage.com/sp/cdn/images/filter-chevron.svg', headers=headers) # ?? https://www.startpage.com/sp/cdn/images/filter-chevron.svg # ?? ping-back URL: https://www.startpage.com/sp/pb?sc=TLsB0oITjZ8F21 - if str(resp.url).startswith('https://www.startpage.com/sp/captcha'): + if str(resp.url).startswith('https://www.startpage.com/sp/captcha'): # type: ignore raise SearxEngineCaptchaException( message="get_sc_code: got redirected to https://www.startpage.com/sp/captcha", ) - dom = lxml.html.fromstring(resp.text) + dom = lxml.html.fromstring(resp.text) # type: ignore try: sc_code = eval_xpath(dom, search_form_xpath + '//input[@name="sc"]/@value')[0] except IndexError as exc: logger.debug("suspend startpage API --> https://github.com/searxng/searxng/pull/695") raise SearxEngineCaptchaException( - message="get_sc_code: [PR-695] query new sc time-stamp failed! (%s)" % resp.url, + message="get_sc_code: [PR-695] query new sc time-stamp failed! (%s)" % resp.url, # type: ignore ) from exc sc_code_ts = time() @@ -350,7 +350,7 @@ def _response_cat_web(dom): title = extract_text(link) if eval_xpath(result, content_xpath): - content = extract_text(eval_xpath(result, content_xpath)) + content: str = extract_text(eval_xpath(result, content_xpath)) # type: ignore else: content = '' @@ -374,7 +374,7 @@ def _response_cat_web(dom): date_string = content[0 : date_pos - 5] # calculate datetime - published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group())) + published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group())) # type: ignore # fix content string content = content[date_pos:] @@ -399,12 +399,12 @@ def fetch_traits(engine_traits: EngineTraits): 'User-Agent': gen_useragent(), 'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language } - resp = network.get('https://www.startpage.com/do/settings', headers=headers) + resp = get('https://www.startpage.com/do/settings', headers=headers) - if not resp.ok: + if not resp.ok: # type: ignore print("ERROR: response from Startpage is not OK.") - dom = lxml.html.fromstring(resp.text) + dom = lxml.html.fromstring(resp.text) # type: ignore # regions @@ -443,8 +443,10 @@ def fetch_traits(engine_traits: EngineTraits): # get the native name of every language known by babel - for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, babel.localedata.locale_identifiers()): - native_name = babel.Locale(lang_code).get_language_name().lower() + for lang_code in filter( + lambda lang_code: lang_code.find('_') == -1, babel.localedata.locale_identifiers() # type: ignore + ): + native_name = babel.Locale(lang_code).get_language_name().lower() # type: ignore # add native name exactly as it is catalog_engine2code[native_name] = lang_code @@ -478,7 +480,7 @@ def fetch_traits(engine_traits: EngineTraits): eng_tag = option.get('value') if eng_tag in skip_eng_tags: continue - name = extract_text(option).lower() + name = extract_text(option).lower() # type: ignore sxng_tag = catalog_engine2code.get(eng_tag) if sxng_tag is None: diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 98b3d6f9e..b4b70208d 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -61,7 +61,7 @@ import babel from lxml import html from searx import utils -from searx import network +from searx import network as _network from searx import locales from searx.enginelib.traits import EngineTraits @@ -180,7 +180,7 @@ def response(resp): ): return [] - network.raise_for_httperror(resp) + _network.raise_for_httperror(resp) api_result = resp.json() title = utils.html_to_text(api_result.get('titles', {}).get('display') or api_result.get('title')) @@ -267,7 +267,7 @@ def fetch_wikimedia_traits(engine_traits: EngineTraits): for sxng_tag in sxng_tag_list: engine_traits.regions[sxng_tag] = eng_tag - resp = network.get(list_of_wikipedias) + resp = _network.get(list_of_wikipedias) if not resp.ok: print("ERROR: response from Wikipedia is not OK.") diff --git a/searx/settings_defaults.py b/searx/settings_defaults.py index 7f657aa54..5d978d0e0 100644 --- a/searx/settings_defaults.py +++ b/searx/settings_defaults.py @@ -209,9 +209,7 @@ SCHEMA = { 'enable_http2': SettingsValue(bool, True), 'verify': SettingsValue((bool, str), True), 'max_request_timeout': SettingsValue((None, numbers.Real), None), - # Magic number kept from previous code 'pool_connections': SettingsValue(int, 100), - # Picked from constructor 'pool_maxsize': SettingsValue(int, 10), 'keepalive_expiry': SettingsValue(numbers.Real, 5.0), # default maximum redirect