From 4d4aa13e1f1d254e5d57c67973a7809d9c1e21f9 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 30 Dec 2022 18:28:02 +0100 Subject: [PATCH] [mod] remove obsolete EngineTraits.supported_languages All engines has been migrated from ``supported_languages`` to the ``fetch_traits`` concept. There is no longer a need for the obsolete code that implements the ``supported_languages`` concept. Signed-off-by: Markus Heiser --- searx/autocomplete.py | 11 --- searx/data/engine_traits.json | 72 +++++---------- searx/enginelib/__init__.py | 7 -- searx/enginelib/traits.py | 143 +----------------------------- searx/engines/__init__.py | 2 - searx/engines/gentoo.py | 6 +- searx/search/processors/online.py | 2 +- searx/webapp.py | 5 -- 8 files changed, 31 insertions(+), 217 deletions(-) diff --git a/searx/autocomplete.py b/searx/autocomplete.py index d659d110f..ad9903f36 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -19,9 +19,6 @@ from searx.engines import ( from searx.network import get as http_get from searx.exceptions import SearxEngineResponseException -# a fetch_supported_languages() for XPath engines isn't available right now -# _brave = ENGINES_LANGUAGES['brave'].keys() - def get(*args, **kwargs): if 'timeout' not in kwargs: @@ -225,14 +222,6 @@ def search_autocomplete(backend_name, query, sxng_locale): backend = backends.get(backend_name) if backend is None: return [] - - if engines[backend_name].traits.data_type != "traits_v1": - # vintage / deprecated - if not sxng_locale or sxng_locale == 'all': - sxng_locale = 'en' - else: - sxng_locale = sxng_locale.split('-')[0] - try: return backend(query, sxng_locale) except (HTTPError, SearxEngineResponseException): diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json index 8f416a636..69156cc27 100644 --- a/searx/data/engine_traits.json +++ b/searx/data/engine_traits.json @@ -49,8 +49,7 @@ "uk": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430", "zh": "\u4e2d\u6587\uff08\u7e41\u9ad4\uff09" }, - "regions": {}, - "supported_languages": {} + "regions": {} }, "bing": { "all_locale": null, @@ -146,8 +145,7 @@ "zh-CN": "zh-CN", "zh-HK": "zh-HK", "zh-TW": "zh-TW" - }, - "supported_languages": {} + } }, "bing images": { "all_locale": null, @@ -243,8 +241,7 @@ "zh-CN": "zh-CN", "zh-HK": "zh-HK", "zh-TW": "zh-TW" - }, - "supported_languages": {} + } }, "bing news": { "all_locale": "en-WW", @@ -316,8 +313,7 @@ "it-IT": "it-IT", "pt-BR": "pt-BR", "zh-CN": "zh-CN" - }, - "supported_languages": {} + } }, "bing videos": { "all_locale": null, @@ -413,8 +409,7 @@ "zh-CN": "zh-CN", "zh-HK": "zh-HK", "zh-TW": "zh-TW" - }, - "supported_languages": {} + } }, "dailymotion": { "all_locale": null, @@ -491,8 +486,7 @@ "vi-VN": "vi_VN", "zh-CN": "zh_CN", "zh-TW": "zh_TW" - }, - "supported_languages": {} + } }, "duckduckgo": { "all_locale": "wt-wt", @@ -656,8 +650,7 @@ "zh-CN": "cn-zh", "zh-HK": "hk-tzh", "zh-TW": "tw-tzh" - }, - "supported_languages": {} + } }, "duckduckgo images": { "all_locale": "wt-wt", @@ -821,8 +814,7 @@ "zh-CN": "cn-zh", "zh-HK": "hk-tzh", "zh-TW": "tw-tzh" - }, - "supported_languages": {} + } }, "duckduckgo weather": { "all_locale": "wt-wt", @@ -986,8 +978,7 @@ "zh-CN": "cn-zh", "zh-HK": "hk-tzh", "zh-TW": "tw-tzh" - }, - "supported_languages": {} + } }, "google": { "all_locale": "ZZ", @@ -1439,8 +1430,7 @@ "zh-HK": "HK", "zh-SG": "SG", "zh-TW": "TW" - }, - "supported_languages": {} + } }, "google images": { "all_locale": "ZZ", @@ -1892,8 +1882,7 @@ "zh-HK": "HK", "zh-SG": "SG", "zh-TW": "TW" - }, - "supported_languages": {} + } }, "google news": { "all_locale": "ZZ", @@ -2238,8 +2227,7 @@ "zh-HK": "HK", "zh-SG": "SG", "zh-TW": "TW" - }, - "supported_languages": {} + } }, "google scholar": { "all_locale": "ZZ", @@ -2691,8 +2679,7 @@ "zh-HK": "HK", "zh-SG": "SG", "zh-TW": "TW" - }, - "supported_languages": {} + } }, "google videos": { "all_locale": "ZZ", @@ -3144,8 +3131,7 @@ "zh-HK": "HK", "zh-SG": "SG", "zh-TW": "TW" - }, - "supported_languages": {} + } }, "peertube": { "all_locale": null, @@ -3174,8 +3160,7 @@ "zh_Hans": "zh", "zh_Hant": "zh" }, - "regions": {}, - "supported_languages": {} + "regions": {} }, "qwant": { "all_locale": null, @@ -3222,8 +3207,7 @@ "th-TH": "th_TH", "zh-CN": "zh_CN", "zh-HK": "zh_HK" - }, - "supported_languages": {} + } }, "qwant images": { "all_locale": null, @@ -3270,8 +3254,7 @@ "th-TH": "th_TH", "zh-CN": "zh_CN", "zh-HK": "zh_HK" - }, - "supported_languages": {} + } }, "qwant news": { "all_locale": null, @@ -3303,8 +3286,7 @@ "nl-BE": "nl_BE", "nl-NL": "nl_NL", "pt-PT": "pt_PT" - }, - "supported_languages": {} + } }, "qwant videos": { "all_locale": null, @@ -3351,8 +3333,7 @@ "th-TH": "th_TH", "zh-CN": "zh_CN", "zh-HK": "zh_HK" - }, - "supported_languages": {} + } }, "sepiasearch": { "all_locale": null, @@ -3381,8 +3362,7 @@ "zh_Hans": "zh", "zh_Hant": "zh" }, - "regions": {}, - "supported_languages": {} + "regions": {} }, "startpage": { "all_locale": null, @@ -3521,8 +3501,7 @@ "zh-CN": "zh-CN_CN", "zh-HK": "zh-TW_HK", "zh-TW": "zh-TW_TW" - }, - "supported_languages": {} + } }, "wikidata": { "all_locale": null, @@ -3610,8 +3589,7 @@ "zh": "zh", "zh_Hant": "zh-classical" }, - "regions": {}, - "supported_languages": {} + "regions": {} }, "wikipedia": { "all_locale": null, @@ -3779,8 +3757,7 @@ "zh_Hans": "zh", "zh_Hant": "zh-classical" }, - "regions": {}, - "supported_languages": {} + "regions": {} }, "yahoo": { "all_locale": "any", @@ -3820,7 +3797,6 @@ "zh_Hans": "zh_chs", "zh_Hant": "zh_cht" }, - "regions": {}, - "supported_languages": {} + "regions": {} } } \ No newline at end of file diff --git a/searx/enginelib/__init__.py b/searx/enginelib/__init__.py index 845d3a8e9..461791b18 100644 --- a/searx/enginelib/__init__.py +++ b/searx/enginelib/__init__.py @@ -134,10 +134,3 @@ class Engine: # pylint: disable=too-few-public-methods require_api_key: true results: HTML """ - - # deprecated properties - - _fetch_supported_languages: Callable # deprecated use fetch_traits - supported_languages: Union[List[str], Dict[str, str]] # deprecated use traits - language_aliases: Dict[str, str] # deprecated not needed when using triats - supported_languages_url: str # deprecated not needed when using triats diff --git a/searx/enginelib/traits.py b/searx/enginelib/traits.py index 1e3578df8..df7851594 100644 --- a/searx/enginelib/traits.py +++ b/searx/enginelib/traits.py @@ -13,11 +13,9 @@ used. from __future__ import annotations import json import dataclasses -from typing import Dict, Union, List, Callable, Optional, TYPE_CHECKING +from typing import Dict, Union, Callable, Optional, TYPE_CHECKING from typing_extensions import Literal, Self -from babel.localedata import locale_identifiers - from searx import locales from searx.data import data_dir, ENGINE_TRAITS @@ -79,18 +77,8 @@ class EngineTraits: language"). """ - data_type: Literal['traits_v1', 'supported_languages'] = 'traits_v1' - """Data type, default is 'traits_v1' for vintage use 'supported_languages'. - - .. hint:: - - For the transition period until the *fetch* functions of all the engines - are converted there will be the data_type 'supported_languages', which - maps the old logic unchanged 1:1. - - Instances of data_type 'supported_languages' do not implement methods - like ``self.get_language(..)`` and ``self.get_region(..)`` - + data_type: Literal['traits_v1'] = 'traits_v1' + """Data type, default is 'traits_v1'. """ custom: Dict[str, Dict] = dataclasses.field(default_factory=dict) @@ -139,16 +127,6 @@ class EngineTraits: if self.data_type == 'traits_v1': return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale)) - if self.data_type == 'supported_languages': # vintage / deprecated - # pylint: disable=import-outside-toplevel - from searx.utils import match_language - - if searxng_locale == 'all': - return True - x = match_language(searxng_locale, self.supported_languages, self.language_aliases, None) - return bool(x) - - # return bool(self.get_supported_language(searxng_locale)) raise TypeError('engine traits of type %s is unknown' % self.data_type) def copy(self): @@ -178,10 +156,6 @@ class EngineTraits: if self.data_type == 'traits_v1': self._set_traits_v1(engine) - - elif self.data_type == 'supported_languages': # vintage / deprecated - self._set_supported_languages(engine) - else: raise TypeError('engine traits of type %s is unknown' % self.data_type) @@ -215,106 +189,6 @@ class EngineTraits: # set the copied & modified traits in engine's namespace engine.traits = traits - # ------------------------------------------------------------------------- - # The code below is deprecated an can hopefully be deleted at one day - # ------------------------------------------------------------------------- - - supported_languages: Union[List[str], Dict[str, str]] = dataclasses.field(default_factory=dict) - """depricated: does not work for engines that do support languages based on a - region. With this type it is not guaranteed that the key values can be - parsed by :py:obj:`babel.Locale.parse`! - """ - - # language_aliases: Dict[str, str] = dataclasses.field(default_factory=dict) - # """depricated: does not work for engines that do support languages based on a - # region. With this type it is not guaranteed that the key values can be - # parsed by :py:obj:`babel.Locale.parse`! - # """ - - BABEL_LANGS = [ - lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0] - for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers()) - ] - - # def get_supported_language(self, searxng_locale, default=None): # vintage / deprecated - # """Return engine's language string that *best fits* to SearXNG's locale.""" - # if searxng_locale == 'all' and self.all_locale is not None: - # return self.all_locale - # return locales.get_engine_locale(searxng_locale, self.supported_languages, default=default) - - @classmethod # vintage / deprecated - def fetch_supported_languages(cls, engine: Engine) -> Union[Self, None]: - """DEPRECATED: Calls a function ``_fetch_supported_languages`` from engine's - namespace to fetch languages from the origin engine. If function does - not exists, ``None`` is returned. - """ - - # pylint: disable=import-outside-toplevel - from searx import network - from searx.utils import gen_useragent - - fetch_languages = getattr(engine, '_fetch_supported_languages', None) - if fetch_languages is None: - return None - - # The headers has been moved here from commit 9b6ffed06: Some engines (at - # least bing and startpage) return a different result list of supported - # languages depending on the IP location where the HTTP request comes from. - # The IP based results (from bing) can be avoided by setting a - # 'Accept-Language' in the HTTP request. - - headers = { - 'User-Agent': gen_useragent(), - 'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language - } - resp = network.get(engine.supported_languages_url, headers=headers) - supported_languages = fetch_languages(resp) - if isinstance(supported_languages, list): - supported_languages.sort() - - engine_traits = cls() - engine_traits.data_type = 'supported_languages' - engine_traits.supported_languages = supported_languages - return engine_traits - - def _set_supported_languages(self, engine: Engine): # vintage / deprecated - traits = self.copy() - - # pylint: disable=import-outside-toplevel - from searx.utils import match_language - - _msg = "settings.yml - engine: '%s' / %s: '%s' not supported" - - if hasattr(engine, 'language'): - if engine.language not in self.supported_languages: - raise ValueError(_msg % (engine.name, 'language', engine.language)) - - if isinstance(self.supported_languages, dict): - traits.supported_languages = {engine.language: self.supported_languages[engine.language]} - else: - traits.supported_languages = [engine.language] - - engine.language_support = bool(traits.supported_languages) - engine.supported_languages = traits.supported_languages - - # find custom aliases for non standard language codes - traits.language_aliases = {} # pylint: disable=attribute-defined-outside-init - - for engine_lang in getattr(engine, 'language_aliases', {}): - iso_lang = match_language(engine_lang, self.BABEL_LANGS, fallback=None) - if ( - iso_lang - and iso_lang != engine_lang - and not engine_lang.startswith(iso_lang) - and iso_lang not in self.supported_languages - ): - traits.language_aliases[iso_lang] = engine_lang - - engine.language_aliases = traits.language_aliases - - # set the copied & modified traits in engine's namespace - engine.traits = traits - class EngineTraitsMap(Dict[str, EngineTraits]): """A python dictionary to map :class:`EngineTraits` by engine name.""" @@ -352,17 +226,6 @@ class EngineTraitsMap(Dict[str, EngineTraits]): log("%-20s: SearXNG regions --> %s" % (engine_name, len(traits.regions))) obj[engine_name] = traits - # vintage / deprecated - _traits = EngineTraits.fetch_supported_languages(engine) - if _traits is not None: - log("%-20s: %s supported_languages (deprecated)" % (engine_name, len(_traits.supported_languages))) - if traits is not None: - traits.supported_languages = _traits.supported_languages - obj[engine_name] = traits - else: - obj[engine_name] = _traits - continue - return obj def set_traits(self, engine: Engine): diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index d0dc8f4be..c8e8e7241 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -43,8 +43,6 @@ ENGINE_DEFAULT_ARGS = { "send_accept_language_header": False, "tokens": [], "about": {}, - "supported_languages": [], # deprecated use traits - "language_aliases": {}, # deprecated not needed when using traits } # set automatically when an engine does not have any tab category OTHER_CATEGORY = 'other' diff --git a/searx/engines/gentoo.py b/searx/engines/gentoo.py index 856c93710..f0cb6a794 100644 --- a/searx/engines/gentoo.py +++ b/searx/engines/gentoo.py @@ -25,6 +25,7 @@ base_url = 'https://wiki.gentoo.org' # xpath queries xpath_results = '//ul[@class="mw-search-results"]/li' xpath_link = './/div[@class="mw-search-result-heading"]/a' +xpath_content = './/div[@class="searchresult"]' # cut 'en' from 'en-US', 'de' from 'de-CH', and so on @@ -77,8 +78,6 @@ main_langs = { 'uk': 'Українська', 'zh': '简体中文', } -supported_languages = dict(lang_urls, **main_langs) - # do search-request def request(query, params): @@ -118,7 +117,8 @@ def response(resp): link = result.xpath(xpath_link)[0] href = urljoin(base_url, link.attrib.get('href')) title = extract_text(link) + content = extract_text(result.xpath(xpath_content)) - results.append({'url': href, 'title': title}) + results.append({'url': href, 'title': title, 'content': content}) return results diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index 48e3a2e92..697533d8c 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -221,7 +221,7 @@ class OnlineProcessor(EngineProcessor): 'test': ['unique_results'], } - if getattr(self.engine, 'supported_languages', []): + if getattr(self.engine, 'traits', False): tests['lang_fr'] = { 'matrix': {'query': 'paris', 'lang': 'fr'}, 'result_container': ['not_empty', ('has_language', 'fr')], diff --git a/searx/webapp.py b/searx/webapp.py index fdd119128..bc2a50784 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -1317,11 +1317,6 @@ def config(): continue _languages = engine.traits.languages.keys() - if engine.traits.data_type == 'supported_languages': # vintage / deprecated - _languages = engine.traits.supported_languages - if isinstance(_languages, dict): - _languages = _languages.keys() - _engines.append( { 'name': name,