From c1ae2ef57c8a7da5df1f0fdacc0e6e745721b2ae Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Mon, 3 Oct 2022 22:42:58 +0200 Subject: [PATCH] [mod] qwant: fetch engine traits (data_type: traits_v1) Implements a fetch_traits function for the Qwant engines. .. note:: Includes migration of the request methode from 'supported_languages' to 'traits' (EngineTraits) object! Signed-off-by: Markus Heiser --- searx/autocomplete.py | 14 +++++------ searx/data/engine_traits.json | 32 ++++++++++++------------- searx/engines/qwant.py | 45 +++++++++++++++++------------------ 3 files changed, 45 insertions(+), 46 deletions(-) diff --git a/searx/autocomplete.py b/searx/autocomplete.py index 9b8755218..848600e57 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -126,16 +126,16 @@ def swisscows(query, _lang): return resp -def qwant(query, lang): - # qwant autocompleter (additional parameter : lang=en_en&count=xxx ) - url = 'https://api.qwant.com/api/suggest?{query}' - - resp = get(url.format(query=urlencode({'q': query, 'lang': lang}))) - +def qwant(query, sxng_locale): + """Autocomplete from Qwant. Supports Qwant's regions.""" results = [] + locale = engines['qwant'].traits.get_region(sxng_locale, 'en_US') + url = 'https://api.qwant.com/v3/suggest?{query}' + resp = get(url.format(query=urlencode({'q': query, 'locale': locale, 'version': '2'}))) + if resp.ok: - data = loads(resp.text) + data = resp.json() if data['status'] == 'success': for item in data['data']['items']: results.append(item['value']) diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json index 945ed2644..4fc01771e 100644 --- a/searx/data/engine_traits.json +++ b/searx/data/engine_traits.json @@ -3107,10 +3107,9 @@ "qwant": { "all_locale": null, "custom": {}, - "data_type": "supported_languages", + "data_type": "traits_v1", "languages": {}, - "regions": {}, - "supported_languages": { + "regions": { "bg-BG": "bg_BG", "ca-ES": "ca_ES", "cs-CZ": "cs_CZ", @@ -3150,15 +3149,15 @@ "th-TH": "th_TH", "zh-CN": "zh_CN", "zh-HK": "zh_HK" - } + }, + "supported_languages": {} }, "qwant images": { "all_locale": null, "custom": {}, - "data_type": "supported_languages", + "data_type": "traits_v1", "languages": {}, - "regions": {}, - "supported_languages": { + "regions": { "bg-BG": "bg_BG", "ca-ES": "ca_ES", "cs-CZ": "cs_CZ", @@ -3198,15 +3197,15 @@ "th-TH": "th_TH", "zh-CN": "zh_CN", "zh-HK": "zh_HK" - } + }, + "supported_languages": {} }, "qwant news": { "all_locale": null, "custom": {}, - "data_type": "supported_languages", + "data_type": "traits_v1", "languages": {}, - "regions": {}, - "supported_languages": { + "regions": { "ca-ES": "ca_ES", "de-AT": "de_AT", "de-CH": "de_CH", @@ -3231,15 +3230,15 @@ "nl-BE": "nl_BE", "nl-NL": "nl_NL", "pt-PT": "pt_PT" - } + }, + "supported_languages": {} }, "qwant videos": { "all_locale": null, "custom": {}, - "data_type": "supported_languages", + "data_type": "traits_v1", "languages": {}, - "regions": {}, - "supported_languages": { + "regions": { "bg-BG": "bg_BG", "ca-ES": "ca_ES", "cs-CZ": "cs_CZ", @@ -3279,7 +3278,8 @@ "th-TH": "th_TH", "zh-CN": "zh_CN", "zh-HK": "zh_HK" - } + }, + "supported_languages": {} }, "startpage": { "all_locale": null, diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 6de2176d0..4a41676c5 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -34,7 +34,9 @@ import babel from searx.exceptions import SearxEngineAPIException from searx.network import raise_for_httperror -from searx.locales import get_engine_locale +from searx.enginelib.traits import EngineTraits + +traits: EngineTraits # about about = { @@ -49,7 +51,6 @@ about = { # engine dependent config categories = [] paging = True -supported_languages_url = about['website'] qwant_categ = None # web|news|inages|videos safesearch = True @@ -95,7 +96,7 @@ def request(query, params): ) # add quant's locale - q_locale = get_engine_locale(params['language'], supported_languages, default='en_US') + q_locale = traits.get_region(params["searxng_locale"], default='en_US') params['url'] += '&locale=' + q_locale # add safesearch option @@ -243,15 +244,20 @@ def response(resp): return results -def _fetch_supported_languages(resp): +def fetch_traits(engine_traits: EngineTraits): + # pylint: disable=import-outside-toplevel + from searx import network + from searx.locales import region_tag + + resp = network.get(about['website']) text = resp.text text = text[text.find('INITIAL_PROPS') :] text = text[text.find('{') : text.find('')] q_initial_props = loads(text) q_locales = q_initial_props.get('locales') - q_valid_locales = [] + eng_tag_list = set() for country, v in q_locales.items(): for lang in v['langs']: @@ -261,25 +267,18 @@ def _fetch_supported_languages(resp): # qwant-news does not support all locales from qwant-web: continue - q_valid_locales.append(_locale) + eng_tag_list.add(_locale) - supported_languages = {} - - for q_locale in q_valid_locales: + for eng_tag in eng_tag_list: try: - locale = babel.Locale.parse(q_locale, sep='_') - except babel.core.UnknownLocaleError: - print("ERROR: can't determine babel locale of quant's locale %s" % q_locale) + sxng_tag = region_tag(babel.Locale.parse(eng_tag, sep='_')) + except babel.UnknownLocaleError: + print("ERROR: can't determine babel locale of quant's locale %s" % eng_tag) continue - # note: supported_languages (dict) - # - # dict's key is a string build up from a babel.Locale object / the - # notation 'xx-XX' (and 'xx') conforms to SearXNG's locale (and - # language) notation and dict's values are the locale strings used by - # the engine. - - searxng_locale = locale.language + '-' + locale.territory # --> params['language'] - supported_languages[searxng_locale] = q_locale - - return supported_languages + conflict = engine_traits.regions.get(sxng_tag) + if conflict: + if conflict != eng_tag: + print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag)) + continue + engine_traits.regions[sxng_tag] = eng_tag