mirror of
https://github.com/searxng/searxng.git
synced 2024-11-26 04:41:00 +00:00
[mod] DuckDuckGo: fetch engine traits (data_type: supported_languages)
Implements a fetch_traits function for the DuckDuckGo engines. .. note:: Does not include migration of the request methode from 'supported_languages' to 'traits' (EngineTraits) object! Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
ef143729a0
commit
dba8977b09
4 changed files with 266 additions and 13 deletions
|
@ -2124,11 +2124,73 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"ddg definitions": {
|
"ddg definitions": {
|
||||||
"all_locale": null,
|
"all_locale": "wt-wt",
|
||||||
"custom": {},
|
"custom": {},
|
||||||
"data_type": "supported_languages",
|
"data_type": "supported_languages",
|
||||||
"languages": {},
|
"languages": {},
|
||||||
"regions": {},
|
"regions": {
|
||||||
|
"ar-SA": "xa-ar",
|
||||||
|
"bg-BG": "bg-bg",
|
||||||
|
"ca-ES": "es-ca",
|
||||||
|
"cs-CZ": "cz-cs",
|
||||||
|
"da-DK": "dk-da",
|
||||||
|
"de-AT": "at-de",
|
||||||
|
"de-CH": "ch-de",
|
||||||
|
"de-DE": "de-de",
|
||||||
|
"el-GR": "gr-el",
|
||||||
|
"en-AU": "au-en",
|
||||||
|
"en-CA": "ca-en",
|
||||||
|
"en-GB": "uk-en",
|
||||||
|
"en-IE": "ie-en",
|
||||||
|
"en-IL": "il-en",
|
||||||
|
"en-IN": "in-en",
|
||||||
|
"en-MY": "my-en",
|
||||||
|
"en-NZ": "nz-en",
|
||||||
|
"en-PH": "ph-en",
|
||||||
|
"en-PK": "pk-en",
|
||||||
|
"en-SG": "sg-en",
|
||||||
|
"en-US": "us-en",
|
||||||
|
"en-ZA": "za-en",
|
||||||
|
"es-AR": "ar-es",
|
||||||
|
"es-CL": "cl-es",
|
||||||
|
"es-CO": "co-es",
|
||||||
|
"es-ES": "es-es",
|
||||||
|
"es-MX": "mx-es",
|
||||||
|
"es-PE": "pe-es",
|
||||||
|
"es-US": "us-es",
|
||||||
|
"et-EE": "ee-et",
|
||||||
|
"fi-FI": "fi-fi",
|
||||||
|
"fr-BE": "be-fr",
|
||||||
|
"fr-CA": "ca-fr",
|
||||||
|
"fr-CH": "ch-fr",
|
||||||
|
"fr-FR": "fr-fr",
|
||||||
|
"hr-HR": "hr-hr",
|
||||||
|
"hu-HU": "hu-hu",
|
||||||
|
"id-ID": "id-en",
|
||||||
|
"it-IT": "it-it",
|
||||||
|
"ja-JP": "jp-jp",
|
||||||
|
"ko-KR": "kr-kr",
|
||||||
|
"lt-LT": "lt-lt",
|
||||||
|
"lv-LV": "lv-lv",
|
||||||
|
"nb-NO": "no-no",
|
||||||
|
"nl-BE": "be-nl",
|
||||||
|
"nl-NL": "nl-nl",
|
||||||
|
"pl-PL": "pl-pl",
|
||||||
|
"pt-BR": "br-pt",
|
||||||
|
"pt-PT": "pt-pt",
|
||||||
|
"ro-RO": "ro-ro",
|
||||||
|
"ru-RU": "ru-ru",
|
||||||
|
"sk-SK": "sk-sk",
|
||||||
|
"sl-SI": "sl-sl",
|
||||||
|
"sv-SE": "se-sv",
|
||||||
|
"th-TH": "th-en",
|
||||||
|
"tr-TR": "tr-tr",
|
||||||
|
"uk-UA": "ua-uk",
|
||||||
|
"vi-VN": "vn-en",
|
||||||
|
"zh-CN": "cn-zh",
|
||||||
|
"zh-HK": "hk-tzh",
|
||||||
|
"zh-TW": "tw-tzh"
|
||||||
|
},
|
||||||
"supported_languages": [
|
"supported_languages": [
|
||||||
"ar-XA",
|
"ar-XA",
|
||||||
"bg-BG",
|
"bg-BG",
|
||||||
|
@ -2196,11 +2258,73 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"duckduckgo": {
|
"duckduckgo": {
|
||||||
"all_locale": null,
|
"all_locale": "wt-wt",
|
||||||
"custom": {},
|
"custom": {},
|
||||||
"data_type": "supported_languages",
|
"data_type": "supported_languages",
|
||||||
"languages": {},
|
"languages": {},
|
||||||
"regions": {},
|
"regions": {
|
||||||
|
"ar-SA": "xa-ar",
|
||||||
|
"bg-BG": "bg-bg",
|
||||||
|
"ca-ES": "es-ca",
|
||||||
|
"cs-CZ": "cz-cs",
|
||||||
|
"da-DK": "dk-da",
|
||||||
|
"de-AT": "at-de",
|
||||||
|
"de-CH": "ch-de",
|
||||||
|
"de-DE": "de-de",
|
||||||
|
"el-GR": "gr-el",
|
||||||
|
"en-AU": "au-en",
|
||||||
|
"en-CA": "ca-en",
|
||||||
|
"en-GB": "uk-en",
|
||||||
|
"en-IE": "ie-en",
|
||||||
|
"en-IL": "il-en",
|
||||||
|
"en-IN": "in-en",
|
||||||
|
"en-MY": "my-en",
|
||||||
|
"en-NZ": "nz-en",
|
||||||
|
"en-PH": "ph-en",
|
||||||
|
"en-PK": "pk-en",
|
||||||
|
"en-SG": "sg-en",
|
||||||
|
"en-US": "us-en",
|
||||||
|
"en-ZA": "za-en",
|
||||||
|
"es-AR": "ar-es",
|
||||||
|
"es-CL": "cl-es",
|
||||||
|
"es-CO": "co-es",
|
||||||
|
"es-ES": "es-es",
|
||||||
|
"es-MX": "mx-es",
|
||||||
|
"es-PE": "pe-es",
|
||||||
|
"es-US": "us-es",
|
||||||
|
"et-EE": "ee-et",
|
||||||
|
"fi-FI": "fi-fi",
|
||||||
|
"fr-BE": "be-fr",
|
||||||
|
"fr-CA": "ca-fr",
|
||||||
|
"fr-CH": "ch-fr",
|
||||||
|
"fr-FR": "fr-fr",
|
||||||
|
"hr-HR": "hr-hr",
|
||||||
|
"hu-HU": "hu-hu",
|
||||||
|
"id-ID": "id-en",
|
||||||
|
"it-IT": "it-it",
|
||||||
|
"ja-JP": "jp-jp",
|
||||||
|
"ko-KR": "kr-kr",
|
||||||
|
"lt-LT": "lt-lt",
|
||||||
|
"lv-LV": "lv-lv",
|
||||||
|
"nb-NO": "no-no",
|
||||||
|
"nl-BE": "be-nl",
|
||||||
|
"nl-NL": "nl-nl",
|
||||||
|
"pl-PL": "pl-pl",
|
||||||
|
"pt-BR": "br-pt",
|
||||||
|
"pt-PT": "pt-pt",
|
||||||
|
"ro-RO": "ro-ro",
|
||||||
|
"ru-RU": "ru-ru",
|
||||||
|
"sk-SK": "sk-sk",
|
||||||
|
"sl-SI": "sl-sl",
|
||||||
|
"sv-SE": "se-sv",
|
||||||
|
"th-TH": "th-en",
|
||||||
|
"tr-TR": "tr-tr",
|
||||||
|
"uk-UA": "ua-uk",
|
||||||
|
"vi-VN": "vn-en",
|
||||||
|
"zh-CN": "cn-zh",
|
||||||
|
"zh-HK": "hk-tzh",
|
||||||
|
"zh-TW": "tw-tzh"
|
||||||
|
},
|
||||||
"supported_languages": [
|
"supported_languages": [
|
||||||
"ar-XA",
|
"ar-XA",
|
||||||
"bg-BG",
|
"bg-BG",
|
||||||
|
@ -2268,11 +2392,73 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"duckduckgo images": {
|
"duckduckgo images": {
|
||||||
"all_locale": null,
|
"all_locale": "wt-wt",
|
||||||
"custom": {},
|
"custom": {},
|
||||||
"data_type": "supported_languages",
|
"data_type": "supported_languages",
|
||||||
"languages": {},
|
"languages": {},
|
||||||
"regions": {},
|
"regions": {
|
||||||
|
"ar-SA": "xa-ar",
|
||||||
|
"bg-BG": "bg-bg",
|
||||||
|
"ca-ES": "es-ca",
|
||||||
|
"cs-CZ": "cz-cs",
|
||||||
|
"da-DK": "dk-da",
|
||||||
|
"de-AT": "at-de",
|
||||||
|
"de-CH": "ch-de",
|
||||||
|
"de-DE": "de-de",
|
||||||
|
"el-GR": "gr-el",
|
||||||
|
"en-AU": "au-en",
|
||||||
|
"en-CA": "ca-en",
|
||||||
|
"en-GB": "uk-en",
|
||||||
|
"en-IE": "ie-en",
|
||||||
|
"en-IL": "il-en",
|
||||||
|
"en-IN": "in-en",
|
||||||
|
"en-MY": "my-en",
|
||||||
|
"en-NZ": "nz-en",
|
||||||
|
"en-PH": "ph-en",
|
||||||
|
"en-PK": "pk-en",
|
||||||
|
"en-SG": "sg-en",
|
||||||
|
"en-US": "us-en",
|
||||||
|
"en-ZA": "za-en",
|
||||||
|
"es-AR": "ar-es",
|
||||||
|
"es-CL": "cl-es",
|
||||||
|
"es-CO": "co-es",
|
||||||
|
"es-ES": "es-es",
|
||||||
|
"es-MX": "mx-es",
|
||||||
|
"es-PE": "pe-es",
|
||||||
|
"es-US": "us-es",
|
||||||
|
"et-EE": "ee-et",
|
||||||
|
"fi-FI": "fi-fi",
|
||||||
|
"fr-BE": "be-fr",
|
||||||
|
"fr-CA": "ca-fr",
|
||||||
|
"fr-CH": "ch-fr",
|
||||||
|
"fr-FR": "fr-fr",
|
||||||
|
"hr-HR": "hr-hr",
|
||||||
|
"hu-HU": "hu-hu",
|
||||||
|
"id-ID": "id-en",
|
||||||
|
"it-IT": "it-it",
|
||||||
|
"ja-JP": "jp-jp",
|
||||||
|
"ko-KR": "kr-kr",
|
||||||
|
"lt-LT": "lt-lt",
|
||||||
|
"lv-LV": "lv-lv",
|
||||||
|
"nb-NO": "no-no",
|
||||||
|
"nl-BE": "be-nl",
|
||||||
|
"nl-NL": "nl-nl",
|
||||||
|
"pl-PL": "pl-pl",
|
||||||
|
"pt-BR": "br-pt",
|
||||||
|
"pt-PT": "pt-pt",
|
||||||
|
"ro-RO": "ro-ro",
|
||||||
|
"ru-RU": "ru-ru",
|
||||||
|
"sk-SK": "sk-sk",
|
||||||
|
"sl-SI": "sl-sl",
|
||||||
|
"sv-SE": "se-sv",
|
||||||
|
"th-TH": "th-en",
|
||||||
|
"tr-TR": "tr-tr",
|
||||||
|
"uk-UA": "ua-uk",
|
||||||
|
"vi-VN": "vn-en",
|
||||||
|
"zh-CN": "cn-zh",
|
||||||
|
"zh-HK": "hk-tzh",
|
||||||
|
"zh-TW": "tw-tzh"
|
||||||
|
},
|
||||||
"supported_languages": [
|
"supported_languages": [
|
||||||
"ar-XA",
|
"ar-XA",
|
||||||
"bg-BG",
|
"bg-BG",
|
||||||
|
|
|
@ -3,9 +3,8 @@
|
||||||
"""DuckDuckGo Lite
|
"""DuckDuckGo Lite
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
import json
|
||||||
|
from lxml import html
|
||||||
from lxml.html import fromstring
|
|
||||||
|
|
||||||
from searx.utils import (
|
from searx.utils import (
|
||||||
dict_subset,
|
dict_subset,
|
||||||
|
@ -14,7 +13,10 @@ from searx.utils import (
|
||||||
extract_text,
|
extract_text,
|
||||||
match_language,
|
match_language,
|
||||||
)
|
)
|
||||||
from searx.network import get
|
from searx import network
|
||||||
|
from searx.enginelib.traits import EngineTraits
|
||||||
|
|
||||||
|
traits: EngineTraits
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
|
@ -120,13 +122,13 @@ def request(query, params):
|
||||||
def response(resp):
|
def response(resp):
|
||||||
|
|
||||||
headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
|
headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
|
||||||
get(url_ping, headers=headers_ping)
|
network.get(url_ping, headers=headers_ping)
|
||||||
|
|
||||||
if resp.status_code == 303:
|
if resp.status_code == 303:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
doc = fromstring(resp.text)
|
doc = html.fromstring(resp.text)
|
||||||
|
|
||||||
result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
|
result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
|
||||||
if not len(result_table) >= 3:
|
if not len(result_table) >= 3:
|
||||||
|
@ -180,7 +182,70 @@ def _fetch_supported_languages(resp):
|
||||||
response_page = response_page[response_page.find('regions:{') + 8 :]
|
response_page = response_page[response_page.find('regions:{') + 8 :]
|
||||||
response_page = response_page[: response_page.find('}') + 1]
|
response_page = response_page[: response_page.find('}') + 1]
|
||||||
|
|
||||||
regions_json = loads(response_page)
|
regions_json = json.loads(response_page)
|
||||||
supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
|
supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
|
||||||
|
|
||||||
return list(supported_languages)
|
return list(supported_languages)
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_traits(engine_traits: EngineTraits):
|
||||||
|
"""Fetch regions from DuckDuckGo."""
|
||||||
|
# pylint: disable=import-outside-toplevel
|
||||||
|
|
||||||
|
engine_traits.data_type = 'supported_languages' # deprecated
|
||||||
|
|
||||||
|
import babel
|
||||||
|
from searx.locales import region_tag
|
||||||
|
|
||||||
|
engine_traits.all_locale = 'wt-wt'
|
||||||
|
|
||||||
|
resp = network.get('https://duckduckgo.com/util/u588.js')
|
||||||
|
if not resp.ok:
|
||||||
|
print("ERROR: response from DuckDuckGo is not OK.")
|
||||||
|
|
||||||
|
pos = resp.text.find('regions:{') + 8
|
||||||
|
js_code = resp.text[pos:]
|
||||||
|
pos = js_code.find('}') + 1
|
||||||
|
regions = json.loads(js_code[:pos])
|
||||||
|
|
||||||
|
reg_map = {
|
||||||
|
'tw-tzh': 'zh_TW',
|
||||||
|
'hk-tzh': 'zh_HK',
|
||||||
|
'ct-ca': 'skip', # ct-ca and es-ca both map to ca_ES
|
||||||
|
'es-ca': 'ca_ES',
|
||||||
|
'id-en': 'id_ID',
|
||||||
|
'no-no': 'nb_NO',
|
||||||
|
'jp-jp': 'ja_JP',
|
||||||
|
'kr-kr': 'ko_KR',
|
||||||
|
'xa-ar': 'ar_SA',
|
||||||
|
'sl-sl': 'sl_SI',
|
||||||
|
'th-en': 'th_TH',
|
||||||
|
'vn-en': 'vi_VN',
|
||||||
|
}
|
||||||
|
|
||||||
|
for eng_tag, name in regions.items():
|
||||||
|
|
||||||
|
if eng_tag == 'wt-wt':
|
||||||
|
engine_traits.all_locale = 'wt-wt'
|
||||||
|
continue
|
||||||
|
|
||||||
|
region = reg_map.get(eng_tag)
|
||||||
|
if region == 'skip':
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not region:
|
||||||
|
eng_territory, eng_lang = eng_tag.split('-')
|
||||||
|
region = eng_lang + '_' + eng_territory.upper()
|
||||||
|
|
||||||
|
try:
|
||||||
|
sxng_tag = region_tag(babel.Locale.parse(region))
|
||||||
|
except babel.UnknownLocaleError:
|
||||||
|
print("ERROR: %s (%s) -> %s is unknown by babel" % (name, eng_tag, region))
|
||||||
|
continue
|
||||||
|
|
||||||
|
conflict = engine_traits.regions.get(sxng_tag)
|
||||||
|
if conflict:
|
||||||
|
if conflict != eng_tag:
|
||||||
|
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
|
||||||
|
continue
|
||||||
|
engine_traits.regions[sxng_tag] = eng_tag
|
||||||
|
|
|
@ -11,6 +11,7 @@ from lxml import html
|
||||||
from searx.data import WIKIDATA_UNITS
|
from searx.data import WIKIDATA_UNITS
|
||||||
from searx.engines.duckduckgo import language_aliases
|
from searx.engines.duckduckgo import language_aliases
|
||||||
from searx.engines.duckduckgo import ( # pylint: disable=unused-import
|
from searx.engines.duckduckgo import ( # pylint: disable=unused-import
|
||||||
|
fetch_traits,
|
||||||
_fetch_supported_languages,
|
_fetch_supported_languages,
|
||||||
supported_languages_url,
|
supported_languages_url,
|
||||||
)
|
)
|
||||||
|
|
|
@ -8,6 +8,7 @@ from urllib.parse import urlencode
|
||||||
from searx.exceptions import SearxEngineAPIException
|
from searx.exceptions import SearxEngineAPIException
|
||||||
from searx.engines.duckduckgo import get_region_code
|
from searx.engines.duckduckgo import get_region_code
|
||||||
from searx.engines.duckduckgo import ( # pylint: disable=unused-import
|
from searx.engines.duckduckgo import ( # pylint: disable=unused-import
|
||||||
|
fetch_traits,
|
||||||
_fetch_supported_languages,
|
_fetch_supported_languages,
|
||||||
supported_languages_url,
|
supported_languages_url,
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in a new issue