mirror of
https://github.com/searxng/searxng.git
synced 2024-11-26 21:01:01 +00:00
[mod] yahoo: fetch engine traits (data_type: traits_v1)
Implements a fetch_traits function for the Yahoo engine. .. note:: Includes migration of the request methode from 'supported_languages' to 'traits' (EngineTraits) object! Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
c1ae2ef57c
commit
ef143729a0
2 changed files with 84 additions and 62 deletions
|
@ -6234,43 +6234,43 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"yahoo": {
|
"yahoo": {
|
||||||
"all_locale": null,
|
"all_locale": "any",
|
||||||
"data_type": "supported_languages",
|
"data_type": "traits_v1",
|
||||||
"languages": {},
|
"languages": {
|
||||||
|
"ar": "ar",
|
||||||
|
"bg": "bg",
|
||||||
|
"cs": "cs",
|
||||||
|
"da": "da",
|
||||||
|
"de": "de",
|
||||||
|
"el": "el",
|
||||||
|
"en": "en",
|
||||||
|
"es": "es",
|
||||||
|
"et": "et",
|
||||||
|
"fi": "fi",
|
||||||
|
"fr": "fr",
|
||||||
|
"he": "he",
|
||||||
|
"hr": "hr",
|
||||||
|
"hu": "hu",
|
||||||
|
"it": "it",
|
||||||
|
"ja": "ja",
|
||||||
|
"ko": "ko",
|
||||||
|
"lt": "lt",
|
||||||
|
"lv": "lv",
|
||||||
|
"nl": "nl",
|
||||||
|
"no": "no",
|
||||||
|
"pl": "pl",
|
||||||
|
"pt": "pt",
|
||||||
|
"ro": "ro",
|
||||||
|
"ru": "ru",
|
||||||
|
"sk": "sk",
|
||||||
|
"sl": "sl",
|
||||||
|
"sv": "sv",
|
||||||
|
"th": "th",
|
||||||
|
"tr": "tr",
|
||||||
|
"zh_Hans": "zh_chs",
|
||||||
|
"zh_Hant": "zh_cht"
|
||||||
|
},
|
||||||
"regions": {},
|
"regions": {},
|
||||||
"supported_languages": [
|
"supported_languages": {}
|
||||||
"ar",
|
|
||||||
"bg",
|
|
||||||
"cs",
|
|
||||||
"da",
|
|
||||||
"de",
|
|
||||||
"el",
|
|
||||||
"en",
|
|
||||||
"es",
|
|
||||||
"et",
|
|
||||||
"fi",
|
|
||||||
"fr",
|
|
||||||
"he",
|
|
||||||
"hr",
|
|
||||||
"hu",
|
|
||||||
"it",
|
|
||||||
"ja",
|
|
||||||
"ko",
|
|
||||||
"lt",
|
|
||||||
"lv",
|
|
||||||
"nl",
|
|
||||||
"no",
|
|
||||||
"pl",
|
|
||||||
"pt",
|
|
||||||
"ro",
|
|
||||||
"ru",
|
|
||||||
"sk",
|
|
||||||
"sl",
|
|
||||||
"sv",
|
|
||||||
"th",
|
|
||||||
"tr",
|
|
||||||
"zh_chs",
|
|
||||||
"zh_cht"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,8 +17,10 @@ from searx.utils import (
|
||||||
eval_xpath_getindex,
|
eval_xpath_getindex,
|
||||||
eval_xpath_list,
|
eval_xpath_list,
|
||||||
extract_text,
|
extract_text,
|
||||||
match_language,
|
|
||||||
)
|
)
|
||||||
|
from searx.enginelib.traits import EngineTraits
|
||||||
|
|
||||||
|
traits: EngineTraits
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
|
@ -34,8 +36,7 @@ about = {
|
||||||
categories = ['general', 'web']
|
categories = ['general', 'web']
|
||||||
paging = True
|
paging = True
|
||||||
time_range_support = True
|
time_range_support = True
|
||||||
supported_languages_url = 'https://search.yahoo.com/preferences/languages'
|
# send_accept_language_header = True
|
||||||
"""Supported languages are read from Yahoo preference page."""
|
|
||||||
|
|
||||||
time_range_dict = {
|
time_range_dict = {
|
||||||
'day': ('1d', 'd'),
|
'day': ('1d', 'd'),
|
||||||
|
@ -43,15 +44,10 @@ time_range_dict = {
|
||||||
'month': ('1m', 'm'),
|
'month': ('1m', 'm'),
|
||||||
}
|
}
|
||||||
|
|
||||||
language_aliases = {
|
|
||||||
'zh-HK': 'zh_chs',
|
|
||||||
'zh-CN': 'zh_chs', # dead since 2015 / routed to hk.search.yahoo.com
|
|
||||||
'zh-TW': 'zh_cht',
|
|
||||||
}
|
|
||||||
|
|
||||||
lang2domain = {
|
lang2domain = {
|
||||||
'zh_chs': 'hk.search.yahoo.com',
|
'zh_chs': 'hk.search.yahoo.com',
|
||||||
'zh_cht': 'tw.search.yahoo.com',
|
'zh_cht': 'tw.search.yahoo.com',
|
||||||
|
'any': 'search.yahoo.com',
|
||||||
'en': 'search.yahoo.com',
|
'en': 'search.yahoo.com',
|
||||||
'bg': 'search.yahoo.com',
|
'bg': 'search.yahoo.com',
|
||||||
'cs': 'search.yahoo.com',
|
'cs': 'search.yahoo.com',
|
||||||
|
@ -67,21 +63,23 @@ lang2domain = {
|
||||||
}
|
}
|
||||||
"""Map language to domain"""
|
"""Map language to domain"""
|
||||||
|
|
||||||
|
locale_aliases = {
|
||||||
def _get_language(params):
|
'zh': 'zh_Hans',
|
||||||
|
'zh-HK': 'zh_Hans',
|
||||||
lang = language_aliases.get(params['language'])
|
'zh-CN': 'zh_Hans', # dead since 2015 / routed to hk.search.yahoo.com
|
||||||
if lang is None:
|
'zh-TW': 'zh_Hant',
|
||||||
lang = match_language(params['language'], supported_languages, language_aliases)
|
}
|
||||||
lang = lang.split('-')[0]
|
|
||||||
logger.debug("params['language']: %s --> %s", params['language'], lang)
|
|
||||||
return lang
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
"""build request"""
|
"""build request"""
|
||||||
|
|
||||||
|
lang = locale_aliases.get(params['language'], None)
|
||||||
|
if not lang:
|
||||||
|
lang = params['language'].split('-')[0]
|
||||||
|
lang = traits.get_language(lang, traits.all_locale)
|
||||||
|
|
||||||
offset = (params['pageno'] - 1) * 7 + 1
|
offset = (params['pageno'] - 1) * 7 + 1
|
||||||
lang = _get_language(params)
|
|
||||||
age, btf = time_range_dict.get(params['time_range'], ('', ''))
|
age, btf = time_range_dict.get(params['time_range'], ('', ''))
|
||||||
|
|
||||||
args = urlencode(
|
args = urlencode(
|
||||||
|
@ -154,13 +152,37 @@ def response(resp):
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
# get supported languages from their site
|
def fetch_traits(engine_traits: EngineTraits):
|
||||||
def _fetch_supported_languages(resp):
|
"""Fetch languages from yahoo"""
|
||||||
supported_languages = []
|
|
||||||
|
# pylint: disable=import-outside-toplevel
|
||||||
|
import babel
|
||||||
|
from searx import network
|
||||||
|
from searx.locales import language_tag
|
||||||
|
|
||||||
|
engine_traits.all_locale = 'any'
|
||||||
|
|
||||||
|
resp = network.get('https://search.yahoo.com/preferences/languages')
|
||||||
|
if not resp.ok:
|
||||||
|
print("ERROR: response from peertube is not OK.")
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
offset = len('lang_')
|
offset = len('lang_')
|
||||||
|
|
||||||
for val in eval_xpath_list(dom, '//div[contains(@class, "lang-item")]/input/@value'):
|
eng2sxng = {'zh_chs': 'zh_Hans', 'zh_cht': 'zh_Hant'}
|
||||||
supported_languages.append(val[offset:])
|
|
||||||
|
|
||||||
return supported_languages
|
for val in eval_xpath_list(dom, '//div[contains(@class, "lang-item")]/input/@value'):
|
||||||
|
eng_tag = val[offset:]
|
||||||
|
|
||||||
|
try:
|
||||||
|
sxng_tag = language_tag(babel.Locale.parse(eng2sxng.get(eng_tag, eng_tag)))
|
||||||
|
except babel.UnknownLocaleError:
|
||||||
|
print('ERROR: unknown language --> %s' % eng_tag)
|
||||||
|
continue
|
||||||
|
|
||||||
|
conflict = engine_traits.languages.get(sxng_tag)
|
||||||
|
if conflict:
|
||||||
|
if conflict != eng_tag:
|
||||||
|
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
|
||||||
|
continue
|
||||||
|
engine_traits.languages[sxng_tag] = eng_tag
|
||||||
|
|
Loading…
Reference in a new issue