mirror of
https://github.com/searxng/searxng.git
synced 2025-01-23 16:48:06 +00:00
[fix] update_engine_traits.py: annas archive, bing-* and zlibrary engines
Github action Update data - update_engine_traits [1] had issues in annas archive, bing-* and zlibrary engines: ./manage pyenv.cmd python ./searxng_extra/update/update_engine_traits.py [1] https://github.com/searxng/searxng/actions/runs/12530827768/job/34953392587 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
05c82d3201
commit
af3f272b0b
3 changed files with 34 additions and 9 deletions
|
@ -169,7 +169,7 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||||
lang_map = {}
|
lang_map = {}
|
||||||
for x in eval_xpath_list(dom, "//form//input[@name='lang']"):
|
for x in eval_xpath_list(dom, "//form//input[@name='lang']"):
|
||||||
eng_lang = x.get("value")
|
eng_lang = x.get("value")
|
||||||
if eng_lang in ('', '_empty', 'nl-BE', 'und'):
|
if eng_lang in ('', '_empty', 'nl-BE', 'und') or eng_lang.startswith('anti__'):
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-')
|
locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-')
|
||||||
|
@ -186,10 +186,12 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||||
engine_traits.languages[sxng_lang] = eng_lang
|
engine_traits.languages[sxng_lang] = eng_lang
|
||||||
|
|
||||||
for x in eval_xpath_list(dom, "//form//input[@name='content']"):
|
for x in eval_xpath_list(dom, "//form//input[@name='content']"):
|
||||||
engine_traits.custom['content'].append(x.get("value"))
|
if not x.get("value").startswith("anti__"):
|
||||||
|
engine_traits.custom['content'].append(x.get("value"))
|
||||||
|
|
||||||
for x in eval_xpath_list(dom, "//form//input[@name='ext']"):
|
for x in eval_xpath_list(dom, "//form//input[@name='ext']"):
|
||||||
engine_traits.custom['ext'].append(x.get("value"))
|
if not x.get("value").startswith("anti__"):
|
||||||
|
engine_traits.custom['ext'].append(x.get("value"))
|
||||||
|
|
||||||
for x in eval_xpath_list(dom, "//form//select[@name='sort']//option"):
|
for x in eval_xpath_list(dom, "//form//select[@name='sort']//option"):
|
||||||
engine_traits.custom['sort'].append(x.get("value"))
|
engine_traits.custom['sort'].append(x.get("value"))
|
||||||
|
|
|
@ -192,8 +192,21 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||||
# pylint: disable=import-outside-toplevel
|
# pylint: disable=import-outside-toplevel
|
||||||
|
|
||||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||||
|
from searx.utils import gen_useragent
|
||||||
|
|
||||||
resp = get("https://www.bing.com/account/general")
|
headers = {
|
||||||
|
"User-Agent": gen_useragent(),
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||||
|
"Accept-Language": "en-US;q=0.5,en;q=0.3",
|
||||||
|
"Accept-Encoding": "gzip, deflate, br",
|
||||||
|
"DNT": "1",
|
||||||
|
"Connection": "keep-alive",
|
||||||
|
"Upgrade-Insecure-Requests": "1",
|
||||||
|
"Sec-GPC": "1",
|
||||||
|
"Cache-Control": "max-age=0",
|
||||||
|
}
|
||||||
|
|
||||||
|
resp = get("https://www.bing.com/account/general", headers=headers)
|
||||||
if not resp.ok: # type: ignore
|
if not resp.ok: # type: ignore
|
||||||
print("ERROR: response from bing is not OK.")
|
print("ERROR: response from bing is not OK.")
|
||||||
|
|
||||||
|
|
|
@ -183,17 +183,27 @@ def fetch_traits(engine_traits: EngineTraits) -> None:
|
||||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||||
from searx.locales import language_tag
|
from searx.locales import language_tag
|
||||||
|
|
||||||
resp = get(base_url, verify=False)
|
def _use_old_values():
|
||||||
|
# don't change anything, re-use the existing values
|
||||||
|
engine_traits.all_locale = ENGINE_TRAITS["z-library"]["all_locale"]
|
||||||
|
engine_traits.custom = ENGINE_TRAITS["z-library"]["custom"]
|
||||||
|
engine_traits.languages = ENGINE_TRAITS["z-library"]["languages"]
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = get(base_url, verify=False)
|
||||||
|
except SearxException as exc:
|
||||||
|
print(f"ERROR: zlibrary domain '{base_url}' is seized?")
|
||||||
|
print(f" --> {exc}")
|
||||||
|
_use_old_values()
|
||||||
|
return
|
||||||
|
|
||||||
if not resp.ok: # type: ignore
|
if not resp.ok: # type: ignore
|
||||||
raise RuntimeError("Response from zlibrary's search page is not OK.")
|
raise RuntimeError("Response from zlibrary's search page is not OK.")
|
||||||
dom = html.fromstring(resp.text) # type: ignore
|
dom = html.fromstring(resp.text) # type: ignore
|
||||||
|
|
||||||
if domain_is_seized(dom):
|
if domain_is_seized(dom):
|
||||||
print(f"ERROR: zlibrary domain is seized: {base_url}")
|
print(f"ERROR: zlibrary domain is seized: {base_url}")
|
||||||
# don't change anything, re-use the existing values
|
_use_old_values()
|
||||||
engine_traits.all_locale = ENGINE_TRAITS["z-library"]["all_locale"]
|
|
||||||
engine_traits.custom = ENGINE_TRAITS["z-library"]["custom"]
|
|
||||||
engine_traits.languages = ENGINE_TRAITS["z-library"]["languages"]
|
|
||||||
return
|
return
|
||||||
|
|
||||||
engine_traits.all_locale = ""
|
engine_traits.all_locale = ""
|
||||||
|
|
Loading…
Reference in a new issue