mirror of
https://github.com/searxng/searxng.git
synced 2024-11-14 06:01:28 +00:00
[fix] engine zlibrary: handle seized domain
The domains of zlibrary instances are known to be seized from time to time. This leads to problems when, for example, the automated tasks try to update the engine traits (aka fetch_traits). The search function should also generate a suitable error message (currently either SSL errors or empty result lists are returned). [1] [1] https://github.com/searxng/searxng/issues/3610 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
b8fa4d6195
commit
39ffec87b7
1 changed files with 23 additions and 6 deletions
|
@ -43,6 +43,7 @@ from flask_babel import gettext
|
|||
from searx.utils import extract_text, eval_xpath, eval_xpath_list
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.data import ENGINE_TRAITS
|
||||
from searx.exceptions import SearxException
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import httpx
|
||||
|
@ -108,13 +109,21 @@ def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]:
|
|||
zlib_year_to=zlib_year_to,
|
||||
zlib_ext=zlib_ext,
|
||||
)
|
||||
params["verify"] = False
|
||||
return params
|
||||
|
||||
|
||||
def domain_is_seized(dom):
|
||||
return bool(dom.xpath('//title') and "seized" in dom.xpath('//title')[0].text.lower())
|
||||
|
||||
|
||||
def response(resp: httpx.Response) -> List[Dict[str, Any]]:
|
||||
results: List[Dict[str, Any]] = []
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
if domain_is_seized(dom):
|
||||
raise SearxException(f"zlibrary domain is seized: {base_url}")
|
||||
|
||||
for item in dom.xpath('//div[@id="searchResultBox"]//div[contains(@class, "resItemBox")]'):
|
||||
results.append(_parse_result(item))
|
||||
|
||||
|
@ -168,22 +177,30 @@ def _parse_result(item) -> Dict[str, Any]:
|
|||
|
||||
def fetch_traits(engine_traits: EngineTraits) -> None:
|
||||
"""Fetch languages and other search arguments from zlibrary's search form."""
|
||||
# pylint: disable=import-outside-toplevel
|
||||
# pylint: disable=import-outside-toplevel, too-many-branches
|
||||
|
||||
import babel
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
from searx.locales import language_tag
|
||||
|
||||
resp = get(base_url, verify=False)
|
||||
if not resp.ok: # type: ignore
|
||||
raise RuntimeError("Response from zlibrary's search page is not OK.")
|
||||
dom = html.fromstring(resp.text) # type: ignore
|
||||
|
||||
if domain_is_seized(dom):
|
||||
print(f"ERROR: zlibrary domain is seized: {base_url}")
|
||||
# don't change anything, re-use the existing values
|
||||
engine_traits.all_locale = ENGINE_TRAITS["z-library"]["all_locale"]
|
||||
engine_traits.custom = ENGINE_TRAITS["z-library"]["custom"]
|
||||
engine_traits.languages = ENGINE_TRAITS["z-library"]["languages"]
|
||||
return
|
||||
|
||||
engine_traits.all_locale = ""
|
||||
engine_traits.custom["ext"] = []
|
||||
engine_traits.custom["year_from"] = []
|
||||
engine_traits.custom["year_to"] = []
|
||||
|
||||
resp = get(base_url)
|
||||
if not resp.ok: # type: ignore
|
||||
raise RuntimeError("Response from zlibrary's search page is not OK.")
|
||||
dom = html.fromstring(resp.text) # type: ignore
|
||||
|
||||
for year in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_yearFrom']/option"):
|
||||
engine_traits.custom["year_from"].append(year.get("value"))
|
||||
|
||||
|
|
Loading…
Reference in a new issue