[fix] engine zlibrary: handle seized domain

The domains of zlibrary instances are known to be seized from time to time. This leads to problems when, for example, the automated tasks try to update the engine traits (aka fetch_traits). The search function should also generate a suitable error message (currently either SSL errors or empty result lists are returned). [1] [1] https://github.com/searxng/searxng/issues/3610 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-11-22 10:51:01 +00:00 · 2024-06-25 14:15:24 +02:00 · 2024-06-25 14:15:24 +02:00 · 39ffec87b7
commit 39ffec87b7
parent b8fa4d6195
1 changed files with 23 additions and 6 deletions
--- a/searx/engines/zlibrary.py
+++ b/searx/engines/zlibrary.py
@ -43,6 +43,7 @@ from flask_babel import gettext
 from searx.utils import extract_text, eval_xpath, eval_xpath_list
 from searx.enginelib.traits import EngineTraits
 from searx.data import ENGINE_TRAITS
 from searx.exceptions import SearxException
 if TYPE_CHECKING:
    import httpx
@ -108,13 +109,21 @@ def request(query: str, params: Dict[str, Any]) -> Dict[str, Any]:
        zlib_year_to=zlib_year_to,
        zlib_ext=zlib_ext,
    )
    params["verify"] = False
    return params
 def domain_is_seized(dom):
    return bool(dom.xpath('//title') and "seized" in dom.xpath('//title')[0].text.lower())
 def response(resp: httpx.Response) -> List[Dict[str, Any]]:
    results: List[Dict[str, Any]] = []
    dom = html.fromstring(resp.text)
    if domain_is_seized(dom):
        raise SearxException(f"zlibrary domain is seized: {base_url}")
    for item in dom.xpath('//div[@id="searchResultBox"]//div[contains(@class, "resItemBox")]'):
        results.append(_parse_result(item))
@ -168,22 +177,30 @@ def _parse_result(item) -> Dict[str, Any]:
 def fetch_traits(engine_traits: EngineTraits) -> None:
    """Fetch languages and other search arguments from zlibrary's search form."""
-    # pylint: disable=import-outside-toplevel
+    # pylint: disable=import-outside-toplevel, too-many-branches
    import babel
    from searx.network import get  # see https://github.com/searxng/searxng/issues/762
    from searx.locales import language_tag
    resp = get(base_url, verify=False)
    if not resp.ok:  # type: ignore
        raise RuntimeError("Response from zlibrary's search page is not OK.")
    dom = html.fromstring(resp.text)  # type: ignore
    if domain_is_seized(dom):
        print(f"ERROR: zlibrary domain is seized: {base_url}")
        # don't change anything, re-use the existing values
        engine_traits.all_locale = ENGINE_TRAITS["z-library"]["all_locale"]
        engine_traits.custom = ENGINE_TRAITS["z-library"]["custom"]
        engine_traits.languages = ENGINE_TRAITS["z-library"]["languages"]
        return
    engine_traits.all_locale = ""
    engine_traits.custom["ext"] = []
    engine_traits.custom["year_from"] = []
    engine_traits.custom["year_to"] = []
    resp = get(base_url)
    if not resp.ok:  # type: ignore
        raise RuntimeError("Response from zlibrary's search page is not OK.")
    dom = html.fromstring(resp.text)  # type: ignore
    for year in eval_xpath_list(dom, "//div[@id='advSearch-noJS']//select[@id='sf_yearFrom']/option"):
        engine_traits.custom["year_from"].append(year.get("value"))