From 540323a4b069cfe1c2d5e49fb6b3320691a7e974 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 29 Nov 2024 12:24:25 +0100 Subject: [PATCH] [mod] hardening xpath engine: ignore empty results A SearXNG maintainer on Matrix reported a traceback:: File "searxng-src/searx/engines/xpath.py", line 272, in response dom = html.fromstring(resp.text) ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "searx-pyenv/lib/python3.11/site-packages/lxml/html/__init__.py", line 850, in fromstring doc = document_fromstring(html, parser=parser, base_url=base_url, **kw) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "searx-pyenv/lib/python3.11/site-packages/lxml/html/__init__.py", line 738, in document_fromstring raise etree.ParserError( lxml.etree.ParserError: Document is empty I don't have an example to reproduce the issue, but the issue and this patch are clearly recognizable even without an example. Signed-off-by: Markus Heiser --- searx/engines/xpath.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 97c84bd09..90b551a33 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -269,6 +269,10 @@ def response(resp): # pylint: disable=too-many-branches raise_for_httperror(resp) results = [] + + if not resp.text: + return results + dom = html.fromstring(resp.text) is_onion = 'onions' in categories