From 6ed4616da99b25703489e7431d84d8749a7a167c Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Wed, 26 May 2021 19:43:27 +0200 Subject: [PATCH] [enh] add settings option to enable/disable search formats Access to formats can be denied by settings configuration:: search: formats: [html, csv, json, rss] Closes: https://github.com/searxng/searxng/issues/95 Signed-off-by: Markus Heiser --- searx/settings.yml | 1 + searx/templates/oscar/results.html | 6 +- searx/templates/simple/results.html | 4 +- searx/utils.py | 57 +++++++++++++++++++ searx/webapp.py | 15 ++++- .../etc/searx/use_default_settings.yml | 1 + 6 files changed, 81 insertions(+), 3 deletions(-) diff --git a/searx/settings.yml b/searx/settings.yml index b0c425e4f..57ed93b28 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -18,6 +18,7 @@ search: default_lang : "" # Default search language - leave blank to detect from browser information or use codes from 'languages.py' ban_time_on_fail : 5 # ban time in seconds after engine errors max_ban_time_on_fail : 120 # max ban time in seconds after engine errors + formats: [html, csv, json, rss] # remove format to deny access, use lower case. server: port : 8888 diff --git a/searx/templates/oscar/results.html b/searx/templates/oscar/results.html index ec84e489b..b5bedcd44 100644 --- a/searx/templates/oscar/results.html +++ b/searx/templates/oscar/results.html @@ -80,9 +80,10 @@ {{- "" -}} {{- "" -}} + {% if search_formats %}
- {% for output_type in ('csv', 'json', 'rss') %} + {% for output_type in search_formats %}
{{- search_form_attrs(pageno) -}} {{- "" -}} @@ -90,8 +91,11 @@
{% endfor %}
+ {% if 'rss' in search_formats %}
+ {% endif %}
+ {% endif %} diff --git a/searx/templates/simple/results.html b/searx/templates/simple/results.html index c53529edd..a68449736 100644 --- a/searx/templates/simple/results.html +++ b/searx/templates/simple/results.html @@ -85,8 +85,9 @@
{{ url_for('search', _external=True) }}?q={{ q|urlencode }}&language={{ current_language }}&time_range={{ time_range }}&safesearch={{ safesearch }}{% if pageno > 1 %}&pageno={{ pageno }}{% endif %}{% if selected_categories %}&categories={{ selected_categories|join(",") | replace(' ','+') }}{% endif %}{% if timeout_limit %}&timeout_limit={{ timeout_limit|urlencode }}{% endif %}
+ {% if search_formats %}

{{ _('Download results') }}

- {% for output_type in ('csv', 'json', 'rss') %} + {% for output_type in search_formats %}
@@ -103,6 +104,7 @@
{% endfor %} + {% endif %}
diff --git a/searx/utils.py b/searx/utils.py index 55a386bd5..8c5b3a9b3 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -8,6 +8,7 @@ from os.path import splitext, join from random import choice from html.parser import HTMLParser from urllib.parse import urljoin, urlparse +from collections.abc import Mapping from lxml import html from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult @@ -500,6 +501,62 @@ def get_engine_from_settings(name): return {} +NOT_EXISTS = object() +"""Singleton used by :py:obj:`get_value` if a key does not exists.""" + + +def get_value(dictionary, keyword, *keys, default=NOT_EXISTS): + """Return the value from a *deep* mapping type (e.g. the ``settings`` object + from yaml). If the path to the *key* does not exists a :py:obj:`NOT_EXISTS` + is returned (non ``KeyError`` exception is raised). + + .. code: python + + >>> from searx.utils import get_value, NOT_EXISTS + >>> get_value(settings, 'checker', 'additional_tests', 'rosebud', 'result_container') + ['not_empty', ['one_title_contains', 'citizen kane']] + + >>> get_value(settings, 'search', 'xxx') is NOT_EXISTS + True + >>> get_value(settings, 'search', 'formats') + ['csv', 'json', 'rss'] + + The list returned from the ``search.format`` key is not a mapping type, you + can't traverse along non-mapping types. If you try it, you will get a + :py:ref:`NOT_EXISTS`: + + .. code: python + + >>> get_value(settings, 'search', 'format', 'csv') is NOT_EXISTS + True + >>> get_value(settings, 'search', 'formats')[0] + 'csv' + + For convenience you can replace :py:ref:`NOT_EXISTS` by a default value of + your choice: + + .. code: python + + if 'csv' in get_value(settings, 'search', 'formats', default=[]): + print("csv format is denied") + + """ + if not isinstance(dictionary, Mapping): + raise TypeError("expected mapping type, got %s" % type(dictionary)) + + ret_val = dictionary.get(keyword, default) + + if ret_val is default: + return ret_val + + if len(keys): + if not isinstance(ret_val, Mapping): + ret_val = default + else: + ret_val = get_value(ret_val, *keys, default=default) + return ret_val + + def get_xpath(xpath_spec): """Return cached compiled XPath diff --git a/searx/webapp.py b/searx/webapp.py index ad6ed368b..47f77acc7 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -31,6 +31,8 @@ from pygments.formatters import HtmlFormatter # pylint: disable=no-name-in-modu from werkzeug.middleware.proxy_fix import ProxyFix from werkzeug.serving import WSGIRequestHandler +import flask + from flask import ( Flask, request, @@ -86,6 +88,7 @@ from searx.utils import ( gen_useragent, dict_subset, match_language, + get_value, ) from searx.version import VERSION_STRING from searx.query import RawTextQuery @@ -161,6 +164,8 @@ for indice, theme in enumerate(themes): for (dirpath, dirnames, filenames) in os.walk(theme_img_path): global_favicons[indice].extend(filenames) +OUTPUT_FORMATS = ['html', 'csv', 'json', 'rss'] + STATS_SORT_PARAMETERS = { 'name': (False, 'name', ''), 'score': (True, 'score', 0), @@ -511,6 +516,11 @@ def render(template_name, override_theme=None, **kwargs): kwargs['preferences'] = request.preferences + kwargs['search_formats'] = [ + x for x in get_value( + settings, 'search', 'formats', default=OUTPUT_FORMATS) + if x != 'html'] + kwargs['brand'] = brand kwargs['translations'] = json.dumps(get_translations(), separators=(',', ':')) @@ -683,9 +693,12 @@ def search(): # output_format output_format = request.form.get('format', 'html') - if output_format not in ['html', 'csv', 'json', 'rss']: + if output_format not in OUTPUT_FORMATS: output_format = 'html' + if output_format not in get_value(settings, 'search', 'formats', default=OUTPUT_FORMATS): + flask.abort(403) + # check if there is query (not None and not an empty string) if not request.form.get('q'): if output_format == 'html': diff --git a/utils/templates/etc/searx/use_default_settings.yml b/utils/templates/etc/searx/use_default_settings.yml index e019a25bb..845e3139f 100644 --- a/utils/templates/etc/searx/use_default_settings.yml +++ b/utils/templates/etc/searx/use_default_settings.yml @@ -8,6 +8,7 @@ search: safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict autocomplete : "" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "swisscows", "qwant", "wikipedia" - leave blank to turn it off by default default_lang : "" # Default search language - leave blank to detect from browser information or use codes from 'languages.py' + formats: [html, csv, json, rss] server: port : 8888