[mod] revision of the settings_loader

The intention of this PR is to modernize the settings_loader implementations.
The concept is old (remember, this is partly from 2014), back then we only had
one config file, meanwhile we have had a folder with config files for a very
long time.  Callers can now load a YAML configuration from this folder as
follows ::

    settings_loader.get_yaml_cfg('my-config.yml')

- BTW this is a fix of #3557.

- Further the `existing_filename_or_none` construct dates back to times when
  there was not yet a `pathlib.Path` in all Python versions we supported in the
  past.

- Typehints have been added wherever appropriate

At the same time, this patch should also be downward compatible and not
introduce a new environment variable. The localization of the folder with the
configurations is further based on:

    SEARXNG_SETTINGS_PATH (wich defaults to /etc/searxng/settings.yml)

Which means, the default config folder is `/etc/searxng/`.

ATTENTION: intended functional changes!

 If SEARXNG_SETTINGS_PATH was set and pointed to a not existing file, the
 previous implementation silently loaded the default configuration.  This
 behavior has been changed: if the file or folder does not exist, an
 EnvironmentError exception will be thrown in future.

Closes: https://github.com/searxng/searxng/issues/3557
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2024-06-12 18:01:18 +02:00 committed by Markus Heiser
parent e4da22ee51
commit 2039060b64
8 changed files with 163 additions and 99 deletions

View file

@ -1,3 +1,5 @@
.. _searxng settings.yml:
======== ========
Settings Settings
======== ========

View file

@ -0,0 +1,8 @@
.. _searx.settings_loader:
===============
Settings Loader
===============
.. automodule:: searx.settings_loader
:members:

4
manage
View file

@ -54,7 +54,9 @@ fi
YAMLLINT_FILES=() YAMLLINT_FILES=()
while IFS= read -r line; do while IFS= read -r line; do
YAMLLINT_FILES+=("$line") if [ "$line" != "tests/unit/settings/syntaxerror_settings.yml" ]; then
YAMLLINT_FILES+=("$line")
fi
done <<< "$(git ls-files './tests/*.yml' './searx/*.yml' './utils/templates/etc/searxng/*.yml')" done <<< "$(git ls-files './tests/*.yml' './searx/*.yml' './utils/templates/etc/searxng/*.yml')"
RST_FILES=( RST_FILES=(

View file

@ -96,7 +96,7 @@ from flask_babel import gettext
from searx import settings from searx import settings
from searx.plugins import logger from searx.plugins import logger
from searx.settings_loader import get_yaml_file from searx.settings_loader import get_yaml_cfg
name = gettext('Hostnames plugin') name = gettext('Hostnames plugin')
description = gettext('Rewrite hostnames, remove results or prioritize them based on the hostname') description = gettext('Rewrite hostnames, remove results or prioritize them based on the hostname')
@ -118,7 +118,7 @@ def _load_regular_expressions(settings_key):
# load external file with configuration # load external file with configuration
if isinstance(setting_value, str): if isinstance(setting_value, str):
setting_value = get_yaml_file(setting_value) setting_value = get_yaml_cfg(setting_value)
if isinstance(setting_value, list): if isinstance(setting_value, list):
return {re.compile(r) for r in setting_value} return {re.compile(r) for r in setting_value}
@ -163,10 +163,10 @@ def _matches_parsed_url(result, pattern):
def on_result(_request, _search, result): def on_result(_request, _search, result):
for pattern, replacement in replacements.items(): for pattern, replacement in replacements.items():
if _matches_parsed_url(result, pattern): if _matches_parsed_url(result, pattern):
logger.debug(result['url']) # logger.debug(result['url'])
result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc)) result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc))
result['url'] = urlunparse(result[parsed]) result['url'] = urlunparse(result[parsed])
logger.debug(result['url']) # logger.debug(result['url'])
for url_field in _url_fields: for url_field in _url_fields:
if not result.get(url_field): if not result.get(url_field):

View file

@ -1,68 +1,116 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, too-many-branches """Implementations for loading configurations from YAML files. This essentially
includes the configuration of the (:ref:`SearXNG appl <searxng settings.yml>`)
server. The default configuration for the application server is loaded from the
:origin:`DEFAULT_SETTINGS_FILE <searx/settings.yml>`. This default
configuration can be completely replaced or :ref:`customized individually
<use_default_settings.yml>` and the ``SEARXNG_SETTINGS_PATH`` environment
variable can be used to set the location from which the local customizations are
to be loaded. The rules used for this can be found in the
:py:obj:`get_user_cfg_folder` function.
from typing import Optional - By default, local configurations are expected in folder ``/etc/searxng`` from
from os import environ where applications can load them with the :py:obj:`get_yaml_cfg` function.
from os.path import dirname, join, abspath, isfile
- By default, customized :ref:`SearXNG appl <searxng settings.yml>` settings are
expected in a file named ``settings.yml``.
"""
from __future__ import annotations
import os.path
from collections.abc import Mapping from collections.abc import Mapping
from itertools import filterfalse from itertools import filterfalse
from pathlib import Path
import yaml import yaml
from searx.exceptions import SearxSettingsException from searx.exceptions import SearxSettingsException
searx_dir = os.path.abspath(os.path.dirname(__file__))
searx_dir = abspath(dirname(__file__)) SETTINGS_YAML = Path("settings.yml")
DEFAULT_SETTINGS_FILE = Path(searx_dir) / SETTINGS_YAML
"""The :origin:`searx/settings.yml` file with all the default settings."""
def existing_filename_or_none(file_name: str) -> Optional[str]: def load_yaml(file_name: str | Path):
if isfile(file_name): """Load YAML config from a file."""
return file_name
return None
def load_yaml(file_name):
try: try:
with open(file_name, 'r', encoding='utf-8') as settings_yaml: with open(file_name, 'r', encoding='utf-8') as settings_yaml:
return yaml.safe_load(settings_yaml) return yaml.safe_load(settings_yaml) or {}
except IOError as e: except IOError as e:
raise SearxSettingsException(e, file_name) from e raise SearxSettingsException(e, str(file_name)) from e
except yaml.YAMLError as e: except yaml.YAMLError as e:
raise SearxSettingsException(e, file_name) from e raise SearxSettingsException(e, str(file_name)) from e
def get_yaml_file(file_name): def get_yaml_cfg(file_name: str | Path) -> dict:
path = existing_filename_or_none(join(searx_dir, file_name)) """Shortcut to load a YAML config from a file, located in the
if path is None:
raise FileNotFoundError(f"File {file_name} does not exist!")
return load_yaml(path) - :py:obj:`get_user_cfg_folder` or
- in the ``searx`` folder of the SearXNG installation
def get_default_settings_path():
return existing_filename_or_none(join(searx_dir, 'settings.yml'))
def get_user_settings_path() -> Optional[str]:
"""Get an user settings file.
By descending priority:
1. ``environ['SEARXNG_SETTINGS_PATH']``
2. ``/etc/searxng/settings.yml`` except if ``SEARXNG_DISABLE_ETC_SETTINGS`` is ``true`` or ``1``
3. ``None``
""" """
# check the environment variable SEARXNG_SETTINGS_PATH folder = get_user_cfg_folder() or Path(searx_dir)
# if the environment variable is defined, this is the last check fname = folder / file_name
if 'SEARXNG_SETTINGS_PATH' in environ: if not fname.is_file():
return existing_filename_or_none(environ['SEARXNG_SETTINGS_PATH']) raise FileNotFoundError(f"File {fname} does not exist!")
# if SEARXNG_DISABLE_ETC_SETTINGS don't look any further return load_yaml(fname)
if environ.get('SEARXNG_DISABLE_ETC_SETTINGS', '').lower() in ('1', 'true'):
return None
# check /etc/searxng/settings.yml
# (continue with other locations if the file is not found) def get_user_cfg_folder() -> Path | None:
return existing_filename_or_none('/etc/searxng/settings.yml') """Returns folder where the local configurations are located.
1. If the ``SEARXNG_SETTINGS_PATH`` environment is set and points to a
folder (e.g. ``/etc/mysxng/``), all local configurations are expected in
this folder. The settings of the :ref:`SearXNG appl <searxng
settings.yml>` then expected in ``settings.yml``
(e.g. ``/etc/mysxng/settings.yml``).
2. If the ``SEARXNG_SETTINGS_PATH`` environment is set and points to a file
(e.g. ``/etc/mysxng/myinstance.yml``), this file contains the settings of
the :ref:`SearXNG appl <searxng settings.yml>` and the folder
(e.g. ``/etc/mysxng/``) is used for all other configurations.
This type (``SEARXNG_SETTINGS_PATH`` points to a file) is suitable for
use cases in which different profiles of the :ref:`SearXNG appl <searxng
settings.yml>` are to be managed, such as in test scenarios.
3. If folder ``/etc/searxng`` exists, it is used.
In case none of the above path exists, ``None`` is returned. In case of
environment ``SEARXNG_SETTINGS_PATH`` is set, but the (folder or file) does
not exists, a :py:obj:`EnvironmentError` is raised.
"""
folder = None
settings_path = os.environ.get("SEARXNG_SETTINGS_PATH")
# Disable default /etc/searxng is intended exclusively for internal testing purposes
# and is therefore not documented!
disable_etc = os.environ.get('SEARXNG_DISABLE_ETC_SETTINGS', '').lower() in ('1', 'true')
if settings_path:
# rule 1. and 2.
settings_path = Path(settings_path)
if settings_path.is_dir():
folder = settings_path
elif settings_path.is_file():
folder = settings_path.parent
else:
raise EnvironmentError(1, f"{settings_path} not exists!", settings_path)
if not folder and not disable_etc:
# default: rule 3.
folder = Path("/etc/searxng")
if not folder.is_dir():
folder = None
return folder
def update_dict(default_dict, user_dict): def update_dict(default_dict, user_dict):
@ -74,7 +122,9 @@ def update_dict(default_dict, user_dict):
return default_dict return default_dict
def update_settings(default_settings, user_settings): def update_settings(default_settings: dict, user_settings: dict):
# pylint: disable=too-many-branches
# merge everything except the engines # merge everything except the engines
for k, v in user_settings.items(): for k, v in user_settings.items():
if k not in ('use_default_settings', 'engines'): if k not in ('use_default_settings', 'engines'):
@ -124,6 +174,7 @@ def update_settings(default_settings, user_settings):
def is_use_default_settings(user_settings): def is_use_default_settings(user_settings):
use_default_settings = user_settings.get('use_default_settings') use_default_settings = user_settings.get('use_default_settings')
if use_default_settings is True: if use_default_settings is True:
return True return True
@ -134,25 +185,37 @@ def is_use_default_settings(user_settings):
raise ValueError('Invalid value for use_default_settings') raise ValueError('Invalid value for use_default_settings')
def load_settings(load_user_settings=True): def load_settings(load_user_settings=True) -> tuple[dict, str]:
default_settings_path = get_default_settings_path() """Function for loading the settings of the SearXNG application
user_settings_path = get_user_settings_path() (:ref:`settings.yml <searxng settings.yml>`)."""
if user_settings_path is None or not load_user_settings:
# no user settings
return (load_yaml(default_settings_path), 'load the default settings from {}'.format(default_settings_path))
# user settings msg = f"load the default settings from {DEFAULT_SETTINGS_FILE}"
user_settings = load_yaml(user_settings_path) cfg = load_yaml(DEFAULT_SETTINGS_FILE)
if is_use_default_settings(user_settings): cfg_folder = get_user_cfg_folder()
if not load_user_settings or not cfg_folder:
return cfg, msg
settings_yml = os.environ.get("SEARXNG_SETTINGS_PATH")
if settings_yml and Path(settings_yml).is_file():
# see get_user_cfg_folder() --> SEARXNG_SETTINGS_PATH points to a file
settings_yml = Path(settings_yml).name
else:
# see get_user_cfg_folder() --> SEARXNG_SETTINGS_PATH points to a folder
settings_yml = SETTINGS_YAML
cfg_file = cfg_folder / settings_yml
if not cfg_file.exists():
return cfg, msg
msg = f"load the user settings from {cfg_file}"
user_cfg = load_yaml(cfg_file)
if is_use_default_settings(user_cfg):
# the user settings are merged with the default configuration # the user settings are merged with the default configuration
default_settings = load_yaml(default_settings_path) msg = f"merge the default settings ( {DEFAULT_SETTINGS_FILE} ) and the user settings ( {cfg_file} )"
update_settings(default_settings, user_settings) update_settings(cfg, user_cfg)
return ( else:
default_settings, cfg = user_cfg
'merge the default settings ( {} ) and the user settings ( {} )'.format(
default_settings_path, user_settings_path
),
)
# the user settings, fully replace the default configuration return cfg, msg
return (user_settings, 'load the user settings from {}'.format(user_settings_path))

View file

@ -61,7 +61,7 @@ from searx.botdetection import link_token
from searx.data import ENGINE_DESCRIPTIONS from searx.data import ENGINE_DESCRIPTIONS
from searx.results import Timing from searx.results import Timing
from searx.settings_defaults import OUTPUT_FORMATS from searx.settings_defaults import OUTPUT_FORMATS
from searx.settings_loader import get_default_settings_path from searx.settings_loader import DEFAULT_SETTINGS_FILE
from searx.exceptions import SearxParameterException from searx.exceptions import SearxParameterException
from searx.engines import ( from searx.engines import (
DEFAULT_CATEGORY, DEFAULT_CATEGORY,
@ -1347,7 +1347,7 @@ def run():
port=settings['server']['port'], port=settings['server']['port'],
host=settings['server']['bind_address'], host=settings['server']['bind_address'],
threaded=True, threaded=True,
extra_files=[get_default_settings_path()], extra_files=[DEFAULT_SETTINGS_FILE],
) )

View file

@ -1,2 +1,3 @@
Test: Test:
"**********" "**********"
xxx

View file

@ -1,7 +1,9 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring # pylint: disable=missing-module-docstring
from os.path import dirname, join, abspath from pathlib import Path
import os
from unittest.mock import patch from unittest.mock import patch
from searx.exceptions import SearxSettingsException from searx.exceptions import SearxSettingsException
@ -9,7 +11,8 @@ from searx import settings_loader
from tests import SearxTestCase from tests import SearxTestCase
test_dir = abspath(dirname(__file__)) def _settings(f_name):
return str(Path(__file__).parent.absolute() / "settings" / f_name)
class TestLoad(SearxTestCase): # pylint: disable=missing-class-docstring class TestLoad(SearxTestCase): # pylint: disable=missing-class-docstring
@ -18,16 +21,9 @@ class TestLoad(SearxTestCase): # pylint: disable=missing-class-docstring
settings_loader.load_yaml('/dev/zero') settings_loader.load_yaml('/dev/zero')
with self.assertRaises(SearxSettingsException): with self.assertRaises(SearxSettingsException):
settings_loader.load_yaml(join(test_dir, '/settings/syntaxerror_settings.yml')) settings_loader.load_yaml(_settings("syntaxerror_settings.yml"))
with self.assertRaises(SearxSettingsException): self.assertEqual(settings_loader.load_yaml(_settings("empty_settings.yml")), {})
settings_loader.load_yaml(join(test_dir, '/settings/empty_settings.yml'))
def test_existing_filename_or_none(self):
self.assertIsNone(settings_loader.existing_filename_or_none('/dev/zero'))
bad_settings_path = join(test_dir, 'settings/syntaxerror_settings.yml')
self.assertEqual(settings_loader.existing_filename_or_none(bad_settings_path), bad_settings_path)
class TestDefaultSettings(SearxTestCase): # pylint: disable=missing-class-docstring class TestDefaultSettings(SearxTestCase): # pylint: disable=missing-class-docstring
@ -55,24 +51,22 @@ class TestUserSettings(SearxTestCase): # pylint: disable=missing-class-docstrin
self.assertFalse(settings_loader.is_use_default_settings({'use_default_settings': 0})) self.assertFalse(settings_loader.is_use_default_settings({'use_default_settings': 0}))
def test_user_settings_not_found(self): def test_user_settings_not_found(self):
with patch.dict(settings_loader.environ, {'SEARXNG_SETTINGS_PATH': '/dev/null'}): with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': _settings("not_exists.yml")}):
settings, msg = settings_loader.load_settings() with self.assertRaises(EnvironmentError):
self.assertTrue(msg.startswith('load the default settings from')) _s, _m = settings_loader.load_settings()
self.assertEqual(settings['server']['secret_key'], "ultrasecretkey") with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': "/folder/not/exists"}):
with self.assertRaises(EnvironmentError):
_s, _m = settings_loader.load_settings()
def test_user_settings(self): def test_user_settings(self):
with patch.dict( with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': _settings("user_settings_simple.yml")}):
settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_simple.yml')}
):
settings, msg = settings_loader.load_settings() settings, msg = settings_loader.load_settings()
self.assertTrue(msg.startswith('merge the default settings')) self.assertTrue(msg.startswith('merge the default settings'))
self.assertEqual(settings['server']['secret_key'], "user_secret_key") self.assertEqual(settings['server']['secret_key'], "user_secret_key")
self.assertEqual(settings['server']['default_http_headers']['Custom-Header'], "Custom-Value") self.assertEqual(settings['server']['default_http_headers']['Custom-Header'], "Custom-Value")
def test_user_settings_remove(self): def test_user_settings_remove(self):
with patch.dict( with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': _settings("user_settings_remove.yml")}):
settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_remove.yml')}
):
settings, msg = settings_loader.load_settings() settings, msg = settings_loader.load_settings()
self.assertTrue(msg.startswith('merge the default settings')) self.assertTrue(msg.startswith('merge the default settings'))
self.assertEqual(settings['server']['secret_key'], "user_secret_key") self.assertEqual(settings['server']['secret_key'], "user_secret_key")
@ -83,9 +77,7 @@ class TestUserSettings(SearxTestCase): # pylint: disable=missing-class-docstrin
self.assertIn('wikipedia', engine_names) self.assertIn('wikipedia', engine_names)
def test_user_settings_remove2(self): def test_user_settings_remove2(self):
with patch.dict( with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': _settings("user_settings_remove2.yml")}):
settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_remove2.yml')}
):
settings, msg = settings_loader.load_settings() settings, msg = settings_loader.load_settings()
self.assertTrue(msg.startswith('merge the default settings')) self.assertTrue(msg.startswith('merge the default settings'))
self.assertEqual(settings['server']['secret_key'], "user_secret_key") self.assertEqual(settings['server']['secret_key'], "user_secret_key")
@ -101,9 +93,7 @@ class TestUserSettings(SearxTestCase): # pylint: disable=missing-class-docstrin
self.assertEqual(newengine[0]['engine'], 'dummy') self.assertEqual(newengine[0]['engine'], 'dummy')
def test_user_settings_keep_only(self): def test_user_settings_keep_only(self):
with patch.dict( with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': _settings("user_settings_keep_only.yml")}):
settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_keep_only.yml')}
):
settings, msg = settings_loader.load_settings() settings, msg = settings_loader.load_settings()
self.assertTrue(msg.startswith('merge the default settings')) self.assertTrue(msg.startswith('merge the default settings'))
engine_names = [engine['name'] for engine in settings['engines']] engine_names = [engine['name'] for engine in settings['engines']]
@ -112,9 +102,7 @@ class TestUserSettings(SearxTestCase): # pylint: disable=missing-class-docstrin
self.assertEqual(len(settings['engines'][2]), 1) self.assertEqual(len(settings['engines'][2]), 1)
def test_custom_settings(self): def test_custom_settings(self):
with patch.dict( with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': _settings("user_settings.yml")}):
settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings.yml')}
):
settings, msg = settings_loader.load_settings() settings, msg = settings_loader.load_settings()
self.assertTrue(msg.startswith('load the user settings from')) self.assertTrue(msg.startswith('load the user settings from'))
self.assertEqual(settings['server']['port'], 9000) self.assertEqual(settings['server']['port'], 9000)