From aa59bfbf60d75508fc1f91220ed2598bf8cf97ec Mon Sep 17 00:00:00 2001 From: Bnyro Date: Sun, 5 May 2024 23:17:35 +0200 Subject: [PATCH] [feat] hostname replace plugin: support for external list file --- searx/plugins/hostnames.py | 36 +++++++++++++++++++++++++++--------- searx/settings.yml | 10 ++++++++++ searx/settings_loader.py | 8 ++++++++ 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/searx/plugins/hostnames.py b/searx/plugins/hostnames.py index 515a45259..6ab6147dd 100644 --- a/searx/plugins/hostnames.py +++ b/searx/plugins/hostnames.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -# pylint: disable=missing-module-docstring +# pylint: disable=missing-module-docstring, too-many-branches import re from urllib.parse import urlunparse, urlparse @@ -8,6 +8,7 @@ from flask_babel import gettext from searx import settings from searx.plugins import logger +from searx.settings_loader import get_yaml_file name = gettext('Hostnames plugin') description = gettext('Rewrite hostnames, remove results or prioritize them based on the hostname') @@ -16,19 +17,36 @@ preference_section = 'general' plugin_id = 'hostnames' -replacements = { - re.compile(p): r - for (p, r) in (settings.get(plugin_id, {}).get('replace', settings.get('hostname_replace', {})).items()) -} -removables = {re.compile(p) for p in settings[plugin_id].get('remove', [])} -high_priority = {re.compile(p) for p in settings[plugin_id].get('high_priority', [])} -low_priority = {re.compile(p) for p in settings[plugin_id].get('low_priority', [])} - logger = logger.getChild(plugin_id) parsed = 'parsed_url' _url_fields = ['iframe_src', 'audio_src'] +def _load_regular_expressions(settings_key): + setting_value = settings.get(plugin_id, {}).get(settings_key) + + if not setting_value: + return {} + + # load external file with configuration + if isinstance(setting_value, str): + setting_value = get_yaml_file(setting_value) + + if isinstance(setting_value, list): + return {re.compile(r) for r in setting_value} + + if isinstance(setting_value, dict): + return {re.compile(p): r for (p, r) in setting_value.items()} + + return {} + + +replacements = _load_regular_expressions('replace') +removables = _load_regular_expressions('remove') +high_priority = _load_regular_expressions('high_priority') +low_priority = _load_regular_expressions('low_priority') + + def _matches_parsed_url(result, pattern): return parsed in result and pattern.search(result[parsed].netloc) diff --git a/searx/settings.yml b/searx/settings.yml index 8a1c00ba8..db749be77 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -243,6 +243,16 @@ outgoing: # - '(.*\.)?google(\..*)?$' # high_priority: # - '(.*\.)?wikipedia.org$' +# +# Alternatively you can use external files for configuring the "Hostnames plugin": +# +# hostnames: +# replace: 'rewrite-hosts.yml' +# +# Content of 'rewrite-hosts.yml' (place the file in the same directory as 'settings.yml'): +# '(.*\.)?youtube\.com$': 'invidious.example.com' +# '(.*\.)?youtu\.be$': 'invidious.example.com' +# checker: # disable checker when in debug mode diff --git a/searx/settings_loader.py b/searx/settings_loader.py index fe2696cce..6bf3465f0 100644 --- a/searx/settings_loader.py +++ b/searx/settings_loader.py @@ -31,6 +31,14 @@ def load_yaml(file_name): raise SearxSettingsException(e, file_name) from e +def get_yaml_file(file_name): + path = existing_filename_or_none(join(searx_dir, file_name)) + if path is None: + raise FileNotFoundError(f"File {file_name} does not exist!") + + return load_yaml(path) + + def get_default_settings_path(): return existing_filename_or_none(join(searx_dir, 'settings.yml'))