[fix] make data.traits - partial revert of commit 30a8204

The entries in the catalog of search languages are build up from the "Engine
Traits" [1] and which entries are included in the catalog is controlled qby two
threshold values [2].

If possible, the values should ensure that no languages or regions disappear
from the catalog of search languages [3].

The threshold values should have been adjusted in commit 30a8204:

- ``min_eng_per_region = 18``
- ``min_eng_per_lang = 22``

Because the threshold values were not adjusted, many entries were missing in the
search language catalog.  This bug has been fixed with this patch: the threshold
values have been adjusted and the catalog of search languages has been completed
again.

[1] https://docs.searxng.org/dev/engines/enginelib.html#module-searx.enginelib.traits
[2] 96a6e3dcb2/searxng_extra/update/update_engine_traits.py (L104-L105)
[3] https://github.com/searxng/searxng/blob/master/searx/sxng_locales.py

Closes: https://github.com/searxng/searxng/issues/4519
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2025-03-21 10:34:15 +01:00 committed by Markus Heiser
parent 7e680d8e8e
commit 5986629c6b
4 changed files with 91 additions and 5 deletions

View file

@ -6680,6 +6680,7 @@
"custom": {},
"data_type": "traits_v1",
"languages": {
"ar": "ar",
"ca": "ca",
"cs": "cs",
"de": "de",
@ -6688,17 +6689,30 @@
"eo": "eo",
"es": "es",
"eu": "eu",
"fa": "fa",
"fi": "fi",
"fr": "fr",
"gd": "gd",
"gl": "gl",
"hr": "hr",
"hu": "hu",
"is": "is",
"it": "it",
"ja": "ja",
"kab": "kab",
"nl": "nl",
"no": "no",
"pl": "pl",
"pt": "pt",
"ru": "ru",
"sk": "sk",
"sq": "sq",
"sv": "sv",
"zh": "zh",
"th": "th",
"tok": "tok",
"tr": "tr",
"uk": "uk",
"vi": "vi",
"zh_Hans": "zh",
"zh_Hant": "zh"
},
@ -6915,6 +6929,7 @@
"BQ",
"BR",
"BS",
"BT",
"BW",
"BY",
"BZ",
@ -6937,6 +6952,7 @@
"CY",
"CZ",
"DE",
"DJ",
"DK",
"DM",
"DO",
@ -6950,8 +6966,10 @@
"FI",
"FJ",
"FK",
"FM",
"FO",
"FR",
"GA",
"GB",
"GD",
"GE",
@ -6961,6 +6979,7 @@
"GI",
"GL",
"GM",
"GN",
"GP",
"GQ",
"GR",
@ -6989,6 +7008,7 @@
"KE",
"KG",
"KH",
"KI",
"KM",
"KN",
"KP",
@ -7001,6 +7021,7 @@
"LC",
"LI",
"LK",
"LR",
"LS",
"LT",
"LU",
@ -7011,15 +7032,18 @@
"MD",
"ME",
"MG",
"MH",
"MK",
"ML",
"MM",
"MN",
"MO",
"MQ",
"MR",
"MS",
"MT",
"MU",
"MV",
"MW",
"MX",
"MY",
@ -7032,6 +7056,7 @@
"NL",
"NO",
"NP",
"NR",
"NU",
"NZ",
"OM",
@ -7055,6 +7080,7 @@
"RU",
"RW",
"SA",
"SB",
"SC",
"SD",
"SE",
@ -7082,8 +7108,10 @@
"TL",
"TM",
"TN",
"TO",
"TR",
"TT",
"TV",
"TW",
"TZ",
"UA",
@ -7239,6 +7267,7 @@
"custom": {},
"data_type": "traits_v1",
"languages": {
"ar": "ar",
"ca": "ca",
"cs": "cs",
"de": "de",
@ -7247,17 +7276,30 @@
"eo": "eo",
"es": "es",
"eu": "eu",
"fa": "fa",
"fi": "fi",
"fr": "fr",
"gd": "gd",
"gl": "gl",
"hr": "hr",
"hu": "hu",
"is": "is",
"it": "it",
"ja": "ja",
"kab": "kab",
"nl": "nl",
"no": "no",
"pl": "pl",
"pt": "pt",
"ru": "ru",
"sk": "sk",
"sq": "sq",
"sv": "sv",
"zh": "zh",
"th": "th",
"tok": "tok",
"tr": "tr",
"uk": "uk",
"vi": "vi",
"zh_Hans": "zh",
"zh_Hant": "zh"
},

View file

@ -10,6 +10,8 @@ used.
"""
from __future__ import annotations
import os
import json
import dataclasses
import types
@ -219,8 +221,20 @@ class EngineTraitsMap(Dict[str, EngineTraits]):
for engine_name in names:
engine = engines.engines[engine_name]
traits = None
# pylint: disable=broad-exception-caught
try:
traits = EngineTraits.fetch_traits(engine)
except Exception as exc:
log("FATAL: while fetch_traits %s: %s" % (engine_name, exc))
if os.environ.get('FORCE', '').lower() not in ['on', 'true', '1']:
raise
v = ENGINE_TRAITS.get(engine_name)
if v:
log("FORCE: re-use old values from fetch_traits - ENGINE_TRAITS[%s]" % engine_name)
traits = EngineTraits(**v)
traits = EngineTraits.fetch_traits(engine)
if traits is not None:
log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages)))
log("%-20s: SearXNG regions --> %s" % (engine_name, len(traits.regions)))

View file

@ -11,9 +11,14 @@
sxng_locales = (
('af', 'Afrikaans', '', 'Afrikaans', '\U0001f310'),
('ar', 'العربية', '', 'Arabic', '\U0001f310'),
('ar-SA', 'العربية', 'المملكة العربية السعودية', 'Arabic', '\U0001f1f8\U0001f1e6'),
('be', 'Беларуская', '', 'Belarusian', '\U0001f310'),
('bg', 'Български', '', 'Bulgarian', '\U0001f310'),
('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'),
('ca', 'Català', '', 'Catalan', '\U0001f310'),
('cs', 'Čeština', '', 'Czech', '\U0001f310'),
('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'),
('cy', 'Cymraeg', '', 'Welsh', '\U0001f310'),
('da', 'Dansk', '', 'Danish', '\U0001f310'),
('da-DK', 'Dansk', 'Danmark', 'Danish', '\U0001f1e9\U0001f1f0'),
('de', 'Deutsch', '', 'German', '\U0001f310'),
@ -21,6 +26,7 @@ sxng_locales = (
('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'),
('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'),
('el', 'Ελληνικά', '', 'Greek', '\U0001f310'),
('el-GR', 'Ελληνικά', 'Ελλάδα', 'Greek', '\U0001f1ec\U0001f1f7'),
('en', 'English', '', 'English', '\U0001f310'),
('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'),
('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'),
@ -29,13 +35,21 @@ sxng_locales = (
('en-IN', 'English', 'India', 'English', '\U0001f1ee\U0001f1f3'),
('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'),
('en-PH', 'English', 'Philippines', 'English', '\U0001f1f5\U0001f1ed'),
('en-PK', 'English', 'Pakistan', 'English', '\U0001f1f5\U0001f1f0'),
('en-SG', 'English', 'Singapore', 'English', '\U0001f1f8\U0001f1ec'),
('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
('en-ZA', 'English', 'South Africa', 'English', '\U0001f1ff\U0001f1e6'),
('es', 'Español', '', 'Spanish', '\U0001f310'),
('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'),
('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'),
('es-CO', 'Español', 'Colombia', 'Spanish', '\U0001f1e8\U0001f1f4'),
('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'),
('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'),
('es-PE', 'Español', 'Perú', 'Spanish', '\U0001f1f5\U0001f1ea'),
('et', 'Eesti', '', 'Estonian', '\U0001f310'),
('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'),
('eu', 'Euskara', '', 'Basque', '\U0001f310'),
('fa', 'فارسی', '', 'Persian', '\U0001f310'),
('fi', 'Suomi', '', 'Finnish', '\U0001f310'),
('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'),
('fr', 'Français', '', 'French', '\U0001f310'),
@ -43,20 +57,29 @@ sxng_locales = (
('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'),
('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'),
('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'),
('ga', 'Gaeilge', '', 'Irish', '\U0001f310'),
('gd', 'Gàidhlig', '', 'Scottish Gaelic', '\U0001f310'),
('gl', 'Galego', '', 'Galician', '\U0001f310'),
('he', 'עברית', '', 'Hebrew', '\U0001f1ee\U0001f1f1'),
('hi', 'हिन्दी', '', 'Hindi', '\U0001f310'),
('hr', 'Hrvatski', '', 'Croatian', '\U0001f310'),
('hu', 'Magyar', '', 'Hungarian', '\U0001f310'),
('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'),
('id', 'Indonesia', '', 'Indonesian', '\U0001f310'),
('id-ID', 'Indonesia', 'Indonesia', 'Indonesian', '\U0001f1ee\U0001f1e9'),
('is', 'Íslenska', '', 'Icelandic', '\U0001f310'),
('it', 'Italiano', '', 'Italian', '\U0001f310'),
('it-CH', 'Italiano', 'Svizzera', 'Italian', '\U0001f1e8\U0001f1ed'),
('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'),
('ja', '日本語', '', 'Japanese', '\U0001f310'),
('ja-JP', '日本語', '日本', 'Japanese', '\U0001f1ef\U0001f1f5'),
('kn', 'ಕನ್ನಡ', '', 'Kannada', '\U0001f310'),
('ko', '한국어', '', 'Korean', '\U0001f310'),
('ko-KR', '한국어', '대한민국', 'Korean', '\U0001f1f0\U0001f1f7'),
('lt', 'Lietuvių', '', 'Lithuanian', '\U0001f310'),
('lv', 'Latviešu', '', 'Latvian', '\U0001f310'),
('ml', 'മലയാളം', '', 'Malayalam', '\U0001f310'),
('mr', 'मराठी', '', 'Marathi', '\U0001f310'),
('nb', 'Norsk Bokmål', '', 'Norwegian Bokmål', '\U0001f310'),
('nb-NO', 'Norsk Bokmål', 'Norge', 'Norwegian Bokmål', '\U0001f1f3\U0001f1f4'),
('nl', 'Nederlands', '', 'Dutch', '\U0001f310'),
@ -68,17 +91,24 @@ sxng_locales = (
('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'),
('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'),
('ro', 'Română', '', 'Romanian', '\U0001f310'),
('ro-RO', 'Română', 'România', 'Romanian', '\U0001f1f7\U0001f1f4'),
('ru', 'Русский', '', 'Russian', '\U0001f310'),
('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'),
('sk', 'Slovenčina', '', 'Slovak', '\U0001f310'),
('sl', 'Slovenščina', '', 'Slovenian', '\U0001f310'),
('sq', 'Shqip', '', 'Albanian', '\U0001f310'),
('sv', 'Svenska', '', 'Swedish', '\U0001f310'),
('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'),
('ta', 'தமிழ்', '', 'Tamil', '\U0001f310'),
('te', 'తెలుగు', '', 'Telugu', '\U0001f310'),
('th', 'ไทย', '', 'Thai', '\U0001f310'),
('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'),
('tr', 'Türkçe', '', 'Turkish', '\U0001f310'),
('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'),
('uk', 'Українська', '', 'Ukrainian', '\U0001f310'),
('ur', 'اردو', '', 'Urdu', '\U0001f310'),
('vi', 'Tiếng Việt', '', 'Vietnamese', '\U0001f310'),
('vi-VN', 'Tiếng Việt', 'Việt Nam', 'Vietnamese', '\U0001f1fb\U0001f1f3'),
('zh', '中文', '', 'Chinese', '\U0001f310'),
('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'),
('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'),

View file

@ -101,8 +101,8 @@ def fetch_traits_map():
def filter_locales(traits_map: EngineTraitsMap):
"""Filter language & region tags by a threshold."""
min_eng_per_region = 22
min_eng_per_lang = 24
min_eng_per_region = 18
min_eng_per_lang = 22
_ = {}
for eng in traits_map.values():