mirror of
https://github.com/searxng/searxng.git
synced 2025-04-09 21:24:08 +00:00
[fix] make data.traits - partial revert of commit 30a8204
The entries in the catalog of search languages are build up from the "Engine Traits" [1] and which entries are included in the catalog is controlled qby two threshold values [2]. If possible, the values should ensure that no languages or regions disappear from the catalog of search languages [3]. The threshold values should have been adjusted in commit30a8204
: - ``min_eng_per_region = 18`` - ``min_eng_per_lang = 22`` Because the threshold values were not adjusted, many entries were missing in the search language catalog. This bug has been fixed with this patch: the threshold values have been adjusted and the catalog of search languages has been completed again. [1] https://docs.searxng.org/dev/engines/enginelib.html#module-searx.enginelib.traits [2]96a6e3dcb2/searxng_extra/update/update_engine_traits.py (L104-L105)
[3] https://github.com/searxng/searxng/blob/master/searx/sxng_locales.py Closes: https://github.com/searxng/searxng/issues/4519 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
7e680d8e8e
commit
5986629c6b
4 changed files with 91 additions and 5 deletions
|
@ -6680,6 +6680,7 @@
|
|||
"custom": {},
|
||||
"data_type": "traits_v1",
|
||||
"languages": {
|
||||
"ar": "ar",
|
||||
"ca": "ca",
|
||||
"cs": "cs",
|
||||
"de": "de",
|
||||
|
@ -6688,17 +6689,30 @@
|
|||
"eo": "eo",
|
||||
"es": "es",
|
||||
"eu": "eu",
|
||||
"fa": "fa",
|
||||
"fi": "fi",
|
||||
"fr": "fr",
|
||||
"gd": "gd",
|
||||
"gl": "gl",
|
||||
"hr": "hr",
|
||||
"hu": "hu",
|
||||
"is": "is",
|
||||
"it": "it",
|
||||
"ja": "ja",
|
||||
"kab": "kab",
|
||||
"nl": "nl",
|
||||
"no": "no",
|
||||
"pl": "pl",
|
||||
"pt": "pt",
|
||||
"ru": "ru",
|
||||
"sk": "sk",
|
||||
"sq": "sq",
|
||||
"sv": "sv",
|
||||
"zh": "zh",
|
||||
"th": "th",
|
||||
"tok": "tok",
|
||||
"tr": "tr",
|
||||
"uk": "uk",
|
||||
"vi": "vi",
|
||||
"zh_Hans": "zh",
|
||||
"zh_Hant": "zh"
|
||||
},
|
||||
|
@ -6915,6 +6929,7 @@
|
|||
"BQ",
|
||||
"BR",
|
||||
"BS",
|
||||
"BT",
|
||||
"BW",
|
||||
"BY",
|
||||
"BZ",
|
||||
|
@ -6937,6 +6952,7 @@
|
|||
"CY",
|
||||
"CZ",
|
||||
"DE",
|
||||
"DJ",
|
||||
"DK",
|
||||
"DM",
|
||||
"DO",
|
||||
|
@ -6950,8 +6966,10 @@
|
|||
"FI",
|
||||
"FJ",
|
||||
"FK",
|
||||
"FM",
|
||||
"FO",
|
||||
"FR",
|
||||
"GA",
|
||||
"GB",
|
||||
"GD",
|
||||
"GE",
|
||||
|
@ -6961,6 +6979,7 @@
|
|||
"GI",
|
||||
"GL",
|
||||
"GM",
|
||||
"GN",
|
||||
"GP",
|
||||
"GQ",
|
||||
"GR",
|
||||
|
@ -6989,6 +7008,7 @@
|
|||
"KE",
|
||||
"KG",
|
||||
"KH",
|
||||
"KI",
|
||||
"KM",
|
||||
"KN",
|
||||
"KP",
|
||||
|
@ -7001,6 +7021,7 @@
|
|||
"LC",
|
||||
"LI",
|
||||
"LK",
|
||||
"LR",
|
||||
"LS",
|
||||
"LT",
|
||||
"LU",
|
||||
|
@ -7011,15 +7032,18 @@
|
|||
"MD",
|
||||
"ME",
|
||||
"MG",
|
||||
"MH",
|
||||
"MK",
|
||||
"ML",
|
||||
"MM",
|
||||
"MN",
|
||||
"MO",
|
||||
"MQ",
|
||||
"MR",
|
||||
"MS",
|
||||
"MT",
|
||||
"MU",
|
||||
"MV",
|
||||
"MW",
|
||||
"MX",
|
||||
"MY",
|
||||
|
@ -7032,6 +7056,7 @@
|
|||
"NL",
|
||||
"NO",
|
||||
"NP",
|
||||
"NR",
|
||||
"NU",
|
||||
"NZ",
|
||||
"OM",
|
||||
|
@ -7055,6 +7080,7 @@
|
|||
"RU",
|
||||
"RW",
|
||||
"SA",
|
||||
"SB",
|
||||
"SC",
|
||||
"SD",
|
||||
"SE",
|
||||
|
@ -7082,8 +7108,10 @@
|
|||
"TL",
|
||||
"TM",
|
||||
"TN",
|
||||
"TO",
|
||||
"TR",
|
||||
"TT",
|
||||
"TV",
|
||||
"TW",
|
||||
"TZ",
|
||||
"UA",
|
||||
|
@ -7239,6 +7267,7 @@
|
|||
"custom": {},
|
||||
"data_type": "traits_v1",
|
||||
"languages": {
|
||||
"ar": "ar",
|
||||
"ca": "ca",
|
||||
"cs": "cs",
|
||||
"de": "de",
|
||||
|
@ -7247,17 +7276,30 @@
|
|||
"eo": "eo",
|
||||
"es": "es",
|
||||
"eu": "eu",
|
||||
"fa": "fa",
|
||||
"fi": "fi",
|
||||
"fr": "fr",
|
||||
"gd": "gd",
|
||||
"gl": "gl",
|
||||
"hr": "hr",
|
||||
"hu": "hu",
|
||||
"is": "is",
|
||||
"it": "it",
|
||||
"ja": "ja",
|
||||
"kab": "kab",
|
||||
"nl": "nl",
|
||||
"no": "no",
|
||||
"pl": "pl",
|
||||
"pt": "pt",
|
||||
"ru": "ru",
|
||||
"sk": "sk",
|
||||
"sq": "sq",
|
||||
"sv": "sv",
|
||||
"zh": "zh",
|
||||
"th": "th",
|
||||
"tok": "tok",
|
||||
"tr": "tr",
|
||||
"uk": "uk",
|
||||
"vi": "vi",
|
||||
"zh_Hans": "zh",
|
||||
"zh_Hant": "zh"
|
||||
},
|
||||
|
|
|
@ -10,6 +10,8 @@ used.
|
|||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import json
|
||||
import dataclasses
|
||||
import types
|
||||
|
@ -219,8 +221,20 @@ class EngineTraitsMap(Dict[str, EngineTraits]):
|
|||
|
||||
for engine_name in names:
|
||||
engine = engines.engines[engine_name]
|
||||
traits = None
|
||||
|
||||
# pylint: disable=broad-exception-caught
|
||||
try:
|
||||
traits = EngineTraits.fetch_traits(engine)
|
||||
except Exception as exc:
|
||||
log("FATAL: while fetch_traits %s: %s" % (engine_name, exc))
|
||||
if os.environ.get('FORCE', '').lower() not in ['on', 'true', '1']:
|
||||
raise
|
||||
v = ENGINE_TRAITS.get(engine_name)
|
||||
if v:
|
||||
log("FORCE: re-use old values from fetch_traits - ENGINE_TRAITS[%s]" % engine_name)
|
||||
traits = EngineTraits(**v)
|
||||
|
||||
traits = EngineTraits.fetch_traits(engine)
|
||||
if traits is not None:
|
||||
log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages)))
|
||||
log("%-20s: SearXNG regions --> %s" % (engine_name, len(traits.regions)))
|
||||
|
|
|
@ -11,9 +11,14 @@
|
|||
sxng_locales = (
|
||||
('af', 'Afrikaans', '', 'Afrikaans', '\U0001f310'),
|
||||
('ar', 'العربية', '', 'Arabic', '\U0001f310'),
|
||||
('ar-SA', 'العربية', 'المملكة العربية السعودية', 'Arabic', '\U0001f1f8\U0001f1e6'),
|
||||
('be', 'Беларуская', '', 'Belarusian', '\U0001f310'),
|
||||
('bg', 'Български', '', 'Bulgarian', '\U0001f310'),
|
||||
('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'),
|
||||
('ca', 'Català', '', 'Catalan', '\U0001f310'),
|
||||
('cs', 'Čeština', '', 'Czech', '\U0001f310'),
|
||||
('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'),
|
||||
('cy', 'Cymraeg', '', 'Welsh', '\U0001f310'),
|
||||
('da', 'Dansk', '', 'Danish', '\U0001f310'),
|
||||
('da-DK', 'Dansk', 'Danmark', 'Danish', '\U0001f1e9\U0001f1f0'),
|
||||
('de', 'Deutsch', '', 'German', '\U0001f310'),
|
||||
|
@ -21,6 +26,7 @@ sxng_locales = (
|
|||
('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'),
|
||||
('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'),
|
||||
('el', 'Ελληνικά', '', 'Greek', '\U0001f310'),
|
||||
('el-GR', 'Ελληνικά', 'Ελλάδα', 'Greek', '\U0001f1ec\U0001f1f7'),
|
||||
('en', 'English', '', 'English', '\U0001f310'),
|
||||
('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'),
|
||||
('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'),
|
||||
|
@ -29,13 +35,21 @@ sxng_locales = (
|
|||
('en-IN', 'English', 'India', 'English', '\U0001f1ee\U0001f1f3'),
|
||||
('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'),
|
||||
('en-PH', 'English', 'Philippines', 'English', '\U0001f1f5\U0001f1ed'),
|
||||
('en-PK', 'English', 'Pakistan', 'English', '\U0001f1f5\U0001f1f0'),
|
||||
('en-SG', 'English', 'Singapore', 'English', '\U0001f1f8\U0001f1ec'),
|
||||
('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
|
||||
('en-ZA', 'English', 'South Africa', 'English', '\U0001f1ff\U0001f1e6'),
|
||||
('es', 'Español', '', 'Spanish', '\U0001f310'),
|
||||
('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'),
|
||||
('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'),
|
||||
('es-CO', 'Español', 'Colombia', 'Spanish', '\U0001f1e8\U0001f1f4'),
|
||||
('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'),
|
||||
('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'),
|
||||
('es-PE', 'Español', 'Perú', 'Spanish', '\U0001f1f5\U0001f1ea'),
|
||||
('et', 'Eesti', '', 'Estonian', '\U0001f310'),
|
||||
('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'),
|
||||
('eu', 'Euskara', '', 'Basque', '\U0001f310'),
|
||||
('fa', 'فارسی', '', 'Persian', '\U0001f310'),
|
||||
('fi', 'Suomi', '', 'Finnish', '\U0001f310'),
|
||||
('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'),
|
||||
('fr', 'Français', '', 'French', '\U0001f310'),
|
||||
|
@ -43,20 +57,29 @@ sxng_locales = (
|
|||
('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'),
|
||||
('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'),
|
||||
('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'),
|
||||
('ga', 'Gaeilge', '', 'Irish', '\U0001f310'),
|
||||
('gd', 'Gàidhlig', '', 'Scottish Gaelic', '\U0001f310'),
|
||||
('gl', 'Galego', '', 'Galician', '\U0001f310'),
|
||||
('he', 'עברית', '', 'Hebrew', '\U0001f1ee\U0001f1f1'),
|
||||
('hi', 'हिन्दी', '', 'Hindi', '\U0001f310'),
|
||||
('hr', 'Hrvatski', '', 'Croatian', '\U0001f310'),
|
||||
('hu', 'Magyar', '', 'Hungarian', '\U0001f310'),
|
||||
('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'),
|
||||
('id', 'Indonesia', '', 'Indonesian', '\U0001f310'),
|
||||
('id-ID', 'Indonesia', 'Indonesia', 'Indonesian', '\U0001f1ee\U0001f1e9'),
|
||||
('is', 'Íslenska', '', 'Icelandic', '\U0001f310'),
|
||||
('it', 'Italiano', '', 'Italian', '\U0001f310'),
|
||||
('it-CH', 'Italiano', 'Svizzera', 'Italian', '\U0001f1e8\U0001f1ed'),
|
||||
('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'),
|
||||
('ja', '日本語', '', 'Japanese', '\U0001f310'),
|
||||
('ja-JP', '日本語', '日本', 'Japanese', '\U0001f1ef\U0001f1f5'),
|
||||
('kn', 'ಕನ್ನಡ', '', 'Kannada', '\U0001f310'),
|
||||
('ko', '한국어', '', 'Korean', '\U0001f310'),
|
||||
('ko-KR', '한국어', '대한민국', 'Korean', '\U0001f1f0\U0001f1f7'),
|
||||
('lt', 'Lietuvių', '', 'Lithuanian', '\U0001f310'),
|
||||
('lv', 'Latviešu', '', 'Latvian', '\U0001f310'),
|
||||
('ml', 'മലയാളം', '', 'Malayalam', '\U0001f310'),
|
||||
('mr', 'मराठी', '', 'Marathi', '\U0001f310'),
|
||||
('nb', 'Norsk Bokmål', '', 'Norwegian Bokmål', '\U0001f310'),
|
||||
('nb-NO', 'Norsk Bokmål', 'Norge', 'Norwegian Bokmål', '\U0001f1f3\U0001f1f4'),
|
||||
('nl', 'Nederlands', '', 'Dutch', '\U0001f310'),
|
||||
|
@ -68,17 +91,24 @@ sxng_locales = (
|
|||
('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'),
|
||||
('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'),
|
||||
('ro', 'Română', '', 'Romanian', '\U0001f310'),
|
||||
('ro-RO', 'Română', 'România', 'Romanian', '\U0001f1f7\U0001f1f4'),
|
||||
('ru', 'Русский', '', 'Russian', '\U0001f310'),
|
||||
('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'),
|
||||
('sk', 'Slovenčina', '', 'Slovak', '\U0001f310'),
|
||||
('sl', 'Slovenščina', '', 'Slovenian', '\U0001f310'),
|
||||
('sq', 'Shqip', '', 'Albanian', '\U0001f310'),
|
||||
('sv', 'Svenska', '', 'Swedish', '\U0001f310'),
|
||||
('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'),
|
||||
('ta', 'தமிழ்', '', 'Tamil', '\U0001f310'),
|
||||
('te', 'తెలుగు', '', 'Telugu', '\U0001f310'),
|
||||
('th', 'ไทย', '', 'Thai', '\U0001f310'),
|
||||
('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'),
|
||||
('tr', 'Türkçe', '', 'Turkish', '\U0001f310'),
|
||||
('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'),
|
||||
('uk', 'Українська', '', 'Ukrainian', '\U0001f310'),
|
||||
('ur', 'اردو', '', 'Urdu', '\U0001f310'),
|
||||
('vi', 'Tiếng Việt', '', 'Vietnamese', '\U0001f310'),
|
||||
('vi-VN', 'Tiếng Việt', 'Việt Nam', 'Vietnamese', '\U0001f1fb\U0001f1f3'),
|
||||
('zh', '中文', '', 'Chinese', '\U0001f310'),
|
||||
('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'),
|
||||
('zh-HK', '中文', '中國香港特別行政區', 'Chinese', '\U0001f1ed\U0001f1f0'),
|
||||
|
|
|
@ -101,8 +101,8 @@ def fetch_traits_map():
|
|||
def filter_locales(traits_map: EngineTraitsMap):
|
||||
"""Filter language & region tags by a threshold."""
|
||||
|
||||
min_eng_per_region = 22
|
||||
min_eng_per_lang = 24
|
||||
min_eng_per_region = 18
|
||||
min_eng_per_lang = 22
|
||||
|
||||
_ = {}
|
||||
for eng in traits_map.values():
|
||||
|
|
Loading…
Reference in a new issue