mirror of
https://github.com/searxng/searxng.git
synced 2024-11-25 12:21:00 +00:00
[mod] reduce memory footprint by not calling babel.Locale.parse at runtime
babel.Locale.parse loads more than 60MB in RAM. The only purpose is to get: LOCALE_NAMES - searx.data.LOCALES["LOCALE_NAMES"] RTL_LOCALES - searx.data.LOCALES["RTL_LOCALES"] This commit calls babel.Locale.parse when the translations are update from weblate and stored in:: searx/data/locales.json This file can be build by:: ./manage data.locales By store these variables in searx.data when the translations are updated we save round about 65MB (usually 4 worker = 260MB of RAM saved. Suggested-by: https://github.com/searxng/searxng/discussions/2633#discussioncomment-8490494 Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
76845ea42c
commit
ed66ed758d
10 changed files with 269 additions and 79 deletions
|
@ -78,6 +78,16 @@ Scripts to update static data in :origin:`searx/data/`
|
|||
.. automodule:: searxng_extra.update.update_pygments
|
||||
:members:
|
||||
|
||||
.. _update_locales.py:
|
||||
|
||||
``update_locales.py``
|
||||
=====================
|
||||
|
||||
:origin:`[source] <searxng_extra/update/update_locales.py>`
|
||||
|
||||
.. automodule:: searxng_extra.update.update_locales
|
||||
:members:
|
||||
|
||||
|
||||
``update_wikidata_units.py``
|
||||
============================
|
||||
|
|
|
@ -13,8 +13,3 @@ Locales
|
|||
:members:
|
||||
|
||||
|
||||
SearXNG's locale codes
|
||||
======================
|
||||
|
||||
.. automodule:: searx.sxng_locales
|
||||
:members:
|
||||
|
|
|
@ -15,6 +15,7 @@ __all__ = [
|
|||
'EXTERNAL_BANGS',
|
||||
'OSM_KEYS_TAGS',
|
||||
'ENGINE_DESCRIPTIONS',
|
||||
'LOCALES',
|
||||
'ahmia_blacklist_loader',
|
||||
]
|
||||
|
||||
|
@ -50,3 +51,4 @@ EXTERNAL_BANGS = _load('external_bangs.json')
|
|||
OSM_KEYS_TAGS = _load('osm_keys_tags.json')
|
||||
ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
|
||||
ENGINE_TRAITS = _load('engine_traits.json')
|
||||
LOCALES = _load('locales.json')
|
||||
|
|
69
searx/data/locales.json
Normal file
69
searx/data/locales.json
Normal file
|
@ -0,0 +1,69 @@
|
|||
{
|
||||
"LOCALE_NAMES": {
|
||||
"af": "Afrikaans",
|
||||
"ar": "العربية (Arabic)",
|
||||
"bg": "Български (Bulgarian)",
|
||||
"bn": "বাংলা (Bangla)",
|
||||
"bo": "བོད་སྐད་ (Tibetan)",
|
||||
"ca": "Català (Catalan)",
|
||||
"cs": "Čeština (Czech)",
|
||||
"cy": "Cymraeg (Welsh)",
|
||||
"da": "Dansk (Danish)",
|
||||
"de": "Deutsch (German)",
|
||||
"dv": "ދިވެހި (Dhivehi)",
|
||||
"el-GR": "Ελληνικά, Ελλάδα (Greek, Greece)",
|
||||
"en": "English",
|
||||
"eo": "Esperanto",
|
||||
"es": "Español (Spanish)",
|
||||
"et": "Eesti (Estonian)",
|
||||
"eu": "Euskara (Basque)",
|
||||
"fa-IR": "فارسی, ایران (Persian, Iran)",
|
||||
"fi": "Suomi (Finnish)",
|
||||
"fil": "Filipino",
|
||||
"fr": "Français (French)",
|
||||
"gl": "Galego (Galician)",
|
||||
"he": "עברית (Hebrew)",
|
||||
"hr": "Hrvatski (Croatian)",
|
||||
"hu": "Magyar (Hungarian)",
|
||||
"ia": "Interlingua",
|
||||
"id": "Indonesia (Indonesian)",
|
||||
"it": "Italiano (Italian)",
|
||||
"ja": "日本語 (Japanese)",
|
||||
"ko": "한국어 (Korean)",
|
||||
"lt": "Lietuvių (Lithuanian)",
|
||||
"lv": "Latviešu (Latvian)",
|
||||
"ml": "മലയാളം (Malayalam)",
|
||||
"ms": "Melayu (Malay)",
|
||||
"nb-NO": "Norsk bokmål, Norge (Norwegian bokmål, Norway)",
|
||||
"nl": "Nederlands (Dutch)",
|
||||
"nl-BE": "Nederlands, België (Dutch, Belgium)",
|
||||
"oc": "Occitan",
|
||||
"pa": "ਪੰਜਾਬੀ (Punjabi)",
|
||||
"pap": "Papiamento",
|
||||
"pl": "Polski (Polish)",
|
||||
"pt": "Português (Portuguese)",
|
||||
"pt-BR": "Português, Brasil (Portuguese, Brazil)",
|
||||
"ro": "Română (Romanian)",
|
||||
"ru": "Русский (Russian)",
|
||||
"si": "සිංහල (Sinhala)",
|
||||
"sk": "Slovenčina (Slovak)",
|
||||
"sl": "Slovenščina (Slovenian)",
|
||||
"sr": "Српски (Serbian)",
|
||||
"sv": "Svenska (Swedish)",
|
||||
"szl": "Ślōnski (Silesian)",
|
||||
"ta": "தமிழ் (Tamil)",
|
||||
"te": "తెలుగు (Telugu)",
|
||||
"th": "ไทย (Thai)",
|
||||
"tr": "Türkçe (Turkish)",
|
||||
"uk": "Українська (Ukrainian)",
|
||||
"vi": "Tiếng việt (Vietnamese)",
|
||||
"zh-HK": "中文, 中國香港特別行政區 (Chinese, Hong Kong SAR China)",
|
||||
"zh-Hans-CN": "中文, 中国 (Chinese, China)",
|
||||
"zh-Hant-TW": "中文, 台灣 (Chinese, Taiwan)"
|
||||
},
|
||||
"RTL_LOCALES": [
|
||||
"fa-IR",
|
||||
"ar",
|
||||
"he"
|
||||
]
|
||||
}
|
127
searx/locales.py
127
searx/locales.py
|
@ -1,12 +1,36 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.
|
||||
"""
|
||||
SearXNG’s locale data
|
||||
=====================
|
||||
|
||||
The variables :py:obj:`RTL_LOCALES` and :py:obj:`LOCALE_NAMES` are loaded from
|
||||
:origin:`searx/data/locales.json` / see :py:obj:`locales_initialize` and
|
||||
:ref:`update_locales.py`.
|
||||
|
||||
.. hint::
|
||||
|
||||
Whenever the value of :py:obj:`ADDITIONAL_TRANSLATIONS` or
|
||||
:py:obj:`LOCALE_BEST_MATCH` is modified, the
|
||||
:origin:`searx/data/locales.json` needs to be rebuild::
|
||||
|
||||
./manage data.locales
|
||||
|
||||
SearXNG's locale codes
|
||||
======================
|
||||
|
||||
.. automodule:: searx.sxng_locales
|
||||
:members:
|
||||
|
||||
|
||||
SearXNG’s locale implementations
|
||||
================================
|
||||
"""
|
||||
|
||||
from typing import Set, Optional, List
|
||||
import os
|
||||
import pathlib
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import babel
|
||||
from babel.support import Translations
|
||||
|
@ -15,7 +39,11 @@ import babel.core
|
|||
import flask_babel
|
||||
import flask
|
||||
from flask.ctx import has_request_context
|
||||
from searx import logger
|
||||
from searx import (
|
||||
data,
|
||||
logger,
|
||||
searx_dir,
|
||||
)
|
||||
|
||||
logger = logger.getChild('locales')
|
||||
|
||||
|
@ -30,7 +58,7 @@ LOCALE_NAMES = {}
|
|||
:meta hide-value:
|
||||
"""
|
||||
|
||||
RTL_LOCALES: Set[str] = set()
|
||||
RTL_LOCALES: set[str] = set()
|
||||
"""List of *Right-To-Left* locales e.g. 'he' or 'fa-IR' (see
|
||||
:py:obj:`locales_initialize`)."""
|
||||
|
||||
|
@ -90,74 +118,37 @@ def get_translations():
|
|||
return _flask_babel_get_translations()
|
||||
|
||||
|
||||
def get_locale_descr(locale, locale_name):
|
||||
"""Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR'
|
||||
|
||||
:param locale: instance of :py:class:`Locale`
|
||||
:param locale_name: name e.g. 'fr' or 'pt_BR' (delimiter is *underscore*)
|
||||
"""
|
||||
|
||||
native_language, native_territory = _get_locale_descr(locale, locale_name)
|
||||
english_language, english_territory = _get_locale_descr(locale, 'en')
|
||||
|
||||
if native_territory == english_territory:
|
||||
english_territory = None
|
||||
|
||||
if not native_territory and not english_territory:
|
||||
if native_language == english_language:
|
||||
return native_language
|
||||
return native_language + ' (' + english_language + ')'
|
||||
|
||||
result = native_language + ', ' + native_territory + ' (' + english_language
|
||||
if english_territory:
|
||||
return result + ', ' + english_territory + ')'
|
||||
return result + ')'
|
||||
_TR_LOCALES: list[str] = []
|
||||
|
||||
|
||||
def _get_locale_descr(locale, language_code):
|
||||
language_name = locale.get_language_name(language_code).capitalize()
|
||||
if language_name and ('a' <= language_name[0] <= 'z'):
|
||||
language_name = language_name.capitalize()
|
||||
territory_name = locale.get_territory_name(language_code)
|
||||
return language_name, territory_name
|
||||
def get_translation_locales() -> list[str]:
|
||||
"""Returns the list of transaltion locales (*underscore*). The list is
|
||||
generated from the translation folders in :origin:`searx/translations`"""
|
||||
|
||||
global _TR_LOCALES # pylint:disable=global-statement
|
||||
if _TR_LOCALES:
|
||||
return _TR_LOCALES
|
||||
|
||||
tr_locales = []
|
||||
for folder in (Path(searx_dir) / 'translations').iterdir():
|
||||
if not folder.is_dir():
|
||||
continue
|
||||
if not (folder / 'LC_MESSAGES').is_dir():
|
||||
continue
|
||||
tr_locales.append(folder.name)
|
||||
_TR_LOCALES = sorted(tr_locales)
|
||||
return _TR_LOCALES
|
||||
|
||||
|
||||
def locales_initialize(directory=None):
|
||||
def locales_initialize():
|
||||
"""Initialize locales environment of the SearXNG session.
|
||||
|
||||
- monkey patch :py:obj:`flask_babel.get_translations` by :py:obj:`get_translations`
|
||||
- init global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`
|
||||
"""
|
||||
|
||||
directory = directory or pathlib.Path(__file__).parent / 'translations'
|
||||
logger.debug("locales_initialize: %s", directory)
|
||||
flask_babel.get_translations = get_translations
|
||||
|
||||
for tag, descr in ADDITIONAL_TRANSLATIONS.items():
|
||||
locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
|
||||
LOCALE_NAMES[tag] = descr
|
||||
if locale.text_direction == 'rtl':
|
||||
RTL_LOCALES.add(tag)
|
||||
|
||||
for tag in LOCALE_BEST_MATCH:
|
||||
descr = LOCALE_NAMES.get(tag)
|
||||
if not descr:
|
||||
locale = babel.Locale.parse(tag, sep='-')
|
||||
LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
|
||||
if locale.text_direction == 'rtl':
|
||||
RTL_LOCALES.add(tag)
|
||||
|
||||
for dirname in sorted(os.listdir(directory)):
|
||||
# Based on https://flask-babel.tkte.ch/_modules/flask_babel.html#Babel.list_translations
|
||||
if not os.path.isdir(os.path.join(directory, dirname, 'LC_MESSAGES')):
|
||||
continue
|
||||
tag = dirname.replace('_', '-')
|
||||
descr = LOCALE_NAMES.get(tag)
|
||||
if not descr:
|
||||
locale = babel.Locale.parse(dirname)
|
||||
LOCALE_NAMES[tag] = get_locale_descr(locale, dirname)
|
||||
if locale.text_direction == 'rtl':
|
||||
RTL_LOCALES.add(tag)
|
||||
LOCALE_NAMES.update(data.LOCALES["LOCALE_NAMES"])
|
||||
RTL_LOCALES.update(data.LOCALES["RTL_LOCALES"])
|
||||
|
||||
|
||||
def region_tag(locale: babel.Locale) -> str:
|
||||
|
@ -177,7 +168,7 @@ def language_tag(locale: babel.Locale) -> str:
|
|||
return sxng_lang
|
||||
|
||||
|
||||
def get_locale(locale_tag: str) -> Optional[babel.Locale]:
|
||||
def get_locale(locale_tag: str) -> babel.Locale | None:
|
||||
"""Returns a :py:obj:`babel.Locale` object parsed from argument
|
||||
``locale_tag``"""
|
||||
try:
|
||||
|
@ -190,7 +181,7 @@ def get_locale(locale_tag: str) -> Optional[babel.Locale]:
|
|||
|
||||
def get_official_locales(
|
||||
territory: str, languages=None, regional: bool = False, de_facto: bool = True
|
||||
) -> Set[babel.Locale]:
|
||||
) -> set[babel.Locale]:
|
||||
"""Returns a list of :py:obj:`babel.Locale` with languages from
|
||||
:py:obj:`babel.languages.get_official_languages`.
|
||||
|
||||
|
@ -376,7 +367,7 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
|
|||
return default
|
||||
|
||||
|
||||
def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Optional[str] = None) -> Optional[str]:
|
||||
def match_locale(searxng_locale: str, locale_tag_list: list[str], fallback: str | None = None) -> str | None:
|
||||
"""Return tag from ``locale_tag_list`` that best fits to ``searxng_locale``.
|
||||
|
||||
:param str searxng_locale: SearXNG's internal representation of locale (de,
|
||||
|
@ -425,7 +416,7 @@ def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Opti
|
|||
return get_engine_locale(searxng_locale, engine_locales, default=fallback)
|
||||
|
||||
|
||||
def build_engine_locales(tag_list: List[str]):
|
||||
def build_engine_locales(tag_list: list[str]):
|
||||
"""From a list of locale tags a dictionary is build that can be passed by
|
||||
argument ``engine_locales`` to :py:obj:`get_engine_locale`. This function
|
||||
is mainly used by :py:obj:`match_locale` and is similar to what the
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
'''List of SearXNG's locale codes.
|
||||
|
||||
This file is generated automatically by::
|
||||
.. hint::
|
||||
|
||||
./manage pyenv.cmd searxng_extra/update/update_engine_traits.py
|
||||
Don't modify this file, this file is generated by::
|
||||
|
||||
./manage data.traits
|
||||
'''
|
||||
|
||||
sxng_locales = (
|
||||
|
|
|
@ -31,9 +31,11 @@ languages_file_header = """\
|
|||
# -*- coding: utf-8 -*-
|
||||
'''List of SearXNG's locale codes.
|
||||
|
||||
This file is generated automatically by::
|
||||
.. hint::
|
||||
|
||||
./manage pyenv.cmd searxng_extra/update/update_engine_traits.py
|
||||
Don't modify this file, this file is generated by::
|
||||
|
||||
./manage data.traits
|
||||
'''
|
||||
|
||||
sxng_locales = (
|
||||
|
|
103
searxng_extra/update/update_locales.py
Normal file
103
searxng_extra/update/update_locales.py
Normal file
|
@ -0,0 +1,103 @@
|
|||
#!/usr/bin/env python
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Update locale names in :origin:`searx/data/locales.json` used by
|
||||
:ref:`searx.locales`
|
||||
|
||||
- :py:obj:`searx.locales.RTL_LOCALES`
|
||||
- :py:obj:`searx.locales.LOCALE_NAMES`
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Set
|
||||
import json
|
||||
from pathlib import Path
|
||||
import os
|
||||
|
||||
import babel
|
||||
import babel.languages
|
||||
import babel.core
|
||||
|
||||
from searx import searx_dir
|
||||
from searx.locales import (
|
||||
ADDITIONAL_TRANSLATIONS,
|
||||
LOCALE_BEST_MATCH,
|
||||
get_translation_locales,
|
||||
)
|
||||
|
||||
LOCALE_DATA_FILE = Path(searx_dir) / 'data' / 'locales.json'
|
||||
TRANSLATOINS_FOLDER = Path(searx_dir) / 'translations'
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
LOCALE_NAMES = {}
|
||||
RTL_LOCALES: Set[str] = set()
|
||||
|
||||
for tag, descr in ADDITIONAL_TRANSLATIONS.items():
|
||||
locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
|
||||
LOCALE_NAMES[tag] = descr
|
||||
if locale.text_direction == 'rtl':
|
||||
RTL_LOCALES.add(tag)
|
||||
|
||||
for tag in LOCALE_BEST_MATCH:
|
||||
descr = LOCALE_NAMES.get(tag)
|
||||
if not descr:
|
||||
locale = babel.Locale.parse(tag, sep='-')
|
||||
LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
|
||||
if locale.text_direction == 'rtl':
|
||||
RTL_LOCALES.add(tag)
|
||||
|
||||
for tr_locale in get_translation_locales():
|
||||
sxng_tag = tr_locale.replace('_', '-')
|
||||
descr = LOCALE_NAMES.get(sxng_tag)
|
||||
if not descr:
|
||||
locale = babel.Locale.parse(tr_locale)
|
||||
LOCALE_NAMES[sxng_tag] = get_locale_descr(locale, tr_locale)
|
||||
if locale.text_direction == 'rtl':
|
||||
RTL_LOCALES.add(sxng_tag)
|
||||
|
||||
content = {
|
||||
"LOCALE_NAMES": LOCALE_NAMES,
|
||||
"RTL_LOCALES": list(RTL_LOCALES),
|
||||
}
|
||||
|
||||
with open(LOCALE_DATA_FILE, 'w', encoding='utf-8') as f:
|
||||
json.dump(content, f, indent=2, sort_keys=True, ensure_ascii=False)
|
||||
|
||||
|
||||
def get_locale_descr(locale: babel.Locale, tr_locale):
|
||||
"""Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR'
|
||||
|
||||
:param locale: instance of :py:class:`Locale`
|
||||
:param tr_locale: name e.g. 'fr' or 'pt_BR' (delimiter is *underscore*)
|
||||
"""
|
||||
|
||||
native_language, native_territory = _get_locale_descr(locale, tr_locale)
|
||||
english_language, english_territory = _get_locale_descr(locale, 'en')
|
||||
|
||||
if native_territory == english_territory:
|
||||
english_territory = None
|
||||
|
||||
if not native_territory and not english_territory:
|
||||
# none territory name
|
||||
if native_language == english_language:
|
||||
return native_language
|
||||
return native_language + ' (' + english_language + ')'
|
||||
|
||||
else:
|
||||
result = native_language + ', ' + native_territory + ' (' + english_language
|
||||
if english_territory:
|
||||
return result + ', ' + english_territory + ')'
|
||||
return result + ')'
|
||||
|
||||
|
||||
def _get_locale_descr(locale: babel.Locale, tr_locale: str) -> tuple[str, str]:
|
||||
language_name = locale.get_language_name(tr_locale).capitalize() # type: ignore
|
||||
if language_name and ('a' <= language_name[0] <= 'z'):
|
||||
language_name = language_name.capitalize()
|
||||
territory_name: str = locale.get_territory_name(tr_locale) # type: ignore
|
||||
return language_name, territory_name
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -7,6 +7,7 @@ data.:
|
|||
all : update searx/sxng_locales.py and searx/data/*
|
||||
traits : update searx/data/engine_traits.json & searx/sxng_locales.py
|
||||
useragents: update searx/data/useragents.json with the most recent versions of Firefox
|
||||
locales : update searx/data/locales.json from babel
|
||||
EOF
|
||||
}
|
||||
|
||||
|
@ -16,6 +17,7 @@ data.all() {
|
|||
pyenv.activate
|
||||
data.traits
|
||||
data.useragents
|
||||
data.locales
|
||||
|
||||
build_msg DATA "update searx/data/osm_keys_tags.json"
|
||||
pyenv.cmd python searxng_extra/update/update_osm_keys_tags.py
|
||||
|
@ -49,6 +51,15 @@ data.useragents() {
|
|||
dump_return $?
|
||||
}
|
||||
|
||||
data.locales() {
|
||||
( set -e
|
||||
pyenv.activate
|
||||
build_msg DATA "update searx/data/locales.json"
|
||||
python searxng_extra/update/update_locales.py
|
||||
)
|
||||
dump_return $?
|
||||
}
|
||||
|
||||
docs.prebuild() {
|
||||
build_msg DOCS "build ${DOCS_BUILD}/includes"
|
||||
(
|
||||
|
|
|
@ -96,10 +96,15 @@ weblate.translations.commit() {
|
|||
build_msg BABEL 'compile translation catalogs into binary MO files'
|
||||
pybabel compile --statistics \
|
||||
-d "searx/translations"
|
||||
|
||||
# update searx/data/translation_labels.json
|
||||
data.locales
|
||||
|
||||
# git add/commit (no push)
|
||||
commit_body=$(cd "${TRANSLATIONS_WORKTREE}"; git log --pretty=format:'%h - %as - %aN <%ae>' "${existing_commit_hash}..HEAD")
|
||||
commit_message=$(echo -e "[translations] update from Weblate\n\n${commit_body}")
|
||||
git add searx/translations
|
||||
git add searx/data/locales.json
|
||||
git commit -m "${commit_message}"
|
||||
)
|
||||
exitcode=$?
|
||||
|
|
Loading…
Reference in a new issue