LibreTranslate/scripts/update_locales.py

154 lines
5.1 KiB
Python
Raw Normal View History

2023-01-04 20:36:26 +00:00
#!/usr/bin/env python
import os
2023-07-09 10:29:11 +00:00
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
2023-07-09 10:29:11 +00:00
import json
2023-01-04 20:36:26 +00:00
import re
2023-07-09 10:29:11 +00:00
2023-01-04 20:36:26 +00:00
import polib
from babel.messages.frontend import main as pybabel
2023-07-09 10:29:11 +00:00
from flask_swagger import swagger
2024-08-08 17:28:40 +00:00
from translatehtml import translate_html
2023-07-09 10:29:11 +00:00
from libretranslate.app import create_app, get_version
from libretranslate.language import improve_translation_formatting, load_languages
2023-01-06 15:27:39 +00:00
from libretranslate.locales import get_available_locale_codes, swag_eval
2023-01-05 19:07:39 +00:00
from libretranslate.main import get_args
2023-01-04 20:36:26 +00:00
# Update strings
if __name__ == "__main__":
2023-01-05 18:12:35 +00:00
print("Loading languages")
languages = load_languages()
en_lang = next((l for l in languages if l.code == 'en'), None)
if en_lang is None:
print("Error: English model not found. You need it to run this script.")
exit(1)
2023-01-04 20:36:26 +00:00
locales_dir = os.path.join("libretranslate", "locales")
if not os.path.isdir(locales_dir):
os.makedirs(locales_dir)
2023-01-05 18:12:35 +00:00
# Dump language list so it gets picked up by pybabel
langs_file = os.path.join(locales_dir, ".langs.py")
with open(langs_file, 'w') as f:
for l in languages:
f.write("_(%s)\n" % json.dumps(l.name))
print("Wrote %s" % langs_file)
2023-01-05 19:07:39 +00:00
# Dump swagger strings
args = get_args()
app = create_app(args)
swag = swagger(app)
swag_strings = []
def add_swag_string(s):
if not s in swag_strings:
swag_strings.append(s)
swag_eval(swag, add_swag_string)
swag_file = os.path.join(locales_dir, ".swag.py")
with open(swag_file, 'w') as f:
for ss in swag_strings:
f.write("_(%s)\n" % json.dumps(ss))
print("Wrote %s" % swag_file)
2023-01-04 20:36:26 +00:00
messagespot = os.path.join(locales_dir, "messages.pot")
print("Updating %s" % messagespot)
sys.argv = ["", "extract", "-F", "babel.cfg", "-k", "_e _h",
2023-01-04 22:54:07 +00:00
"--copyright-holder", "LibreTranslate Authors",
"--project", "LibreTranslate",
"--version", get_version(),
"-o", messagespot, "libretranslate"]
2023-01-04 20:36:26 +00:00
pybabel()
2024-08-08 17:28:40 +00:00
lang_map = {
'zt': 'zh_Hant'
}
lang_codes = [lang_map.get(l.code, l.code) for l in languages if l.code != "en"]
review_map = {}
2023-01-04 20:36:26 +00:00
# Init/update
for l in lang_codes:
cmd = "init"
2023-01-06 19:34:40 +00:00
if os.path.isdir(os.path.join(locales_dir, l, "LC_MESSAGES")):
2023-01-04 20:36:26 +00:00
cmd = "update"
2024-08-08 17:28:40 +00:00
sys.argv = ["", cmd, "-i", messagespot, "-d", locales_dir, "-l", l] + (["--no-fuzzy-matching"] if cmd == "update" else [])
2023-01-04 20:36:26 +00:00
pybabel()
2023-01-06 15:27:39 +00:00
meta_file = os.path.join(locales_dir, l, "meta.json")
2024-08-08 17:28:40 +00:00
reviewed = False
2023-01-06 15:27:39 +00:00
if not os.path.isfile(meta_file):
with open(meta_file, 'w') as f:
f.write(json.dumps({
2024-08-08 17:28:40 +00:00
'name': next(lang.name for lang in languages if lang_map.get(lang.code, lang.code) == l),
2023-01-06 15:27:39 +00:00
'reviewed': False
}, indent=4))
print("Wrote %s" % meta_file)
2024-08-08 17:28:40 +00:00
else:
with open(meta_file) as f:
reviewed = json.loads(f.read()).get('reviewed', False)
review_map[l] = reviewed
2023-01-04 20:36:26 +00:00
# Automatically translate strings with libretranslate
# when a language model is available and a string is empty
2023-01-06 15:27:39 +00:00
locales = get_available_locale_codes(only_reviewed=False)
print(locales)
2023-01-04 20:36:26 +00:00
for locale in locales:
if locale == 'en':
2023-01-06 15:27:39 +00:00
continue
2024-08-08 17:28:40 +00:00
if review_map.get(locale):
# Don't automatically translate reviewed languages
continue
2023-01-04 20:36:26 +00:00
tgt_lang = next((l for l in languages if l.code == locale), None)
if tgt_lang is None:
# We cannot translate
continue
translator = en_lang.get_translation(tgt_lang)
messages_file = os.path.join(locales_dir, locale, "LC_MESSAGES", 'messages.po')
if os.path.isfile(messages_file):
print("Translating '%s'" % locale)
pofile = polib.pofile(messages_file)
c = 0
2023-01-04 20:36:26 +00:00
for entry in pofile.untranslated_entries():
text = entry.msgid
2023-01-04 20:36:26 +00:00
# Extract placeholders
2023-01-05 18:12:35 +00:00
placeholders = re.findall(r'%\(?[^\)]*\)?s', text)
2023-01-04 20:36:26 +00:00
for p in range(0, len(placeholders)):
text = text.replace(placeholders[p], "<x>%s</x>" % p)
if len(placeholders) > 0:
translated = str(translate_html(translator, text))
else:
translated = improve_translation_formatting(text, translator.translate(text))
2023-01-05 18:12:35 +00:00
2023-01-04 20:36:26 +00:00
# Restore placeholders
for p in range(0, len(placeholders)):
tag = "<x>%s</x>" % p
if tag in translated:
translated = translated.replace(tag, placeholders[p])
else:
# Meh, append
translated += " " + placeholders[p]
2023-01-04 20:36:26 +00:00
print(entry.msgid, " --> ", translated)
entry.msgstr = translated
c += 1
2023-01-04 20:36:26 +00:00
if c > 0:
pofile.save(messages_file)
print("Saved %s" % messages_file)