mirror of
https://github.com/LibreTranslate/LibreTranslate.git
synced 2024-11-27 02:11:00 +00:00
161 lines
5.5 KiB
Python
Executable file
161 lines
5.5 KiB
Python
Executable file
#!/usr/bin/env python
|
|
import os
|
|
import sys
|
|
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
|
import json
|
|
import re
|
|
|
|
import polib
|
|
from babel.messages.frontend import main as pybabel
|
|
from flask_swagger import swagger
|
|
from translatehtml import translate_html
|
|
|
|
from libretranslate.app import create_app, get_version
|
|
from libretranslate.language import improve_translation_formatting, load_languages
|
|
from libretranslate.locales import get_available_locale_codes, swag_eval
|
|
from libretranslate.main import get_args
|
|
|
|
# Update strings
|
|
if __name__ == "__main__":
|
|
print("Loading languages")
|
|
languages = load_languages()
|
|
en_lang = next((l for l in languages if l.code == 'en'), None)
|
|
if en_lang is None:
|
|
print("Error: English model not found. You need it to run this script.")
|
|
exit(1)
|
|
|
|
locales_dir = os.path.join("libretranslate", "locales")
|
|
if not os.path.isdir(locales_dir):
|
|
os.makedirs(locales_dir)
|
|
|
|
# Dump language list so it gets picked up by pybabel
|
|
langs_file = os.path.join(locales_dir, ".langs.py")
|
|
lang_names = {}
|
|
with open(langs_file, 'w') as f:
|
|
for l in languages:
|
|
f.write("_(%s)\n" % json.dumps(l.name))
|
|
lang_names[l.name] = True
|
|
|
|
# Additional names that might be missing
|
|
for l in ["Serbian", "Ukrainian", "Vietnamese"]:
|
|
if not l in lang_names:
|
|
f.write("_(%s)\n" % json.dumps(l))
|
|
lang_names[l] = True
|
|
print("Wrote %s" % langs_file)
|
|
|
|
# Dump swagger strings
|
|
args = get_args()
|
|
app = create_app(args)
|
|
swag = swagger(app)
|
|
|
|
swag_strings = []
|
|
def add_swag_string(s):
|
|
if not s in swag_strings:
|
|
swag_strings.append(s)
|
|
swag_eval(swag, add_swag_string)
|
|
|
|
swag_file = os.path.join(locales_dir, ".swag.py")
|
|
with open(swag_file, 'w') as f:
|
|
for ss in swag_strings:
|
|
f.write("_(%s)\n" % json.dumps(ss))
|
|
print("Wrote %s" % swag_file)
|
|
|
|
messagespot = os.path.join(locales_dir, "messages.pot")
|
|
print("Updating %s" % messagespot)
|
|
sys.argv = ["", "extract", "-F", "babel.cfg", "-k", "_e _h",
|
|
"--copyright-holder", "LibreTranslate Authors",
|
|
"--project", "LibreTranslate",
|
|
"--version", get_version(),
|
|
"-o", messagespot, "libretranslate"]
|
|
pybabel()
|
|
|
|
lang_map = {
|
|
'zt': 'zh_Hant'
|
|
}
|
|
lang_codes = [lang_map.get(l.code, l.code) for l in languages if l.code != "en"]
|
|
all_folders = [d for d in os.listdir(locales_dir) if os.path.isdir(os.path.join(locales_dir, d))]
|
|
review_map = {}
|
|
|
|
# Init/update
|
|
for l in all_folders:
|
|
cmd = "init"
|
|
if os.path.isdir(os.path.join(locales_dir, l, "LC_MESSAGES")):
|
|
cmd = "update"
|
|
|
|
sys.argv = ["", cmd, "-i", messagespot, "-d", locales_dir, "-l", l] + (["--no-fuzzy-matching"] if cmd == "update" else [])
|
|
pybabel()
|
|
|
|
meta_file = os.path.join(locales_dir, l, "meta.json")
|
|
reviewed = False
|
|
if not os.path.isfile(meta_file):
|
|
with open(meta_file, 'w') as f:
|
|
f.write(json.dumps({
|
|
'name': l if l not in lang_codes else next(lang.name for lang in languages if lang_map.get(lang.code, lang.code) == l),
|
|
'reviewed': False
|
|
}, indent=4))
|
|
print("Wrote %s" % meta_file)
|
|
else:
|
|
with open(meta_file) as f:
|
|
reviewed = json.loads(f.read()).get('reviewed', False)
|
|
|
|
review_map[l] = reviewed
|
|
|
|
# Automatically translate strings with libretranslate
|
|
# when a language model is available and a string is empty
|
|
|
|
locales = get_available_locale_codes(only_reviewed=False)
|
|
for locale in locales:
|
|
if locale == 'en':
|
|
continue
|
|
if review_map.get(locale):
|
|
# Don't automatically translate reviewed languages
|
|
continue
|
|
|
|
tgt_lang = next((l for l in languages if l.code == locale), None)
|
|
|
|
if tgt_lang is None:
|
|
# We cannot translate
|
|
continue
|
|
|
|
translator = en_lang.get_translation(tgt_lang)
|
|
|
|
messages_file = os.path.join(locales_dir, locale, "LC_MESSAGES", 'messages.po')
|
|
if os.path.isfile(messages_file):
|
|
print("Translating '%s'" % locale)
|
|
pofile = polib.pofile(messages_file)
|
|
c = 0
|
|
|
|
for entry in pofile.untranslated_entries():
|
|
text = entry.msgid
|
|
|
|
# Extract placeholders
|
|
placeholders = re.findall(r'%\(?[^\)]*\)?s', text)
|
|
|
|
for p in range(0, len(placeholders)):
|
|
text = text.replace(placeholders[p], "<x>%s</x>" % p)
|
|
|
|
if len(placeholders) > 0:
|
|
translated = str(translate_html(translator, text))
|
|
else:
|
|
translated = improve_translation_formatting(text, translator.translate(text))
|
|
|
|
# Restore placeholders
|
|
for p in range(0, len(placeholders)):
|
|
tag = "<x>%s</x>" % p
|
|
if tag in translated:
|
|
translated = translated.replace(tag, placeholders[p])
|
|
else:
|
|
# Meh, append
|
|
translated += " " + placeholders[p]
|
|
|
|
print(entry.msgid, " --> ", translated)
|
|
entry.msgstr = translated
|
|
c += 1
|
|
|
|
if c > 0:
|
|
pofile.save(messages_file)
|
|
print("Saved %s" % messages_file)
|
|
|
|
|
|
|