Auto self-translation

This commit is contained in:
Piero Toffanin 2023-01-04 15:36:26 -05:00
parent 50c9b62595
commit 3cbbd8ae16
10 changed files with 135 additions and 60 deletions

16
compile_locales.py Executable file
View file

@ -0,0 +1,16 @@
#!/usr/bin/env python
import sys
import os
from babel.messages.frontend import main as pybabel
if __name__ == "__main__":
locales_dir = os.path.join("libretranslate", "locales")
if not os.path.isdir(locales_dir):
os.makedirs(locales_dir)
print("Compiling locales")
sys.argv = ["", "compile", "-d", locales_dir]
pybabel()

View file

@ -1,16 +0,0 @@
#!/usr/bin/env python
import sys
import os
from babel.messages.frontend import main as pybabel
if __name__ == "__main__":
translations_dir = os.path.join("libretranslate", "translations")
if not os.path.isdir(translations_dir):
os.makedirs(translations_dir)
print("Compiling translations")
sys.argv = ["", "compile", "-d", translations_dir]
pybabel()

View file

@ -19,6 +19,7 @@ from flask_babel import Babel, gettext as _
from libretranslate import flood, remove_translated_files, security
from libretranslate.language import detect_languages, improve_translation_formatting
from libretranslate.locales import get_available_locales
from .api_keys import Database, RemoteDatabase
from .suggestions import Database as SuggestionsDatabase
@ -1020,11 +1021,11 @@ def create_app(args):
@babel.localeselector
def get_locale():
# TODO: populate from available locales
return request.accept_languages.best_match(['en', 'it'])
return request.accept_languages.best_match(get_available_locales())
def gettext_escaped(*args, **kwargs):
return _(*args, **kwargs).replace("'", "\\'")
app.jinja_env.globals.update(_e=gettext_escaped)
app.jinja_env.globals.update(N_=gettext_escaped)
# Call factory function to create our blueprint
swaggerui_blueprint = get_swaggerui_blueprint(SWAGGER_URL, API_URL)

View file

@ -0,0 +1,9 @@
import os
from functools import cache
@cache
def get_available_locales():
locales_dir = os.path.join(os.path.dirname(__file__), 'locales')
dirs = [os.path.join(locales_dir, d) for d in os.listdir(locales_dir)]
return ['en'] + [os.path.basename(d) for d in dirs if os.path.isdir(os.path.join(d, 'LC_MESSAGES'))]

View file

@ -7,24 +7,24 @@ msgid ""
msgstr ""
"Project-Id-Version: PROJECT VERSION\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2023-01-04 12:27-0500\n"
"POT-Creation-Date: 2023-01-04 15:34-0500\n"
"PO-Revision-Date: 2023-01-04 12:27-0500\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: it\n"
"Language-Team: it <LL@li.org>\n"
"Plural-Forms: nplurals=2; plural=(n != 1);\n"
"Language: it\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=2; plural=(n != 1);\n"
"Generated-By: Babel 2.11.0\n"
#: libretranslate/app.py:57
#: libretranslate/app.py:58
msgid "Invalid JSON format"
msgstr ""
msgstr "Formato JSON non valido"
#: libretranslate/app.py:125
#: libretranslate/app.py:126
msgid "Auto Detect"
msgstr ""
msgstr "Rilevamento automatico"
#: libretranslate/templates/app.js.template:31
msgid "Copy text"
@ -33,9 +33,8 @@ msgstr "Copia testo"
#: libretranslate/templates/app.js.template:72
#, python-format
msgid "Cannot load %(url)s"
msgstr "Impossibile caricare' %(url)s"
msgstr "Non riesco a caricare %(url)s"
#: libretranslate/templates/index.html:6
msgid "Free and Open Source Machine Translation API"
msgstr "API di traduzione automatica open source"

View file

@ -28,7 +28,7 @@ document.addEventListener('DOMContentLoaded', function(){
detectedLangText: "",
copyTextLabel: '{{ _e("Copy text") }}',
copyTextLabel: '{{ N_("Copy text") }}',
suggestions: false,
isSuggesting: false,
@ -69,7 +69,7 @@ document.addEventListener('DOMContentLoaded', function(){
}
}
} else {
self.error = '{{ _e("Cannot load %(url)s", url="/frontend/settings") }}';
self.error = '{{ N_("Cannot load %(url)s", url="/frontend/settings") }}';
self.loading = false;
}
};

View file

@ -17,3 +17,4 @@ Werkzeug==2.2.2
requests==2.28.1
redis==4.3.4
prometheus-client==0.15.0
polib==1.1.1

96
update_locales.py Executable file
View file

@ -0,0 +1,96 @@
#!/usr/bin/env python
import sys
import os
import re
import polib
from babel.messages.frontend import main as pybabel
from libretranslate.language import load_languages, improve_translation_formatting
from libretranslate.locales import get_available_locales
from translatehtml import translate_html
# Update strings
if __name__ == "__main__":
locales_dir = os.path.join("libretranslate", "locales")
if not os.path.isdir(locales_dir):
os.makedirs(locales_dir)
messagespot = os.path.join(locales_dir, "messages.pot")
print("Updating %s" % messagespot)
sys.argv = ["", "extract", "-F", "babel.cfg", "-o", messagespot, "libretranslate"]
pybabel()
# Load list of languages
print("Loading languages")
languages = load_languages()
en_lang = next((l for l in languages if l.code == 'en'), None)
if en_lang is None:
print("Error: English model not found. You need it to run this script.")
exit(1)
lang_codes = [l.code for l in languages if l != "en"]
lang_codes = ["it"] # TODO REMOVE
# Init/update
for l in lang_codes:
cmd = "init"
if os.path.isdir(os.path.join(locales_dir, l)):
cmd = "update"
sys.argv = ["", cmd, "-i", messagespot, "-d", locales_dir, "-l", l]
pybabel()
# Automatically translate strings with libretranslate
# when a language model is available and a string is empty
locales = get_available_locales()
for locale in locales:
if locale == 'en':
continue
tgt_lang = next((l for l in languages if l.code == locale), None)
if tgt_lang is None:
# We cannot translate
continue
translator = en_lang.get_translation(tgt_lang)
messages_file = os.path.join(locales_dir, locale, "LC_MESSAGES", 'messages.po')
if os.path.isfile(messages_file):
print("Translating '%s'" % locale)
pofile = polib.pofile(messages_file)
c = 0
for entry in pofile.untranslated_entries():
text = entry.msgid
# Extract placeholders
placeholders = re.findall(r'%\(?.*?\)?s', text)
for p in range(0, len(placeholders)):
text = text.replace(placeholders[p], "<x>%s</x>" % p)
if len(placeholders) > 0:
translated = str(translate_html(translator, text))
else:
translated = improve_translation_formatting(text, translator.translate(text))
# Restore placeholders
for p in range(0, len(placeholders)):
tag = "<x>%s</x>" % p
if tag in translated:
translated = translated.replace(tag, placeholders[p])
else:
# Meh, append
translated += " " + placeholders[p]
print(entry.msgid, " --> ", translated)
entry.msgstr = translated
c += 1
if c > 0:
pofile.save(messages_file)
print("Saved %s" % messages_file)

View file

@ -1,31 +0,0 @@
#!/usr/bin/env python
import sys
import os
from babel.messages.frontend import main as pybabel
from libretranslate.language import load_languages
# Update strings
if __name__ == "__main__":
translations_dir = os.path.join("libretranslate", "translations")
if not os.path.isdir(translations_dir):
os.makedirs(translations_dir)
messagespot = os.path.join(translations_dir, "messages.pot")
print("Updating %s" % messagespot)
sys.argv = ["", "extract", "-F", "babel.cfg", "-o", messagespot, "libretranslate"]
pybabel()
# Load list of languages
print("Loading languages")
languages = [l.code for l in load_languages() if l != "en"]
print(languages)
languages = ["it"]
for l in languages:
cmd = "init"
if os.path.isdir(os.path.join(translations_dir, l)):
cmd = "update"
sys.argv = ["", cmd, "-i", messagespot, "-d", translations_dir, "-l", l]
pybabel()