Auto self-translation

This commit is contained in:
Piero Toffanin 2023-01-04 15:36:26 -05:00
parent 50c9b62595
commit 3cbbd8ae16
10 changed files with 135 additions and 60 deletions

16
compile_locales.py Executable file
View file

@ -0,0 +1,16 @@
#!/usr/bin/env python
import sys
import os
from babel.messages.frontend import main as pybabel
if __name__ == "__main__":
locales_dir = os.path.join("libretranslate", "locales")
if not os.path.isdir(locales_dir):
os.makedirs(locales_dir)
print("Compiling locales")
sys.argv = ["", "compile", "-d", locales_dir]
pybabel()

View file

@ -1,16 +0,0 @@
#!/usr/bin/env python
import sys
import os
from babel.messages.frontend import main as pybabel
if __name__ == "__main__":
translations_dir = os.path.join("libretranslate", "translations")
if not os.path.isdir(translations_dir):
os.makedirs(translations_dir)
print("Compiling translations")
sys.argv = ["", "compile", "-d", translations_dir]
pybabel()

View file

@ -19,6 +19,7 @@ from flask_babel import Babel, gettext as _
from libretranslate import flood, remove_translated_files, security from libretranslate import flood, remove_translated_files, security
from libretranslate.language import detect_languages, improve_translation_formatting from libretranslate.language import detect_languages, improve_translation_formatting
from libretranslate.locales import get_available_locales
from .api_keys import Database, RemoteDatabase from .api_keys import Database, RemoteDatabase
from .suggestions import Database as SuggestionsDatabase from .suggestions import Database as SuggestionsDatabase
@ -1020,11 +1021,11 @@ def create_app(args):
@babel.localeselector @babel.localeselector
def get_locale(): def get_locale():
# TODO: populate from available locales # TODO: populate from available locales
return request.accept_languages.best_match(['en', 'it']) return request.accept_languages.best_match(get_available_locales())
def gettext_escaped(*args, **kwargs): def gettext_escaped(*args, **kwargs):
return _(*args, **kwargs).replace("'", "\\'") return _(*args, **kwargs).replace("'", "\\'")
app.jinja_env.globals.update(_e=gettext_escaped) app.jinja_env.globals.update(N_=gettext_escaped)
# Call factory function to create our blueprint # Call factory function to create our blueprint
swaggerui_blueprint = get_swaggerui_blueprint(SWAGGER_URL, API_URL) swaggerui_blueprint = get_swaggerui_blueprint(SWAGGER_URL, API_URL)

View file

@ -0,0 +1,9 @@
import os
from functools import cache
@cache
def get_available_locales():
locales_dir = os.path.join(os.path.dirname(__file__), 'locales')
dirs = [os.path.join(locales_dir, d) for d in os.listdir(locales_dir)]
return ['en'] + [os.path.basename(d) for d in dirs if os.path.isdir(os.path.join(d, 'LC_MESSAGES'))]

View file

@ -7,24 +7,24 @@ msgid ""
msgstr "" msgstr ""
"Project-Id-Version: PROJECT VERSION\n" "Project-Id-Version: PROJECT VERSION\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2023-01-04 12:27-0500\n" "POT-Creation-Date: 2023-01-04 15:34-0500\n"
"PO-Revision-Date: 2023-01-04 12:27-0500\n" "PO-Revision-Date: 2023-01-04 12:27-0500\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: it\n"
"Language-Team: it <LL@li.org>\n" "Language-Team: it <LL@li.org>\n"
"Plural-Forms: nplurals=2; plural=(n != 1);\n" "Language: it\n"
"MIME-Version: 1.0\n" "MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n" "Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n" "Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=2; plural=(n != 1);\n"
"Generated-By: Babel 2.11.0\n" "Generated-By: Babel 2.11.0\n"
#: libretranslate/app.py:57 #: libretranslate/app.py:58
msgid "Invalid JSON format" msgid "Invalid JSON format"
msgstr "" msgstr "Formato JSON non valido"
#: libretranslate/app.py:125 #: libretranslate/app.py:126
msgid "Auto Detect" msgid "Auto Detect"
msgstr "" msgstr "Rilevamento automatico"
#: libretranslate/templates/app.js.template:31 #: libretranslate/templates/app.js.template:31
msgid "Copy text" msgid "Copy text"
@ -33,9 +33,8 @@ msgstr "Copia testo"
#: libretranslate/templates/app.js.template:72 #: libretranslate/templates/app.js.template:72
#, python-format #, python-format
msgid "Cannot load %(url)s" msgid "Cannot load %(url)s"
msgstr "Impossibile caricare' %(url)s" msgstr "Non riesco a caricare %(url)s"
#: libretranslate/templates/index.html:6 #: libretranslate/templates/index.html:6
msgid "Free and Open Source Machine Translation API" msgid "Free and Open Source Machine Translation API"
msgstr "API di traduzione automatica open source" msgstr "API di traduzione automatica open source"

View file

@ -28,7 +28,7 @@ document.addEventListener('DOMContentLoaded', function(){
detectedLangText: "", detectedLangText: "",
copyTextLabel: '{{ _e("Copy text") }}', copyTextLabel: '{{ N_("Copy text") }}',
suggestions: false, suggestions: false,
isSuggesting: false, isSuggesting: false,
@ -69,7 +69,7 @@ document.addEventListener('DOMContentLoaded', function(){
} }
} }
} else { } else {
self.error = '{{ _e("Cannot load %(url)s", url="/frontend/settings") }}'; self.error = '{{ N_("Cannot load %(url)s", url="/frontend/settings") }}';
self.loading = false; self.loading = false;
} }
}; };

View file

@ -17,3 +17,4 @@ Werkzeug==2.2.2
requests==2.28.1 requests==2.28.1
redis==4.3.4 redis==4.3.4
prometheus-client==0.15.0 prometheus-client==0.15.0
polib==1.1.1

96
update_locales.py Executable file
View file

@ -0,0 +1,96 @@
#!/usr/bin/env python
import sys
import os
import re
import polib
from babel.messages.frontend import main as pybabel
from libretranslate.language import load_languages, improve_translation_formatting
from libretranslate.locales import get_available_locales
from translatehtml import translate_html
# Update strings
if __name__ == "__main__":
locales_dir = os.path.join("libretranslate", "locales")
if not os.path.isdir(locales_dir):
os.makedirs(locales_dir)
messagespot = os.path.join(locales_dir, "messages.pot")
print("Updating %s" % messagespot)
sys.argv = ["", "extract", "-F", "babel.cfg", "-o", messagespot, "libretranslate"]
pybabel()
# Load list of languages
print("Loading languages")
languages = load_languages()
en_lang = next((l for l in languages if l.code == 'en'), None)
if en_lang is None:
print("Error: English model not found. You need it to run this script.")
exit(1)
lang_codes = [l.code for l in languages if l != "en"]
lang_codes = ["it"] # TODO REMOVE
# Init/update
for l in lang_codes:
cmd = "init"
if os.path.isdir(os.path.join(locales_dir, l)):
cmd = "update"
sys.argv = ["", cmd, "-i", messagespot, "-d", locales_dir, "-l", l]
pybabel()
# Automatically translate strings with libretranslate
# when a language model is available and a string is empty
locales = get_available_locales()
for locale in locales:
if locale == 'en':
continue
tgt_lang = next((l for l in languages if l.code == locale), None)
if tgt_lang is None:
# We cannot translate
continue
translator = en_lang.get_translation(tgt_lang)
messages_file = os.path.join(locales_dir, locale, "LC_MESSAGES", 'messages.po')
if os.path.isfile(messages_file):
print("Translating '%s'" % locale)
pofile = polib.pofile(messages_file)
c = 0
for entry in pofile.untranslated_entries():
text = entry.msgid
# Extract placeholders
placeholders = re.findall(r'%\(?.*?\)?s', text)
for p in range(0, len(placeholders)):
text = text.replace(placeholders[p], "<x>%s</x>" % p)
if len(placeholders) > 0:
translated = str(translate_html(translator, text))
else:
translated = improve_translation_formatting(text, translator.translate(text))
# Restore placeholders
for p in range(0, len(placeholders)):
tag = "<x>%s</x>" % p
if tag in translated:
translated = translated.replace(tag, placeholders[p])
else:
# Meh, append
translated += " " + placeholders[p]
print(entry.msgid, " --> ", translated)
entry.msgstr = translated
c += 1
if c > 0:
pofile.save(messages_file)
print("Saved %s" % messages_file)

View file

@ -1,31 +0,0 @@
#!/usr/bin/env python
import sys
import os
from babel.messages.frontend import main as pybabel
from libretranslate.language import load_languages
# Update strings
if __name__ == "__main__":
translations_dir = os.path.join("libretranslate", "translations")
if not os.path.isdir(translations_dir):
os.makedirs(translations_dir)
messagespot = os.path.join(translations_dir, "messages.pot")
print("Updating %s" % messagespot)
sys.argv = ["", "extract", "-F", "babel.cfg", "-o", messagespot, "libretranslate"]
pybabel()
# Load list of languages
print("Loading languages")
languages = [l.code for l in load_languages() if l != "en"]
print(languages)
languages = ["it"]
for l in languages:
cmd = "init"
if os.path.isdir(os.path.join(translations_dir, l)):
cmd = "update"
sys.argv = ["", cmd, "-i", messagespot, "-d", translations_dir, "-l", l]
pybabel()