forked from mirrors/LibreTranslate
Remove Polyglot transliteration library
The model server for the Polyglot transliteration library is currently down and the project looks unmaintained; this currently is breaking LibreTranslate installs. This commit removes Polyglot for transliteration but keeps using Polyglot for language detection. - https://github.com/LibreTranslate/LibreTranslate/issues/344 - https://community.libretranslate.com/t/improving-transliteration-in-libretranslate/400
This commit is contained in:
parent
1c5b006c87
commit
ee638b407d
4 changed files with 5 additions and 92 deletions
|
@ -15,7 +15,7 @@ from translatehtml import translate_html
|
||||||
from werkzeug.utils import secure_filename
|
from werkzeug.utils import secure_filename
|
||||||
|
|
||||||
from app import flood, remove_translated_files, security
|
from app import flood, remove_translated_files, security
|
||||||
from app.language import detect_languages, transliterate, improve_translation_formatting
|
from app.language import detect_languages, improve_translation_formatting
|
||||||
|
|
||||||
from .api_keys import Database, RemoteDatabase
|
from .api_keys import Database, RemoteDatabase
|
||||||
from .suggestions import Database as SuggestionsDatabase
|
from .suggestions import Database as SuggestionsDatabase
|
||||||
|
@ -489,8 +489,7 @@ def create_app(args):
|
||||||
if text_format == "html":
|
if text_format == "html":
|
||||||
translated_text = str(translate_html(translator, text))
|
translated_text = str(translate_html(translator, text))
|
||||||
else:
|
else:
|
||||||
translated_text = improve_translation_formatting(text, translator.translate(
|
translated_text = improve_translation_formatting(text, translator.translate(text))
|
||||||
transliterate(text, target_lang=source_langs[idx]["language"])))
|
|
||||||
|
|
||||||
results.append(unescape(translated_text))
|
results.append(unescape(translated_text))
|
||||||
if source_lang == "auto":
|
if source_lang == "auto":
|
||||||
|
@ -512,8 +511,7 @@ def create_app(args):
|
||||||
if text_format == "html":
|
if text_format == "html":
|
||||||
translated_text = str(translate_html(translator, q))
|
translated_text = str(translate_html(translator, q))
|
||||||
else:
|
else:
|
||||||
translated_text = improve_translation_formatting(q, translator.translate(
|
translated_text = improve_translation_formatting(q, translator.translate(q))
|
||||||
transliterate(q, target_lang=source_langs[0]["language"])))
|
|
||||||
|
|
||||||
if source_lang == "auto":
|
if source_lang == "auto":
|
||||||
return jsonify(
|
return jsonify(
|
||||||
|
|
36
app/init.py
36
app/init.py
|
@ -1,6 +1,5 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import polyglot
|
|
||||||
from argostranslate import package, translate
|
from argostranslate import package, translate
|
||||||
|
|
||||||
import app.language
|
import app.language
|
||||||
|
@ -9,7 +8,6 @@ import app.language
|
||||||
def boot(load_only=None):
|
def boot(load_only=None):
|
||||||
try:
|
try:
|
||||||
check_and_install_models(load_only_lang_codes=load_only)
|
check_and_install_models(load_only_lang_codes=load_only)
|
||||||
check_and_install_transliteration()
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Cannot update models (normal if you're offline): %s" % str(e))
|
print("Cannot update models (normal if you're offline): %s" % str(e))
|
||||||
|
|
||||||
|
@ -59,36 +57,4 @@ def check_and_install_models(force=False, load_only_lang_codes=None):
|
||||||
print(
|
print(
|
||||||
"Loaded support for %s languages (%s models total)!"
|
"Loaded support for %s languages (%s models total)!"
|
||||||
% (len(translate.get_installed_languages()), len(available_packages))
|
% (len(translate.get_installed_languages()), len(available_packages))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def check_and_install_transliteration(force=False):
|
|
||||||
# 'en' is not a supported transliteration language
|
|
||||||
transliteration_languages = [
|
|
||||||
l.code for l in app.language.load_languages() if l.code != "en"
|
|
||||||
]
|
|
||||||
|
|
||||||
# check installed
|
|
||||||
install_needed = []
|
|
||||||
if not force:
|
|
||||||
t_packages_path = Path(polyglot.polyglot_path) / "transliteration2"
|
|
||||||
for lang in transliteration_languages:
|
|
||||||
if not (
|
|
||||||
t_packages_path / lang / f"transliteration.{lang}.tar.bz2"
|
|
||||||
).exists():
|
|
||||||
install_needed.append(lang)
|
|
||||||
else:
|
|
||||||
install_needed = transliteration_languages
|
|
||||||
|
|
||||||
# install the needed transliteration packages
|
|
||||||
if install_needed:
|
|
||||||
print(
|
|
||||||
f"Installing transliteration models for the following languages: {', '.join(install_needed)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
from polyglot.downloader import Downloader
|
|
||||||
|
|
||||||
downloader = Downloader()
|
|
||||||
|
|
||||||
for lang in install_needed:
|
|
||||||
downloader.download(f"transliteration2.{lang}")
|
|
|
@ -2,7 +2,6 @@ import string
|
||||||
|
|
||||||
from argostranslate import translate
|
from argostranslate import translate
|
||||||
from polyglot.detect.base import Detector, UnknownLanguage
|
from polyglot.detect.base import Detector, UnknownLanguage
|
||||||
from polyglot.transliteration.base import Transliterator
|
|
||||||
|
|
||||||
__languages = None
|
__languages = None
|
||||||
|
|
||||||
|
@ -113,52 +112,3 @@ def improve_translation_formatting(source, translation, improve_punctuation=True
|
||||||
|
|
||||||
return translation
|
return translation
|
||||||
|
|
||||||
|
|
||||||
def __transliterate_line(transliterator, line_text):
|
|
||||||
new_text = []
|
|
||||||
|
|
||||||
# transliteration is done word by word
|
|
||||||
for orig_word in line_text.split(" "):
|
|
||||||
# remove any punctuation on the right side
|
|
||||||
r_word = orig_word.rstrip(string.punctuation)
|
|
||||||
r_diff = set(char for char in orig_word) - set(char for char in r_word)
|
|
||||||
# and on the left side
|
|
||||||
l_word = orig_word.lstrip(string.punctuation)
|
|
||||||
l_diff = set(char for char in orig_word) - set(char for char in l_word)
|
|
||||||
|
|
||||||
# the actual transliteration of the word
|
|
||||||
t_word = transliterator.transliterate(orig_word.strip(string.punctuation))
|
|
||||||
|
|
||||||
# if transliteration fails, default back to the original word
|
|
||||||
if not t_word:
|
|
||||||
t_word = orig_word
|
|
||||||
else:
|
|
||||||
t_word = improve_translation_formatting(orig_word.strip(string.punctuation), t_word, improve_punctuation=False)
|
|
||||||
|
|
||||||
# add back any stripped punctuation
|
|
||||||
if r_diff:
|
|
||||||
t_word = t_word + "".join(r_diff)
|
|
||||||
if l_diff:
|
|
||||||
t_word = "".join(l_diff) + t_word
|
|
||||||
|
|
||||||
new_text.append(t_word)
|
|
||||||
|
|
||||||
# rebuild the text
|
|
||||||
return " ".join(new_text)
|
|
||||||
|
|
||||||
|
|
||||||
def transliterate(text, target_lang="en"):
|
|
||||||
# initialize the transliterator from polyglot
|
|
||||||
transliterator = Transliterator(target_lang=target_lang)
|
|
||||||
|
|
||||||
# check for multiline string
|
|
||||||
if "\n" in text:
|
|
||||||
lines = []
|
|
||||||
# process each line separate
|
|
||||||
for line in text.split("\n"):
|
|
||||||
lines.append(__transliterate_line(transliterator, line))
|
|
||||||
|
|
||||||
# rejoin multiline string
|
|
||||||
return "\n".join(lines)
|
|
||||||
else:
|
|
||||||
return __transliterate_line(transliterator, text)
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
import argparse
|
import argparse
|
||||||
from app.init import check_and_install_models, check_and_install_transliteration
|
from app.init import check_and_install_models
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
@ -10,4 +10,3 @@ if __name__ == "__main__":
|
||||||
if len(lang_codes) == 0 or lang_codes[0] == '':
|
if len(lang_codes) == 0 or lang_codes[0] == '':
|
||||||
lang_codes = None
|
lang_codes = None
|
||||||
check_and_install_models(force=True, load_only_lang_codes=lang_codes)
|
check_and_install_models(force=True, load_only_lang_codes=lang_codes)
|
||||||
check_and_install_transliteration(force=True)
|
|
||||||
|
|
Loading…
Reference in a new issue