diff --git a/app/app.py b/app/app.py index 06a294e..36e1ad3 100644 --- a/app/app.py +++ b/app/app.py @@ -15,7 +15,7 @@ from translatehtml import translate_html from werkzeug.utils import secure_filename from app import flood, remove_translated_files, security -from app.language import detect_languages, transliterate, improve_translation_formatting +from app.language import detect_languages, improve_translation_formatting from .api_keys import Database, RemoteDatabase from .suggestions import Database as SuggestionsDatabase @@ -489,8 +489,7 @@ def create_app(args): if text_format == "html": translated_text = str(translate_html(translator, text)) else: - translated_text = improve_translation_formatting(text, translator.translate( - transliterate(text, target_lang=source_langs[idx]["language"]))) + translated_text = improve_translation_formatting(text, translator.translate(text)) results.append(unescape(translated_text)) if source_lang == "auto": @@ -512,8 +511,7 @@ def create_app(args): if text_format == "html": translated_text = str(translate_html(translator, q)) else: - translated_text = improve_translation_formatting(q, translator.translate( - transliterate(q, target_lang=source_langs[0]["language"]))) + translated_text = improve_translation_formatting(q, translator.translate(q)) if source_lang == "auto": return jsonify( diff --git a/app/init.py b/app/init.py index 9ada3f7..4b081ac 100644 --- a/app/init.py +++ b/app/init.py @@ -1,6 +1,5 @@ from pathlib import Path -import polyglot from argostranslate import package, translate import app.language @@ -9,7 +8,6 @@ import app.language def boot(load_only=None): try: check_and_install_models(load_only_lang_codes=load_only) - check_and_install_transliteration() except Exception as e: print("Cannot update models (normal if you're offline): %s" % str(e)) @@ -59,36 +57,4 @@ def check_and_install_models(force=False, load_only_lang_codes=None): print( "Loaded support for %s languages (%s models total)!" % (len(translate.get_installed_languages()), len(available_packages)) - ) - - -def check_and_install_transliteration(force=False): - # 'en' is not a supported transliteration language - transliteration_languages = [ - l.code for l in app.language.load_languages() if l.code != "en" - ] - - # check installed - install_needed = [] - if not force: - t_packages_path = Path(polyglot.polyglot_path) / "transliteration2" - for lang in transliteration_languages: - if not ( - t_packages_path / lang / f"transliteration.{lang}.tar.bz2" - ).exists(): - install_needed.append(lang) - else: - install_needed = transliteration_languages - - # install the needed transliteration packages - if install_needed: - print( - f"Installing transliteration models for the following languages: {', '.join(install_needed)}" - ) - - from polyglot.downloader import Downloader - - downloader = Downloader() - - for lang in install_needed: - downloader.download(f"transliteration2.{lang}") + ) \ No newline at end of file diff --git a/app/language.py b/app/language.py index 2e2e65f..a19ad2f 100644 --- a/app/language.py +++ b/app/language.py @@ -2,7 +2,6 @@ import string from argostranslate import translate from polyglot.detect.base import Detector, UnknownLanguage -from polyglot.transliteration.base import Transliterator __languages = None @@ -113,52 +112,3 @@ def improve_translation_formatting(source, translation, improve_punctuation=True return translation - -def __transliterate_line(transliterator, line_text): - new_text = [] - - # transliteration is done word by word - for orig_word in line_text.split(" "): - # remove any punctuation on the right side - r_word = orig_word.rstrip(string.punctuation) - r_diff = set(char for char in orig_word) - set(char for char in r_word) - # and on the left side - l_word = orig_word.lstrip(string.punctuation) - l_diff = set(char for char in orig_word) - set(char for char in l_word) - - # the actual transliteration of the word - t_word = transliterator.transliterate(orig_word.strip(string.punctuation)) - - # if transliteration fails, default back to the original word - if not t_word: - t_word = orig_word - else: - t_word = improve_translation_formatting(orig_word.strip(string.punctuation), t_word, improve_punctuation=False) - - # add back any stripped punctuation - if r_diff: - t_word = t_word + "".join(r_diff) - if l_diff: - t_word = "".join(l_diff) + t_word - - new_text.append(t_word) - - # rebuild the text - return " ".join(new_text) - - -def transliterate(text, target_lang="en"): - # initialize the transliterator from polyglot - transliterator = Transliterator(target_lang=target_lang) - - # check for multiline string - if "\n" in text: - lines = [] - # process each line separate - for line in text.split("\n"): - lines.append(__transliterate_line(transliterator, line)) - - # rejoin multiline string - return "\n".join(lines) - else: - return __transliterate_line(transliterator, text) diff --git a/install_models.py b/install_models.py index 0976ef2..7aa3c68 100755 --- a/install_models.py +++ b/install_models.py @@ -1,6 +1,6 @@ #!/usr/bin/env python import argparse -from app.init import check_and_install_models, check_and_install_transliteration +from app.init import check_and_install_models if __name__ == "__main__": parser = argparse.ArgumentParser() @@ -10,4 +10,3 @@ if __name__ == "__main__": if len(lang_codes) == 0 or lang_codes[0] == '': lang_codes = None check_and_install_models(force=True, load_only_lang_codes=lang_codes) - check_and_install_transliteration(force=True)