From 077d6e9efaf3b34e390d8387925117e2260779cb Mon Sep 17 00:00:00 2001 From: dingedi Date: Fri, 23 Sep 2022 13:59:13 +0200 Subject: [PATCH 1/2] move and improve_translation in language.py, use it for transliteration --- app/app.py | 37 +++---------------------------------- app/language.py | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 34 deletions(-) diff --git a/app/app.py b/app/app.py index 6986344..1221152 100644 --- a/app/app.py +++ b/app/app.py @@ -15,7 +15,7 @@ from translatehtml import translate_html from werkzeug.utils import secure_filename from app import flood, remove_translated_files, security -from app.language import detect_languages, transliterate +from app.language import detect_languages, transliterate, improve_translation_formatting from .api_keys import Database, RemoteDatabase from .suggestions import Database as SuggestionsDatabase @@ -483,46 +483,15 @@ def create_app(args): if text_format not in ["text", "html"]: abort(400, description="%s format is not supported" % text_format) - def improve_translation(source, translation): - source = source.strip() - - source_last_char = source[len(source) - 1] - translation_last_char = translation[len(translation) - 1] - - punctuation_chars = ['!', '?', '.', ',', ';'] - if source_last_char in punctuation_chars: - if translation_last_char != source_last_char: - if translation_last_char in punctuation_chars: - translation = translation[:-1] - - translation += source_last_char - elif translation_last_char in punctuation_chars: - translation = translation[:-1] - - if source.islower(): - return translation.lower() - - if source.isupper(): - return translation.upper() - - if source[0].islower(): - return translation[0].lower() + translation[1:] - - if source[0].isupper(): - return translation[0].upper() + translation[1:] - - return translation - try: if batch: results = [] for idx, text in enumerate(q): translator = src_langs[idx].get_translation(tgt_lang) - if text_format == "html": translated_text = str(translate_html(translator, text)) else: - translated_text = improve_translation(text, translator.translate( + translated_text = improve_translation_formatting(text, translator.translate( transliterate(text, target_lang=source_langs[idx]["language"]))) results.append(unescape(translated_text)) @@ -545,7 +514,7 @@ def create_app(args): if text_format == "html": translated_text = str(translate_html(translator, q)) else: - translated_text = improve_translation(q, translator.translate( + translated_text = improve_translation_formatting(q, translator.translate( transliterate(q, target_lang=source_langs[0]["language"]))) if source_lang == "auto": diff --git a/app/language.py b/app/language.py index 42bdbf2..6cee0cd 100644 --- a/app/language.py +++ b/app/language.py @@ -79,6 +79,41 @@ def detect_languages(text): return [{"confidence": l.confidence, "language": l.code} for l in candidate_langs] +def improve_translation_formatting(source, translation, improve_punctuation=True): + source = source.strip() + + if not len(source): + return "" + + if improve_punctuation: + source_last_char = source[len(source) - 1] + translation_last_char = translation[len(translation) - 1] + + punctuation_chars = ['!', '?', '.', ',', ';'] + if source_last_char in punctuation_chars: + if translation_last_char != source_last_char: + if translation_last_char in punctuation_chars: + translation = translation[:-1] + + translation += source_last_char + elif translation_last_char in punctuation_chars: + translation = translation[:-1] + + if source.islower(): + return translation.lower() + + if source.isupper(): + return translation.upper() + + if source[0].islower(): + return translation[0].lower() + translation[1:] + + if source[0].isupper(): + return translation[0].upper() + translation[1:] + + return translation + + def __transliterate_line(transliterator, line_text): new_text = [] @@ -94,6 +129,8 @@ def __transliterate_line(transliterator, line_text): # the actual transliteration of the word t_word = transliterator.transliterate(orig_word.strip(string.punctuation)) + t_word = improve_translation_formatting(orig_word.strip(string.punctuation), t_word, improve_punctuation=False) + # if transliteration fails, default back to the original word if not t_word: t_word = orig_word From c66a519751d5e01c3b948bb1dd34e19457f3ae71 Mon Sep 17 00:00:00 2001 From: dingedi Date: Fri, 23 Sep 2022 14:20:22 +0200 Subject: [PATCH 2/2] restore formatting only if transliteration dont fail --- app/language.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/language.py b/app/language.py index 6cee0cd..2e2e65f 100644 --- a/app/language.py +++ b/app/language.py @@ -129,12 +129,12 @@ def __transliterate_line(transliterator, line_text): # the actual transliteration of the word t_word = transliterator.transliterate(orig_word.strip(string.punctuation)) - t_word = improve_translation_formatting(orig_word.strip(string.punctuation), t_word, improve_punctuation=False) - # if transliteration fails, default back to the original word if not t_word: t_word = orig_word else: + t_word = improve_translation_formatting(orig_word.strip(string.punctuation), t_word, improve_punctuation=False) + # add back any stripped punctuation if r_diff: t_word = t_word + "".join(r_diff)