diff --git a/libretranslate/app.py b/libretranslate/app.py index 397951c..0952290 100644 --- a/libretranslate/app.py +++ b/libretranslate/app.py @@ -37,6 +37,19 @@ from libretranslate.locales import ( from .api_keys import Database, RemoteDatabase from .suggestions import Database as SuggestionsDatabase +# Rough map of emoji characters +emojis = {e: True for e in \ + [ord(' ')] + # Spaces + list(range(0x1F600,0x1F64F)) + # Emoticons + list(range(0x1F300,0x1F5FF)) + # Misc Symbols and Pictographs + list(range(0x1F680,0x1F6FF)) + # Transport and Map + list(range(0x2600,0x26FF)) + # Misc symbols + list(range(0x2700,0x27BF)) + # Dingbats + list(range(0xFE00,0xFE0F)) + # Variation Selectors + list(range(0x1F900,0x1F9FF)) + # Supplemental Symbols and Pictographs + list(range(0x1F1E6,0x1F1FF)) + # Flags + list(range(0x20D0,0x20FF)) # Combining Diacritical Marks for Symbols +} def get_version(): try: @@ -153,6 +166,19 @@ def filter_unique(seq, extra): seen_add = seen.add return [x for x in seq if not (x in seen or seen_add(x))] + +def detect_translatable(src_texts): + if isinstance(src_texts, list): + return any(detect_translatable(t) for t in src_texts) + + for ch in src_texts: + if not (ord(ch) in emojis): + return True + + # All emojis + return False + + def create_app(args): from libretranslate.init import boot @@ -647,13 +673,17 @@ def create_app(args): if batch: request.req_cost = max(1, len(q)) - - if source_lang == "auto": - candidate_langs = detect_languages(src_texts) - detected_src_lang = candidate_langs[0] + + translatable = detect_translatable(src_texts) + if translatable: + if source_lang == "auto": + candidate_langs = detect_languages(src_texts) + detected_src_lang = candidate_langs[0] + else: + detected_src_lang = {"confidence": 100.0, "language": source_lang} else: - detected_src_lang = {"confidence": 100.0, "language": source_lang} - + detected_src_lang = {"confidence": 0.0, "language": "en"} + src_lang = next(iter([l for l in languages if l.code == detected_src_lang["language"]]), None) if src_lang is None: @@ -679,14 +709,18 @@ def create_app(args): if translator is None: abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code)) - if text_format == "html": - translated_text = unescape(str(translate_html(translator, text))) - alternatives = [] # Not supported for html yet + if translatable: + if text_format == "html": + translated_text = unescape(str(translate_html(translator, text))) + alternatives = [] # Not supported for html yet + else: + hypotheses = translator.hypotheses(text, num_alternatives + 1) + translated_text = unescape(improve_translation_formatting(text, hypotheses[0].value)) + alternatives = filter_unique([unescape(improve_translation_formatting(text, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text) else: - hypotheses = translator.hypotheses(text, num_alternatives + 1) - translated_text = unescape(improve_translation_formatting(text, hypotheses[0].value)) - alternatives = filter_unique([unescape(improve_translation_formatting(text, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text) - + translated_text = text # Cannot translate, send the original text back + alternatives = [] + batch_results.append(translated_text) batch_alternatives.append(alternatives) @@ -703,14 +737,18 @@ def create_app(args): if translator is None: abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code)) - if text_format == "html": - translated_text = unescape(str(translate_html(translator, q))) - alternatives = [] # Not supported for html yet + if translatable: + if text_format == "html": + translated_text = unescape(str(translate_html(translator, q))) + alternatives = [] # Not supported for html yet + else: + hypotheses = translator.hypotheses(q, num_alternatives + 1) + translated_text = unescape(improve_translation_formatting(q, hypotheses[0].value)) + alternatives = filter_unique([unescape(improve_translation_formatting(q, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text) else: - hypotheses = translator.hypotheses(q, num_alternatives + 1) - translated_text = unescape(improve_translation_formatting(q, hypotheses[0].value)) - alternatives = filter_unique([unescape(improve_translation_formatting(q, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text) - + translated_text = q # Cannot translate, send the original text back + alternatives = [] + result = {"translatedText": translated_text} if source_lang == "auto":