Merge pull request #764 from pierotofy/emoji

Detect emoji translation requests
This commit is contained in:
Piero Toffanin 2025-03-20 11:24:50 -04:00 committed by GitHub
commit cfb1a92170
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -37,6 +37,19 @@ from libretranslate.locales import (
from .api_keys import Database, RemoteDatabase
from .suggestions import Database as SuggestionsDatabase
# Rough map of emoji characters
emojis = {e: True for e in \
[ord(' ')] + # Spaces
list(range(0x1F600,0x1F64F)) + # Emoticons
list(range(0x1F300,0x1F5FF)) + # Misc Symbols and Pictographs
list(range(0x1F680,0x1F6FF)) + # Transport and Map
list(range(0x2600,0x26FF)) + # Misc symbols
list(range(0x2700,0x27BF)) + # Dingbats
list(range(0xFE00,0xFE0F)) + # Variation Selectors
list(range(0x1F900,0x1F9FF)) + # Supplemental Symbols and Pictographs
list(range(0x1F1E6,0x1F1FF)) + # Flags
list(range(0x20D0,0x20FF)) # Combining Diacritical Marks for Symbols
}
def get_version():
try:
@ -153,6 +166,19 @@ def filter_unique(seq, extra):
seen_add = seen.add
return [x for x in seq if not (x in seen or seen_add(x))]
def detect_translatable(src_texts):
if isinstance(src_texts, list):
return any(detect_translatable(t) for t in src_texts)
for ch in src_texts:
if not (ord(ch) in emojis):
return True
# All emojis
return False
def create_app(args):
from libretranslate.init import boot
@ -647,13 +673,17 @@ def create_app(args):
if batch:
request.req_cost = max(1, len(q))
if source_lang == "auto":
candidate_langs = detect_languages(src_texts)
detected_src_lang = candidate_langs[0]
translatable = detect_translatable(src_texts)
if translatable:
if source_lang == "auto":
candidate_langs = detect_languages(src_texts)
detected_src_lang = candidate_langs[0]
else:
detected_src_lang = {"confidence": 100.0, "language": source_lang}
else:
detected_src_lang = {"confidence": 100.0, "language": source_lang}
detected_src_lang = {"confidence": 0.0, "language": "en"}
src_lang = next(iter([l for l in languages if l.code == detected_src_lang["language"]]), None)
if src_lang is None:
@ -679,14 +709,18 @@ def create_app(args):
if translator is None:
abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code))
if text_format == "html":
translated_text = unescape(str(translate_html(translator, text)))
alternatives = [] # Not supported for html yet
if translatable:
if text_format == "html":
translated_text = unescape(str(translate_html(translator, text)))
alternatives = [] # Not supported for html yet
else:
hypotheses = translator.hypotheses(text, num_alternatives + 1)
translated_text = unescape(improve_translation_formatting(text, hypotheses[0].value))
alternatives = filter_unique([unescape(improve_translation_formatting(text, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
else:
hypotheses = translator.hypotheses(text, num_alternatives + 1)
translated_text = unescape(improve_translation_formatting(text, hypotheses[0].value))
alternatives = filter_unique([unescape(improve_translation_formatting(text, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
translated_text = text # Cannot translate, send the original text back
alternatives = []
batch_results.append(translated_text)
batch_alternatives.append(alternatives)
@ -703,14 +737,18 @@ def create_app(args):
if translator is None:
abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code))
if text_format == "html":
translated_text = unescape(str(translate_html(translator, q)))
alternatives = [] # Not supported for html yet
if translatable:
if text_format == "html":
translated_text = unescape(str(translate_html(translator, q)))
alternatives = [] # Not supported for html yet
else:
hypotheses = translator.hypotheses(q, num_alternatives + 1)
translated_text = unescape(improve_translation_formatting(q, hypotheses[0].value))
alternatives = filter_unique([unescape(improve_translation_formatting(q, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
else:
hypotheses = translator.hypotheses(q, num_alternatives + 1)
translated_text = unescape(improve_translation_formatting(q, hypotheses[0].value))
alternatives = filter_unique([unescape(improve_translation_formatting(q, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
translated_text = q # Cannot translate, send the original text back
alternatives = []
result = {"translatedText": translated_text}
if source_lang == "auto":