mirror of
https://github.com/LibreTranslate/LibreTranslate.git
synced 2025-03-31 05:09:45 +00:00
Merge pull request #764 from pierotofy/emoji
Detect emoji translation requests
This commit is contained in:
commit
cfb1a92170
1 changed files with 58 additions and 20 deletions
|
@ -37,6 +37,19 @@ from libretranslate.locales import (
|
|||
from .api_keys import Database, RemoteDatabase
|
||||
from .suggestions import Database as SuggestionsDatabase
|
||||
|
||||
# Rough map of emoji characters
|
||||
emojis = {e: True for e in \
|
||||
[ord(' ')] + # Spaces
|
||||
list(range(0x1F600,0x1F64F)) + # Emoticons
|
||||
list(range(0x1F300,0x1F5FF)) + # Misc Symbols and Pictographs
|
||||
list(range(0x1F680,0x1F6FF)) + # Transport and Map
|
||||
list(range(0x2600,0x26FF)) + # Misc symbols
|
||||
list(range(0x2700,0x27BF)) + # Dingbats
|
||||
list(range(0xFE00,0xFE0F)) + # Variation Selectors
|
||||
list(range(0x1F900,0x1F9FF)) + # Supplemental Symbols and Pictographs
|
||||
list(range(0x1F1E6,0x1F1FF)) + # Flags
|
||||
list(range(0x20D0,0x20FF)) # Combining Diacritical Marks for Symbols
|
||||
}
|
||||
|
||||
def get_version():
|
||||
try:
|
||||
|
@ -153,6 +166,19 @@ def filter_unique(seq, extra):
|
|||
seen_add = seen.add
|
||||
return [x for x in seq if not (x in seen or seen_add(x))]
|
||||
|
||||
|
||||
def detect_translatable(src_texts):
|
||||
if isinstance(src_texts, list):
|
||||
return any(detect_translatable(t) for t in src_texts)
|
||||
|
||||
for ch in src_texts:
|
||||
if not (ord(ch) in emojis):
|
||||
return True
|
||||
|
||||
# All emojis
|
||||
return False
|
||||
|
||||
|
||||
def create_app(args):
|
||||
from libretranslate.init import boot
|
||||
|
||||
|
@ -647,13 +673,17 @@ def create_app(args):
|
|||
|
||||
if batch:
|
||||
request.req_cost = max(1, len(q))
|
||||
|
||||
if source_lang == "auto":
|
||||
candidate_langs = detect_languages(src_texts)
|
||||
detected_src_lang = candidate_langs[0]
|
||||
|
||||
translatable = detect_translatable(src_texts)
|
||||
if translatable:
|
||||
if source_lang == "auto":
|
||||
candidate_langs = detect_languages(src_texts)
|
||||
detected_src_lang = candidate_langs[0]
|
||||
else:
|
||||
detected_src_lang = {"confidence": 100.0, "language": source_lang}
|
||||
else:
|
||||
detected_src_lang = {"confidence": 100.0, "language": source_lang}
|
||||
|
||||
detected_src_lang = {"confidence": 0.0, "language": "en"}
|
||||
|
||||
src_lang = next(iter([l for l in languages if l.code == detected_src_lang["language"]]), None)
|
||||
|
||||
if src_lang is None:
|
||||
|
@ -679,14 +709,18 @@ def create_app(args):
|
|||
if translator is None:
|
||||
abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code))
|
||||
|
||||
if text_format == "html":
|
||||
translated_text = unescape(str(translate_html(translator, text)))
|
||||
alternatives = [] # Not supported for html yet
|
||||
if translatable:
|
||||
if text_format == "html":
|
||||
translated_text = unescape(str(translate_html(translator, text)))
|
||||
alternatives = [] # Not supported for html yet
|
||||
else:
|
||||
hypotheses = translator.hypotheses(text, num_alternatives + 1)
|
||||
translated_text = unescape(improve_translation_formatting(text, hypotheses[0].value))
|
||||
alternatives = filter_unique([unescape(improve_translation_formatting(text, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
|
||||
else:
|
||||
hypotheses = translator.hypotheses(text, num_alternatives + 1)
|
||||
translated_text = unescape(improve_translation_formatting(text, hypotheses[0].value))
|
||||
alternatives = filter_unique([unescape(improve_translation_formatting(text, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
|
||||
|
||||
translated_text = text # Cannot translate, send the original text back
|
||||
alternatives = []
|
||||
|
||||
batch_results.append(translated_text)
|
||||
batch_alternatives.append(alternatives)
|
||||
|
||||
|
@ -703,14 +737,18 @@ def create_app(args):
|
|||
if translator is None:
|
||||
abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code))
|
||||
|
||||
if text_format == "html":
|
||||
translated_text = unescape(str(translate_html(translator, q)))
|
||||
alternatives = [] # Not supported for html yet
|
||||
if translatable:
|
||||
if text_format == "html":
|
||||
translated_text = unescape(str(translate_html(translator, q)))
|
||||
alternatives = [] # Not supported for html yet
|
||||
else:
|
||||
hypotheses = translator.hypotheses(q, num_alternatives + 1)
|
||||
translated_text = unescape(improve_translation_formatting(q, hypotheses[0].value))
|
||||
alternatives = filter_unique([unescape(improve_translation_formatting(q, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
|
||||
else:
|
||||
hypotheses = translator.hypotheses(q, num_alternatives + 1)
|
||||
translated_text = unescape(improve_translation_formatting(q, hypotheses[0].value))
|
||||
alternatives = filter_unique([unescape(improve_translation_formatting(q, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
|
||||
|
||||
translated_text = q # Cannot translate, send the original text back
|
||||
alternatives = []
|
||||
|
||||
result = {"translatedText": translated_text}
|
||||
|
||||
if source_lang == "auto":
|
||||
|
|
Loading…
Reference in a new issue