Merge pull request #630 from pierotofy/alternatives

Support for alternative translations
This commit is contained in:
Piero Toffanin 2024-06-03 13:03:19 -04:00 committed by GitHub
commit 76721891b4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 66 additions and 31 deletions

View file

@ -1 +1 @@
1.5.7 1.6.0

View file

@ -148,6 +148,10 @@ def get_routes_limits(args, api_keys_db):
return res return res
def unique_list(seq):
seen = set()
seen_add = seen.add
return [x for x in seq if not (x in seen or seen_add(x))]
def create_app(args): def create_app(args):
from libretranslate.init import boot from libretranslate.init import boot
@ -496,6 +500,14 @@ def create_app(args):
Format of source text: Format of source text:
* `text` - Plain text * `text` - Plain text
* `html` - HTML markup * `html` - HTML markup
- in: formData
name: alternatives
schema:
type: integer
default: 0
example: 3
required: false
description: Preferred number of alternative translations
- in: formData - in: formData
name: api_key name: api_key
schema: schema:
@ -558,11 +570,13 @@ def create_app(args):
source_lang = json.get("source") source_lang = json.get("source")
target_lang = json.get("target") target_lang = json.get("target")
text_format = json.get("format") text_format = json.get("format")
num_alternatives = int(json.get("alternatives", 0))
else: else:
q = request.values.get("q") q = request.values.get("q")
source_lang = request.values.get("source") source_lang = request.values.get("source")
target_lang = request.values.get("target") target_lang = request.values.get("target")
text_format = request.values.get("format") text_format = request.values.get("format")
num_alternatives = request.values.get("alternatives", 0)
if not q: if not q:
abort(400, description=_("Invalid request: missing %(name)s parameter", name='q')) abort(400, description=_("Invalid request: missing %(name)s parameter", name='q'))
@ -571,6 +585,14 @@ def create_app(args):
if not target_lang: if not target_lang:
abort(400, description=_("Invalid request: missing %(name)s parameter", name='target')) abort(400, description=_("Invalid request: missing %(name)s parameter", name='target'))
try:
num_alternatives = max(0, int(num_alternatives))
except ValueError:
abort(400, description=_("Invalid request: %(name)s parameter is not a number", name='alternatives'))
if args.alternatives_limit != -1 and num_alternatives > args.alternatives_limit:
abort(400, description=_("Invalid request: %(name)s parameter must be <= %(value)s", name='alternatives', value=args.alternatives_limit))
if not request.is_json: if not request.is_json:
# Normalize line endings to UNIX style (LF) only so we can consistently # Normalize line endings to UNIX style (LF) only so we can consistently
# enforce character limits. # enforce character limits.
@ -626,54 +648,53 @@ def create_app(args):
try: try:
if batch: if batch:
results = [] batch_results = []
batch_alternatives = []
for text in q: for text in q:
translator = src_lang.get_translation(tgt_lang) translator = src_lang.get_translation(tgt_lang)
if translator is None: if translator is None:
abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code)) abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code))
if text_format == "html": if text_format == "html":
translated_text = str(translate_html(translator, text)) translated_text = unescape(str(translate_html(translator, text)))
alternatives = [] # Not supported for html yet
else: else:
translated_text = improve_translation_formatting(text, translator.translate(text)) hypotheses = translator.hypotheses(text, num_alternatives + 1)
translated_text = unescape(improve_translation_formatting(text, hypotheses[0].value))
alternatives = unique_list([unescape(improve_translation_formatting(text, hypotheses[i].value)) for i in range(1, len(hypotheses))])
batch_results.append(translated_text)
batch_alternatives.append(alternatives)
result = {"translatedText": batch_results}
results.append(unescape(translated_text))
if source_lang == "auto": if source_lang == "auto":
return jsonify( result["detectedLanguage"] = [detected_src_lang] * len(q)
{ if num_alternatives > 0:
"translatedText": results, result["alternatives"] = batch_alternatives
"detectedLanguage": [detected_src_lang] * len(q)
} return jsonify(result)
)
else:
return jsonify(
{
"translatedText": results
}
)
else: else:
translator = src_lang.get_translation(tgt_lang) translator = src_lang.get_translation(tgt_lang)
if translator is None: if translator is None:
abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code)) abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code))
if text_format == "html": if text_format == "html":
translated_text = str(translate_html(translator, q)) translated_text = unescape(str(translate_html(translator, q)))
alternatives = [] # Not supported for html yet
else: else:
translated_text = improve_translation_formatting(q, translator.translate(q)) hypotheses = translator.hypotheses(q, num_alternatives + 1)
translated_text = unescape(improve_translation_formatting(q, hypotheses[0].value))
alternatives = unique_list([unescape(improve_translation_formatting(q, hypotheses[i].value)) for i in range(1, len(hypotheses))])
result = {"translatedText": translated_text}
if source_lang == "auto": if source_lang == "auto":
return jsonify( result["detectedLanguage"] = detected_src_lang
{ if num_alternatives > 0:
"translatedText": unescape(translated_text), result["alternatives"] = alternatives
"detectedLanguage": detected_src_lang
} return jsonify(result)
)
else:
return jsonify(
{
"translatedText": unescape(translated_text)
}
)
except Exception as e: except Exception as e:
raise e raise e
abort(500, description=_("Cannot translate text: %(text)s", text=str(e))) abort(500, description=_("Cannot translate text: %(text)s", text=str(e)))

View file

@ -161,6 +161,11 @@ _default_options_objects = [
'default_value': None, 'default_value': None,
'value_type': 'str' 'value_type': 'str'
}, },
{
'name': 'ALTERNATIVES_LIMIT',
'default_value': -1,
'value_type': 'int'
},
{ {
'name': 'THREADS', 'name': 'THREADS',
'default_value': 4, 'default_value': 4,

View file

@ -161,6 +161,13 @@ def get_args():
metavar="<comma-separated language codes>", metavar="<comma-separated language codes>",
help="Set available languages (ar,de,en,es,fr,ga,hi,it,ja,ko,pt,ru,zh)", help="Set available languages (ar,de,en,es,fr,ga,hi,it,ja,ko,pt,ru,zh)",
) )
parser.add_argument(
"--alternatives-limit",
default=DEFARGS['ALTERNATIVES_LIMIT'],
type=int,
metavar="<maximum number of alternatives translations>",
help="Set the maximum number of supported alternative translations (%(default)s)",
)
parser.add_argument( parser.add_argument(
"--threads", "--threads",
default=DEFARGS['THREADS'], default=DEFARGS['THREADS'],

View file

@ -134,6 +134,7 @@ document.addEventListener('DOMContentLoaded', function(){
' source: ' + this.$options.filters.escape(this.sourceLang) + ',', ' source: ' + this.$options.filters.escape(this.sourceLang) + ',',
' target: ' + this.$options.filters.escape(this.targetLang) + ',', ' target: ' + this.$options.filters.escape(this.targetLang) + ',',
' format: "' + (this.isHtml ? "html" : "text") + '",', ' format: "' + (this.isHtml ? "html" : "text") + '",',
' alternatives: 3,',
' api_key: "' + (localStorage.getItem("api_key") || "") + '"', ' api_key: "' + (localStorage.getItem("api_key") || "") + '"',
' }),', ' }),',
' headers: { "Content-Type": "application/json" }', ' headers: { "Content-Type": "application/json" }',
@ -241,6 +242,7 @@ document.addEventListener('DOMContentLoaded', function(){
data.append("source", self.sourceLang); data.append("source", self.sourceLang);
data.append("target", self.targetLang); data.append("target", self.targetLang);
data.append("format", self.isHtml ? "html" : "text"); data.append("format", self.isHtml ? "html" : "text");
data.append("alternatives", 3);
data.append("api_key", localStorage.getItem("api_key") || ""); data.append("api_key", localStorage.getItem("api_key") || "");
if (self.apiSecret) data.append("secret", self.apiSecret); if (self.apiSecret) data.append("secret", self.apiSecret);