From ba8b8d97a118a27d3556af521003edb1f5b96540 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Mon, 3 Jun 2024 12:42:32 -0400 Subject: [PATCH] Feat: support for alternative translations --- VERSION | 2 +- libretranslate/app.py | 81 +++++++++++++++--------- libretranslate/default_values.py | 5 ++ libretranslate/main.py | 7 ++ libretranslate/templates/app.js.template | 2 + 5 files changed, 66 insertions(+), 31 deletions(-) diff --git a/VERSION b/VERSION index f01291b..dc1e644 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.5.7 +1.6.0 diff --git a/libretranslate/app.py b/libretranslate/app.py index 74d53c8..72077f8 100644 --- a/libretranslate/app.py +++ b/libretranslate/app.py @@ -148,6 +148,10 @@ def get_routes_limits(args, api_keys_db): return res +def unique_list(seq): + seen = set() + seen_add = seen.add + return [x for x in seq if not (x in seen or seen_add(x))] def create_app(args): from libretranslate.init import boot @@ -496,6 +500,14 @@ def create_app(args): Format of source text: * `text` - Plain text * `html` - HTML markup + - in: formData + name: alternatives + schema: + type: integer + default: 0 + example: 3 + required: false + description: Preferred number of alternative translations - in: formData name: api_key schema: @@ -558,11 +570,13 @@ def create_app(args): source_lang = json.get("source") target_lang = json.get("target") text_format = json.get("format") + num_alternatives = int(json.get("alternatives", 0)) else: q = request.values.get("q") source_lang = request.values.get("source") target_lang = request.values.get("target") text_format = request.values.get("format") + num_alternatives = request.values.get("alternatives", 0) if not q: abort(400, description=_("Invalid request: missing %(name)s parameter", name='q')) @@ -570,6 +584,14 @@ def create_app(args): abort(400, description=_("Invalid request: missing %(name)s parameter", name='source')) if not target_lang: abort(400, description=_("Invalid request: missing %(name)s parameter", name='target')) + + try: + num_alternatives = max(0, int(num_alternatives)) + except ValueError: + abort(400, description=_("Invalid request: %(name)s parameter is not a number", name='alternatives')) + + if args.alternatives_limit != -1 and num_alternatives > args.alternatives_limit: + abort(400, description=_("Invalid request: %(name)s parameter must be <= %(value)s", name='alternatives', value=args.alternatives_limit)) if not request.is_json: # Normalize line endings to UNIX style (LF) only so we can consistently @@ -626,54 +648,53 @@ def create_app(args): try: if batch: - results = [] + batch_results = [] + batch_alternatives = [] for text in q: translator = src_lang.get_translation(tgt_lang) if translator is None: abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code)) if text_format == "html": - translated_text = str(translate_html(translator, text)) + translated_text = unescape(str(translate_html(translator, text))) + alternatives = [] # Not supported for html yet else: - translated_text = improve_translation_formatting(text, translator.translate(text)) + hypotheses = translator.hypotheses(text, num_alternatives + 1) + translated_text = unescape(improve_translation_formatting(text, hypotheses[0].value)) + alternatives = unique_list([unescape(improve_translation_formatting(text, hypotheses[i].value)) for i in range(1, len(hypotheses))]) + + batch_results.append(translated_text) + batch_alternatives.append(alternatives) + + result = {"translatedText": batch_results} - results.append(unescape(translated_text)) if source_lang == "auto": - return jsonify( - { - "translatedText": results, - "detectedLanguage": [detected_src_lang] * len(q) - } - ) - else: - return jsonify( - { - "translatedText": results - } - ) + result["detectedLanguage"] = [detected_src_lang] * len(q) + if num_alternatives > 0: + result["alternatives"] = batch_alternatives + + return jsonify(result) else: translator = src_lang.get_translation(tgt_lang) if translator is None: abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code)) if text_format == "html": - translated_text = str(translate_html(translator, q)) + translated_text = unescape(str(translate_html(translator, q))) + alternatives = [] # Not supported for html yet else: - translated_text = improve_translation_formatting(q, translator.translate(q)) + hypotheses = translator.hypotheses(q, num_alternatives + 1) + translated_text = unescape(improve_translation_formatting(q, hypotheses[0].value)) + alternatives = unique_list([unescape(improve_translation_formatting(q, hypotheses[i].value)) for i in range(1, len(hypotheses))]) + + result = {"translatedText": translated_text} if source_lang == "auto": - return jsonify( - { - "translatedText": unescape(translated_text), - "detectedLanguage": detected_src_lang - } - ) - else: - return jsonify( - { - "translatedText": unescape(translated_text) - } - ) + result["detectedLanguage"] = detected_src_lang + if num_alternatives > 0: + result["alternatives"] = alternatives + + return jsonify(result) except Exception as e: raise e abort(500, description=_("Cannot translate text: %(text)s", text=str(e))) diff --git a/libretranslate/default_values.py b/libretranslate/default_values.py index b93cd76..2231a70 100644 --- a/libretranslate/default_values.py +++ b/libretranslate/default_values.py @@ -161,6 +161,11 @@ _default_options_objects = [ 'default_value': None, 'value_type': 'str' }, + { + 'name': 'ALTERNATIVES_LIMIT', + 'default_value': -1, + 'value_type': 'int' + }, { 'name': 'THREADS', 'default_value': 4, diff --git a/libretranslate/main.py b/libretranslate/main.py index 6866a55..a342710 100644 --- a/libretranslate/main.py +++ b/libretranslate/main.py @@ -161,6 +161,13 @@ def get_args(): metavar="", help="Set available languages (ar,de,en,es,fr,ga,hi,it,ja,ko,pt,ru,zh)", ) + parser.add_argument( + "--alternatives-limit", + default=DEFARGS['ALTERNATIVES_LIMIT'], + type=int, + metavar="", + help="Set the maximum number of supported alternative translations (%(default)s)", + ) parser.add_argument( "--threads", default=DEFARGS['THREADS'], diff --git a/libretranslate/templates/app.js.template b/libretranslate/templates/app.js.template index 6377724..7727451 100644 --- a/libretranslate/templates/app.js.template +++ b/libretranslate/templates/app.js.template @@ -134,6 +134,7 @@ document.addEventListener('DOMContentLoaded', function(){ ' source: ' + this.$options.filters.escape(this.sourceLang) + ',', ' target: ' + this.$options.filters.escape(this.targetLang) + ',', ' format: "' + (this.isHtml ? "html" : "text") + '",', + ' alternatives: 3,', ' api_key: "' + (localStorage.getItem("api_key") || "") + '"', ' }),', ' headers: { "Content-Type": "application/json" }', @@ -241,6 +242,7 @@ document.addEventListener('DOMContentLoaded', function(){ data.append("source", self.sourceLang); data.append("target", self.targetLang); data.append("format", self.isHtml ? "html" : "text"); + data.append("alternatives", 3); data.append("api_key", localStorage.getItem("api_key") || ""); if (self.apiSecret) data.append("secret", self.apiSecret);