Merge pull request #157 from dingedi/main

[WIP] Add files translation
This commit is contained in:
Piero Toffanin 2021-10-26 16:06:59 -04:00 committed by GitHub
commit 7727d8ddc3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 446 additions and 49 deletions

View file

@ -150,6 +150,7 @@ docker-compose up -d --build
| --require-api-key-origin | Require use of an API key for programmatic access to the API, unless the request origin matches this domain | `No restrictions on domain origin` | LT_REQUIRE_API_KEY_ORIGIN |
| --load-only | Set available languages | `all from argostranslate` | LT_LOAD_ONLY |
| --suggestions | Allow user suggestions | `false` | LT_SUGGESTIONS |
| --disable-files-translation | Disable files translation | `false` | LT_DISABLE_FILES_TRANSLATION |
Note that each argument has an equivalent env. variable that can be used instead. The env. variables overwrite the default values but have lower priority than the command aguments. They are particularly useful if used with Docker. Their name is the upper-snake case of the command arguments' ones, with a `LT` prefix.

1
VERSION Normal file
View file

@ -0,0 +1 @@
1.2.6

View file

@ -1,18 +1,38 @@
import io
import os
import tempfile
import uuid
from functools import wraps
import pkg_resources
from flask import Flask, abort, jsonify, render_template, request
import argostranslatefiles
from argostranslatefiles import get_supported_formats
from flask import Flask, abort, jsonify, render_template, request, url_for, send_file
from flask_swagger import swagger
from flask_swagger_ui import get_swaggerui_blueprint
from translatehtml import translate_html
from werkzeug.utils import secure_filename
from app import flood
from app import flood, remove_translated_files, security
from app.language import detect_languages, transliterate
from .api_keys import Database
from .suggestions import Database as SuggestionsDatabase
from translatehtml import translate_html
def get_version():
try:
with open("VERSION") as f:
return f.read().strip()
except:
return "?"
def get_upload_dir():
upload_dir = os.path.join(tempfile.gettempdir(), "libretranslate-files-translate")
if not os.path.isdir(upload_dir):
os.mkdir(upload_dir)
return upload_dir
def get_json_dict(request):
d = request.get_json()
@ -30,7 +50,7 @@ def get_remote_address():
return ip
def get_req_limits(default_limit, api_keys_db, multiplier = 1):
def get_req_limits(default_limit, api_keys_db, multiplier=1):
req_limit = default_limit
if api_keys_db:
@ -44,7 +64,7 @@ def get_req_limits(default_limit, api_keys_db, multiplier = 1):
db_req_limit = api_keys_db.lookup(api_key)
if db_req_limit is not None:
req_limit = db_req_limit * multiplier
return req_limit
@ -79,6 +99,9 @@ def create_app(args):
if args.debug:
app.config["TEMPLATES_AUTO_RELOAD"] = True
if not args.disable_files_translation:
remove_translated_files.setup(get_upload_dir())
# Map userdefined frontend languages to argos language object.
if args.frontend_language_source == "auto":
frontend_argos_language_source = type(
@ -94,6 +117,12 @@ def create_app(args):
iter([l for l in languages if l.code == args.frontend_language_target]), None
)
frontend_argos_supported_files_format = []
for file_format in get_supported_formats():
for ff in file_format.supported_file_extensions:
frontend_argos_supported_files_format.append(ff)
# Raise AttributeError to prevent app startup if user input is not valid.
if frontend_argos_language_source is None:
raise AttributeError(
@ -140,7 +169,7 @@ def create_app(args):
ak = request.values.get("api_key")
if (
api_keys_db.lookup(ak) is None and request.headers.get("Origin") != args.require_api_key_origin
api_keys_db.lookup(ak) is None and request.headers.get("Origin") != args.require_api_key_origin
):
abort(
403,
@ -177,7 +206,7 @@ def create_app(args):
frontendTimeout=args.frontend_timeout,
api_keys=args.api_keys,
web_version=os.environ.get("LT_WEB") is not None,
version=pkg_resources.require("LibreTranslate")[0].version
version=get_version()
)
@app.route("/javascript-licenses", methods=["GET"])
@ -361,7 +390,7 @@ def create_app(args):
abort(
400,
description="Invalid request: Request (%d) exceeds text limit (%d)"
% (batch_size, args.batch_limit),
% (batch_size, args.batch_limit),
)
if args.char_limit != -1:
@ -374,40 +403,40 @@ def create_app(args):
abort(
400,
description="Invalid request: Request (%d) exceeds character limit (%d)"
% (chars, args.char_limit),
% (chars, args.char_limit),
)
if source_lang == "auto":
source_langs = []
if batch:
auto_detect_texts = q
auto_detect_texts = q
else:
auto_detect_texts = [q]
auto_detect_texts = [q]
overall_candidates = detect_languages(q)
for text_to_check in auto_detect_texts:
if len(text_to_check) > 40:
candidate_langs = detect_languages(text_to_check)
else:
# Unable to accurately detect languages for short texts
candidate_langs = overall_candidates
source_langs.append(candidate_langs[0]["language"])
if args.debug:
print(text_to_check, candidate_langs)
print("Auto detected: %s" % candidate_langs[0]["language"])
for text_to_check in auto_detect_texts:
if len(text_to_check) > 40:
candidate_langs = detect_languages(text_to_check)
else:
# Unable to accurately detect languages for short texts
candidate_langs = overall_candidates
source_langs.append(candidate_langs[0]["language"])
if args.debug:
print(text_to_check, candidate_langs)
print("Auto detected: %s" % candidate_langs[0]["language"])
else:
if batch:
source_langs = [source_lang for text in q]
else:
source_langs = [source_lang]
if batch:
source_langs = [source_lang for text in q]
else:
source_langs = [source_lang]
src_langs = [next(iter([l for l in languages if l.code == source_lang]), None) for source_lang in source_langs]
for idx, lang in enumerate(src_langs):
if lang is None:
abort(400, description="%s is not supported" % source_langs[idx])
if lang is None:
abort(400, description="%s is not supported" % source_langs[idx])
tgt_lang = next(iter([l for l in languages if l.code == target_lang]), None)
@ -420,19 +449,18 @@ def create_app(args):
if text_format not in ["text", "html"]:
abort(400, description="%s format is not supported" % text_format)
try:
if batch:
results = []
for idx, text in enumerate(q):
translator = src_langs[idx].get_translation(tgt_lang)
translator = src_langs[idx].get_translation(tgt_lang)
if text_format == "html":
translated_text = str(translate_html(translator, text))
else:
translated_text = translator.translate(transliterate(text, target_lang=source_langs[idx]))
if text_format == "html":
translated_text = str(translate_html(translator, text))
else:
translated_text = translator.translate(transliterate(text, target_lang=source_langs[idx]))
results.append(translated_text)
results.append(translated_text)
return jsonify(
{
"translatedText": results
@ -453,6 +481,167 @@ def create_app(args):
except Exception as e:
abort(500, description="Cannot translate text: %s" % str(e))
@app.route("/translate_file", methods=["POST"])
@access_check
def translate_file():
"""
Translate file from a language to another
---
tags:
- translate
consumes:
- multipart/form-data
parameters:
- in: formData
name: file
type: file
required: true
description: File to translate
- in: formData
name: source
schema:
type: string
example: en
required: true
description: Source language code
- in: formData
name: target
schema:
type: string
example: es
required: true
description: Target language code
- in: formData
name: api_key
schema:
type: string
example: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
required: false
description: API key
responses:
200:
description: Translated file
schema:
id: translate
type: object
properties:
translatedFileUrl:
type: string
description: Translated file url
400:
description: Invalid request
schema:
id: error-response
type: object
properties:
error:
type: string
description: Error message
500:
description: Translation error
schema:
id: error-response
type: object
properties:
error:
type: string
description: Error message
429:
description: Slow down
schema:
id: error-slow-down
type: object
properties:
error:
type: string
description: Reason for slow down
403:
description: Banned
schema:
id: error-response
type: object
properties:
error:
type: string
description: Error message
"""
if args.disable_files_translation:
abort(403, description="Files translation are disabled on this server.")
source_lang = request.form.get("source")
target_lang = request.form.get("target")
file = request.files['file']
if not file:
abort(400, description="Invalid request: missing file parameter")
if not source_lang:
abort(400, description="Invalid request: missing source parameter")
if not target_lang:
abort(400, description="Invalid request: missing target parameter")
if file.filename == '':
abort(400, description="Invalid request: empty file")
if os.path.splitext(file.filename)[1] not in frontend_argos_supported_files_format:
abort(400, description="Invalid request: file format not supported")
source_langs = [source_lang]
src_langs = [next(iter([l for l in languages if l.code == source_lang]), None) for source_lang in source_langs]
for idx, lang in enumerate(src_langs):
if lang is None:
abort(400, description="%s is not supported" % source_langs[idx])
tgt_lang = next(iter([l for l in languages if l.code == target_lang]), None)
if tgt_lang is None:
abort(400, description="%s is not supported" % target_lang)
try:
filename = str(uuid.uuid4()) + '.' + secure_filename(file.filename)
filepath = os.path.join(get_upload_dir(), filename)
file.save(filepath)
translated_file_path = argostranslatefiles.translate_file(src_langs[0].get_translation(tgt_lang), filepath)
translated_filename = os.path.basename(translated_file_path)
return jsonify(
{
"translatedFileUrl": url_for('download_file', filename=translated_filename, _external=True)
}
)
except Exception as e:
abort(500, description=e)
@app.route("/download_file/<string:filename>", methods=["GET"])
@access_check
def download_file(filename: str):
"""
Download a translated file
"""
if args.disable_files_translation:
abort(400, description="Files translation are disabled on this server.")
filepath = os.path.join(get_upload_dir(), filename)
try:
checked_filepath = security.path_traversal_check(filepath, get_upload_dir())
if os.path.isfile(checked_filepath):
filepath = checked_filepath
except security.SuspiciousFileOperation:
abort(400, description="Invalid filename")
return_data = io.BytesIO()
with open(filepath, 'rb') as fo:
return_data.write(fo.read())
return_data.seek(0)
download_filename = filename.split('.')
download_filename.pop(0)
download_filename = '.'.join(download_filename)
return send_file(return_data, as_attachment=True, attachment_filename=download_filename)
@app.route("/detect", methods=["POST"])
@access_check
def detect():
@ -571,6 +760,11 @@ def create_app(args):
suggestions:
type: boolean
description: Whether submitting suggestions is enabled.
supportedFilesFormat:
type: array
items:
type: string
description: Supported files format
language:
type: object
properties:
@ -598,6 +792,8 @@ def create_app(args):
"charLimit": args.char_limit,
"frontendTimeout": args.frontend_timeout,
"suggestions": args.suggestions,
"filesTranslation": not args.disable_files_translation,
"supportedFilesFormat": [] if args.disable_files_translation else frontend_argos_supported_files_format,
"language": {
"source": {
"code": frontend_argos_language_source.code,
@ -680,7 +876,7 @@ def create_app(args):
return jsonify({"success": True})
swag = swagger(app)
swag["info"]["version"] = "1.2.1"
swag["info"]["version"] = "1.3.0"
swag["info"]["title"] = "LibreTranslate"
@app.route("/spec")

View file

@ -115,6 +115,11 @@ _default_options_objects = [
'name': 'SUGGESTIONS',
'default_value': False,
'value_type': 'bool'
},
{
'name': 'DISABLE_FILES_TRANSLATION',
'default_value': False,
'value_type': 'bool'
}
]

View file

@ -105,6 +105,9 @@ def main():
parser.add_argument(
"--suggestions", default=DEFARGS['SUGGESTIONS'], action="store_true", help="Allow user suggestions"
)
parser.add_argument(
"--disable-files-translation", default=DEFARGS['DISABLE_FILES_TRANSLATION'], action="store_true", help="Disable files translation"
)
args = parser.parse_args()
app = create_app(args)

View file

@ -0,0 +1,26 @@
import atexit
import os
import time
from datetime import datetime
from apscheduler.schedulers.background import BackgroundScheduler
def remove_translated_files(upload_dir: str):
now = time.mktime(datetime.now().timetuple())
for f in os.listdir(upload_dir):
f = os.path.join(upload_dir, f)
if os.path.isfile(f):
f_time = os.path.getmtime(f)
if (now - f_time) > 1800: # 30 minutes
os.remove(f)
def setup(upload_dir):
scheduler = BackgroundScheduler(daemon=True)
scheduler.add_job(remove_translated_files, "interval", minutes=30, kwargs={'upload_dir': upload_dir})
scheduler.start()
# Shut down the scheduler when exiting the app
atexit.register(lambda: scheduler.shutdown())

14
app/security.py Normal file
View file

@ -0,0 +1,14 @@
import os
class SuspiciousFileOperation(Exception):
pass
def path_traversal_check(unsafe_path, known_safe_path):
known_safe_path = os.path.abspath(known_safe_path)
unsafe_path = os.path.abspath(unsafe_path)
if (os.path.commonprefix([known_safe_path, unsafe_path]) != known_safe_path):
raise SuspiciousFileOperation("{} is not safe".format(unsafe_path))
# Passes the check
return unsafe_path

View file

@ -23,6 +23,10 @@ h3.header {
margin-top: 0 !important;
}
.mb-1 {
margin-bottom: 1rem;
}
.position-relative {
position: relative;
}
@ -114,6 +118,42 @@ h3.header {
display: flex;
}
.btn-switch-type {
background-color: #fff;
color: #42A5F5;
}
.btn-switch-type:focus {
background-color: inherit;
}
.btn-switch-type:hover {
background-color: #eee !important;
color: #42A5F5;
}
.btn-switch-type.active {
background-color: #42A5F5 !important;
color: #fff;
}
.file-dropzone {
font-size: 1.1rem;
border: 1px solid #ccc;
background: #f3f3f3;
padding: 1rem 2rem 1rem 1.5rem;
min-height: 220px;
position: relative;
}
.dropzone-content {
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
}
.btn-action {
display: flex;
align-items: center;

View file

@ -30,6 +30,13 @@ document.addEventListener('DOMContentLoaded', function(){
suggestions: false,
isSuggesting: false,
supportedFilesFormat : [],
translationType: "text",
inputFile: false,
loadingFileTranslation: false,
translatedFileUrl: false,
filesTranslation: true,
},
mounted: function(){
var self = this;
@ -44,6 +51,8 @@ document.addEventListener('DOMContentLoaded', function(){
self.targetLang = self.settings.language.target.code;
self.charactersLimit = self.settings.charLimit;
self.suggestions = self.settings.suggestions;
self.supportedFilesFormat = self.settings.supportedFilesFormat;
self.filesTranslation = self.settings.filesTranslation;
}else {
self.error = "Cannot load /frontend/settings";
self.loading = false;
@ -139,7 +148,9 @@ document.addEventListener('DOMContentLoaded', function(){
'',
'console.log(await res.json());'].join("\n");
},
supportedFilesFormatFormatted: function() {
return this.supportedFilesFormat.join(', ');
},
isHtml: function(){
return htmlRegex.test(this.inputText);
},
@ -299,6 +310,67 @@ document.addEventListener('DOMContentLoaded', function(){
deleteText: function(e){
e.preventDefault();
this.inputText = this.translatedText = this.output = "";
},
switchType: function(type) {
this.translationType = type;
},
handleInputFile: function(e) {
this.inputFile = e.target.files[0];
},
removeFile: function(e) {
e.preventDefault()
this.inputFile = false;
this.translatedFileUrl = false;
this.loadingFileTranslation = false;
},
translateFile: function(e) {
e.preventDefault();
let self = this;
let translateFileRequest = new XMLHttpRequest();
translateFileRequest.open("POST", BaseUrl + "/translate_file", true);
let data = new FormData();
data.append("file", this.inputFile);
data.append("source", this.sourceLang);
data.append("target", this.targetLang);
data.append("api_key", localStorage.getItem("api_key") || "");
this.loadingFileTranslation = true
translateFileRequest.onload = function() {
if (translateFileRequest.readyState === 4 && translateFileRequest.status === 200) {
try{
self.loadingFileTranslation = false;
let res = JSON.parse(this.response);
if (res.translatedFileUrl){
self.translatedFileUrl = res.translatedFileUrl;
let link = document.createElement("a");
link.target = "_blank";
link.href = self.translatedFileUrl;
link.click();
}else{
throw new Error(res.error || "Unknown error");
}
}catch(e){
self.error = e.message;
self.loadingFileTranslation = false;
self.inputFile = false;
}
}
}
translateFileRequest.onerror = function() {
self.error = "Error while calling /translate_file";
self.loadingFileTranslation = false;
self.inputFile = false;
};
translateFileRequest.send(data);
}
}
});

File diff suppressed because one or more lines are too long

View file

@ -105,7 +105,10 @@
<div class="container">
<div class="row">
<h3 class="header center">Translation API</h3>
<div class="col s12 mb-1 center" v-if="filesTranslation === true">
<button type="button" class="btn btn-switch-type" @click="switchType('text')" :class="{'active': translationType === 'text'}"><i class="material-icons left">title</i>Translate text</button>
<button type="button" class="btn btn-switch-type" @click="switchType('files')" :class="{'active': translationType === 'files'}"><i class="material-icons left">description</i>Translate files</button>
</div>
<form class="col s12">
<div class="row mb-0">
<div class="col s6 language-select">
@ -130,7 +133,7 @@
</div>
</div>
<div class="row">
<div class="row" v-if="translationType === 'text'">
<div class="input-field textarea-container col s6">
<label for="textarea1" class="sr-only">
Text to translate
@ -143,7 +146,6 @@
<label>[[ inputText.length ]] / [[ charactersLimit ]]</label>
</div>
</div>
<div class="input-field textarea-container col s6">
<label for="textarea2" class="sr-only">
Translated text
@ -170,13 +172,49 @@
</div>
</div>
</div>
<div class="row" v-if="translationType === 'files'">
<div class="file-dropzone">
<div v-if="inputFile === false" class="dropzone-content">
<span>Supported file formats: [[ supportedFilesFormatFormatted ]]</span>
<form action="#">
<div class="file-field input-field">
<div class="btn">
<span>File</span>
<input type="file" :accept="supportedFilesFormatFormatted" @change="handleInputFile" ref="fileInputRef">
</div>
<div class="file-path-wrapper hidden">
<input class="file-path validate" type="text">
</div>
</div>
</form>
</div>
<div v-if="inputFile !== false" class="dropzone-content">
<div class="card">
<div class="card-content" style="padding-right: 6px;">
<div class="row mb-0">
<div class="col s12">
[[ inputFile.name ]]
<button v-if="loadingFileTranslation !== true" @click="removeFile" class="btn-flat">
<i class="material-icons">close</i>
</button>
</div>
</div>
</div>
</div>
<button @click="translateFile" v-if="translatedFileUrl === false && loadingFileTranslation === false" class="btn">Translate</button>
<a v-if="translatedFileUrl !== false" :href="translatedFileUrl" class="btn">Download</a>
<div class="progress" v-if="loadingFileTranslation">
<div class="indeterminate"></div>
</div>
</div>
</div>
</div>
</form>
</div>
</div>
</div>
<div class="section no-pad-bot">
<div class="section no-pad-bot" v-if="translationType !== 'files'">
<div class="container">
<div class="row center">
<div class="col s12 m12">

View file

@ -12,3 +12,4 @@ polyglot==16.7.4
appdirs==1.4.4
APScheduler==3.7.0
translatehtml==1.5.1
argos-translate-files==1.0.1

View file

@ -3,7 +3,7 @@
from setuptools import setup, find_packages
setup(
version='1.2.5',
version=open('VERSION').read().strip(),
name='libretranslate',
license='GNU Affero General Public License v3.0',
description='Free and Open Source Machine Translation API. Self-hosted, no limits, no ties to proprietary services.',