diff --git a/README.md b/README.md index e0a8598..daad3d7 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![Python versions](https://img.shields.io/pypi/pyversions/libretranslate)](https://pypi.org/project/libretranslate) [![Run tests](https://github.com/uav4geo/LibreTranslate/workflows/Run%20tests/badge.svg)](https://github.com/uav4geo/LibreTranslate/actions?query=workflow%3A%22Run+tests%22) [![Publish to DockerHub](https://github.com/uav4geo/LibreTranslate/workflows/Publish%20to%20DockerHub/badge.svg)](https://hub.docker.com/r/libretranslate/libretranslate) [![Publish to GitHub Container Registry](https://github.com/uav4geo/LibreTranslate/workflows/Publish%20to%20GitHub%20Container%20Registry/badge.svg)](https://github.com/uav4geo/LibreTranslate/actions?query=workflow%3A%22Publish+to+GitHub+Container+Registry%22) [![Awesome Humane Tech](https://raw.githubusercontent.com/humanetech-community/awesome-humane-tech/main/humane-tech-badge.svg?sanitize=true)](https://github.com/humanetech-community/awesome-humane-tech) -Free and Open Source Machine Translation API, entirely self-hosted. Unlike other APIs, it doesn't rely on proprietary providers such as Google or Azure to perform translations. +Free and Open Source Machine Translation API, entirely self-hosted. Unlike other APIs, it doesn't rely on proprietary providers such as Google or Azure to perform translations. Instead, its translation engine is powered by the open source [Argos Translate][argo] library. ![image](https://user-images.githubusercontent.com/1951843/121782367-23f90080-cb77-11eb-87fd-ed23a21b730f.png) @@ -51,6 +51,8 @@ Then open a web browser to http://localhost:5000 If you're on Windows, we recommend you [Run with Docker](#run-with-docker) instead. +On Ubuntu 20.04 you can also use the install script available on https://github.com/argosopentech/LibreTranslate-init + ## Build and Run If you want to make some changes to the code, you can build from source, and run the API: @@ -101,22 +103,39 @@ docker-compose up -d --build ## Arguments -| Argument | Description | Default | -| ------------- | ------------------------------ | -------------------- | -| --host | Set host to bind the server to | `127.0.0.1` | -| --port | Set port to bind the server to | `5000` | -| --char-limit | Set character limit | `No limit` | -| --req-limit | Set maximum number of requests per minute per client | `No limit` | -| --batch-limit | Set maximum number of texts to translate in a batch request | `No limit` | -| --ga-id | Enable Google Analytics on the API client page by providing an ID | `No tracking` | -| --debug | Enable debug environment | `False` | -| --ssl | Whether to enable SSL | `False` | -| --frontend-language-source | Set frontend default language - source | `en` | -| --frontend-language-target | Set frontend default language - target | `es` | -| --frontend-timeout | Set frontend translation timeout | `500` | -| --api-keys | Enable API keys database for per-user rate limits lookup | `Don't use API keys` | -| --require-api-key-origin | Require use of an API key for programmatic access to the API, unless the request origin matches this domain | `No restrictions on domain origin` | -| --load-only | Set available languages | `all from argostranslate` | +| Argument | Description | Default | Env. name | +| ------------- | ------------------------------ | -------------------- | ---------------------- | +| --host | Set host to bind the server to | `127.0.0.1` | LT_HOST | +| --port | Set port to bind the server to | `5000` | LT_PORT | +| --char-limit | Set character limit | `No limit` | LT_CHAR_LIMIT | +| --req-limit | Set maximum number of requests per minute per client | `No limit` | LT_REQ_LIMIT | +| --batch-limit | Set maximum number of texts to translate in a batch request | `No limit` | LT_BATCH_LIMIT | +| --ga-id | Enable Google Analytics on the API client page by providing an ID | `No tracking` | LT_GA_ID | +| --debug | Enable debug environment | `False` | LT_DEBUG | +| --ssl | Whether to enable SSL | `False` | LT_SSL | +| --frontend-language-source | Set frontend default language - source | `en` | LT_FRONTEND_LANGUAGE_SOURCE | +| --frontend-language-target | Set frontend default language - target | `es` | LT_FRONTEND_LANGUAGE_TARGET | +| --frontend-timeout | Set frontend translation timeout | `500` | LT_FRONTEND_TIMEOUT | +| --api-keys | Enable API keys database for per-user rate limits lookup | `Don't use API keys` | LT_API_KEYS | +| --require-api-key-origin | Require use of an API key for programmatic access to the API, unless the request origin matches this domain | `No restrictions on domain origin` | LT_REQUIRE_API_KEY_ORIGIN | +| --load-only | Set available languages | `all from argostranslate` | LT_LOAD_ONLY | + +Note that each argument has an equivalent env. variable that can be used instead. The env. variables overwrite the default values but have lower priority than the command aguments. They are particularly useful if used with Docker. Their name is the upper-snake case of the command arguments' ones, with a `LT` prefix. + +## Run with Gunicorn + +``` +pip install gunicorn +gunicorn --bind 0.0.0.0:5000 'wsgi:app' +``` + +You can pass application arguments directly to Gunicorn via: + + +``` +gunicorn --bind 0.0.0.0:5000 'wsgi:app(api_keys=True)' +``` + ## Manage API Keys @@ -153,6 +172,8 @@ You can use the LibreTranslate API using the following bindings: - .Net: https://github.com/sigaloid/LibreTranslate.Net - Go: https://github.com/SnakeSel/libretranslate - Python: https://github.com/argosopentech/LibreTranslate-py + - C++: https://github.com/argosopentech/LibreTranslate-cpp + - Unix: https://github.com/argosopentech/LibreTranslate-sh More coming soon! @@ -175,16 +196,24 @@ hooks: Then issue `./launcher rebuild app`. From the Discourse's admin panel then select "LibreTranslate" as a translation provider and set the relevant endpoint configurations. +## Mobile Apps + +- [LibreTranslater](https://gitlab.com/BeowuIf/libretranslater) is an Android app available on the Play Store (https://play.google.com/store/apps/details?id=de.beowulf.libretranslater) that uses the LibreTranslate API. + +## Web browser +- [minbrowser](https://minbrowser.org/) is a web browser with [integrated LibreTranslate support](https://github.com/argosopentech/argos-translate/discussions/158#discussioncomment-1141551). + ## Mirrors This is a list of online resources that serve the LibreTranslate API. Some require an API key. If you want to add a new URL, please open a pull request. URL |API Key Required|Contact|Cost --- | --- | --- | --- -[libretranslate.com](https://libretranslate.com)|:heavy_check_mark:|[UAV4GEO](https://uav4geo.com/contact)| $9 / month +[libretranslate.com](https://libretranslate.com)|:heavy_check_mark:|[UAV4GEO](https://uav4geo.com/contact)| [$9 / month](https://buy.stripe.com/28obLvdgGcIE5AQfYY), 80 requests / minute limit [libretranslate.de](https://libretranslate.de/)|-|- [translate.mentality.rip](https://translate.mentality.rip)|-|- [translate.astian.org](https://translate.astian.org/)|-|- +[translate.argosopentech.com](https://translate.argosopentech.com/)|-|- ## Roadmap @@ -256,8 +285,10 @@ To get a HTTPS subdomain certificate, install `certbot` (snap), run `sudo certbo ## Credits -This work is largely possible thanks to [Argos Translate](https://github.com/argosopentech/argos-translate), which powers the translation engine. +This work is largely possible thanks to [Argos Translate][argo], which powers the translation engine. ## License [GNU Affero General Public License v3](https://www.gnu.org/licenses/agpl-3.0.en.html) + +[argo]: https://github.com/argosopentech/argos-translate diff --git a/app/app.py b/app/app.py index c21a4e0..1bcc8c7 100644 --- a/app/app.py +++ b/app/app.py @@ -360,43 +360,61 @@ def create_app(args): ) if source_lang == "auto": - candidate_langs = detect_languages(q) + source_langs = [] + if batch: + auto_detect_texts = q + else: + auto_detect_texts = [q] - if args.debug: - print(candidate_langs) + overall_candidates = detect_languages(q) + + for text_to_check in auto_detect_texts: + if len(text_to_check) > 40: + candidate_langs = detect_languages(text_to_check) + else: + # Unable to accurately detect languages for short texts + candidate_langs = overall_candidates + source_langs.append(candidate_langs[0]["language"]) - source_lang = candidate_langs[0]["language"] + if args.debug: + print(text_to_check, candidate_langs) + print("Auto detected: %s" % candidate_langs[0]["language"]) + else: + if batch: + source_langs = [source_lang for text in q] + else: + source_langs = [source_lang] - if args.debug: - print("Auto detected: %s" % source_lang) + src_langs = [next(iter([l for l in languages if l.code == source_lang]), None) for source_lang in source_langs] + + for idx, lang in enumerate(src_langs): + if lang is None: + abort(400, description="%s is not supported" % source_langs[idx]) - src_lang = next(iter([l for l in languages if l.code == source_lang]), None) tgt_lang = next(iter([l for l in languages if l.code == target_lang]), None) - if src_lang is None: - abort(400, description="%s is not supported" % source_lang) if tgt_lang is None: abort(400, description="%s is not supported" % target_lang) - translator = src_lang.get_translation(tgt_lang) - try: if batch: + results = [] + for idx, text in enumerate(q): + translator = src_langs[idx].get_translation(tgt_lang) + results.append(translator.translate( + transliterate(text, target_lang=source_langs[idx]) + )) return jsonify( { - "translatedText": [ - translator.translate( - transliterate(text, target_lang=source_lang) - ) - for text in q - ] + "translatedText": results } ) else: + translator = src_langs[0].get_translation(tgt_lang) return jsonify( { "translatedText": translator.translate( - transliterate(q, target_lang=source_lang) + transliterate(q, target_lang=source_langs[0]) ) } ) diff --git a/app/default_values.py b/app/default_values.py new file mode 100644 index 0000000..2538a0e --- /dev/null +++ b/app/default_values.py @@ -0,0 +1,117 @@ +import os + +_prefix = 'LT_' + +def _get_value_str(name, default_value): + env_value = os.environ.get(name) + return default_value if env_value is None else env_value + +def _get_value_int(name, default_value): + try: + return int(os.environ[name]) + except: + return default_value + +def _get_value_bool(name, default_value): + env_value = os.environ.get(name) + if env_value in ['FALSE', 'False', 'false', '0']: + return False + if env_value in ['TRUE', 'True', 'true', '1']: + return True + return default_value + +def _get_value(name, default_value, value_type): + env_name = _prefix + name + if value_type == 'str': + return _get_value_str(env_name, default_value) + if value_type == 'int': + return _get_value_int(env_name, default_value) + if value_type == 'bool': + return _get_value_bool(env_name, default_value) + return default_value + +_default_options_objects = [ + { + 'name': 'HOST', + 'default_value': '127.0.0.1', + 'value_type': 'str' + }, + { + 'name': 'PORT', + 'default_value': 5000, + 'value_type': 'int' + }, + { + 'name': 'CHAR_LIMIT', + 'default_value': -1, + 'value_type': 'int' + }, + { + 'name': 'REQ_LIMIT', + 'default_value': -1, + 'value_type': 'int' + }, + { + 'name': 'DAILY_REQ_LIMIT', + 'default_value': -1, + 'value_type': 'int' + }, + { + 'name': 'REQ_FLOOD_THRESHOLD', + 'default_value': -1, + 'value_type': 'int' + }, + { + 'name': 'BATCH_LIMIT', + 'default_value': -1, + 'value_type': 'int' + }, + { + 'name': 'GA_ID', + 'default_value': None, + 'value_type': 'str' + }, + { + 'name': 'DEBUG', + 'default_value': False, + 'value_type': 'bool' + }, + { + 'name': 'SSL', + 'default_value': None, + 'value_type': 'bool' + }, + { + 'name': 'FRONTEND_LANGUAGE_SOURCE', + 'default_value': 'en', + 'value_type': 'str' + }, + { + 'name': 'FRONTEND_LANGUAGE_TARGET', + 'default_value': 'es', + 'value_type': 'str' + }, + { + 'name': 'FRONTEND_TIMEOUT', + 'default_value': 500, + 'value_type': 'int' + }, + { + 'name': 'API_KEYS', + 'default_value': False, + 'value_type': 'bool' + }, + { + 'name': 'REQUIRE_API_KEY_ORIGIN', + 'default_value': '', + 'value_type': 'str' + }, + { + 'name': 'LOAD_ONLY', + 'default_value': None, + 'value_type': 'str' + } +] + + +DEFAULT_ARGUMENTS = { obj['name']:_get_value(**obj) for obj in _default_options_objects} diff --git a/app/language.py b/app/language.py index 1b35812..868eab8 100644 --- a/app/language.py +++ b/app/language.py @@ -22,16 +22,19 @@ def detect_languages(text): candidates = [] for t in text: try: - candidates.extend(Detector(t).languages) + d = Detector(t).languages + for i in range(len(d)): + d[i].text_length = len(t) + candidates.extend(d) except UnknownLanguage: pass # total read bytes of the provided text - read_bytes_total = sum(c.read_bytes for c in candidates) + text_length_total = sum(c.text_length for c in candidates) # only use candidates that are supported by argostranslate candidate_langs = list( - filter(lambda l: l.read_bytes != 0 and l.code in __lang_codes, candidates) + filter(lambda l: l.text_length != 0 and l.code in __lang_codes, candidates) ) # this happens if no language could be detected @@ -50,7 +53,7 @@ def detect_languages(text): # if more than one is present, calculate the average confidence lang = lc[0] lang.confidence = sum(l.confidence for l in lc) / len(lc) - lang.read_bytes = sum(l.read_bytes for l in lc) + lang.text_length = sum(l.text_length for l in lc) temp_average_list.append(lang) elif lc: # otherwise just add it to the temporary list @@ -62,7 +65,7 @@ def detect_languages(text): # sort the candidates descending based on the detected confidence candidate_langs.sort( - key=lambda l: (l.confidence * l.read_bytes) / read_bytes_total, reverse=True + key=lambda l: (l.confidence * l.text_length) / text_length_total, reverse=True ) return [{"confidence": l.confidence, "language": l.code} for l in candidate_langs] diff --git a/app/main.py b/app/main.py index a4ac6f9..d9b5b44 100644 --- a/app/main.py +++ b/app/main.py @@ -1,7 +1,9 @@ import argparse +import sys import operator from app.app import create_app +from app.default_values import DEFAULT_ARGUMENTS as DEFARGS def main(): @@ -9,40 +11,40 @@ def main(): description="LibreTranslate - Free and Open Source Translation API" ) parser.add_argument( - "--host", type=str, help="Hostname (%(default)s)", default="127.0.0.1" + "--host", type=str, help="Hostname (%(default)s)", default=DEFARGS['HOST'] ) - parser.add_argument("--port", type=int, help="Port (%(default)s)", default=5000) + parser.add_argument("--port", type=int, help="Port (%(default)s)", default=DEFARGS['PORT']) parser.add_argument( "--char-limit", - default=-1, + default=DEFARGS['CHAR_LIMIT'], type=int, metavar="", help="Set character limit (%(default)s)", ) parser.add_argument( "--req-limit", - default=-1, + default=DEFARGS['REQ_LIMIT'], type=int, metavar="", help="Set the default maximum number of requests per minute per client (%(default)s)", ) parser.add_argument( "--daily-req-limit", - default=-1, + default=DEFARGS['DAILY_REQ_LIMIT'], type=int, metavar="", help="Set the default maximum number of requests per day per client, in addition to req-limit. (%(default)s)", ) parser.add_argument( "--req-flood-threshold", - default=-1, + default=DEFARGS['REQ_FLOOD_THRESHOLD'], type=int, metavar="", help="Set the maximum number of request limit offences per 4 weeks that a client can exceed before being banned. (%(default)s)", ) parser.add_argument( "--batch-limit", - default=-1, + default=DEFARGS['BATCH_LIMIT'], type=int, metavar="", help="Set maximum number of texts to translate in a batch request (%(default)s)", @@ -50,52 +52,53 @@ def main(): parser.add_argument( "--ga-id", type=str, - default=None, + default=DEFARGS['GA_ID'], metavar="", help="Enable Google Analytics on the API client page by providing an ID (%(default)s)", ) parser.add_argument( - "--debug", default=False, action="store_true", help="Enable debug environment" + "--debug", default=DEFARGS['DEBUG'], action="store_true", help="Enable debug environment" ) parser.add_argument( - "--ssl", default=None, action="store_true", help="Whether to enable SSL" + "--ssl", default=DEFARGS['SSL'], action="store_true", help="Whether to enable SSL" ) parser.add_argument( "--frontend-language-source", type=str, - default="en", + default=DEFARGS['FRONTEND_LANGUAGE_SOURCE'], metavar="", help="Set frontend default language - source (%(default)s)", ) parser.add_argument( "--frontend-language-target", type=str, - default="es", + default=DEFARGS['FRONTEND_LANGUAGE_TARGET'], metavar="", help="Set frontend default language - target (%(default)s)", ) parser.add_argument( "--frontend-timeout", type=int, - default=500, + default=DEFARGS['FRONTEND_TIMEOUT'], metavar="", help="Set frontend translation timeout (%(default)s)", ) parser.add_argument( "--api-keys", - default=False, + default=DEFARGS['API_KEYS'], action="store_true", help="Enable API keys database for per-user rate limits lookup", ) parser.add_argument( "--require-api-key-origin", type=str, - default="", + default=DEFARGS['REQUIRE_API_KEY_ORIGIN'], help="Require use of an API key for programmatic access to the API, unless the request origin matches this domain", ) parser.add_argument( "--load-only", type=operator.methodcaller("split", ","), + default=DEFARGS['LOAD_ONLY'], metavar="", help="Set available languages (ar,de,en,es,fr,ga,hi,it,ja,ko,pt,ru,zh)", ) @@ -103,17 +106,20 @@ def main(): args = parser.parse_args() app = create_app(args) - if args.debug: - app.run(host=args.host, port=args.port) + if sys.argv[0] == '--wsgi': + return app else: - from waitress import serve + if args.debug: + app.run(host=args.host, port=args.port) + else: + from waitress import serve - serve( - app, - host=args.host, - port=args.port, - url_scheme="https" if args.ssl else "http", - ) + serve( + app, + host=args.host, + port=args.port, + url_scheme="https" if args.ssl else "http", + ) if __name__ == "__main__": diff --git a/app/static/css/main.css b/app/static/css/main.css index a9e7d89..71dd74e 100644 --- a/app/static/css/main.css +++ b/app/static/css/main.css @@ -7,6 +7,10 @@ select { font-family: Arial, Helvetica, sans-serif !important; } +#app { + min-height: 80vh; +} + h3.header { margin-bottom: 2.5rem; } diff --git a/app/templates/index.html b/app/templates/index.html index eb14b42..fe5ac16 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -135,7 +135,7 @@ - + @@ -148,7 +148,7 @@ - + diff --git a/wsgi.py b/wsgi.py new file mode 100644 index 0000000..aaf531e --- /dev/null +++ b/wsgi.py @@ -0,0 +1,19 @@ +from app import main + +def app(*args, **kwargs): + import sys + sys.argv = ['--wsgi'] + for k in kwargs: + ck = k.replace("_", "-") + if isinstance(kwargs[k], bool) and kwargs[k]: + sys.argv.append("--" + ck) + else: + sys.argv.append("--" + ck) + sys.argv.append(kwargs[k]) + + instance = main() + + if len(kwargs) == 0: + return instance(*args, **kwargs) + else: + return instance