mirror of
https://github.com/LibreTranslate/LibreTranslate.git
synced 2024-11-22 16:01:01 +00:00
use polyglot for detecting the language
This commit is contained in:
parent
9301ad0bda
commit
51980f6ff5
2 changed files with 40 additions and 23 deletions
24
app/app.py
24
app/app.py
|
@ -6,6 +6,7 @@ from langdetect import detect_langs
|
||||||
from langdetect import DetectorFactory
|
from langdetect import DetectorFactory
|
||||||
from pkg_resources import resource_filename
|
from pkg_resources import resource_filename
|
||||||
from .api_keys import Database
|
from .api_keys import Database
|
||||||
|
from app.language import detect_languages
|
||||||
|
|
||||||
DetectorFactory.seed = 0 # deterministic
|
DetectorFactory.seed = 0 # deterministic
|
||||||
|
|
||||||
|
@ -57,11 +58,6 @@ def create_app(args):
|
||||||
from app.language import languages
|
from app.language import languages
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
# For faster access
|
|
||||||
language_map = {}
|
|
||||||
for l in languages:
|
|
||||||
language_map[l.code] = l.name
|
|
||||||
|
|
||||||
if args.debug:
|
if args.debug:
|
||||||
app.config['TEMPLATES_AUTO_RELOAD'] = True
|
app.config['TEMPLATES_AUTO_RELOAD'] = True
|
||||||
|
|
||||||
|
@ -271,19 +267,12 @@ def create_app(args):
|
||||||
abort(400, description="Invalid request: Request (%d) exceeds character limit (%d)" % (chars, args.char_limit))
|
abort(400, description="Invalid request: Request (%d) exceeds character limit (%d)" % (chars, args.char_limit))
|
||||||
|
|
||||||
if source_lang == 'auto':
|
if source_lang == 'auto':
|
||||||
candidate_langs = list(filter(lambda l: l.lang in language_map, detect_langs(q)))
|
candidate_langs = detect_languages(q)
|
||||||
|
|
||||||
if len(candidate_langs) > 0:
|
|
||||||
candidate_langs.sort(key=lambda l: l.prob, reverse=True)
|
|
||||||
|
|
||||||
if args.debug:
|
if args.debug:
|
||||||
print(candidate_langs)
|
print(candidate_langs)
|
||||||
|
|
||||||
source_lang = next(iter([l.code for l in languages if l.code == candidate_langs[0].lang]), None)
|
source_lang = candidate_langs[0]["language"]
|
||||||
if not source_lang:
|
|
||||||
source_lang = 'en'
|
|
||||||
else:
|
|
||||||
source_lang = 'en'
|
|
||||||
|
|
||||||
if args.debug:
|
if args.debug:
|
||||||
print("Auto detected: %s" % source_lang)
|
print("Auto detected: %s" % source_lang)
|
||||||
|
@ -385,12 +374,7 @@ def create_app(args):
|
||||||
if not q:
|
if not q:
|
||||||
abort(400, description="Invalid request: missing q parameter")
|
abort(400, description="Invalid request: missing q parameter")
|
||||||
|
|
||||||
candidate_langs = list(filter(lambda l: l.lang in language_map, detect_langs(q)))
|
return jsonify(detect_languages(q))
|
||||||
candidate_langs.sort(key=lambda l: l.prob, reverse=True)
|
|
||||||
return jsonify([{
|
|
||||||
'confidence': l.prob,
|
|
||||||
'language': l.lang
|
|
||||||
} for l in candidate_langs])
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/frontend/settings")
|
@app.route("/frontend/settings")
|
||||||
|
|
|
@ -1,3 +1,36 @@
|
||||||
from argostranslate import translate
|
from argostranslate import translate
|
||||||
|
from polyglot.detect.base import Detector
|
||||||
|
|
||||||
|
|
||||||
languages = translate.load_installed_languages()
|
languages = translate.load_installed_languages()
|
||||||
|
|
||||||
|
|
||||||
|
__lang_codes = [l.code for l in languages]
|
||||||
|
|
||||||
|
|
||||||
|
def detect_languages(text):
|
||||||
|
f = Detector(text).languages
|
||||||
|
|
||||||
|
# get the candidates
|
||||||
|
candidate_langs = list(filter(lambda l: l.read_bytes != 0 and l.code in __lang_codes, f))
|
||||||
|
|
||||||
|
# this happens if no language can be detected
|
||||||
|
if not candidate_langs:
|
||||||
|
# use language "en" by default but with zero confidence
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
'confidence': 0.0,
|
||||||
|
'language': "en"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# sort the candidates descending based on the detected confidence
|
||||||
|
candidate_langs.sort(key=lambda l: l.confidence, reverse=True)
|
||||||
|
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
'confidence': l.confidence,
|
||||||
|
'language': l.code
|
||||||
|
}
|
||||||
|
for l in candidate_langs
|
||||||
|
]
|
||||||
|
|
Loading…
Reference in a new issue