mirror of
https://github.com/LibreTranslate/LibreTranslate.git
synced 2024-11-22 07:51:00 +00:00
commit
92df4c3e6b
3 changed files with 23 additions and 37 deletions
|
@ -554,22 +554,8 @@ def create_app(args):
|
||||||
)
|
)
|
||||||
|
|
||||||
if source_lang == "auto":
|
if source_lang == "auto":
|
||||||
source_langs = []
|
candidate_langs = detect_languages(q if batch else [q])
|
||||||
auto_detect_texts = q if batch else [q]
|
source_langs = [candidate_langs[0]]
|
||||||
|
|
||||||
overall_candidates = detect_languages(q)
|
|
||||||
|
|
||||||
for text_to_check in auto_detect_texts:
|
|
||||||
if len(text_to_check) > 40:
|
|
||||||
candidate_langs = detect_languages(text_to_check)
|
|
||||||
else:
|
|
||||||
# Unable to accurately detect languages for short texts
|
|
||||||
candidate_langs = overall_candidates
|
|
||||||
source_langs.append(candidate_langs[0])
|
|
||||||
|
|
||||||
if args.debug:
|
|
||||||
print(text_to_check, candidate_langs)
|
|
||||||
print("Auto detected: %s" % candidate_langs[0]["language"])
|
|
||||||
else:
|
else:
|
||||||
if batch:
|
if batch:
|
||||||
source_langs = [ {"confidence": 100.0, "language": source_lang} for text in q]
|
source_langs = [ {"confidence": 100.0, "language": source_lang} for text in q]
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
from functools import lru_cache
|
|
||||||
|
|
||||||
import linguars
|
from langdetect import DetectorFactory
|
||||||
|
|
||||||
|
DetectorFactory.seed = 0
|
||||||
|
|
||||||
|
from langdetect import detect_langs
|
||||||
from lexilang.detector import detect as lldetect
|
from lexilang.detector import detect as lldetect
|
||||||
|
|
||||||
|
|
||||||
|
@ -12,34 +15,31 @@ class Language:
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return (f"code: {self.code:<9} confidence: {self.confidence:>5.1f} ")
|
return (f"code: {self.code:<9} confidence: {self.confidence:>5.1f} ")
|
||||||
|
|
||||||
@lru_cache(maxsize=None)
|
def check_lang(langcodes, lang):
|
||||||
def load_detector(langcodes = ()):
|
return normalized_lang_code(lang) in langcodes
|
||||||
languages = []
|
|
||||||
for lc in langcodes:
|
|
||||||
if lc == 'zt':
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
languages.append(linguars.Language.from_iso_code_639_1(lc))
|
|
||||||
except Exception:
|
|
||||||
print(f"{lc} is not supported by lingua")
|
|
||||||
pass # Not supported
|
|
||||||
|
|
||||||
return linguars.LanguageDetector(languages=languages)
|
|
||||||
|
|
||||||
|
def normalized_lang_code(lang):
|
||||||
|
code = lang.lang
|
||||||
|
# Handle zh-cn
|
||||||
|
if code.startswith("zh"):
|
||||||
|
code = "zh"
|
||||||
|
return code
|
||||||
|
|
||||||
class Detector:
|
class Detector:
|
||||||
def __init__(self, langcodes = ()):
|
def __init__(self, langcodes = ()):
|
||||||
self.langcodes = langcodes
|
self.langcodes = langcodes
|
||||||
self.detector = load_detector(langcodes)
|
|
||||||
|
|
||||||
def detect(self, text):
|
def detect(self, text):
|
||||||
if len(text) < 18:
|
if len(text) < 20:
|
||||||
code, conf = lldetect(text, self.langcodes)
|
code, conf = lldetect(text, self.langcodes)
|
||||||
if conf > 0:
|
if conf > 0:
|
||||||
return [Language(code, round(conf * 100))]
|
return [Language(code, round(conf * 100))]
|
||||||
|
|
||||||
top_3_choices = self.detector.confidence(text)[:3]
|
top_3_choices = [lang for lang in detect_langs(text) if check_lang(self.langcodes, lang)][:3]
|
||||||
if top_3_choices[0][1] == 0:
|
if not len(top_3_choices):
|
||||||
|
return [Language("en", 0)]
|
||||||
|
if top_3_choices[0].prob == 0:
|
||||||
return [Language("en", 0)]
|
return [Language("en", 0)]
|
||||||
return [Language(lang.iso_code_639_1, round(conf * 100)) for lang, conf in top_3_choices]
|
|
||||||
|
return [Language(normalized_lang_code(lang), round(lang.prob * 100)) for lang in top_3_choices]
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@ dependencies = [
|
||||||
"Flask-Session ==0.4.0",
|
"Flask-Session ==0.4.0",
|
||||||
"waitress ==2.1.2",
|
"waitress ==2.1.2",
|
||||||
"expiringdict ==1.2.2",
|
"expiringdict ==1.2.2",
|
||||||
"linguars==0.4.0",
|
"langdetect==1.0.9",
|
||||||
"lexilang==1.0.1",
|
"lexilang==1.0.1",
|
||||||
"morfessor ==2.0.6",
|
"morfessor ==2.0.6",
|
||||||
"appdirs ==1.4.4",
|
"appdirs ==1.4.4",
|
||||||
|
|
Loading…
Reference in a new issue