mirror of
https://github.com/LibreTranslate/LibreTranslate.git
synced 2024-11-25 01:11:00 +00:00
Add lexilang for language detection on short texts
This commit is contained in:
parent
c9592a236a
commit
6ff5bba000
3 changed files with 9 additions and 1 deletions
|
@ -1,6 +1,7 @@
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
|
||||||
import linguars
|
import linguars
|
||||||
|
from lexilang.detector import detect as lldetect
|
||||||
|
|
||||||
|
|
||||||
class Language:
|
class Language:
|
||||||
|
@ -26,9 +27,15 @@ def load_detector(langcodes = ()):
|
||||||
|
|
||||||
class Detector:
|
class Detector:
|
||||||
def __init__(self, langcodes = ()):
|
def __init__(self, langcodes = ()):
|
||||||
|
self.langcodes = langcodes
|
||||||
self.detector = load_detector(langcodes)
|
self.detector = load_detector(langcodes)
|
||||||
|
|
||||||
def detect(self, text):
|
def detect(self, text):
|
||||||
|
if len(text) < 18:
|
||||||
|
code, conf = lldetect(text, self.langcodes)
|
||||||
|
if conf > 0:
|
||||||
|
return [Language(code, round(conf * 100))]
|
||||||
|
|
||||||
top_3_choices = self.detector.confidence(text)[:3]
|
top_3_choices = self.detector.confidence(text)[:3]
|
||||||
if top_3_choices[0][1] == 0:
|
if top_3_choices[0][1] == 0:
|
||||||
return [Language("en", 0)]
|
return [Language("en", 0)]
|
||||||
|
|
|
@ -18,7 +18,7 @@ def load_languages():
|
||||||
@lru_cache(maxsize=None)
|
@lru_cache(maxsize=None)
|
||||||
def load_lang_codes():
|
def load_lang_codes():
|
||||||
languages = load_languages()
|
languages = load_languages()
|
||||||
return (l.code for l in languages)
|
return tuple(l.code for l in languages)
|
||||||
|
|
||||||
def detect_languages(text):
|
def detect_languages(text):
|
||||||
# detect batch processing
|
# detect batch processing
|
||||||
|
|
|
@ -43,6 +43,7 @@ dependencies = [
|
||||||
"waitress ==2.1.2",
|
"waitress ==2.1.2",
|
||||||
"expiringdict ==1.2.2",
|
"expiringdict ==1.2.2",
|
||||||
"linguars==0.4.0",
|
"linguars==0.4.0",
|
||||||
|
"lexilang==1.0.1",
|
||||||
"morfessor ==2.0.6",
|
"morfessor ==2.0.6",
|
||||||
"appdirs ==1.4.4",
|
"appdirs ==1.4.4",
|
||||||
"APScheduler ==3.9.1",
|
"APScheduler ==3.9.1",
|
||||||
|
|
Loading…
Reference in a new issue