Fix pycld2 error

This commit is contained in:
Piero Toffanin 2023-10-09 15:46:50 -04:00
parent aac2d9820a
commit b825370a88
2 changed files with 13 additions and 2 deletions

View file

@ -1,5 +1,7 @@
# Originally adapted from https://github.com/aboSamoor/polyglot/blob/master/polyglot/base.py # Originally adapted from https://github.com/aboSamoor/polyglot/blob/master/polyglot/base.py
import unicodedata
import pycld2 as cld2 import pycld2 as cld2
@ -52,7 +54,16 @@ class Detector:
text (string): A snippet of text, the longer it is the more reliable we text (string): A snippet of text, the longer it is the more reliable we
can detect the language used to write the text. can detect the language used to write the text.
""" """
try:
reliable, index, top_3_choices = cld2.detect(text, bestEffort=False) reliable, index, top_3_choices = cld2.detect(text, bestEffort=False)
except cld2.error as e:
if "input contains invalid UTF-8" in str(e):
# Fix for https://github.com/LibreTranslate/LibreTranslate/issues/514
# related to https://github.com/aboSamoor/polyglot/issues/71#issuecomment-707997790
text = ''.join([l for l in text if unicodedata.category(str(l))[0] not in ('S', 'M', 'C')])
reliable, index, top_3_choices = cld2.detect(text, bestEffort=False)
else:
raise e
if not reliable: if not reliable:
self.reliable = False self.reliable = False

View file

@ -162,7 +162,7 @@ h3.header {
left: 2px; left: 2px;
} }
.locale-panel a:hovselecter{ .locale-panel a:hover{
background-color: transparent !important; background-color: transparent !important;
} }