mirror of
https://github.com/searxng/searxng.git
synced 2024-11-26 12:51:02 +00:00
Merge pull request #2347 from return42/mod-lang-detection
If language recognition fails use the Accept-Language
This commit is contained in:
commit
45529f51a1
5 changed files with 106 additions and 71 deletions
|
@ -8,9 +8,10 @@
|
||||||
from base64 import urlsafe_b64encode, urlsafe_b64decode
|
from base64 import urlsafe_b64encode, urlsafe_b64decode
|
||||||
from zlib import compress, decompress
|
from zlib import compress, decompress
|
||||||
from urllib.parse import parse_qs, urlencode
|
from urllib.parse import parse_qs, urlencode
|
||||||
from typing import Iterable, Dict, List
|
from typing import Iterable, Dict, List, Optional
|
||||||
|
|
||||||
import flask
|
import flask
|
||||||
|
import babel
|
||||||
|
|
||||||
from searx import settings, autocomplete
|
from searx import settings, autocomplete
|
||||||
from searx.enginelib import Engine
|
from searx.enginelib import Engine
|
||||||
|
@ -287,10 +288,65 @@ class PluginsSetting(BooleanChoices):
|
||||||
return [item[len('plugin_') :] for item in items]
|
return [item[len('plugin_') :] for item in items]
|
||||||
|
|
||||||
|
|
||||||
|
class ClientPref:
|
||||||
|
"""Container to assemble client prefferences and settings."""
|
||||||
|
|
||||||
|
# hint: searx.webapp.get_client_settings should be moved into this class
|
||||||
|
|
||||||
|
locale: babel.Locale
|
||||||
|
"""Locale prefered by the client."""
|
||||||
|
|
||||||
|
def __init__(self, locale: Optional[babel.Locale] = None):
|
||||||
|
self.locale = locale
|
||||||
|
|
||||||
|
@property
|
||||||
|
def locale_tag(self):
|
||||||
|
if self.locale is None:
|
||||||
|
return None
|
||||||
|
tag = self.locale.language
|
||||||
|
if self.locale.territory:
|
||||||
|
tag += '-' + self.locale.territory
|
||||||
|
return tag
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_http_request(cls, http_request: flask.Request):
|
||||||
|
"""Build ClientPref object from HTTP request.
|
||||||
|
|
||||||
|
- `Accept-Language used for locale setting
|
||||||
|
<https://www.w3.org/International/questions/qa-accept-lang-locales.en>`__
|
||||||
|
|
||||||
|
"""
|
||||||
|
al_header = http_request.headers.get("Accept-Language")
|
||||||
|
if not al_header:
|
||||||
|
return cls(locale=None)
|
||||||
|
|
||||||
|
pairs = []
|
||||||
|
for l in al_header.split(','):
|
||||||
|
# fmt: off
|
||||||
|
lang, qvalue = [_.strip() for _ in (l.split(';') + ['q=1',])[:2]]
|
||||||
|
# fmt: on
|
||||||
|
try:
|
||||||
|
qvalue = float(qvalue.split('=')[-1])
|
||||||
|
locale = babel.Locale.parse(lang, sep='-')
|
||||||
|
except (ValueError, babel.core.UnknownLocaleError):
|
||||||
|
continue
|
||||||
|
pairs.append((locale, qvalue))
|
||||||
|
pairs.sort(reverse=True, key=lambda x: x[1])
|
||||||
|
return cls(locale=pairs[0][0])
|
||||||
|
|
||||||
|
|
||||||
class Preferences:
|
class Preferences:
|
||||||
"""Validates and saves preferences to cookies"""
|
"""Validates and saves preferences to cookies"""
|
||||||
|
|
||||||
def __init__(self, themes: List[str], categories: List[str], engines: Dict[str, Engine], plugins: Iterable[Plugin]):
|
def __init__(
|
||||||
|
self,
|
||||||
|
themes: List[str],
|
||||||
|
categories: List[str],
|
||||||
|
engines: Dict[str, Engine],
|
||||||
|
plugins: Iterable[Plugin],
|
||||||
|
client: Optional[ClientPref] = None,
|
||||||
|
):
|
||||||
|
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
self.key_value_settings: Dict[str, Setting] = {
|
self.key_value_settings: Dict[str, Setting] = {
|
||||||
|
@ -414,6 +470,7 @@ class Preferences:
|
||||||
self.engines = EnginesSetting('engines', engines=engines.values())
|
self.engines = EnginesSetting('engines', engines=engines.values())
|
||||||
self.plugins = PluginsSetting('plugins', plugins=plugins)
|
self.plugins = PluginsSetting('plugins', plugins=plugins)
|
||||||
self.tokens = SetSetting('tokens')
|
self.tokens = SetSetting('tokens')
|
||||||
|
self.client = client or ClientPref()
|
||||||
self.unknown_params: Dict[str, str] = {}
|
self.unknown_params: Dict[str, str] = {}
|
||||||
|
|
||||||
def get_as_url_params(self):
|
def get_as_url_params(self):
|
||||||
|
|
|
@ -22,7 +22,6 @@ from searx.network import initialize as initialize_network, check_network_config
|
||||||
from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time
|
from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time
|
||||||
from searx.search.processors import PROCESSORS, initialize as initialize_processors
|
from searx.search.processors import PROCESSORS, initialize as initialize_processors
|
||||||
from searx.search.checker import initialize as initialize_checker
|
from searx.search.checker import initialize as initialize_checker
|
||||||
from searx.utils import detect_language
|
|
||||||
|
|
||||||
|
|
||||||
logger = logger.getChild('search')
|
logger = logger.getChild('search')
|
||||||
|
@ -40,57 +39,19 @@ def initialize(settings_engines=None, enable_checker=False, check_network=False,
|
||||||
initialize_checker()
|
initialize_checker()
|
||||||
|
|
||||||
|
|
||||||
def replace_auto_language(search_query: SearchQuery):
|
|
||||||
"""
|
|
||||||
Do nothing except if `search_query.lang` is "auto".
|
|
||||||
In this case:
|
|
||||||
* the value "auto" is replaced by the detected language of the query.
|
|
||||||
The default value is "all" when no language is detected.
|
|
||||||
* `search_query.locale` is updated accordingly
|
|
||||||
|
|
||||||
Use :py:obj:`searx.utils.detect_language` with `only_search_languages=True` to keep
|
|
||||||
only languages supported by the engines.
|
|
||||||
"""
|
|
||||||
if search_query.lang != 'auto':
|
|
||||||
return
|
|
||||||
|
|
||||||
detected_lang = detect_language(search_query.query, threshold=0.3, only_search_languages=True)
|
|
||||||
if detected_lang is None:
|
|
||||||
# fallback to 'all' if no language has been detected
|
|
||||||
search_query.lang = 'all'
|
|
||||||
search_query.locale = None
|
|
||||||
return
|
|
||||||
search_query.lang = detected_lang
|
|
||||||
try:
|
|
||||||
search_query.locale = babel.Locale.parse(search_query.lang)
|
|
||||||
except babel.core.UnknownLocaleError:
|
|
||||||
search_query.locale = None
|
|
||||||
|
|
||||||
|
|
||||||
class Search:
|
class Search:
|
||||||
"""Search information container"""
|
"""Search information container"""
|
||||||
|
|
||||||
__slots__ = "search_query", "result_container", "start_time", "actual_timeout"
|
__slots__ = "search_query", "result_container", "start_time", "actual_timeout"
|
||||||
|
|
||||||
def __init__(self, search_query: SearchQuery):
|
def __init__(self, search_query: SearchQuery):
|
||||||
"""Initialize the Search
|
"""Initialize the Search"""
|
||||||
|
|
||||||
search_query is copied
|
|
||||||
"""
|
|
||||||
# init vars
|
# init vars
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
self.search_query = search_query
|
||||||
self.result_container = ResultContainer()
|
self.result_container = ResultContainer()
|
||||||
self.start_time = None
|
self.start_time = None
|
||||||
self.actual_timeout = None
|
self.actual_timeout = None
|
||||||
self.search_query = copy(search_query)
|
|
||||||
self.update_search_query(self.search_query)
|
|
||||||
|
|
||||||
def update_search_query(self, search_query: SearchQuery):
|
|
||||||
"""Update search_query.
|
|
||||||
|
|
||||||
call replace_auto_language to replace the "auto" language
|
|
||||||
"""
|
|
||||||
replace_auto_language(search_query)
|
|
||||||
|
|
||||||
def search_external_bang(self):
|
def search_external_bang(self):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -31,7 +31,7 @@ search:
|
||||||
autocomplete_min: 4
|
autocomplete_min: 4
|
||||||
# Default search language - leave blank to detect from browser information or
|
# Default search language - leave blank to detect from browser information or
|
||||||
# use codes from 'languages.py'
|
# use codes from 'languages.py'
|
||||||
default_lang: ""
|
default_lang: "auto"
|
||||||
# Available languages
|
# Available languages
|
||||||
# languages:
|
# languages:
|
||||||
# - all
|
# - all
|
||||||
|
|
|
@ -6,6 +6,7 @@ from searx.query import RawTextQuery
|
||||||
from searx.engines import categories, engines
|
from searx.engines import categories, engines
|
||||||
from searx.search import SearchQuery, EngineRef
|
from searx.search import SearchQuery, EngineRef
|
||||||
from searx.preferences import Preferences, is_locked
|
from searx.preferences import Preferences, is_locked
|
||||||
|
from searx.utils import detect_language
|
||||||
|
|
||||||
|
|
||||||
# remove duplicate queries.
|
# remove duplicate queries.
|
||||||
|
@ -214,7 +215,27 @@ def parse_engine_data(form):
|
||||||
|
|
||||||
def get_search_query_from_webapp(
|
def get_search_query_from_webapp(
|
||||||
preferences: Preferences, form: Dict[str, str]
|
preferences: Preferences, form: Dict[str, str]
|
||||||
) -> Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef]]:
|
) -> Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef], str]:
|
||||||
|
"""Assemble data from preferences and request.form (from the HTML form) needed
|
||||||
|
in a search query.
|
||||||
|
|
||||||
|
The returned tuple consits of:
|
||||||
|
|
||||||
|
1. instance of :py:obj:`searx.search.SearchQuery`
|
||||||
|
2. instance of :py:obj:`searx.query.RawTextQuery`
|
||||||
|
3. list of :py:obj:`searx.search.EngineRef` instances
|
||||||
|
4. string with the *selected locale* of the query
|
||||||
|
|
||||||
|
About language/locale: if the client selects the alias ``auto`` the
|
||||||
|
``SearchQuery`` object is build up by the :py:obj:`detected language
|
||||||
|
<searx.utils.detect_language>`. If language recognition does not have a
|
||||||
|
match the language preferred by the :py:obj:`Preferences.client` is used.
|
||||||
|
If client does not have a preference, the default ``all`` is used.
|
||||||
|
|
||||||
|
The *selected locale* in the tuple always represents the selected
|
||||||
|
language/locale and might differ from the language recognition.
|
||||||
|
|
||||||
|
"""
|
||||||
# no text for the query ?
|
# no text for the query ?
|
||||||
if not form.get('q'):
|
if not form.get('q'):
|
||||||
raise SearxParameterException('q', '')
|
raise SearxParameterException('q', '')
|
||||||
|
@ -229,13 +250,19 @@ def get_search_query_from_webapp(
|
||||||
# set query
|
# set query
|
||||||
query = raw_text_query.getQuery()
|
query = raw_text_query.getQuery()
|
||||||
query_pageno = parse_pageno(form)
|
query_pageno = parse_pageno(form)
|
||||||
query_lang = parse_lang(preferences, form, raw_text_query)
|
|
||||||
query_safesearch = parse_safesearch(preferences, form)
|
query_safesearch = parse_safesearch(preferences, form)
|
||||||
query_time_range = parse_time_range(form)
|
query_time_range = parse_time_range(form)
|
||||||
query_timeout = parse_timeout(form, raw_text_query)
|
query_timeout = parse_timeout(form, raw_text_query)
|
||||||
external_bang = raw_text_query.external_bang
|
external_bang = raw_text_query.external_bang
|
||||||
engine_data = parse_engine_data(form)
|
engine_data = parse_engine_data(form)
|
||||||
|
|
||||||
|
query_lang = parse_lang(preferences, form, raw_text_query)
|
||||||
|
selected_locale = query_lang
|
||||||
|
|
||||||
|
if query_lang == 'auto':
|
||||||
|
query_lang = detect_language(query, threshold=0.8, only_search_languages=True)
|
||||||
|
query_lang = query_lang or preferences.client.locale_tag or 'all'
|
||||||
|
|
||||||
if not is_locked('categories') and raw_text_query.specific:
|
if not is_locked('categories') and raw_text_query.specific:
|
||||||
# if engines are calculated from query,
|
# if engines are calculated from query,
|
||||||
# set categories by using that information
|
# set categories by using that information
|
||||||
|
@ -265,4 +292,5 @@ def get_search_query_from_webapp(
|
||||||
raw_text_query,
|
raw_text_query,
|
||||||
query_engineref_list_unknown,
|
query_engineref_list_unknown,
|
||||||
query_engineref_list_notoken,
|
query_engineref_list_notoken,
|
||||||
|
selected_locale,
|
||||||
)
|
)
|
||||||
|
|
|
@ -84,6 +84,7 @@ from searx.webutils import (
|
||||||
from searx.webadapter import (
|
from searx.webadapter import (
|
||||||
get_search_query_from_webapp,
|
get_search_query_from_webapp,
|
||||||
get_selected_categories,
|
get_selected_categories,
|
||||||
|
parse_lang,
|
||||||
)
|
)
|
||||||
from searx.utils import (
|
from searx.utils import (
|
||||||
html_to_text,
|
html_to_text,
|
||||||
|
@ -96,6 +97,7 @@ from searx.plugins import Plugin, plugins, initialize as plugin_initialize
|
||||||
from searx.plugins.oa_doi_rewrite import get_doi_resolver
|
from searx.plugins.oa_doi_rewrite import get_doi_resolver
|
||||||
from searx.preferences import (
|
from searx.preferences import (
|
||||||
Preferences,
|
Preferences,
|
||||||
|
ClientPref,
|
||||||
ValidationException,
|
ValidationException,
|
||||||
)
|
)
|
||||||
from searx.answerers import (
|
from searx.answerers import (
|
||||||
|
@ -221,16 +223,9 @@ babel = Babel(app, locale_selector=get_locale)
|
||||||
|
|
||||||
|
|
||||||
def _get_browser_language(req, lang_list):
|
def _get_browser_language(req, lang_list):
|
||||||
for lang in req.headers.get("Accept-Language", "en").split(","):
|
client = ClientPref.from_http_request(req)
|
||||||
if ';' in lang:
|
locale = match_locale(client.locale_tag, lang_list, fallback='en')
|
||||||
lang = lang.split(';')[0]
|
|
||||||
if '-' in lang:
|
|
||||||
lang_parts = lang.split('-')
|
|
||||||
lang = "{}-{}".format(lang_parts[0], lang_parts[-1].upper())
|
|
||||||
locale = match_locale(lang, lang_list, fallback=None)
|
|
||||||
if locale is not None:
|
|
||||||
return locale
|
return locale
|
||||||
return 'en'
|
|
||||||
|
|
||||||
|
|
||||||
def _get_locale_rfc5646(locale):
|
def _get_locale_rfc5646(locale):
|
||||||
|
@ -446,11 +441,7 @@ def render(template_name: str, **kwargs):
|
||||||
kwargs['rtl'] = True
|
kwargs['rtl'] = True
|
||||||
|
|
||||||
if 'current_language' not in kwargs:
|
if 'current_language' not in kwargs:
|
||||||
_locale = request.preferences.get_value('language')
|
kwargs['current_language'] = parse_lang(request.preferences, {}, RawTextQuery('', []))
|
||||||
if _locale in ('auto', 'all'):
|
|
||||||
kwargs['current_language'] = _locale
|
|
||||||
else:
|
|
||||||
kwargs['current_language'] = match_locale(_locale, settings['search']['languages'])
|
|
||||||
|
|
||||||
# values from settings
|
# values from settings
|
||||||
kwargs['search_formats'] = [x for x in settings['search']['formats'] if x != 'html']
|
kwargs['search_formats'] = [x for x in settings['search']['formats'] if x != 'html']
|
||||||
|
@ -512,7 +503,10 @@ def pre_request():
|
||||||
request.timings = [] # pylint: disable=assigning-non-slot
|
request.timings = [] # pylint: disable=assigning-non-slot
|
||||||
request.errors = [] # pylint: disable=assigning-non-slot
|
request.errors = [] # pylint: disable=assigning-non-slot
|
||||||
|
|
||||||
preferences = Preferences(themes, list(categories.keys()), engines, plugins) # pylint: disable=redefined-outer-name
|
client_pref = ClientPref.from_http_request(request)
|
||||||
|
# pylint: disable=redefined-outer-name
|
||||||
|
preferences = Preferences(themes, list(categories.keys()), engines, plugins, client_pref)
|
||||||
|
|
||||||
user_agent = request.headers.get('User-Agent', '').lower()
|
user_agent = request.headers.get('User-Agent', '').lower()
|
||||||
if 'webkit' in user_agent and 'android' in user_agent:
|
if 'webkit' in user_agent and 'android' in user_agent:
|
||||||
preferences.key_value_settings['method'].value = 'GET'
|
preferences.key_value_settings['method'].value = 'GET'
|
||||||
|
@ -681,7 +675,9 @@ def search():
|
||||||
raw_text_query = None
|
raw_text_query = None
|
||||||
result_container = None
|
result_container = None
|
||||||
try:
|
try:
|
||||||
search_query, raw_text_query, _, _ = get_search_query_from_webapp(request.preferences, request.form)
|
search_query, raw_text_query, _, _, selected_locale = get_search_query_from_webapp(
|
||||||
|
request.preferences, request.form
|
||||||
|
)
|
||||||
# search = Search(search_query) # without plugins
|
# search = Search(search_query) # without plugins
|
||||||
search = SearchWithPlugins(search_query, request.user_plugins, request) # pylint: disable=redefined-outer-name
|
search = SearchWithPlugins(search_query, request.user_plugins, request) # pylint: disable=redefined-outer-name
|
||||||
|
|
||||||
|
@ -812,13 +808,6 @@ def search():
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
if search_query.lang in ('auto', 'all'):
|
|
||||||
current_language = search_query.lang
|
|
||||||
else:
|
|
||||||
current_language = match_locale(
|
|
||||||
search_query.lang, settings['search']['languages'], fallback=request.preferences.get_value("language")
|
|
||||||
)
|
|
||||||
|
|
||||||
# search_query.lang contains the user choice (all, auto, en, ...)
|
# search_query.lang contains the user choice (all, auto, en, ...)
|
||||||
# when the user choice is "auto", search.search_query.lang contains the detected language
|
# when the user choice is "auto", search.search_query.lang contains the detected language
|
||||||
# otherwise it is equals to search_query.lang
|
# otherwise it is equals to search_query.lang
|
||||||
|
@ -841,7 +830,7 @@ def search():
|
||||||
result_container.unresponsive_engines
|
result_container.unresponsive_engines
|
||||||
),
|
),
|
||||||
current_locale = request.preferences.get_value("locale"),
|
current_locale = request.preferences.get_value("locale"),
|
||||||
current_language = current_language,
|
current_language = selected_locale,
|
||||||
search_language = match_locale(
|
search_language = match_locale(
|
||||||
search.search_query.lang,
|
search.search_query.lang,
|
||||||
settings['search']['languages'],
|
settings['search']['languages'],
|
||||||
|
|
Loading…
Reference in a new issue