mirror of
https://github.com/searxng/searxng.git
synced 2025-01-25 01:28:08 +00:00
make search language handling less strict
languages.py can change, so users may query on a language that is not on the list anymore, even if it is still recognized by a few engines. also made no and nb the same because they seem to return the same, though most engines will only support one or the other.
This commit is contained in:
parent
805fb02ed1
commit
fd65c12921
8 changed files with 17 additions and 15 deletions
File diff suppressed because one or more lines are too long
|
@ -94,6 +94,8 @@ def _fetch_supported_languages(resp):
|
||||||
options = dom.xpath('//div[@id="limit-languages"]//input')
|
options = dom.xpath('//div[@id="limit-languages"]//input')
|
||||||
for option in options:
|
for option in options:
|
||||||
code = option.xpath('./@id')[0].replace('_', '-')
|
code = option.xpath('./@id')[0].replace('_', '-')
|
||||||
|
if code == 'nb':
|
||||||
|
code = 'no'
|
||||||
supported_languages.append(code)
|
supported_languages.append(code)
|
||||||
|
|
||||||
return supported_languages
|
return supported_languages
|
||||||
|
|
|
@ -47,6 +47,8 @@ def request(query, params):
|
||||||
|
|
||||||
# add language tag if specified
|
# add language tag if specified
|
||||||
if params['language'] != 'all':
|
if params['language'] != 'all':
|
||||||
|
if params['language'] == 'no' or params['language'].startswith('no-'):
|
||||||
|
params['language'] = params['language'].replace('no', 'nb', 1)
|
||||||
if params['language'].find('-') < 0:
|
if params['language'].find('-') < 0:
|
||||||
# tries to get a country code from language
|
# tries to get a country code from language
|
||||||
for lang in supported_languages:
|
for lang in supported_languages:
|
||||||
|
@ -118,6 +120,8 @@ def _fetch_supported_languages(resp):
|
||||||
|
|
||||||
supported_languages = []
|
supported_languages = []
|
||||||
for lang in regions_json['languages'].values():
|
for lang in regions_json['languages'].values():
|
||||||
|
if lang['code'] == 'nb':
|
||||||
|
lang['code'] = 'no'
|
||||||
for country in lang['countries']:
|
for country in lang['countries']:
|
||||||
supported_languages.append(lang['code'] + '-' + country)
|
supported_languages.append(lang['code'] + '-' + country)
|
||||||
|
|
||||||
|
|
|
@ -120,6 +120,8 @@ def _fetch_supported_languages(resp):
|
||||||
options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
|
options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
|
||||||
for option in options:
|
for option in options:
|
||||||
code = option.xpath('./@data-val')[0]
|
code = option.xpath('./@data-val')[0]
|
||||||
|
if code.startswith('nb-'):
|
||||||
|
code = code.replace('nb', 'no', 1)
|
||||||
supported_languages.append(code)
|
supported_languages.append(code)
|
||||||
|
|
||||||
return supported_languages
|
return supported_languages
|
||||||
|
|
|
@ -57,6 +57,7 @@ language_codes = (
|
||||||
(u"nl", u"Nederlands", u"", u"Dutch"),
|
(u"nl", u"Nederlands", u"", u"Dutch"),
|
||||||
(u"nl-BE", u"Nederlands", u"België", u"Dutch"),
|
(u"nl-BE", u"Nederlands", u"België", u"Dutch"),
|
||||||
(u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
|
(u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
|
||||||
|
(u"no-NO", u"Norsk", u"", u"Norwegian"),
|
||||||
(u"pl-PL", u"Polski", u"", u"Polish"),
|
(u"pl-PL", u"Polski", u"", u"Polish"),
|
||||||
(u"pt", u"Português", u"", u"Portuguese"),
|
(u"pt", u"Português", u"", u"Portuguese"),
|
||||||
(u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
|
(u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
|
||||||
|
|
|
@ -107,6 +107,8 @@ class SearchLanguageSetting(EnumStringSetting):
|
||||||
pass
|
pass
|
||||||
elif lang in self.choices:
|
elif lang in self.choices:
|
||||||
data = lang
|
data = lang
|
||||||
|
elif data == 'nb-NO':
|
||||||
|
data = 'no-NO'
|
||||||
elif data == 'ar-XA':
|
elif data == 'ar-XA':
|
||||||
data = 'ar-SA'
|
data = 'ar-SA'
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -24,7 +24,7 @@ from searx.engines import (
|
||||||
import string
|
import string
|
||||||
import re
|
import re
|
||||||
|
|
||||||
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(\-[A-Z]{2})?$')
|
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
|
||||||
|
|
||||||
|
|
||||||
class RawTextQuery(object):
|
class RawTextQuery(object):
|
||||||
|
@ -68,7 +68,7 @@ class RawTextQuery(object):
|
||||||
|
|
||||||
# this force a language
|
# this force a language
|
||||||
if query_part[0] == ':':
|
if query_part[0] == ':':
|
||||||
lang = query_part[1:].lower()
|
lang = query_part[1:].lower().replace('_', '-')
|
||||||
|
|
||||||
# user may set a valid, yet not selectable language
|
# user may set a valid, yet not selectable language
|
||||||
if VALID_LANGUAGE_CODE.match(lang):
|
if VALID_LANGUAGE_CODE.match(lang):
|
||||||
|
@ -86,7 +86,7 @@ class RawTextQuery(object):
|
||||||
or lang_id.startswith(lang)\
|
or lang_id.startswith(lang)\
|
||||||
or lang == lang_name\
|
or lang == lang_name\
|
||||||
or lang == english_name\
|
or lang == english_name\
|
||||||
or lang.replace('_', ' ') == country:
|
or lang.replace('-', ' ') == country:
|
||||||
parse_next = True
|
parse_next = True
|
||||||
self.languages.append(lang_id)
|
self.languages.append(lang_id)
|
||||||
# to ensure best match (first match is not necessarily the best one)
|
# to ensure best match (first match is not necessarily the best one)
|
||||||
|
|
|
@ -27,20 +27,16 @@ from searx.engines import (
|
||||||
)
|
)
|
||||||
from searx.answerers import ask
|
from searx.answerers import ask
|
||||||
from searx.utils import gen_useragent
|
from searx.utils import gen_useragent
|
||||||
from searx.query import RawTextQuery, SearchQuery
|
from searx.query import RawTextQuery, SearchQuery, VALID_LANGUAGE_CODE
|
||||||
from searx.results import ResultContainer
|
from searx.results import ResultContainer
|
||||||
from searx import logger
|
from searx import logger
|
||||||
from searx.plugins import plugins
|
from searx.plugins import plugins
|
||||||
from searx.languages import language_codes
|
|
||||||
from searx.exceptions import SearxParameterException
|
from searx.exceptions import SearxParameterException
|
||||||
|
|
||||||
logger = logger.getChild('search')
|
logger = logger.getChild('search')
|
||||||
|
|
||||||
number_of_searches = 0
|
number_of_searches = 0
|
||||||
|
|
||||||
language_code_set = set(l[0].lower() for l in language_codes)
|
|
||||||
language_code_set.add('all')
|
|
||||||
|
|
||||||
|
|
||||||
def send_http_request(engine, request_params, start_time, timeout_limit):
|
def send_http_request(engine, request_params, start_time, timeout_limit):
|
||||||
# for page_load_time stats
|
# for page_load_time stats
|
||||||
|
@ -219,7 +215,7 @@ def get_search_query_from_webapp(preferences, form):
|
||||||
query_lang = preferences.get_value('language')
|
query_lang = preferences.get_value('language')
|
||||||
|
|
||||||
# check language
|
# check language
|
||||||
if query_lang.lower() not in language_code_set:
|
if not VALID_LANGUAGE_CODE.match(query_lang):
|
||||||
raise SearxParameterException('language', query_lang)
|
raise SearxParameterException('language', query_lang)
|
||||||
|
|
||||||
# get safesearch
|
# get safesearch
|
||||||
|
@ -371,11 +367,6 @@ class Search(object):
|
||||||
if search_query.pageno > 1 and not engine.paging:
|
if search_query.pageno > 1 and not engine.paging:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# if search-language is set and engine does not
|
|
||||||
# provide language-support, skip
|
|
||||||
if search_query.lang != 'all' and not engine.language_support:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# if time_range is not supported, skip
|
# if time_range is not supported, skip
|
||||||
if search_query.time_range and not engine.time_range_support:
|
if search_query.time_range and not engine.time_range_support:
|
||||||
continue
|
continue
|
||||||
|
|
Loading…
Reference in a new issue