make search language handling less strict

languages.py can change, so users may query on a language that is not
on the list anymore, even if it is still recognized by a few engines.

also made no and nb the same because they seem to return the same,
though most engines will only support one or the other.
This commit is contained in:
marc 2017-03-01 17:11:51 -06:00 committed by Adam Tauber
parent 805fb02ed1
commit fd65c12921
8 changed files with 17 additions and 15 deletions

File diff suppressed because one or more lines are too long

View file

@ -94,6 +94,8 @@ def _fetch_supported_languages(resp):
options = dom.xpath('//div[@id="limit-languages"]//input') options = dom.xpath('//div[@id="limit-languages"]//input')
for option in options: for option in options:
code = option.xpath('./@id')[0].replace('_', '-') code = option.xpath('./@id')[0].replace('_', '-')
if code == 'nb':
code = 'no'
supported_languages.append(code) supported_languages.append(code)
return supported_languages return supported_languages

View file

@ -47,6 +47,8 @@ def request(query, params):
# add language tag if specified # add language tag if specified
if params['language'] != 'all': if params['language'] != 'all':
if params['language'] == 'no' or params['language'].startswith('no-'):
params['language'] = params['language'].replace('no', 'nb', 1)
if params['language'].find('-') < 0: if params['language'].find('-') < 0:
# tries to get a country code from language # tries to get a country code from language
for lang in supported_languages: for lang in supported_languages:
@ -118,6 +120,8 @@ def _fetch_supported_languages(resp):
supported_languages = [] supported_languages = []
for lang in regions_json['languages'].values(): for lang in regions_json['languages'].values():
if lang['code'] == 'nb':
lang['code'] = 'no'
for country in lang['countries']: for country in lang['countries']:
supported_languages.append(lang['code'] + '-' + country) supported_languages.append(lang['code'] + '-' + country)

View file

@ -120,6 +120,8 @@ def _fetch_supported_languages(resp):
options = dom.xpath('//div[@id="regions-popup"]//ul/li/a') options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
for option in options: for option in options:
code = option.xpath('./@data-val')[0] code = option.xpath('./@data-val')[0]
if code.startswith('nb-'):
code = code.replace('nb', 'no', 1)
supported_languages.append(code) supported_languages.append(code)
return supported_languages return supported_languages

View file

@ -57,6 +57,7 @@ language_codes = (
(u"nl", u"Nederlands", u"", u"Dutch"), (u"nl", u"Nederlands", u"", u"Dutch"),
(u"nl-BE", u"Nederlands", u"België", u"Dutch"), (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
(u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"), (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
(u"no-NO", u"Norsk", u"", u"Norwegian"),
(u"pl-PL", u"Polski", u"", u"Polish"), (u"pl-PL", u"Polski", u"", u"Polish"),
(u"pt", u"Português", u"", u"Portuguese"), (u"pt", u"Português", u"", u"Portuguese"),
(u"pt-BR", u"Português", u"Brasil", u"Portuguese"), (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),

View file

@ -107,6 +107,8 @@ class SearchLanguageSetting(EnumStringSetting):
pass pass
elif lang in self.choices: elif lang in self.choices:
data = lang data = lang
elif data == 'nb-NO':
data = 'no-NO'
elif data == 'ar-XA': elif data == 'ar-XA':
data = 'ar-SA' data = 'ar-SA'
else: else:

View file

@ -24,7 +24,7 @@ from searx.engines import (
import string import string
import re import re
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(\-[A-Z]{2})?$') VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
class RawTextQuery(object): class RawTextQuery(object):
@ -68,7 +68,7 @@ class RawTextQuery(object):
# this force a language # this force a language
if query_part[0] == ':': if query_part[0] == ':':
lang = query_part[1:].lower() lang = query_part[1:].lower().replace('_', '-')
# user may set a valid, yet not selectable language # user may set a valid, yet not selectable language
if VALID_LANGUAGE_CODE.match(lang): if VALID_LANGUAGE_CODE.match(lang):
@ -86,7 +86,7 @@ class RawTextQuery(object):
or lang_id.startswith(lang)\ or lang_id.startswith(lang)\
or lang == lang_name\ or lang == lang_name\
or lang == english_name\ or lang == english_name\
or lang.replace('_', ' ') == country: or lang.replace('-', ' ') == country:
parse_next = True parse_next = True
self.languages.append(lang_id) self.languages.append(lang_id)
# to ensure best match (first match is not necessarily the best one) # to ensure best match (first match is not necessarily the best one)

View file

@ -27,20 +27,16 @@ from searx.engines import (
) )
from searx.answerers import ask from searx.answerers import ask
from searx.utils import gen_useragent from searx.utils import gen_useragent
from searx.query import RawTextQuery, SearchQuery from searx.query import RawTextQuery, SearchQuery, VALID_LANGUAGE_CODE
from searx.results import ResultContainer from searx.results import ResultContainer
from searx import logger from searx import logger
from searx.plugins import plugins from searx.plugins import plugins
from searx.languages import language_codes
from searx.exceptions import SearxParameterException from searx.exceptions import SearxParameterException
logger = logger.getChild('search') logger = logger.getChild('search')
number_of_searches = 0 number_of_searches = 0
language_code_set = set(l[0].lower() for l in language_codes)
language_code_set.add('all')
def send_http_request(engine, request_params, start_time, timeout_limit): def send_http_request(engine, request_params, start_time, timeout_limit):
# for page_load_time stats # for page_load_time stats
@ -219,7 +215,7 @@ def get_search_query_from_webapp(preferences, form):
query_lang = preferences.get_value('language') query_lang = preferences.get_value('language')
# check language # check language
if query_lang.lower() not in language_code_set: if not VALID_LANGUAGE_CODE.match(query_lang):
raise SearxParameterException('language', query_lang) raise SearxParameterException('language', query_lang)
# get safesearch # get safesearch
@ -371,11 +367,6 @@ class Search(object):
if search_query.pageno > 1 and not engine.paging: if search_query.pageno > 1 and not engine.paging:
continue continue
# if search-language is set and engine does not
# provide language-support, skip
if search_query.lang != 'all' and not engine.language_support:
continue
# if time_range is not supported, skip # if time_range is not supported, skip
if search_query.time_range and not engine.time_range_support: if search_query.time_range and not engine.time_range_support:
continue continue