From f4525880ed5f965ac4c241638933842a27a2acf7 Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 11:47:27 +0200 Subject: [PATCH 01/14] [enh] dictionary engine added --- searx/engines/dictionary.py | 70 +++++++++++++++++++++++++++++++++++++ searx/settings.yml | 4 +++ 2 files changed, 74 insertions(+) create mode 100644 searx/engines/dictionary.py diff --git a/searx/engines/dictionary.py b/searx/engines/dictionary.py new file mode 100644 index 000000000..1849322f5 --- /dev/null +++ b/searx/engines/dictionary.py @@ -0,0 +1,70 @@ +import re +from lxml import html +from searx.engines.xpath import extract_text +from searx.languages import language_codes + +categories = [] +url = 'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' +weight = 100 + +parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.+)', re.I) +results_xpath = './/table[@id="r"]/tr' + + +def request(query, params): + m = parser_re.match(unicode(query, 'utf8')) + if not m: + return params + + from_lang, to_lang, query = m.groups() + + if len(from_lang) == 2: + lan = filter(lambda x: x[0][:2] == from_lang, language_codes) + if lan: + from_lang = lan[0][1].lower() + else: + return params + + if len(to_lang) == 2: + lan = filter(lambda x: x[0][:2] == to_lang, language_codes) + if lan: + to_lang = lan[0][1].lower() + else: + return params + + params['url'] = url.format(from_lang=from_lang, to_lang=to_lang,query=query) + params['from_lang'] = from_lang + params['to_lang'] = to_lang + params['query'] = query + + return params + +def response(resp): + results = [] + answers = [] + + dom = html.fromstring(resp.text) + + for result in dom.xpath(results_xpath)[1:]: + try: + from_result, to_results_raw = result.xpath('./td') + except: + continue + + to_results = [] + for to_result in to_results_raw.xpath('./p/a'): + t = to_result.text_content() + if t.strip(): + to_results.append(to_result.text_content()) + + results.append({ + 'answer': u'{0} - {1}'.format( + from_result.text_content(), + '; '.join(to_results) + ), + 'url': url + }) + + return results + + diff --git a/searx/settings.yml b/searx/settings.yml index 2c034a863..b998e0c0d 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -495,6 +495,10 @@ engines: timeout: 6.0 categories : science + - name : dictionary + engine : dictionary + shortcut : dc + #The blekko technology and team have joined IBM Watson! -> https://blekko.com/ # - name : blekko images # engine : blekko_images From 3f4cc2146c81e12a890b8ea4c4ac5ad600f34618 Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 12:34:20 +0200 Subject: [PATCH 02/14] [enh] return results instead of answers --- searx/engines/dictionary.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/searx/engines/dictionary.py b/searx/engines/dictionary.py index 1849322f5..e3abaa1a4 100644 --- a/searx/engines/dictionary.py +++ b/searx/engines/dictionary.py @@ -3,7 +3,7 @@ from lxml import html from searx.engines.xpath import extract_text from searx.languages import language_codes -categories = [] +categories = ['general'] url = 'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' weight = 100 @@ -41,7 +41,6 @@ def request(query, params): def response(resp): results = [] - answers = [] dom = html.fromstring(resp.text) @@ -58,11 +57,9 @@ def response(resp): to_results.append(to_result.text_content()) results.append({ - 'answer': u'{0} - {1}'.format( - from_result.text_content(), - '; '.join(to_results) - ), - 'url': url + 'url': resp.url, + 'title': from_result.text_content(), + 'content': '; '.join(to_results) }) return results From b808a2e26670e06d6f912f7d169a9c59ee7ac8ee Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 12:37:26 +0200 Subject: [PATCH 03/14] [fix] don't merge with suggestions --- searx/engines/dictionary.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/searx/engines/dictionary.py b/searx/engines/dictionary.py index e3abaa1a4..080f7b4a4 100644 --- a/searx/engines/dictionary.py +++ b/searx/engines/dictionary.py @@ -1,4 +1,5 @@ import re +from urlparse import urljoin from lxml import html from searx.engines.xpath import extract_text from searx.languages import language_codes @@ -44,7 +45,7 @@ def response(resp): dom = html.fromstring(resp.text) - for result in dom.xpath(results_xpath)[1:]: + for k, result in enumerate(dom.xpath(results_xpath)[1:]): try: from_result, to_results_raw = result.xpath('./td') except: @@ -57,7 +58,7 @@ def response(resp): to_results.append(to_result.text_content()) results.append({ - 'url': resp.url, + 'url': urljoin(resp.url, '?%d' % k), 'title': from_result.text_content(), 'content': '; '.join(to_results) }) From 84ff6e289ea608207755b01bc648575a87ea55ba Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 12:46:18 +0200 Subject: [PATCH 04/14] [enh] filter non-existing language code/name containing requests --- searx/engines/dictionary.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/searx/engines/dictionary.py b/searx/engines/dictionary.py index 080f7b4a4..b255463aa 100644 --- a/searx/engines/dictionary.py +++ b/searx/engines/dictionary.py @@ -25,6 +25,9 @@ def request(query, params): from_lang = lan[0][1].lower() else: return params + elif from_lang.lower() not in [x[1].lower() for x in language_codes]: + return params + if len(to_lang) == 2: lan = filter(lambda x: x[0][:2] == to_lang, language_codes) @@ -32,6 +35,8 @@ def request(query, params): to_lang = lan[0][1].lower() else: return params + elif to_lang.lower() not in [x[1].lower() for x in language_codes]: + return params params['url'] = url.format(from_lang=from_lang, to_lang=to_lang,query=query) params['from_lang'] = from_lang From 5416f0f248e1c8072c69b4a272af07bd4c0d8e5e Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 12:50:56 +0200 Subject: [PATCH 05/14] [enh] dictionary engine renamed to dictzone --- searx/engines/{dictionary.py => dictzone.py} | 0 searx/settings.yml | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename searx/engines/{dictionary.py => dictzone.py} (100%) diff --git a/searx/engines/dictionary.py b/searx/engines/dictzone.py similarity index 100% rename from searx/engines/dictionary.py rename to searx/engines/dictzone.py diff --git a/searx/settings.yml b/searx/settings.yml index 8c552eb02..ce768ce81 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -495,8 +495,8 @@ engines: timeout: 6.0 categories : science - - name : dictionary - engine : dictionary + - name : dictzone + engine : dictzone shortcut : dc #The blekko technology and team have joined IBM Watson! -> https://blekko.com/ From bc806bfab1cc75279dc912bf443dc39178a872dd Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 14:12:46 +0200 Subject: [PATCH 06/14] [fix] no lambda anymore, cgi.escape --- searx/engines/dictzone.py | 45 ++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index b255463aa..f68f44887 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -1,6 +1,7 @@ import re from urlparse import urljoin from lxml import html +from cgi import escape from searx.engines.xpath import extract_text from searx.languages import language_codes @@ -12,6 +13,19 @@ parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.+)', re.I) results_xpath = './/table[@id="r"]/tr' +def is_valid_lang(lang): + is_abbr = (len(lang) == 2) + if is_abbr: + for l in language_codes: + if l[0][:2] == lang.lower(): + return (True, l[1].lower()) + return False + else: + for l in language_codes: + if l[1].lower() == lang.lower(): + return (True, l[1].lower()) + return False + def request(query, params): m = parser_re.match(unicode(query, 'utf8')) if not m: @@ -19,28 +33,15 @@ def request(query, params): from_lang, to_lang, query = m.groups() - if len(from_lang) == 2: - lan = filter(lambda x: x[0][:2] == from_lang, language_codes) - if lan: - from_lang = lan[0][1].lower() - else: - return params - elif from_lang.lower() not in [x[1].lower() for x in language_codes]: + from_lang = is_valid_lang(from_lang) + to_lang = is_valid_lang(to_lang) + + if not from_lang or not to_lang: return params - - if len(to_lang) == 2: - lan = filter(lambda x: x[0][:2] == to_lang, language_codes) - if lan: - to_lang = lan[0][1].lower() - else: - return params - elif to_lang.lower() not in [x[1].lower() for x in language_codes]: - return params - - params['url'] = url.format(from_lang=from_lang, to_lang=to_lang,query=query) - params['from_lang'] = from_lang - params['to_lang'] = to_lang + params['url'] = url.format(from_lang=from_lang[1], to_lang=to_lang[1],query=query) + params['from_lang'] = from_lang[1] + params['to_lang'] = to_lang[1] params['query'] = query return params @@ -64,8 +65,8 @@ def response(resp): results.append({ 'url': urljoin(resp.url, '?%d' % k), - 'title': from_result.text_content(), - 'content': '; '.join(to_results) + 'title': escape(from_result.text_content()), + 'content': escape('; '.join(to_results)) }) return results From 7bf1013c1591c1af177063477fb4ac9ed178ff2a Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 14:24:08 +0200 Subject: [PATCH 07/14] [enh] removed missing params; [fix] pep8 --- searx/engines/dictzone.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index f68f44887..212218343 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -26,6 +26,7 @@ def is_valid_lang(lang): return (True, l[1].lower()) return False + def request(query, params): m = parser_re.match(unicode(query, 'utf8')) if not m: @@ -39,13 +40,13 @@ def request(query, params): if not from_lang or not to_lang: return params - params['url'] = url.format(from_lang=from_lang[1], to_lang=to_lang[1],query=query) - params['from_lang'] = from_lang[1] - params['to_lang'] = to_lang[1] - params['query'] = query + params['url'] = url.format(from_lang=from_lang[1], + to_lang=to_lang[1], + query=query) return params + def response(resp): results = [] @@ -70,5 +71,3 @@ def response(resp): }) return results - - From 22bd39fd42e469339ff1ccac9f8c16cc00f52211 Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 15:07:47 +0200 Subject: [PATCH 08/14] [fix] only 1-word search triggers the engine --- searx/engines/dictzone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index 212218343..b58d7ec62 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -9,7 +9,7 @@ categories = ['general'] url = 'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' weight = 100 -parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.+)', re.I) +parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) results_xpath = './/table[@id="r"]/tr' From ab471fd13b3891a5a924e8c2cd18a1079e7ac8e0 Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 15:40:07 +0200 Subject: [PATCH 09/14] [enh] mymemory translated engine added for multi-word translations --- searx/engines/translated.py | 63 +++++++++++++++++++++++++++++++++++++ searx/settings.yml | 8 +++++ 2 files changed, 71 insertions(+) create mode 100644 searx/engines/translated.py diff --git a/searx/engines/translated.py b/searx/engines/translated.py new file mode 100644 index 000000000..9f194b76b --- /dev/null +++ b/searx/engines/translated.py @@ -0,0 +1,63 @@ +import re +from urlparse import urljoin +from lxml import html +from cgi import escape +from searx.engines.xpath import extract_text +from searx.languages import language_codes + +categories = ['general'] +url = 'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}' +web_url = 'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' +weight = 100 + +parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) + +def is_valid_lang(lang): + is_abbr = (len(lang) == 2) + if is_abbr: + for l in language_codes: + if l[0][:2] == lang.lower(): + return (True, l[0][:2], l[1].lower()) + return False + else: + for l in language_codes: + if l[1].lower() == lang.lower(): + return (True, l[0][:2], l[1].lower()) + return False + + +def request(query, params): + m = parser_re.match(unicode(query, 'utf8')) + if not m: + return params + + from_lang, to_lang, query = m.groups() + + from_lang = is_valid_lang(from_lang) + to_lang = is_valid_lang(to_lang) + + if not from_lang or not to_lang: + return params + + params['url'] = url.format(from_lang=from_lang[1], + to_lang=to_lang[1], + query=query) + params['query'] = query + params['from_lang'] = from_lang + params['to_lang'] = to_lang + + return params + + +def response(resp): + results = [] + results.append({ + 'url': escape(web_url.format(from_lang=resp.search_params['from_lang'][2], + to_lang=resp.search_params['to_lang'][2], + query=resp.search_params['query'])), + 'title': escape('[{0}-{1}] {2}'.format(resp.search_params['from_lang'][1], + resp.search_params['to_lang'][1], + resp.search_params['query'])), + 'content': escape(resp.json()['responseData']['translatedText']) + }) + return results diff --git a/searx/settings.yml b/searx/settings.yml index ce768ce81..ba5824bb1 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -499,6 +499,14 @@ engines: engine : dictzone shortcut : dc + - name : mymemory translated + engine : translated + shortcut : tl + timeout : 5.0 + # You can use without an API key, but you are limited to 1000 words/day + # See : http://mymemory.translated.net/doc/usagelimits.php + # api_key : '' + #The blekko technology and team have joined IBM Watson! -> https://blekko.com/ # - name : blekko images # engine : blekko_images From c051e6a2c3e97419983d552594a6a8340339c1d5 Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 15:44:05 +0200 Subject: [PATCH 10/14] [fix] pep8 --- searx/engines/translated.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 9f194b76b..2f535140c 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -6,12 +6,14 @@ from searx.engines.xpath import extract_text from searx.languages import language_codes categories = ['general'] -url = 'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}' +url = 'http://api.mymemory.translated.net/get?q={query}' \ + '&langpair={from_lang}|{to_lang}' web_url = 'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' weight = 100 parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) + def is_valid_lang(lang): is_abbr = (len(lang) == 2) if is_abbr: @@ -52,12 +54,14 @@ def request(query, params): def response(resp): results = [] results.append({ - 'url': escape(web_url.format(from_lang=resp.search_params['from_lang'][2], - to_lang=resp.search_params['to_lang'][2], - query=resp.search_params['query'])), - 'title': escape('[{0}-{1}] {2}'.format(resp.search_params['from_lang'][1], - resp.search_params['to_lang'][1], - resp.search_params['query'])), + 'url': escape(web_url.format( + from_lang=resp.search_params['from_lang'][2], + to_lang=resp.search_params['to_lang'][2], + query=resp.search_params['query'])), + 'title': escape('[{0}-{1}] {2}'.format( + resp.search_params['from_lang'][1], + resp.search_params['to_lang'][1], + resp.search_params['query'])), 'content': escape(resp.json()['responseData']['translatedText']) }) return results From 8c72a22757290754fc15fecb82dd157f6ea56a7f Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 16:12:34 +0200 Subject: [PATCH 11/14] [enh] api_key usage, disable the engine by default --- searx/engines/translated.py | 10 ++++++++-- searx/settings.yml | 3 ++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 2f535140c..3be9d4adf 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -7,11 +7,12 @@ from searx.languages import language_codes categories = ['general'] url = 'http://api.mymemory.translated.net/get?q={query}' \ - '&langpair={from_lang}|{to_lang}' + '&langpair={from_lang}|{to_lang}{key}' web_url = 'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' weight = 100 parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) +api_key = '' def is_valid_lang(lang): @@ -41,9 +42,14 @@ def request(query, params): if not from_lang or not to_lang: return params + if api_key: + key_form = '&key=' + api_key + else: + key_form = '' params['url'] = url.format(from_lang=from_lang[1], to_lang=to_lang[1], - query=query) + query=query, + key=key_form) params['query'] = query params['from_lang'] = from_lang params['to_lang'] = to_lang diff --git a/searx/settings.yml b/searx/settings.yml index ba5824bb1..63a4b0acf 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -7,7 +7,7 @@ search: autocomplete : "" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "wikipedia" - leave blank to turn it off by default server: - port : 8888 + port : 8889 bind_address : "127.0.0.1" # address to listen on secret_key : "ultrasecretkey" # change this! base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/" @@ -503,6 +503,7 @@ engines: engine : translated shortcut : tl timeout : 5.0 + disabled : True # You can use without an API key, but you are limited to 1000 words/day # See : http://mymemory.translated.net/doc/usagelimits.php # api_key : '' From 5ed9846bbf9ac990b35b4860d5e63bd689f41de1 Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 16:35:12 +0200 Subject: [PATCH 12/14] [fix] revert the port change --- searx/settings.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/searx/settings.yml b/searx/settings.yml index 63a4b0acf..4969c4ac6 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -7,7 +7,7 @@ search: autocomplete : "" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "wikipedia" - leave blank to turn it off by default server: - port : 8889 + port : 8888 bind_address : "127.0.0.1" # address to listen on secret_key : "ultrasecretkey" # change this! base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/" From b7d578ae8041658fe6f088eb337f42238c25e2f5 Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 16:36:04 +0200 Subject: [PATCH 13/14] [enh] engine header comments --- searx/engines/dictzone.py | 11 +++++++++++ searx/engines/translated.py | 10 ++++++++++ 2 files changed, 21 insertions(+) diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index b58d7ec62..2c2ec3abc 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -1,3 +1,14 @@ +""" + Dictzone + + @website https://dictzone.com/ + @provide-api no + @using-api no + @results HTML (using search portal) + @stable no (HTML can change) + @parse url, title, content +""" + import re from urlparse import urljoin from lxml import html diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 3be9d4adf..1b75e4f4e 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -1,3 +1,13 @@ +""" + MyMemory Translated + + @website https://mymemory.translated.net/ + @provide-api yes (https://mymemory.translated.net/doc/spec.php) + @using-api yes + @results JSON + @stable yes + @parse url, title, content +""" import re from urlparse import urljoin from lxml import html From 983415bc38937a637e9b2aae191f2e087765800b Mon Sep 17 00:00:00 2001 From: potato Date: Tue, 6 Sep 2016 16:43:48 +0200 Subject: [PATCH 14/14] [enh] is_valid_lang moved to utils --- searx/engines/dictzone.py | 20 +++----------------- searx/engines/translated.py | 16 +--------------- searx/utils.py | 15 +++++++++++++++ 3 files changed, 19 insertions(+), 32 deletions(-) diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index 2c2ec3abc..5de6c5b98 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -14,7 +14,7 @@ from urlparse import urljoin from lxml import html from cgi import escape from searx.engines.xpath import extract_text -from searx.languages import language_codes +from searx.utils import is_valid_lang categories = ['general'] url = 'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' @@ -24,20 +24,6 @@ parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) results_xpath = './/table[@id="r"]/tr' -def is_valid_lang(lang): - is_abbr = (len(lang) == 2) - if is_abbr: - for l in language_codes: - if l[0][:2] == lang.lower(): - return (True, l[1].lower()) - return False - else: - for l in language_codes: - if l[1].lower() == lang.lower(): - return (True, l[1].lower()) - return False - - def request(query, params): m = parser_re.match(unicode(query, 'utf8')) if not m: @@ -51,8 +37,8 @@ def request(query, params): if not from_lang or not to_lang: return params - params['url'] = url.format(from_lang=from_lang[1], - to_lang=to_lang[1], + params['url'] = url.format(from_lang=from_lang[2], + to_lang=to_lang[2], query=query) return params diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 1b75e4f4e..3a077ae8e 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -13,7 +13,7 @@ from urlparse import urljoin from lxml import html from cgi import escape from searx.engines.xpath import extract_text -from searx.languages import language_codes +from searx.utils import is_valid_lang categories = ['general'] url = 'http://api.mymemory.translated.net/get?q={query}' \ @@ -25,20 +25,6 @@ parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) api_key = '' -def is_valid_lang(lang): - is_abbr = (len(lang) == 2) - if is_abbr: - for l in language_codes: - if l[0][:2] == lang.lower(): - return (True, l[0][:2], l[1].lower()) - return False - else: - for l in language_codes: - if l[1].lower() == lang.lower(): - return (True, l[0][:2], l[1].lower()) - return False - - def request(query, params): m = parser_re.match(unicode(query, 'utf8')) if not m: diff --git a/searx/utils.py b/searx/utils.py index 744142e36..b3806d3fd 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -9,6 +9,7 @@ from HTMLParser import HTMLParser from random import choice from searx.version import VERSION_STRING +from searx.languages import language_codes from searx import settings from searx import logger @@ -255,3 +256,17 @@ def get_torrent_size(filesize, filesize_multiplier): filesize = None return filesize + + +def is_valid_lang(lang): + is_abbr = (len(lang) == 2) + if is_abbr: + for l in language_codes: + if l[0][:2] == lang.lower(): + return (True, l[0][:2], l[1].lower()) + return False + else: + for l in language_codes: + if l[1].lower() == lang.lower(): + return (True, l[0][:2], l[1].lower()) + return False