[enh] add supported_languages on engines and auto-generate languages.py

This commit is contained in:
marc 2016-08-05 23:34:56 -05:00
parent e58949b76f
commit 149802c569
34 changed files with 666 additions and 128 deletions

View file

@ -81,17 +81,17 @@ def searx_bang(full_query):
engine_query = full_query.getSearchQuery()[1:] engine_query = full_query.getSearchQuery()[1:]
for lc in language_codes: for lc in language_codes:
lang_id, lang_name, country = map(str.lower, lc) lang_id, lang_name, country, english_name = map(str.lower, lc)
# check if query starts with language-id # check if query starts with language-id
if lang_id.startswith(engine_query): if lang_id.startswith(engine_query):
if len(engine_query) <= 2: if len(engine_query) <= 2:
results.append(':{lang_id}'.format(lang_id=lang_id.split('_')[0])) results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
else: else:
results.append(':{lang_id}'.format(lang_id=lang_id)) results.append(':{lang_id}'.format(lang_id=lang_id))
# check if query starts with language name # check if query starts with language name
if lang_name.startswith(engine_query): if lang_name.startswith(engine_query) or english_name.startswith(engine_query):
results.append(':{lang_name}'.format(lang_name=lang_name)) results.append(':{lang_name}'.format(lang_name=lang_name))
# check if query starts with country # check if query starts with country

View file

@ -38,6 +38,7 @@ engine_shortcuts = {}
engine_default_args = {'paging': False, engine_default_args = {'paging': False,
'categories': ['general'], 'categories': ['general'],
'language_support': True, 'language_support': True,
'supported_languages': [],
'safesearch': False, 'safesearch': False,
'timeout': settings['outgoing']['request_timeout'], 'timeout': settings['outgoing']['request_timeout'],
'shortcut': '-', 'shortcut': '-',

View file

@ -29,8 +29,8 @@ xpath_link = './/div[@class="mw-search-result-heading"]/a'
# cut 'en' from 'en_US', 'de' from 'de_CH', and so on # cut 'en' from 'en_US', 'de' from 'de_CH', and so on
def locale_to_lang_code(locale): def locale_to_lang_code(locale):
if locale.find('_') >= 0: if locale.find('-') >= 0:
locale = locale.split('_')[0] locale = locale.split('-')[0]
return locale return locale
@ -95,6 +95,7 @@ main_langs = {
'uk': 'Українська', 'uk': 'Українська',
'zh': '简体中文' 'zh': '简体中文'
} }
supported_languages = dict(lang_urls, **main_langs)
# do search-request # do search-request

View file

@ -32,7 +32,7 @@ def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1 offset = (params['pageno'] - 1) * 10 + 1
if params['language'] != 'all': if params['language'] != 'all':
query = u'language:{} {}'.format(params['language'].split('_')[0].upper(), query = u'language:{} {}'.format(params['language'].split('-')[0].upper(),
query.decode('utf-8')).encode('utf-8') query.decode('utf-8')).encode('utf-8')
search_path = search_string.format( search_path = search_string.format(

View file

@ -19,6 +19,7 @@ from urllib import urlencode
from lxml import html from lxml import html
from json import loads from json import loads
import re import re
from searx.engines.bing import supported_languages
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']
@ -53,7 +54,7 @@ def request(query, params):
if params['language'] == 'all': if params['language'] == 'all':
language = 'en-US' language = 'en-US'
else: else:
language = params['language'].replace('_', '-') language = params['language']
search_path = search_string.format( search_path = search_string.format(
query=urlencode({'q': query}), query=urlencode({'q': query}),

View file

@ -17,6 +17,7 @@ from datetime import datetime
from dateutil import parser from dateutil import parser
from lxml import etree from lxml import etree
from searx.utils import list_get from searx.utils import list_get
from searx.engines.bing import supported_languages
# engine dependent config # engine dependent config
categories = ['news'] categories = ['news']
@ -74,7 +75,7 @@ def request(query, params):
if params['language'] == 'all': if params['language'] == 'all':
language = 'en-US' language = 'en-US'
else: else:
language = params['language'].replace('_', '-') language = params['language']
params['url'] = _get_url(query, language, offset, params['time_range']) params['url'] = _get_url(query, language, offset, params['time_range'])

View file

@ -22,6 +22,13 @@ from searx.languages import language_codes
categories = ['general'] categories = ['general']
paging = True paging = True
language_support = True language_support = True
supported_languages = ["es-AR", "en-AU", "de-AT", "fr-BE", "nl-BE", "pt-BR", "bg-BG", "en-CA", "fr-CA", "ca-CT",
"es-CL", "zh-CN", "es-CO", "hr-HR", "cs-CZ", "da-DK", "et-EE", "fi-FI", "fr-FR", "de-DE",
"el-GR", "tzh-HK", "hu-HU", "en-IN", "id-ID", "en-ID", "en-IE", "he-IL", "it-IT", "jp-JP",
"kr-KR", "es-XL", "lv-LV", "lt-LT", "ms-MY", "en-MY", "es-MX", "nl-NL", "en-NZ", "no-NO",
"es-PE", "en-PH", "tl-PH", "pl-PL", "pt-PT", "ro-RO", "ru-RU", "ar-XA", "en-XA", "en-SG",
"sk-SK", "sl-SL", "en-ZA", "es-ES", "ca-ES", "sv-SE", "de-CH", "fr-CH", "it-CH", "tzh-TW",
"th-TH", "tr-TR", "uk-UA", "en-UK", "en-US", "es-US", "vi-VN"]
time_range_support = True time_range_support = True
# search-url # search-url
@ -46,10 +53,23 @@ def request(query, params):
offset = (params['pageno'] - 1) * 30 offset = (params['pageno'] - 1) * 30
# custom fixes for languages
if params['language'] == 'all': if params['language'] == 'all':
locale = None locale = None
elif params['language'][:2] == 'ja':
locale = 'jp-jp'
elif params['language'] == 'zh-TW':
locale = 'tw-tzh'
elif params['language'] == 'zh-HK':
locale = 'hk-tzh'
elif params['language'][-2:] == 'SA':
locale = 'xa' + params['language'].split('-')[0]
elif params['language'][-2:] == 'GB':
locale = 'uk' + params['language'].split('-')[0]
elif params['language'] == 'es-419':
locale = 'xl-es'
else: else:
locale = params['language'].split('_') locale = params['language'].split('-')
if len(locale) == 2: if len(locale) == 2:
# country code goes first # country code goes first
locale = locale[1].lower() + '-' + locale[0].lower() locale = locale[1].lower() + '-' + locale[0].lower()
@ -58,7 +78,25 @@ def request(query, params):
locale = locale[0].lower() locale = locale[0].lower()
lang_codes = [x[0] for x in language_codes] lang_codes = [x[0] for x in language_codes]
for lc in lang_codes: for lc in lang_codes:
lc = lc.split('_') lc = lc.split('-')
if locale == lc[0] and len(lc) == 2:
locale = lc[1].lower() + '-' + lc[0].lower()
break
if locale:
params['url'] = url.format(
query=urlencode({'q': query, 'kl': locale}), offset=offset)
else:
locale = params['language'].split('-')
if len(locale) == 2:
# country code goes first
locale = locale[1].lower() + '-' + locale[0].lower()
else:
# tries to get a country code from language
locale = locale[0].lower()
lang_codes = [x[0] for x in language_codes]
for lc in lang_codes:
lc = lc.split('-')
if locale == lc[0]: if locale == lc[0]:
locale = lc[1].lower() + '-' + lc[0].lower() locale = lc[1].lower() + '-' + lc[0].lower()
break break

View file

@ -4,6 +4,7 @@ from re import compile, sub
from lxml import html from lxml import html
from searx.utils import html_to_text from searx.utils import html_to_text
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import supported_languages
url = 'https://api.duckduckgo.com/'\ url = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1' + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
@ -23,7 +24,7 @@ def result_to_text(url, text, htmlResult):
def request(query, params): def request(query, params):
params['url'] = url.format(query=urlencode({'q': query})) params['url'] = url.format(query=urlencode({'q': query}))
params['headers']['Accept-Language'] = params['language'] params['headers']['Accept-Language'] = params['language'].split('-')[0]
return params return params

View file

@ -48,7 +48,7 @@ def request(query, params):
if params['language'] == 'all': if params['language'] == 'all':
language = 'xx' language = 'xx'
else: else:
language = params['language'][0:2] language = params['language'].split('-')[0]
if params['safesearch'] >= 1: if params['safesearch'] >= 1:
safesearch = 1 safesearch = 1

View file

@ -23,6 +23,20 @@ categories = ['general']
paging = True paging = True
language_support = True language_support = True
use_locale_domain = True use_locale_domain = True
supported_languages = ['de', 'en', 'es', 'es_419', 'fr', 'hr', 'it', 'nl', 'pl', 'pt-BR',
'pt-PT', 'vi', 'tr', 'ru', 'ar', 'th', 'ko', 'zh-CN', 'zh-TW', 'ja',
'ach', 'af', 'ak', 'az', 'ms', 'ban', 'xx_bork', 'bs', 'br', 'ca',
'ceb', 'ckb', 'cs', 'sn', 'co', 'cy', 'da', 'yo', 'et', 'xx_elmer',
'eo', 'eu', 'ee', 'tl', 'fo', 'gaa', 'ga', 'gd', 'gl', 'gn', 'xx_hacker',
'ht', 'ha', 'haw', 'bem', 'ig', 'rn', 'id', 'ia', 'zu', 'is', 'jw', 'rw',
'sw', 'tlh', 'kg', 'mfe', 'kri', 'la', 'lv', 'to', 'lt', 'ln', 'loz',
'lua', 'lg', 'hu', 'mg', 'mt', 'mi', 'pcm', 'no', 'nso', 'ny', 'nn',
'uz', 'oc', 'om', 'xx_pirate', 'pt', 'ro', 'mo', 'rm', 'qu', 'nyn', 'crs',
'sq', 'sd', 'sk', 'sl', 'so', 'st', 'sr_ME', 'sr_Latn', 'su', 'fi', 'sv',
'tg', 'tt', 'tn', 'tum', 'tk', 'tw', 'fy', 'wo', 'xh', 'el', 'be', 'bg',
'ky', 'kk', 'mk', 'mn', 'sr', 'uk', 'ka', 'hy', 'yi', 'iw', 'ug', 'ur',
'ps', 'fa', 'ti', 'am', 'ne', 'mr', 'hi', 'bn', 'pa', 'gu', 'or', 'ta',
'te', 'kn', 'ml', 'si', 'lo', 'my', 'km', 'chr']
time_range_support = True time_range_support = True
# based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests # based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests

View file

@ -12,6 +12,8 @@
from lxml import html from lxml import html
from urllib import urlencode from urllib import urlencode
from json import loads
from searx.engines.google import supported_languages
# search-url # search-url
categories = ['news'] categories = ['news']
@ -50,7 +52,7 @@ def request(query, params):
search_options=urlencode(search_options)) search_options=urlencode(search_options))
if params['language'] != 'all': if params['language'] != 'all':
language_array = params['language'].lower().split('_') language_array = params['language'].lower().split('-')
params['url'] += '&lr=lang_' + language_array[0] params['url'] += '&lr=lang_' + language_array[0]
return params return params

View file

@ -15,6 +15,7 @@
from json import loads from json import loads
from string import Formatter from string import Formatter
from urllib import urlencode, quote from urllib import urlencode, quote
from searx.engines.wikipedia import supported_engines
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
@ -46,7 +47,7 @@ def request(query, params):
if params['language'] == 'all': if params['language'] == 'all':
language = 'en' language = 'en'
else: else:
language = params['language'].split('_')[0] language = params['language'].split('-')[0]
# format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)] # format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)]
if any(x[1] == 'language' for x in format_strings): if any(x[1] == 'language' for x in format_strings):

View file

@ -26,7 +26,7 @@ search_string = 'api/?{query}&limit={limit}'
result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}' result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
# list of supported languages # list of supported languages
allowed_languages = ['de', 'en', 'fr', 'it'] supported_languages = ['de', 'en', 'fr', 'it']
# do search-request # do search-request
@ -37,7 +37,7 @@ def request(query, params):
if params['language'] != 'all': if params['language'] != 'all':
language = params['language'].split('_')[0] language = params['language'].split('_')[0]
if language in allowed_languages: if language in supported_languages:
params['url'] = params['url'] + "&lang=" + language params['url'] = params['url'] + "&lang=" + language
# using searx User-Agent # using searx User-Agent

View file

@ -47,7 +47,7 @@ def request(query, params):
# set language if specified # set language if specified
if params['language'] != 'all': if params['language'] != 'all':
params['data']['with_language'] = ('lang_' + params['language'].split('_')[0]) params['data']['with_language'] = ('lang_' + params['language'].split('-')[0])
return params return params

View file

@ -43,8 +43,13 @@ def response(resp):
search_lang = "" search_lang = ""
if resp.search_params['language'] != 'all': # dirty fix for languages named differenly in their site
search_lang = [lc[1] if resp.search_params['language'][:2] == 'fa':
search_lang = 'Farsi'
elif resp.search_params['language'] == 'pt_BR':
search_lang = 'Brazilian'
elif resp.search_params['language'] != 'all':
search_lang = [lc[3]
for lc in language_codes for lc in language_codes
if lc[0][:2] == resp.search_params['language'].split('_')[0]][0] if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]

View file

@ -36,8 +36,8 @@ def request(query, params):
ui_language = 'browser' ui_language = 'browser'
region = 'browser' region = 'browser'
else: else:
region = params['language'].replace('_', '-') region = params['language']
ui_language = params['language'].split('_')[0] ui_language = params['language'].split('-')[0]
search_path = search_string.format( search_path = search_string.format(
query=urlencode({'query': query, query=urlencode({'query': query,

View file

@ -40,7 +40,7 @@ def request(query, params):
# set language if specified # set language if specified
if params['language'] != 'all': if params['language'] != 'all':
params['cookies']['lang'] = params['language'].split('_')[0] params['cookies']['lang'] = params['language'].split('-')[0]
else: else:
params['cookies']['lang'] = 'en' params['cookies']['lang'] = 'en'

View file

@ -14,6 +14,8 @@
from searx import logger from searx import logger
from searx.poolrequests import get from searx.poolrequests import get
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.utils import format_date_by_locale
from searx.engines.wikipedia import supported_languages
from json import loads from json import loads
from lxml.html import fromstring from lxml.html import fromstring

View file

@ -13,6 +13,36 @@
from json import loads from json import loads
from urllib import urlencode, quote from urllib import urlencode, quote
supported_languages = ["en", "sv", "ceb", "de", "nl", "fr", "ru", "it", "es", "war",
"pl", "vi", "ja", "pt", "zh", "uk", "ca", "fa", "no", "sh",
"ar", "fi", "hu", "id", "ro", "cs", "ko", "sr", "ms", "tr",
"eu", "eo", "min", "bg", "da", "kk", "sk", "hy", "he", "zh-min-nan",
"lt", "hr", "sl", "et", "ce", "gl", "nn", "uz", "la", "vo",
"el", "simple", "be", "az", "th", "ur", "ka", "hi", "oc", "ta",
"mk", "mg", "new", "lv", "cy", "bs", "tt", "tl", "te", "pms",
"be-tarask", "br", "sq", "ky", "ht", "jv", "tg", "ast", "zh-yue", "lb",
"mr", "ml", "bn", "pnb", "is", "af", "sco", "ga", "ba", "fy",
"cv", "lmo", "sw", "my", "an", "yo", "ne", "io", "gu", "nds",
"scn", "bpy", "pa", "ku", "als", "kn", "bar", "ia", "qu", "su",
"ckb", "bat-smg", "mn", "arz", "nap", "wa", "bug", "gd", "yi", "map-bms",
"am", "mzn", "fo", "si", "nah", "li", "sah", "vec", "hsb", "or",
"os", "mrj", "sa", "hif", "mhr", "roa-tara", "azb", "pam", "ilo",
"sd", "ps", "se", "mi", "bh", "eml", "bcl", "xmf", "diq", "hak",
"gan", "glk", "vls", "nds-nl", "rue", "bo", "fiu-vro", "co", "sc",
"tk", "csb", "lrc", "vep", "wuu", "km", "szl", "gv", "crh", "kv",
"zh-classical", "frr", "zea", "as", "so", "kw", "nso", "ay", "stq",
"udm", "cdo", "nrm", "ie", "koi", "rm", "pcd", "myv", "mt", "fur",
"ace", "lad", "gn", "lij", "dsb", "dv", "cbk-zam", "ext", "gom",
"kab", "ksh", "ang", "mai", "mwl", "lez", "gag", "ln", "ug", "pi",
"pag", "frp", "sn", "nv", "av", "pfl", "haw", "xal", "krc", "kaa",
"rw", "bxr", "pdc", "to", "kl", "nov", "arc", "kbd", "lo", "bjn",
"pap", "ha", "tet", "ki", "tyv", "tpi", "na", "lbe", "ig", "jbo",
"roa-rup", "ty", "jam", "za", "kg", "mdf", "lg", "wo", "srn", "ab",
"ltg", "zu", "sm", "chr", "om", "tn", "chy", "rmy", "cu", "tw", "tum",
"xh", "bi", "rn", "pih", "got", "ss", "pnt", "bm", "ch", "mo", "ts",
"ady", "iu", "st", "ee", "ny", "fj", "ks", "ak", "ik", "sg", "ve",
"dz", "ff", "ti", "cr", "ng", "cho", "kj", "mh", "ho", "ii", "aa", "mus", "hz", "kr"]
# search-url # search-url
base_url = 'https://{language}.wikipedia.org/' base_url = 'https://{language}.wikipedia.org/'
search_postfix = 'w/api.php?'\ search_postfix = 'w/api.php?'\
@ -28,10 +58,11 @@ search_postfix = 'w/api.php?'\
# set language in base_url # set language in base_url
def url_lang(lang): def url_lang(lang):
if lang == 'all': lang = lang.split('-')[0]
if lang == 'all' or lang not in supported_languages:
language = 'en' language = 'en'
else: else:
language = lang.split('_')[0] language = lang
return base_url.format(language=language) return base_url.format(language=language)

View file

@ -53,7 +53,7 @@ def request(query, params):
# add language tag if specified # add language tag if specified
if params['language'] != 'all': if params['language'] != 'all':
params['url'] += '&lr=lang_' + params['language'].split('_')[0] params['url'] += '&lr=lang_' + params['language'].split('-')[0]
return params return params

View file

@ -20,6 +20,10 @@ from searx.engines.xpath import extract_text, extract_url
categories = ['general'] categories = ['general']
paging = True paging = True
language_support = True language_support = True
supported_languages = ["ar", "bg", "ca", "szh", "tzh", "hr", "cs", "da", "nl", "en",
"et", "fi", "fr", "de", "el", "he", "hu", "is", "id", "it", "ja",
"ko", "lv", "lt", "no", "fa", "pl", "pt", "ro", "ru", "sk", "sr",
"sl", "es", "sv", "th", "tr"]
time_range_support = True time_range_support = True
# search-url # search-url
@ -72,7 +76,13 @@ def _get_url(query, offset, language, time_range):
def _get_language(params): def _get_language(params):
if params['language'] == 'all': if params['language'] == 'all':
return 'en' return 'en'
return params['language'].split('_')[0] elif params['language'][:2] == 'zh':
if params['language'] == 'zh' or params['language'] == 'zh-CH':
return 'szh'
else:
return 'tzh'
else:
return params['language'].split('-')[0]
# do search-request # do search-request

View file

@ -12,7 +12,7 @@
from urllib import urlencode from urllib import urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text, extract_url from searx.engines.xpath import extract_text, extract_url
from searx.engines.yahoo import parse_url from searx.engines.yahoo import parse_url, supported_languages
from datetime import datetime, timedelta from datetime import datetime, timedelta
import re import re
from dateutil import parser from dateutil import parser

View file

@ -36,7 +36,7 @@ content_xpath = './/div[@class="text-container typo typo_text_m typo_line_m orga
def request(query, params): def request(query, params):
lang = params['language'].split('_')[0] lang = params['language'].split('-')[0]
host = base_url.format(tld=language_map.get(lang) or default_tld) host = base_url.format(tld=language_map.get(lang) or default_tld)
params['url'] = host + search_url.format(page=params['pageno'] - 1, params['url'] = host + search_url.format(page=params['pageno'] - 1,
query=urlencode({'text': query})) query=urlencode({'text': query}))

View file

@ -36,7 +36,7 @@ def request(query, params):
# add language tag if specified # add language tag if specified
if params['language'] != 'all': if params['language'] != 'all':
params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0] params['url'] += '&relevanceLanguage=' + params['language'].split('-')[0]
return params return params

View file

@ -1,78 +1,390 @@
''' # -*- coding: utf-8 -*-
searx is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
searx is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
'''
# list of language codes # list of language codes
# this file is generated automatically by utils/update_search_languages.py
language_codes = ( language_codes = (
("ar_XA", "Arabic", "Arabia"), (u"gv", u"Gaelg", u"", u"Manx"),
("bg_BG", "Bulgarian", "Bulgaria"), (u"sco", u"Scots", u"", u"Scots"),
("cs_CZ", "Czech", "Czech Republic"), (u"scn", u"Sicilianu", u"", u"Sicilian"),
("da_DK", "Danish", "Denmark"), (u"gu", u"ગુજરાતી", u"", u"Gujarati"),
("de_AT", "German", "Austria"), (u"gd", u"Gàidhlig", u"", u"Scottish Gaelic"),
("de_CH", "German", "Switzerland"), (u"ga", u"Gaeilge", u"", u"Irish"),
("de_DE", "German", "Germany"), (u"gn", u"Avañe'", u"", u"Guarani"),
("el_GR", "Greek", "Greece"), (u"gl", u"Galego", u"", u"Galician"),
("en_AU", "English", "Australia"), (u"als", u"Alemannisch", u"", u"Alemannic"),
("en_CA", "English", "Canada"), (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"),
("en_GB", "English", "United Kingdom"), (u"vep", u"Vepsän", u"", u"Vepsian"),
("en_ID", "English", "Indonesia"), (u"ty", u"Reo Mā`ohi", u"", u"Tahitian"),
("en_IE", "English", "Ireland"), (u"tw", u"Twi", u"", u"Twi"),
("en_IN", "English", "India"), (u"tt", u"Tatarça / Татарча", u"", u"Tatar"),
("en_MY", "English", "Malaysia"), (u"tr", u"Türkçe", u"", u"Turkish"),
("en_NZ", "English", "New Zealand"), (u"ts", u"Xitsonga", u"", u"Tsonga"),
("en_PH", "English", "Philippines"), (u"tn", u"Setswana", u"", u"Tswana"),
("en_SG", "English", "Singapore"), (u"to", u"faka Tonga", u"", u"Tongan"),
("en_US", "English", "United States"), (u"tl", u"Tagalog", u"", u"Tagalog"),
("en_XA", "English", "Arabia"), (u"vec", u"Vèneto", u"", u"Venetian"),
("en_ZA", "English", "South Africa"), (u"th", u"ไทย", u"", u"Thai"),
("es_AR", "Spanish", "Argentina"), (u"ti", u"ትግርኛ", u"", u"Tigrinya"),
("es_CL", "Spanish", "Chile"), (u"tg", u"Тоҷикӣ", u"", u"Tajik"),
("es_ES", "Spanish", "Spain"), (u"te", u"తెలుగు", u"", u"Telugu"),
("es_MX", "Spanish", "Mexico"), (u"ta", u"தமிழ்", u"", u"Tamil"),
("es_US", "Spanish", "United States"), (u"lrc", u"لۊری شومالی", u"", u"Northern Luri"),
("es_XL", "Spanish", "Latin America"), (u"en-NZ", u"English", u"", u"English"),
("et_EE", "Estonian", "Estonia"), (u"got", u"𐌲𐌿𐍄𐌹𐍃𐌺", u"", u"Gothic"),
("fi_FI", "Finnish", "Finland"), (u"vls", u"West-Vlams", u"", u"West Flemish"),
("fr_BE", "French", "Belgium"), (u"ro", u"Română", u"", u"Romanian"),
("fr_CA", "French", "Canada"), (u"bxr", u"Буряад", u"", u"Buryat"),
("fr_CH", "French", "Switzerland"), (u"fiu-vro", u"Võro", u"", u"Võro"),
("fr_FR", "French", "France"), (u"diq", u"Zazaki", u"", u"Zazaki"),
("he_IL", "Hebrew", "Israel"), (u"zh", u"中文", u"", u"Chinese"),
("hr_HR", "Croatian", "Croatia"), (u"pms", u"Piemontèis", u"", u"Piedmontese"),
("hu_HU", "Hungarian", "Hungary"), (u"za", u"Cuengh", u"", u"Zhuang"),
("it_IT", "Italian", "Italy"), (u"zh-HK", u"中文", u"", u"Chinese"),
("ja_JP", "Japanese", "Japan"), (u"zu", u"isiZulu", u"", u"Zulu"),
("ko_KR", "Korean", "Korea"), (u"tet", u"Tetun", u"", u"Tetum"),
("lt_LT", "Lithuanian", "Lithuania"), (u"es-PE", u"Español", u"", u"Spanish"),
("lv_LV", "Latvian", "Latvia"), (u"new", u"नेपाल भाषा", u"", u"Newar"),
("nb_NO", "Norwegian", "Norway"), (u"lez", u"Лезги чІал (Lezgi čal)", u"", u"Lezgian"),
("nl_BE", "Dutch", "Belgium"), (u"glk", u"گیلکی", u"", u"Gilaki"),
("nl_NL", "Dutch", "Netherlands"), (u"ko-KR", u"한국어", u"", u"Korean"),
("oc_OC", "Occitan", "Occitan"), (u"id-ID", u"Bahasa Indonesia", u"", u"Indonesian"),
("pl_PL", "Polish", "Poland"), (u"cho", u"Choctaw", u"", u"Choctaw"),
("pt_BR", "Portuguese", "Brazil"), (u"chr", u"ᏣᎳᎩ", u"", u"Cherokee"),
("pt_PT", "Portuguese", "Portugal"), (u"vi", u"Tiếng Việt", u"", u"Vietnamese"),
("ro_RO", "Romanian", "Romania"), (u"chy", u"Tsetsêhestâhese", u"", u"Cheyenne"),
("ru_RU", "Russian", "Russia"), (u"is", u"Íslenska", u"", u"Icelandic"),
("sk_SK", "Slovak", "Slovak Republic"), (u"tk", u"تركمن / Туркмен", u"", u"Turkmen"),
("sl_SL", "Slovenian", "Slovenia"), (u"da-DK", u"Dansk", u"", u"Danish"),
("sv_SE", "Swedish", "Sweden"), (u"pfl", u"Pälzisch", u"", u"Palatinate German"),
("th_TH", "Thai", "Thailand"), (u"hu-HU", u"Magyar", u"", u"Hungarian"),
("tr_TR", "Turkish", "Turkey"), (u"he-IL", u"עברית", u"", u"Hebrew"),
("uk_UA", "Ukrainian", "Ukraine"), (u"mg", u"Malagasy", u"", u"Malagasy"),
("zh_CN", "Chinese", "China"), (u"ml", u"മലയാളം", u"", u"Malayalam"),
("zh_HK", "Chinese", "Hong Kong SAR"), (u"mo", u"Молдовеняскэ", u"", u"Moldovan"),
("zh_TW", "Chinese", "Taiwan")) (u"mn", u"Монгол", u"", u"Mongolian"),
(u"mi", u"Māori", u"", u"Maori"),
(u"mh", u"Ebon", u"", u"Marshallese"),
(u"mk", u"Македонски", u"", u"Macedonian"),
(u"mt", u"Malti", u"", u"Maltese"),
(u"ms", u"Bahasa Melayu", u"", u"Malay"),
(u"mr", u"मराठी", u"", u"Marathi"),
(u"mwl", u"Mirandés", u"", u"Mirandese"),
(u"my", u"မြန်မာဘာသာ", u"", u"Burmese"),
(u"en-PH", u"English", u"", u"English"),
(u"srn", u"Sranantongo", u"", u"Sranan"),
(u"pl-PL", u"Polski", u"", u"Polish"),
(u"sl-SL", u"Slovenščina", u"", u"Slovenian"),
(u"csb", u"Kaszëbsczi", u"", u"Kashubian"),
(u"cbk-zam", u"Chavacano de Zamboanga", u"", u"Zamboanga Chavacano"),
(u"nyn", u"Runyankore", u"", u""),
(u"ig", u"Igbo", u"", u"Igbo"),
(u"fr", u"Français", u"", u"French"),
(u"lad", u"Dzhudezmo", u"", u"Ladino"),
(u"fy", u"Frysk", u"", u"West Frisian"),
(u"fa", u"فارسی", u"", u"Persian"),
(u"ff", u"Fulfulde", u"", u"Fula"),
(u"mai", u"मैथिली", u"", u"Maithili"),
(u"fi", u"Suomi", u"", u"Finnish"),
(u"fj", u"Na Vosa Vakaviti", u"", u"Fijian"),
(u"fo", u"Føroyskt", u"", u"Faroese"),
(u"ss", u"SiSwati", u"", u"Swati"),
(u"roa-tara", u"Tarandíne", u"", u"Tarantino"),
(u"sq", u"Shqip", u"", u"Albanian"),
(u"sw", u"Kiswahili", u"", u"Swahili"),
(u"sv", u"Svenska", u"", u"Swedish"),
(u"su", u"Basa Sunda", u"", u"Sundanese"),
(u"st", u"Sesotho", u"", u"Sesotho"),
(u"sk", u"Slovenčina", u"", u"Slovak"),
(u"si", u"සිංහල", u"", u"Sinhalese"),
(u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"),
(u"so", u"Soomaali", u"", u"Somali"),
(u"sn", u"chiShona", u"", u"Shona"),
(u"sm", u"Gagana Samoa", u"", u"Samoan"),
(u"sl", u"Slovenščina", u"", u"Slovenian"),
(u"sc", u"Sardu", u"", u"Sardinian"),
(u"pt-BR", u"português (Brasil)", u"", u""),
(u"sa", u"संस्कृतम्", u"", u"Sanskrit"),
(u"sg", u"Sängö", u"", u"Sango"),
(u"se", u"Sámegiella", u"", u"Northern Sami"),
(u"sd", u"سنڌي، سندھی ، सिन्ध", u"", u"Sindhi"),
(u"fr-CH", u"Français", u"", u"French"),
(u"zea", u"Zeêuws", u"", u"Zeelandic"),
(u"it-CH", u"Italiano", u"", u"Italian"),
(u"wuu", u"吴语", u"", u"Wu"),
(u"fr-CA", u"Français", u"", u"French"),
(u"ar-XA", u"العربية", u"", u"Arabic"),
(u"kbd", u"Адыгэбзэ (Adighabze)", u"", u"Kabardian Circassian"),
(u"no-NO", u"Norsk (Bokmål)", u"", u"Norwegian (Bokmål)"),
(u"ca-ES", u"Català", u"", u"Catalan"),
(u"lg", u"Luganda", u"", u"Luganda"),
(u"lb", u"Lëtzebuergesch", u"", u"Luxembourgish"),
(u"la", u"Latina", u"", u"Latin"),
(u"ln", u"Lingala", u"", u"Lingala"),
(u"lo", u"ລາວ", u"", u"Lao"),
(u"de-CH", u"Deutsch", u"", u"German"),
(u"li", u"Limburgs", u"", u"Limburgish"),
(u"lv", u"Latviešu", u"", u"Latvian"),
(u"lt", u"Lietuvių", u"", u"Lithuanian"),
(u"pcm", u"Nigerian Pidgin", u"", u""),
(u"pcd", u"Picard", u"", u"Picard"),
(u"yi", u"ייִדיש", u"", u"Yiddish"),
(u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"),
(u"yo", u"Yorùbá", u"", u"Yoruba"),
(u"ro-RO", u"Română", u"", u"Romanian"),
(u"bar", u"Boarisch", u"", u"Bavarian"),
(u"nov", u"Novial", u"", u"Novial"),
(u"sr-ME", u"srpski (Crna Gora)", u"", u""),
(u"es-CL", u"Español", u"", u"Spanish"),
(u"es-CO", u"Español", u"", u"Spanish"),
(u"nl-NL", u"Nederlands", u"", u"Dutch"),
(u"map-bms", u"Basa Banyumasan", u"", u"Banyumasan"),
(u"el", u"Ελληνικά", u"", u"Greek"),
(u"eo", u"Esperanto", u"", u"Esperanto"),
(u"en", u"English", u"", u"English"),
(u"ee", u"Eʋegbe", u"", u"Ewe"),
(u"mdf", u"Мокшень (Mokshanj Kälj)", u"", u"Moksha"),
(u"eu", u"Euskara", u"", u"Basque"),
(u"et", u"Eesti", u"", u"Estonian"),
(u"es", u"Español", u"", u"Spanish"),
(u"gom", u"गोवा कोंकणी / Gova Konknni", u"", u"Goan Konkani"),
(u"ru", u"Русский", u"", u"Russian"),
(u"rw", u"Ikinyarwanda", u"", u"Kinyarwanda"),
(u"rm", u"Rumantsch", u"", u"Romansh"),
(u"rn", u"Kirundi", u"", u"Kirundi"),
(u"es-419", u"español (Latinoamérica)", u"", u""),
(u"dsb", u"Dolnoserbski", u"", u"Lower Sorbian"),
(u"ast", u"Asturianu", u"", u"Asturian"),
(u"lmo", u"Lumbaart", u"", u"Lombard"),
(u"ltg", u"Latgaļu", u"", u"Latgalian"),
(u"xh", u"isiXhosa", u"", u"Xhosa"),
(u"en-CA", u"English", u"", u"English"),
(u"koi", u"Перем Коми (Perem Komi)", u"", u"Komi-Permyak"),
(u"tr-TR", u"Türkçe", u"", u"Turkish"),
(u"pnt", u"Ποντιακά", u"", u"Pontic"),
(u"es-XL", u"Español", u"", u"Spanish"),
(u"fi-FI", u"Suomi", u"", u"Finnish"),
(u"pnb", u"شاہ مکھی پنجابی (Shāhmukhī Pañjābī)", u"", u"Western Punjabi"),
(u"udm", u"Удмурт кыл", u"", u"Udmurt"),
(u"bem", u"Ichibemba", u"", u""),
(u"roa-rup", u"Armãneashce", u"", u"Aromanian"),
(u"sr-Latn", u"srpski (latinica)", u"", u""),
(u"stq", u"Seeltersk", u"", u"Saterland Frisian"),
(u"sr", u"Српски / Srpski", u"", u"Serbian"),
(u"ang", u"Englisc", u"", u"Anglo-Saxon"),
(u"ru-RU", u"Русский", u"", u"Russian"),
(u"lbe", u"Лакку", u"", u"Lak"),
(u"min", u"Minangkabau", u"", u"Minangkabau"),
(u"es-US", u"Español", u"", u"Spanish"),
(u"lij", u"Líguru", u"", u"Ligurian"),
(u"kab", u"Taqbaylit", u"", u"Kabyle"),
(u"kaa", u"Qaraqalpaqsha", u"", u"Karakalpak"),
(u"fr-FR", u"Français", u"", u"French"),
(u"tyv", u"Тыва", u"", u"Tuvan"),
(u"ka", u"ქართული", u"", u"Georgian"),
(u"kg", u"KiKongo", u"", u"Kongo"),
(u"ckb", u"Soranî / کوردی", u"", u"Sorani"),
(u"kk", u"Қазақша", u"", u"Kazakh"),
(u"kj", u"Kuanyama", u"", u"Kuanyama"),
(u"ki", u"Gĩkũyũ", u"", u"Kikuyu"),
(u"ko", u"한국어", u"", u"Korean"),
(u"kn", u"ಕನ್ನಡ", u"", u"Kannada"),
(u"tpi", u"Tok Pisin", u"", u"Tok Pisin"),
(u"kl", u"Kalaallisut", u"", u"Greenlandic"),
(u"ks", u"कश्मीरी / كشميري", u"", u"Kashmiri"),
(u"kr", u"Kanuri", u"", u"Kanuri"),
(u"ext", u"Estremeñu", u"", u"Extremaduran"),
(u"kw", u"Kernewek/Karnuack", u"", u"Cornish"),
(u"kv", u"Коми", u"", u"Komi"),
(u"mrj", u"Кырык Мары (Kyryk Mary)", u"", u"Hill Mari"),
(u"ky", u"Кыргызча", u"", u"Kirghiz"),
(u"szl", u"Ślůnski", u"", u"Silesian"),
(u"cdo", u"Mìng-dĕ̤ng-ngṳ̄", u"", u"Min Dong"),
(u"en-GB", u"English", u"", u"English"),
(u"xmf", u"მარგალური (Margaluri)", u"", u"Mingrelian"),
(u"jam", u"Jamaican Creole English", u"", u"Patois"),
(u"ar-SA", u"العربية", u"", u"Arabic"),
(u"ksh", u"Ripoarisch", u"", u"Ripuarian"),
(u"ms-MY", u"Bahasa Melayu", u"", u"Malay"),
(u"de", u"Deutsch", u"", u"German"),
(u"da", u"Dansk", u"", u"Danish"),
(u"dz", u"ཇོང་ཁ", u"", u"Dzongkha"),
(u"hif", u"Fiji Hindi", u"", u"Fiji Hindi"),
(u"dv", u"ދިވެހިބަސް", u"", u"Divehi"),
(u"crs", u"Seychellois Creole", u"", u""),
(u"qu", u"Runa Simi", u"", u"Quechua"),
(u"eml", u"Emiliàn e rumagnòl", u"", u"Emilian-Romagnol"),
(u"ban", u"Balinese", u"", u""),
(u"crh", u"Qırımtatarca", u"", u"Crimean Tatar"),
(u"arz", u"مصرى (Maṣri)", u"", u"Egyptian Arabic"),
(u"rmy", u"romani - रोमानी", u"", u"Romani"),
(u"arc", u"ܐܪܡܝܐ", u"", u"Aramaic"),
(u"th-TH", u"ไทย", u"", u"Thai"),
(u"mus", u"Muskogee", u"", u"Muscogee"),
(u"lua", u"Luba-Lulua", u"", u""),
(u"en-ZA", u"English", u"", u"English"),
(u"wa", u"Walon", u"", u"Walloon"),
(u"wo", u"Wolof", u"", u"Wolof"),
(u"jv", u"Basa Jawa", u"", u"Javanese"),
(u"jw", u"Javanese", u"", u""),
(u"fr-BE", u"Français", u"", u"French"),
(u"tum", u"chiTumbuka", u"", u"Tumbuka"),
(u"ja", u"日本語", u"", u"Japanese"),
(u"pt-PT", u"português (Portugal)", u"", u""),
(u"ilo", u"Ilokano", u"", u"Ilokano"),
(u"tlh", u"Klingon", u"", u""),
(u"pdc", u"Deitsch", u"", u"Pennsylvania German"),
(u"aa", u"Afar", u"", u"Afar"),
(u"ch", u"Chamoru", u"", u"Chamorro"),
(u"co", u"Corsu", u"", u"Corsican"),
(u"simple", u"Simple English", u"", u"Simple English"),
(u"ca", u"Català", u"", u"Catalan"),
(u"xx-pirate", u"Pirate", u"", u""),
(u"ce", u"Нохчийн", u"", u"Chechen"),
(u"cy", u"Cymraeg", u"", u"Welsh"),
(u"sah", u"Саха тыла (Saxa Tyla)", u"", u"Sakha"),
(u"cs", u"Čeština", u"", u"Czech"),
(u"cr", u"Nehiyaw", u"", u"Cree"),
(u"bg-BG", u"Български", u"", u"Bulgarian"),
(u"cv", u"Чăваш", u"", u"Chuvash"),
(u"cu", u"Словѣньскъ", u"", u"Old Church Slavonic"),
(u"ps", u"پښتو", u"", u"Pashto"),
(u"pt", u"Português", u"", u"Portuguese"),
(u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"),
(u"frr", u"Nordfriisk", u"", u"North Frisian"),
(u"frp", u"Arpitan", u"", u"Franco-Provençal"),
(u"xal", u"Хальмг", u"", u"Kalmyk"),
(u"pi", u"पाऴि", u"", u"Pali"),
(u"it-IT", u"Italiano", u"", u"Italian"),
(u"pl", u"Polski", u"", u"Polish"),
(u"nrm", u"Nouormand/Normaund", u"", u"Norman"),
(u"en-US", u"English", u"", u"English"),
(u"gan", u"贛語", u"", u"Gan"),
(u"bat-smg", u"Žemaitėška", u"", u"Samogitian"),
(u"en-UK", u"English", u"", u"English"),
(u"gag", u"Gagauz", u"", u"Gagauz"),
(u"an", u"Aragonés", u"", u"Aragonese"),
(u"gaa", u"Ga", u"", u""),
(u"fur", u"Furlan", u"", u"Friulian"),
(u"kr-KR", u"Kanuri", u"", u"Kanuri"),
(u"zh-CN", u"中文 (简体)", u"", u""),
(u"tl-PH", u"Tagalog", u"", u"Tagalog"),
(u"en-IN", u"English", u"", u"English"),
(u"ve", u"Tshivenda", u"", u"Venda"),
(u"en-ID", u"English", u"", u"English"),
(u"en-IE", u"English", u"", u"English"),
(u"xx-bork", u"Bork, bork, bork!", u"", u""),
(u"iu", u"ᐃᓄᒃᑎᑐᑦ", u"", u"Inuktitut"),
(u"it", u"Italiano", u"", u"Italian"),
(u"iw", u"עברית", u"", u""),
(u"vo", u"Volapük", u"", u"Volapük"),
(u"ii", u"ꆇꉙ", u"", u"Sichuan Yi"),
(u"ik", u"Iñupiak", u"", u"Inupiak"),
(u"io", u"Ido", u"", u"Ido"),
(u"ia", u"Interlingua", u"", u"Interlingua"),
(u"ja-JP", u"日本語", u"", u"Japanese"),
(u"ie", u"Interlingue", u"", u"Interlingue"),
(u"id", u"Bahasa Indonesia", u"", u"Indonesian"),
(u"nds-nl", u"Nedersaksisch", u"", u"Dutch Low Saxon"),
(u"pap", u"Papiamentu", u"", u"Papiamentu"),
(u"pag", u"Pangasinan", u"", u"Pangasinan"),
(u"pam", u"Kapampangan", u"", u"Kapampangan"),
(u"lv-LV", u"Latviešu", u"", u"Latvian"),
(u"mzn", u"مَزِروني", u"", u"Mazandarani"),
(u"nl-BE", u"Nederlands", u"", u"Dutch"),
(u"sk-SK", u"Slovenčina", u"", u"Slovak"),
(u"zh-TW", u"中文 (繁體)", u"", u""),
(u"es-MX", u"Español", u"", u"Spanish"),
(u"de-DE", u"Deutsch", u"", u"German"),
(u"jbo", u"Lojban", u"", u"Lojban"),
(u"mfe", u"kreol morisien", u"", u""),
(u"hak", u"Hak-kâ-fa / 客家話", u"", u"Hakka"),
(u"ny", u"Chichewa", u"", u"Chichewa"),
(u"ady", u"Адыгэбзэ", u"", u"Adyghe"),
(u"haw", u"Hawai`i", u"", u"Hawaiian"),
(u"el-GR", u"Ελληνικά", u"", u"Greek"),
(u"bpy", u"ইমার ঠার/বিষ্ণুপ্রিয়া মণিপুরী", u"", u"Bishnupriya Manipuri"),
(u"mhr", u"Олык Марий (Olyk Marij)", u"", u"Meadow Mari"),
(u"ca-CT", u"Català", u"", u"Catalan"),
(u"en-MY", u"English", u"", u"English"),
(u"sv-SE", u"Svenska", u"", u"Swedish"),
(u"de-AT", u"Deutsch", u"", u"German"),
(u"xx-elmer", u"Elmer Fudd", u"", u""),
(u"hsb", u"Hornjoserbsce", u"", u"Upper Sorbian"),
(u"be", u"Беларуская", u"", u"Belarusian"),
(u"bg", u"Български", u"", u"Bulgarian"),
(u"ba", u"Башҡорт", u"", u"Bashkir"),
(u"bm", u"Bamanankan", u"", u"Bambara"),
(u"bn", u"বাংলা", u"", u"Bengali"),
(u"bo", u"བོད་སྐད", u"", u"Tibetan"),
(u"bh", u"भोजपुरी", u"", u"Bihari"),
(u"bi", u"Bislama", u"", u"Bislama"),
(u"rue", u"Русиньскый", u"", u"Rusyn"),
(u"et-EE", u"Eesti", u"", u"Estonian"),
(u"br", u"Brezhoneg", u"", u"Breton"),
(u"bs", u"Bosanski", u"", u"Bosnian"),
(u"om", u"Oromoo", u"", u"Oromo"),
(u"ace", u"Bahsa Acèh", u"", u"Acehnese"),
(u"es-AR", u"Español", u"", u"Spanish"),
(u"ach", u"Acoli", u"", u""),
(u"oc", u"Occitan", u"", u"Occitan"),
(u"kri", u"Krio (Sierra Leone)", u"", u""),
(u"be-tarask", u"Беларуская (тарашкевіца)", u"", u"Belarusian (Taraškievica)"),
(u"krc", u"Къарачай-Малкъар (Qarachay-Malqar)", u"", u"Karachay-Balkar"),
(u"nds", u"Plattdüütsch", u"", u"Low Saxon"),
(u"os", u"Иронау", u"", u"Ossetian"),
(u"or", u"ଓଡ଼ିଆ", u"", u"Oriya"),
(u"nso", u"Sepedi", u"", u"Northern Sotho"),
(u"bjn", u"Bahasa Banjar", u"", u"Banjar"),
(u"xx-hacker", u"Hacker", u"", u""),
(u"zh-min-nan", u"Bân-lâm-gú", u"", u"Min Nan"),
(u"pa", u"ਪੰਜਾਬੀ", u"", u"Punjabi"),
(u"loz", u"Lozi", u"", u""),
(u"war", u"Winaray", u"", u"Waray-Waray"),
(u"hz", u"Otsiherero", u"", u"Herero"),
(u"hy", u"Հայերեն", u"", u"Armenian"),
(u"hr", u"Hrvatski", u"", u"Croatian"),
(u"ht", u"Krèyol ayisyen", u"", u"Haitian"),
(u"hu", u"Magyar", u"", u"Hungarian"),
(u"hi", u"हिन्दी", u"", u"Hindi"),
(u"ho", u"Hiri Motu", u"", u"Hiri Motu"),
(u"ha", u"هَوُسَ", u"", u"Hausa"),
(u"bug", u"Basa Ugi", u"", u"Buginese"),
(u"he", u"עברית", u"", u"Hebrew"),
(u"hr-HR", u"Hrvatski", u"", u"Croatian"),
(u"uz", u"Ozbek", u"", u"Uzbek"),
(u"azb", u"تۆرکجه", u"", u"South Azerbaijani"),
(u"ur", u"اردو", u"", u"Urdu"),
(u"uk", u"Українська", u"", u"Ukrainian"),
(u"ug", u"ئۇيغۇر تىلى", u"", u"Uyghur"),
(u"pih", u"Norfuk", u"", u"Norfolk"),
(u"ab", u"Аҧсуа", u"", u"Abkhazian"),
(u"af", u"Afrikaans", u"", u"Afrikaans"),
(u"ak", u"Akana", u"", u"Akan"),
(u"am", u"አማርኛ", u"", u"Amharic"),
(u"myv", u"Эрзянь (Erzjanj Kelj)", u"", u"Erzya"),
(u"as", u"অসমীয়া", u"", u"Assamese"),
(u"ar", u"العربية", u"", u"Arabic"),
(u"km", u"ភាសាខ្មែរ", u"", u"Khmer"),
(u"uk-UA", u"Українська", u"", u"Ukrainian"),
(u"av", u"Авар", u"", u"Avar"),
(u"ay", u"Aymar", u"", u"Aymara"),
(u"az", u"Azərbaycanca", u"", u"Azerbaijani"),
(u"es-ES", u"Español", u"", u"Spanish"),
(u"nl", u"Nederlands", u"", u"Dutch"),
(u"nn", u"Nynorsk", u"", u"Norwegian (Nynorsk)"),
(u"no", u"Norsk (Bokmål)", u"", u"Norwegian (Bokmål)"),
(u"na", u"dorerin Naoero", u"", u"Nauruan"),
(u"nah", u"Nāhuatl", u"", u"Nahuatl"),
(u"ne", u"नेपाली", u"", u"Nepali"),
(u"ng", u"Oshiwambo", u"", u"Ndonga"),
(u"en-AU", u"English", u"", u"English"),
(u"nap", u"Nnapulitano", u"", u"Neapolitan"),
(u"nv", u"Diné bizaad", u"", u"Navajo"),
(u"ku", u"Kurdî / كوردی", u"", u"Kurdish"),
(u"cs-CZ", u"Čeština", u"", u"Czech"),
(u"zh-yue", u"粵語", u"", u"Cantonese"),
(u"en-SG", u"English", u"", u"English"),
(u"zh-classical", u"古文 / 文言文", u"", u"Classical Chinese"),
(u"bcl", u"Bikol", u"", u"Central Bicolano"),
(u"en-XA", u"English", u"", u"English")
)

View file

@ -71,21 +71,24 @@ class RawTextQuery(object):
# check if any language-code is equal with # check if any language-code is equal with
# declared language-codes # declared language-codes
for lc in language_codes: for lc in language_codes:
lang_id, lang_name, country = map(str.lower, lc) lang_id, lang_name, country, english_name = map(unicode.lower, lc)
# if correct language-code is found # if correct language-code is found
# set it as new search-language # set it as new search-language
if lang == lang_id\ if lang == lang_id\
or lang_id.startswith(lang)\ or lang_id.startswith(lang)\
or lang == lang_name\ or lang == lang_name\
or lang == english_name\
or lang.replace('_', ' ') == country: or lang.replace('_', ' ') == country:
parse_next = True parse_next = True
self.languages.append(lang) self.languages.append(lang_id)
# to ensure best match (first match is not necessarily the best one)
if lang == lang_id:
break break
# this force a engine or category # this force a engine or category
if query_part[0] == '!' or query_part[0] == '?': if query_part[0] == '!' or query_part[0] == '?':
prefix = query_part[1:].replace('_', ' ') prefix = query_part[1:].replace('-', ' ')
# check if prefix is equal with engine shortcut # check if prefix is equal with engine shortcut
if prefix in engine_shortcuts: if prefix in engine_shortcuts:

View file

@ -13,9 +13,9 @@
<legend>{{ _('Search language') }}</legend> <legend>{{ _('Search language') }}</legend>
<p> <p>
<select name='language'> <select name='language'>
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option> <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %} {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option> <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
{% endfor %} {% endfor %}
</select> </select>
</p> </p>

View file

@ -14,9 +14,9 @@
<legend>{{ _('Search language') }}</legend> <legend>{{ _('Search language') }}</legend>
<p> <p>
<select name='language'> <select name='language'>
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option> <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %} {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option> <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
{% endfor %} {% endfor %}
</select> </select>
</p> </p>

View file

@ -6,4 +6,5 @@
<div id="advanced-search-container"> <div id="advanced-search-container">
{% include 'oscar/categories.html' %} {% include 'oscar/categories.html' %}
{% include 'oscar/time-range.html' %} {% include 'oscar/time-range.html' %}
{% include 'oscar/languages.html' %}
</div> </div>

View file

@ -0,0 +1,12 @@
{% if preferences %}
<select class="form-control" name='language'>
{% else %}
<select class="time_range" name='language'>
{% endif %}
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}
</option>
{% endfor %}
</select>

View file

@ -40,12 +40,7 @@
{% set language_label = _('Search language') %} {% set language_label = _('Search language') %}
{% set language_info = _('What language do you prefer for search?') %} {% set language_info = _('What language do you prefer for search?') %}
{{ preferences_item_header(language_info, language_label, rtl) }} {{ preferences_item_header(language_info, language_label, rtl) }}
<select class="form-control" name='language'> {% include 'oscar/languages.html' %}
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
{% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
{% endfor %}
</select>
{{ preferences_item_footer(language_info, language_label, rtl) }} {{ preferences_item_footer(language_info, language_label, rtl) }}
{% set locale_label = _('Interface language') %} {% set locale_label = _('Interface language') %}
@ -153,6 +148,7 @@
<th>{{ _("Allow") }}</th> <th>{{ _("Allow") }}</th>
<th>{{ _("Engine name") }}</th> <th>{{ _("Engine name") }}</th>
<th>{{ _("Shortcut") }}</th> <th>{{ _("Shortcut") }}</th>
<th>{{ _("Language support") }}</th>
<th>{{ _("SafeSearch") }}</th> <th>{{ _("SafeSearch") }}</th>
<th>{{ _("Time range") }}</th> <th>{{ _("Time range") }}</th>
<th>{{ _("Avg. time") }}</th> <th>{{ _("Avg. time") }}</th>
@ -161,6 +157,7 @@
<th>{{ _("Max time") }}</th> <th>{{ _("Max time") }}</th>
<th>{{ _("Avg. time") }}</th> <th>{{ _("Avg. time") }}</th>
<th>{{ _("SafeSearch") }}</th> <th>{{ _("SafeSearch") }}</th>
<th>{{ _("Language support") }}</th>
<th>{{ _("Shortcut") }}</th> <th>{{ _("Shortcut") }}</th>
<th>{{ _("Engine name") }}</th> <th>{{ _("Engine name") }}</th>
<th>{{ _("Allow") }}</th> <th>{{ _("Allow") }}</th>
@ -175,6 +172,7 @@
</td> </td>
<th>{{ search_engine.name }}</th> <th>{{ search_engine.name }}</th>
<td>{{ shortcuts[search_engine.name] }}</td> <td>{{ shortcuts[search_engine.name] }}</td>
<td><input type="checkbox" {{ "checked" if current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages else ""}} readonly="readonly" disabled="disabled"></td>
<td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td> <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
<td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td> <td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td>
<td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td> <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
@ -183,6 +181,7 @@
<td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td> <td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td>
<td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td> <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
<td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td> <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
<td><input type="checkbox" {{ "checked" if current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages else ""}} readonly="readonly" disabled="disabled"></td>
<td>{{ shortcuts[search_engine.name] }}</td> <td>{{ shortcuts[search_engine.name] }}</td>
<th>{{ search_engine.name }}</th> <th>{{ search_engine.name }}</th>
<td class="onoff-checkbox"> <td class="onoff-checkbox">

View file

@ -9,9 +9,9 @@
<legend>{{ _('Search language') }}</legend> <legend>{{ _('Search language') }}</legend>
<p> <p>
<select name='language'> <select name='language'>
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option> <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %} {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option> <option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
{% endfor %} {% endfor %}
</select> </select>
</p> </p>

View file

@ -330,6 +330,10 @@ def render(template_name, override_theme=None, **kwargs):
kwargs['safesearch'] = str(request.preferences.get_value('safesearch')) kwargs['safesearch'] = str(request.preferences.get_value('safesearch'))
kwargs['language_codes'] = language_codes
if 'current_language' not in kwargs:
kwargs['current_language'] = request.preferences.get_value('language')
# override url_for function in templates # override url_for function in templates
kwargs['url_for'] = url_for_theme kwargs['url_for'] = url_for_theme
@ -510,6 +514,7 @@ def index():
answers=result_container.answers, answers=result_container.answers,
infoboxes=result_container.infoboxes, infoboxes=result_container.infoboxes,
paging=result_container.paging, paging=result_container.paging,
current_language=search.lang,
base_url=get_base_url(), base_url=get_base_url(),
theme=get_current_theme_name(), theme=get_current_theme_name(),
favicons=global_favicons[themes.index(get_current_theme_name())] favicons=global_favicons[themes.index(get_current_theme_name())]
@ -552,7 +557,7 @@ def autocompleter():
if not language or language == 'all': if not language or language == 'all':
language = 'en' language = 'en'
else: else:
language = language.split('_')[0] language = language.split('-')[0]
# run autocompletion # run autocompletion
raw_results.extend(completer(raw_text_query.getSearchQuery(), language)) raw_results.extend(completer(raw_text_query.getSearchQuery(), language))
@ -615,9 +620,7 @@ def preferences():
return render('preferences.html', return render('preferences.html',
locales=settings['locales'], locales=settings['locales'],
current_locale=get_locale(), current_locale=get_locale(),
current_language=lang,
image_proxy=image_proxy, image_proxy=image_proxy,
language_codes=language_codes,
engines_by_category=categories, engines_by_category=categories,
stats=stats, stats=stats,
answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers], answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers],
@ -627,7 +630,8 @@ def preferences():
themes=themes, themes=themes,
plugins=plugins, plugins=plugins,
allowed_plugins=allowed_plugins, allowed_plugins=allowed_plugins,
theme=get_current_theme_name()) theme=get_current_theme_name(),
preferences=True)
@app.route('/image_proxy', methods=['GET']) @app.route('/image_proxy', methods=['GET'])

99
utils/update_languages.py Normal file
View file

@ -0,0 +1,99 @@
# -*- coding: utf-8 -*-
# This script generates languages.py from
# intersecting each engine's supported languages.
#
# The language's native names are obtained from
# Wikipedia's supported languages.
#
# Output file (languages.py) is written in current directory
# to avoid overwriting in case something goes wrong.
from requests import get
from re import sub
from lxml.html import fromstring
from json import loads
from sys import path
path.append('../searx')
from searx.engines import engines
# list of language names
wiki_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
google_languages_url = 'https://www.google.com/preferences?#languages'
google_json_name = 'google.preferences.langMap'
languages = {}
# Get language names from Wikipedia.
def get_wikipedia_languages():
response = get(wiki_languages_url)
dom = fromstring(response.text)
tables = dom.xpath('//table[contains(@class,"sortable")]')
for table in tables:
# exclude header row
trs = table.xpath('.//tr')[1:]
for tr in trs:
td = tr.xpath('./td')
code = td[3].xpath('./a')[0].text
name = td[2].xpath('./a')[0].text
english_name = td[1].xpath('./a')[0].text
if code not in languages:
languages[code] = (name, '', english_name)
# Get language names from Google.
def get_google_languages():
response = get(google_languages_url)
dom = fromstring(response.text)
options = dom.xpath('//select[@name="hl"]/option')
for option in options:
code = option.xpath('./@value')[0]
name = option.text[:-1]
if code not in languages:
languages[code] = (name, '', '')
# Join all language lists.
# iterate all languages supported by each engine
def join_language_lists():
for engine_name in engines:
for locale in engines[engine_name].supported_languages:
locale = locale.replace('_', '-')
if locale not in languages:
# try to get language name
language = languages.get(locale.split('-')[0], None)
if language == None:
print engine_name + ": " + locale
continue
(name, country, english) = language
languages[locale] = (name, country, english)
# Write languages.py.
def write_languages_file():
new_file = open('languages.py', 'w')
file_content = '# -*- coding: utf-8 -*-\n'
file_content += '# list of language codes\n'
file_content += '# this file is generated automatically by utils/update_search_languages.py\n'
file_content += '\nlanguage_codes = ('
for code in languages:
(name, country, english) = languages[code]
file_content += '\n (u"' + code + '"'\
+ ', u"' + name + '"'\
+ ', u"' + country[1:-1] + '"'\
+ ', u"' + english + '"),'
# remove last comma
file_content = file_content[:-1]
file_content += '\n)\n'
new_file.write(file_content.encode('utf8'))
new_file.close()
def main():
get_wikipedia_languages()
get_google_languages()
join_language_lists()
write_languages_file()
if __name__ == "__main__":
main()