From 149802c56926bf48520c98932c4c36b8152b3d2d Mon Sep 17 00:00:00 2001 From: marc Date: Fri, 5 Aug 2016 23:34:56 -0500 Subject: [PATCH 01/14] [enh] add supported_languages on engines and auto-generate languages.py --- searx/autocomplete.py | 6 +- searx/engines/__init__.py | 1 + searx/engines/archlinux.py | 5 +- searx/engines/bing.py | 2 +- searx/engines/bing_images.py | 3 +- searx/engines/bing_news.py | 3 +- searx/engines/duckduckgo.py | 42 +- searx/engines/duckduckgo_definitions.py | 3 +- searx/engines/gigablast.py | 2 +- searx/engines/google.py | 14 + searx/engines/google_news.py | 4 +- searx/engines/mediawiki.py | 3 +- searx/engines/photon.py | 4 +- searx/engines/startpage.py | 2 +- searx/engines/subtitleseeker.py | 9 +- searx/engines/swisscows.py | 4 +- searx/engines/twitter.py | 2 +- searx/engines/wikidata.py | 2 + searx/engines/wikipedia.py | 35 +- searx/engines/yacy.py | 2 +- searx/engines/yahoo.py | 12 +- searx/engines/yahoo_news.py | 2 +- searx/engines/yandex.py | 2 +- searx/engines/youtube_api.py | 2 +- searx/languages.py | 464 +++++++++++++++++---- searx/query.py | 11 +- searx/templates/courgette/preferences.html | 6 +- searx/templates/legacy/preferences.html | 6 +- searx/templates/oscar/advanced.html | 1 + searx/templates/oscar/languages.html | 12 + searx/templates/oscar/preferences.html | 11 +- searx/templates/pix-art/preferences.html | 6 +- searx/webapp.py | 12 +- utils/update_languages.py | 99 +++++ 34 files changed, 666 insertions(+), 128 deletions(-) create mode 100644 searx/templates/oscar/languages.html create mode 100644 utils/update_languages.py diff --git a/searx/autocomplete.py b/searx/autocomplete.py index 527104041..dff8a69a0 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -81,17 +81,17 @@ def searx_bang(full_query): engine_query = full_query.getSearchQuery()[1:] for lc in language_codes: - lang_id, lang_name, country = map(str.lower, lc) + lang_id, lang_name, country, english_name = map(str.lower, lc) # check if query starts with language-id if lang_id.startswith(engine_query): if len(engine_query) <= 2: - results.append(':{lang_id}'.format(lang_id=lang_id.split('_')[0])) + results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0])) else: results.append(':{lang_id}'.format(lang_id=lang_id)) # check if query starts with language name - if lang_name.startswith(engine_query): + if lang_name.startswith(engine_query) or english_name.startswith(engine_query): results.append(':{lang_name}'.format(lang_name=lang_name)) # check if query starts with country diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 87b1b0eb4..ab3677984 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -38,6 +38,7 @@ engine_shortcuts = {} engine_default_args = {'paging': False, 'categories': ['general'], 'language_support': True, + 'supported_languages': [], 'safesearch': False, 'timeout': settings['outgoing']['request_timeout'], 'shortcut': '-', diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index 5ba512766..dca825790 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -29,8 +29,8 @@ xpath_link = './/div[@class="mw-search-result-heading"]/a' # cut 'en' from 'en_US', 'de' from 'de_CH', and so on def locale_to_lang_code(locale): - if locale.find('_') >= 0: - locale = locale.split('_')[0] + if locale.find('-') >= 0: + locale = locale.split('-')[0] return locale @@ -95,6 +95,7 @@ main_langs = { 'uk': 'Українська', 'zh': '简体中文' } +supported_languages = dict(lang_urls, **main_langs) # do search-request diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 58db61251..052b66448 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -32,7 +32,7 @@ def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 if params['language'] != 'all': - query = u'language:{} {}'.format(params['language'].split('_')[0].upper(), + query = u'language:{} {}'.format(params['language'].split('-')[0].upper(), query.decode('utf-8')).encode('utf-8') search_path = search_string.format( diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 4dd362cb3..c0deaf6b2 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -19,6 +19,7 @@ from urllib import urlencode from lxml import html from json import loads import re +from searx.engines.bing import supported_languages # engine dependent config categories = ['images'] @@ -53,7 +54,7 @@ def request(query, params): if params['language'] == 'all': language = 'en-US' else: - language = params['language'].replace('_', '-') + language = params['language'] search_path = search_string.format( query=urlencode({'q': query}), diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 4e7c33129..4bac5bbce 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -17,6 +17,7 @@ from datetime import datetime from dateutil import parser from lxml import etree from searx.utils import list_get +from searx.engines.bing import supported_languages # engine dependent config categories = ['news'] @@ -74,7 +75,7 @@ def request(query, params): if params['language'] == 'all': language = 'en-US' else: - language = params['language'].replace('_', '-') + language = params['language'] params['url'] = _get_url(query, language, offset, params['time_range']) diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 9959a52e6..a1cb5882c 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -22,6 +22,13 @@ from searx.languages import language_codes categories = ['general'] paging = True language_support = True +supported_languages = ["es-AR", "en-AU", "de-AT", "fr-BE", "nl-BE", "pt-BR", "bg-BG", "en-CA", "fr-CA", "ca-CT", + "es-CL", "zh-CN", "es-CO", "hr-HR", "cs-CZ", "da-DK", "et-EE", "fi-FI", "fr-FR", "de-DE", + "el-GR", "tzh-HK", "hu-HU", "en-IN", "id-ID", "en-ID", "en-IE", "he-IL", "it-IT", "jp-JP", + "kr-KR", "es-XL", "lv-LV", "lt-LT", "ms-MY", "en-MY", "es-MX", "nl-NL", "en-NZ", "no-NO", + "es-PE", "en-PH", "tl-PH", "pl-PL", "pt-PT", "ro-RO", "ru-RU", "ar-XA", "en-XA", "en-SG", + "sk-SK", "sl-SL", "en-ZA", "es-ES", "ca-ES", "sv-SE", "de-CH", "fr-CH", "it-CH", "tzh-TW", + "th-TH", "tr-TR", "uk-UA", "en-UK", "en-US", "es-US", "vi-VN"] time_range_support = True # search-url @@ -46,10 +53,23 @@ def request(query, params): offset = (params['pageno'] - 1) * 30 + # custom fixes for languages if params['language'] == 'all': locale = None + elif params['language'][:2] == 'ja': + locale = 'jp-jp' + elif params['language'] == 'zh-TW': + locale = 'tw-tzh' + elif params['language'] == 'zh-HK': + locale = 'hk-tzh' + elif params['language'][-2:] == 'SA': + locale = 'xa' + params['language'].split('-')[0] + elif params['language'][-2:] == 'GB': + locale = 'uk' + params['language'].split('-')[0] + elif params['language'] == 'es-419': + locale = 'xl-es' else: - locale = params['language'].split('_') + locale = params['language'].split('-') if len(locale) == 2: # country code goes first locale = locale[1].lower() + '-' + locale[0].lower() @@ -58,7 +78,25 @@ def request(query, params): locale = locale[0].lower() lang_codes = [x[0] for x in language_codes] for lc in lang_codes: - lc = lc.split('_') + lc = lc.split('-') + if locale == lc[0] and len(lc) == 2: + locale = lc[1].lower() + '-' + lc[0].lower() + break + + if locale: + params['url'] = url.format( + query=urlencode({'q': query, 'kl': locale}), offset=offset) + else: + locale = params['language'].split('-') + if len(locale) == 2: + # country code goes first + locale = locale[1].lower() + '-' + locale[0].lower() + else: + # tries to get a country code from language + locale = locale[0].lower() + lang_codes = [x[0] for x in language_codes] + for lc in lang_codes: + lc = lc.split('-') if locale == lc[0]: locale = lc[1].lower() + '-' + lc[0].lower() break diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 208ccca28..23a2f3be3 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -4,6 +4,7 @@ from re import compile, sub from lxml import html from searx.utils import html_to_text from searx.engines.xpath import extract_text +from searx.engines.duckduckgo import supported_languages url = 'https://api.duckduckgo.com/'\ + '?{query}&format=json&pretty=0&no_redirect=1&d=1' @@ -23,7 +24,7 @@ def result_to_text(url, text, htmlResult): def request(query, params): params['url'] = url.format(query=urlencode({'q': query})) - params['headers']['Accept-Language'] = params['language'] + params['headers']['Accept-Language'] = params['language'].split('-')[0] return params diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index 5430eb3ba..e139842fa 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -48,7 +48,7 @@ def request(query, params): if params['language'] == 'all': language = 'xx' else: - language = params['language'][0:2] + language = params['language'].split('-')[0] if params['safesearch'] >= 1: safesearch = 1 diff --git a/searx/engines/google.py b/searx/engines/google.py index a02b6940e..375e627ba 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -23,6 +23,20 @@ categories = ['general'] paging = True language_support = True use_locale_domain = True +supported_languages = ['de', 'en', 'es', 'es_419', 'fr', 'hr', 'it', 'nl', 'pl', 'pt-BR', + 'pt-PT', 'vi', 'tr', 'ru', 'ar', 'th', 'ko', 'zh-CN', 'zh-TW', 'ja', + 'ach', 'af', 'ak', 'az', 'ms', 'ban', 'xx_bork', 'bs', 'br', 'ca', + 'ceb', 'ckb', 'cs', 'sn', 'co', 'cy', 'da', 'yo', 'et', 'xx_elmer', + 'eo', 'eu', 'ee', 'tl', 'fo', 'gaa', 'ga', 'gd', 'gl', 'gn', 'xx_hacker', + 'ht', 'ha', 'haw', 'bem', 'ig', 'rn', 'id', 'ia', 'zu', 'is', 'jw', 'rw', + 'sw', 'tlh', 'kg', 'mfe', 'kri', 'la', 'lv', 'to', 'lt', 'ln', 'loz', + 'lua', 'lg', 'hu', 'mg', 'mt', 'mi', 'pcm', 'no', 'nso', 'ny', 'nn', + 'uz', 'oc', 'om', 'xx_pirate', 'pt', 'ro', 'mo', 'rm', 'qu', 'nyn', 'crs', + 'sq', 'sd', 'sk', 'sl', 'so', 'st', 'sr_ME', 'sr_Latn', 'su', 'fi', 'sv', + 'tg', 'tt', 'tn', 'tum', 'tk', 'tw', 'fy', 'wo', 'xh', 'el', 'be', 'bg', + 'ky', 'kk', 'mk', 'mn', 'sr', 'uk', 'ka', 'hy', 'yi', 'iw', 'ug', 'ur', + 'ps', 'fa', 'ti', 'am', 'ne', 'mr', 'hi', 'bn', 'pa', 'gu', 'or', 'ta', + 'te', 'kn', 'ml', 'si', 'lo', 'my', 'km', 'chr'] time_range_support = True # based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index 37253c6a7..6d1430248 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -12,6 +12,8 @@ from lxml import html from urllib import urlencode +from json import loads +from searx.engines.google import supported_languages # search-url categories = ['news'] @@ -50,7 +52,7 @@ def request(query, params): search_options=urlencode(search_options)) if params['language'] != 'all': - language_array = params['language'].lower().split('_') + language_array = params['language'].lower().split('-') params['url'] += '&lr=lang_' + language_array[0] return params diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py index 26d3720d9..b17cb38e4 100644 --- a/searx/engines/mediawiki.py +++ b/searx/engines/mediawiki.py @@ -15,6 +15,7 @@ from json import loads from string import Formatter from urllib import urlencode, quote +from searx.engines.wikipedia import supported_engines # engine dependent config categories = ['general'] @@ -46,7 +47,7 @@ def request(query, params): if params['language'] == 'all': language = 'en' else: - language = params['language'].split('_')[0] + language = params['language'].split('-')[0] # format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)] if any(x[1] == 'language' for x in format_strings): diff --git a/searx/engines/photon.py b/searx/engines/photon.py index 2197005e5..a029bbfef 100644 --- a/searx/engines/photon.py +++ b/searx/engines/photon.py @@ -26,7 +26,7 @@ search_string = 'api/?{query}&limit={limit}' result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}' # list of supported languages -allowed_languages = ['de', 'en', 'fr', 'it'] +supported_languages = ['de', 'en', 'fr', 'it'] # do search-request @@ -37,7 +37,7 @@ def request(query, params): if params['language'] != 'all': language = params['language'].split('_')[0] - if language in allowed_languages: + if language in supported_languages: params['url'] = params['url'] + "&lang=" + language # using searx User-Agent diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 6f6eae1cf..54aafdee5 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -47,7 +47,7 @@ def request(query, params): # set language if specified if params['language'] != 'all': - params['data']['with_language'] = ('lang_' + params['language'].split('_')[0]) + params['data']['with_language'] = ('lang_' + params['language'].split('-')[0]) return params diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py index daba68be7..2c0a94f08 100644 --- a/searx/engines/subtitleseeker.py +++ b/searx/engines/subtitleseeker.py @@ -43,8 +43,13 @@ def response(resp): search_lang = "" - if resp.search_params['language'] != 'all': - search_lang = [lc[1] + # dirty fix for languages named differenly in their site + if resp.search_params['language'][:2] == 'fa': + search_lang = 'Farsi' + elif resp.search_params['language'] == 'pt_BR': + search_lang = 'Brazilian' + elif resp.search_params['language'] != 'all': + search_lang = [lc[3] for lc in language_codes if lc[0][:2] == resp.search_params['language'].split('_')[0]][0] diff --git a/searx/engines/swisscows.py b/searx/engines/swisscows.py index 72184e428..68632a15a 100644 --- a/searx/engines/swisscows.py +++ b/searx/engines/swisscows.py @@ -36,8 +36,8 @@ def request(query, params): ui_language = 'browser' region = 'browser' else: - region = params['language'].replace('_', '-') - ui_language = params['language'].split('_')[0] + region = params['language'] + ui_language = params['language'].split('-')[0] search_path = search_string.format( query=urlencode({'query': query, diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py index 36efac186..6cca05f70 100644 --- a/searx/engines/twitter.py +++ b/searx/engines/twitter.py @@ -40,7 +40,7 @@ def request(query, params): # set language if specified if params['language'] != 'all': - params['cookies']['lang'] = params['language'].split('_')[0] + params['cookies']['lang'] = params['language'].split('-')[0] else: params['cookies']['lang'] = 'en' diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index 91040e218..edb6d75fe 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -14,6 +14,8 @@ from searx import logger from searx.poolrequests import get from searx.engines.xpath import extract_text +from searx.utils import format_date_by_locale +from searx.engines.wikipedia import supported_languages from json import loads from lxml.html import fromstring diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 70191d22b..fdba5ed68 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -13,6 +13,36 @@ from json import loads from urllib import urlencode, quote +supported_languages = ["en", "sv", "ceb", "de", "nl", "fr", "ru", "it", "es", "war", + "pl", "vi", "ja", "pt", "zh", "uk", "ca", "fa", "no", "sh", + "ar", "fi", "hu", "id", "ro", "cs", "ko", "sr", "ms", "tr", + "eu", "eo", "min", "bg", "da", "kk", "sk", "hy", "he", "zh-min-nan", + "lt", "hr", "sl", "et", "ce", "gl", "nn", "uz", "la", "vo", + "el", "simple", "be", "az", "th", "ur", "ka", "hi", "oc", "ta", + "mk", "mg", "new", "lv", "cy", "bs", "tt", "tl", "te", "pms", + "be-tarask", "br", "sq", "ky", "ht", "jv", "tg", "ast", "zh-yue", "lb", + "mr", "ml", "bn", "pnb", "is", "af", "sco", "ga", "ba", "fy", + "cv", "lmo", "sw", "my", "an", "yo", "ne", "io", "gu", "nds", + "scn", "bpy", "pa", "ku", "als", "kn", "bar", "ia", "qu", "su", + "ckb", "bat-smg", "mn", "arz", "nap", "wa", "bug", "gd", "yi", "map-bms", + "am", "mzn", "fo", "si", "nah", "li", "sah", "vec", "hsb", "or", + "os", "mrj", "sa", "hif", "mhr", "roa-tara", "azb", "pam", "ilo", + "sd", "ps", "se", "mi", "bh", "eml", "bcl", "xmf", "diq", "hak", + "gan", "glk", "vls", "nds-nl", "rue", "bo", "fiu-vro", "co", "sc", + "tk", "csb", "lrc", "vep", "wuu", "km", "szl", "gv", "crh", "kv", + "zh-classical", "frr", "zea", "as", "so", "kw", "nso", "ay", "stq", + "udm", "cdo", "nrm", "ie", "koi", "rm", "pcd", "myv", "mt", "fur", + "ace", "lad", "gn", "lij", "dsb", "dv", "cbk-zam", "ext", "gom", + "kab", "ksh", "ang", "mai", "mwl", "lez", "gag", "ln", "ug", "pi", + "pag", "frp", "sn", "nv", "av", "pfl", "haw", "xal", "krc", "kaa", + "rw", "bxr", "pdc", "to", "kl", "nov", "arc", "kbd", "lo", "bjn", + "pap", "ha", "tet", "ki", "tyv", "tpi", "na", "lbe", "ig", "jbo", + "roa-rup", "ty", "jam", "za", "kg", "mdf", "lg", "wo", "srn", "ab", + "ltg", "zu", "sm", "chr", "om", "tn", "chy", "rmy", "cu", "tw", "tum", + "xh", "bi", "rn", "pih", "got", "ss", "pnt", "bm", "ch", "mo", "ts", + "ady", "iu", "st", "ee", "ny", "fj", "ks", "ak", "ik", "sg", "ve", + "dz", "ff", "ti", "cr", "ng", "cho", "kj", "mh", "ho", "ii", "aa", "mus", "hz", "kr"] + # search-url base_url = 'https://{language}.wikipedia.org/' search_postfix = 'w/api.php?'\ @@ -28,10 +58,11 @@ search_postfix = 'w/api.php?'\ # set language in base_url def url_lang(lang): - if lang == 'all': + lang = lang.split('-')[0] + if lang == 'all' or lang not in supported_languages: language = 'en' else: - language = lang.split('_')[0] + language = lang return base_url.format(language=language) diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py index 92cf881c0..7b1b6b35d 100644 --- a/searx/engines/yacy.py +++ b/searx/engines/yacy.py @@ -53,7 +53,7 @@ def request(query, params): # add language tag if specified if params['language'] != 'all': - params['url'] += '&lr=lang_' + params['language'].split('_')[0] + params['url'] += '&lr=lang_' + params['language'].split('-')[0] return params diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index 2bb34b83d..c00e42368 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -20,6 +20,10 @@ from searx.engines.xpath import extract_text, extract_url categories = ['general'] paging = True language_support = True +supported_languages = ["ar", "bg", "ca", "szh", "tzh", "hr", "cs", "da", "nl", "en", + "et", "fi", "fr", "de", "el", "he", "hu", "is", "id", "it", "ja", + "ko", "lv", "lt", "no", "fa", "pl", "pt", "ro", "ru", "sk", "sr", + "sl", "es", "sv", "th", "tr"] time_range_support = True # search-url @@ -72,7 +76,13 @@ def _get_url(query, offset, language, time_range): def _get_language(params): if params['language'] == 'all': return 'en' - return params['language'].split('_')[0] + elif params['language'][:2] == 'zh': + if params['language'] == 'zh' or params['language'] == 'zh-CH': + return 'szh' + else: + return 'tzh' + else: + return params['language'].split('-')[0] # do search-request diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index e91c1d34e..613513e59 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -12,7 +12,7 @@ from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text, extract_url -from searx.engines.yahoo import parse_url +from searx.engines.yahoo import parse_url, supported_languages from datetime import datetime, timedelta import re from dateutil import parser diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py index b83a747f9..eee345c45 100644 --- a/searx/engines/yandex.py +++ b/searx/engines/yandex.py @@ -36,7 +36,7 @@ content_xpath = './/div[@class="text-container typo typo_text_m typo_line_m orga def request(query, params): - lang = params['language'].split('_')[0] + lang = params['language'].split('-')[0] host = base_url.format(tld=language_map.get(lang) or default_tld) params['url'] = host + search_url.format(page=params['pageno'] - 1, query=urlencode({'text': query})) diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py index 8fd939a25..1dfca5166 100644 --- a/searx/engines/youtube_api.py +++ b/searx/engines/youtube_api.py @@ -36,7 +36,7 @@ def request(query, params): # add language tag if specified if params['language'] != 'all': - params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0] + params['url'] += '&relevanceLanguage=' + params['language'].split('-')[0] return params diff --git a/searx/languages.py b/searx/languages.py index 70459a577..011b11c02 100644 --- a/searx/languages.py +++ b/searx/languages.py @@ -1,78 +1,390 @@ -''' -searx is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -searx is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with searx. If not, see < http://www.gnu.org/licenses/ >. - -(C) 2013- by Adam Tauber, -''' - +# -*- coding: utf-8 -*- # list of language codes +# this file is generated automatically by utils/update_search_languages.py + language_codes = ( - ("ar_XA", "Arabic", "Arabia"), - ("bg_BG", "Bulgarian", "Bulgaria"), - ("cs_CZ", "Czech", "Czech Republic"), - ("da_DK", "Danish", "Denmark"), - ("de_AT", "German", "Austria"), - ("de_CH", "German", "Switzerland"), - ("de_DE", "German", "Germany"), - ("el_GR", "Greek", "Greece"), - ("en_AU", "English", "Australia"), - ("en_CA", "English", "Canada"), - ("en_GB", "English", "United Kingdom"), - ("en_ID", "English", "Indonesia"), - ("en_IE", "English", "Ireland"), - ("en_IN", "English", "India"), - ("en_MY", "English", "Malaysia"), - ("en_NZ", "English", "New Zealand"), - ("en_PH", "English", "Philippines"), - ("en_SG", "English", "Singapore"), - ("en_US", "English", "United States"), - ("en_XA", "English", "Arabia"), - ("en_ZA", "English", "South Africa"), - ("es_AR", "Spanish", "Argentina"), - ("es_CL", "Spanish", "Chile"), - ("es_ES", "Spanish", "Spain"), - ("es_MX", "Spanish", "Mexico"), - ("es_US", "Spanish", "United States"), - ("es_XL", "Spanish", "Latin America"), - ("et_EE", "Estonian", "Estonia"), - ("fi_FI", "Finnish", "Finland"), - ("fr_BE", "French", "Belgium"), - ("fr_CA", "French", "Canada"), - ("fr_CH", "French", "Switzerland"), - ("fr_FR", "French", "France"), - ("he_IL", "Hebrew", "Israel"), - ("hr_HR", "Croatian", "Croatia"), - ("hu_HU", "Hungarian", "Hungary"), - ("it_IT", "Italian", "Italy"), - ("ja_JP", "Japanese", "Japan"), - ("ko_KR", "Korean", "Korea"), - ("lt_LT", "Lithuanian", "Lithuania"), - ("lv_LV", "Latvian", "Latvia"), - ("nb_NO", "Norwegian", "Norway"), - ("nl_BE", "Dutch", "Belgium"), - ("nl_NL", "Dutch", "Netherlands"), - ("oc_OC", "Occitan", "Occitan"), - ("pl_PL", "Polish", "Poland"), - ("pt_BR", "Portuguese", "Brazil"), - ("pt_PT", "Portuguese", "Portugal"), - ("ro_RO", "Romanian", "Romania"), - ("ru_RU", "Russian", "Russia"), - ("sk_SK", "Slovak", "Slovak Republic"), - ("sl_SL", "Slovenian", "Slovenia"), - ("sv_SE", "Swedish", "Sweden"), - ("th_TH", "Thai", "Thailand"), - ("tr_TR", "Turkish", "Turkey"), - ("uk_UA", "Ukrainian", "Ukraine"), - ("zh_CN", "Chinese", "China"), - ("zh_HK", "Chinese", "Hong Kong SAR"), - ("zh_TW", "Chinese", "Taiwan")) + (u"gv", u"Gaelg", u"", u"Manx"), + (u"sco", u"Scots", u"", u"Scots"), + (u"scn", u"Sicilianu", u"", u"Sicilian"), + (u"gu", u"ગુજરાતી", u"", u"Gujarati"), + (u"gd", u"Gàidhlig", u"", u"Scottish Gaelic"), + (u"ga", u"Gaeilge", u"", u"Irish"), + (u"gn", u"Avañe'ẽ", u"", u"Guarani"), + (u"gl", u"Galego", u"", u"Galician"), + (u"als", u"Alemannisch", u"", u"Alemannic"), + (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"), + (u"vep", u"Vepsän", u"", u"Vepsian"), + (u"ty", u"Reo Mā`ohi", u"", u"Tahitian"), + (u"tw", u"Twi", u"", u"Twi"), + (u"tt", u"Tatarça / Татарча", u"", u"Tatar"), + (u"tr", u"Türkçe", u"", u"Turkish"), + (u"ts", u"Xitsonga", u"", u"Tsonga"), + (u"tn", u"Setswana", u"", u"Tswana"), + (u"to", u"faka Tonga", u"", u"Tongan"), + (u"tl", u"Tagalog", u"", u"Tagalog"), + (u"vec", u"Vèneto", u"", u"Venetian"), + (u"th", u"ไทย", u"", u"Thai"), + (u"ti", u"ትግርኛ", u"", u"Tigrinya"), + (u"tg", u"Тоҷикӣ", u"", u"Tajik"), + (u"te", u"తెలుగు", u"", u"Telugu"), + (u"ta", u"தமிழ்", u"", u"Tamil"), + (u"lrc", u"لۊری شومالی", u"", u"Northern Luri"), + (u"en-NZ", u"English", u"", u"English"), + (u"got", u"𐌲𐌿𐍄𐌹𐍃𐌺", u"", u"Gothic"), + (u"vls", u"West-Vlams", u"", u"West Flemish"), + (u"ro", u"Română", u"", u"Romanian"), + (u"bxr", u"Буряад", u"", u"Buryat"), + (u"fiu-vro", u"Võro", u"", u"Võro"), + (u"diq", u"Zazaki", u"", u"Zazaki"), + (u"zh", u"中文", u"", u"Chinese"), + (u"pms", u"Piemontèis", u"", u"Piedmontese"), + (u"za", u"Cuengh", u"", u"Zhuang"), + (u"zh-HK", u"中文", u"", u"Chinese"), + (u"zu", u"isiZulu", u"", u"Zulu"), + (u"tet", u"Tetun", u"", u"Tetum"), + (u"es-PE", u"Español", u"", u"Spanish"), + (u"new", u"नेपाल भाषा", u"", u"Newar"), + (u"lez", u"Лезги чІал (Lezgi č’al)", u"", u"Lezgian"), + (u"glk", u"گیلکی", u"", u"Gilaki"), + (u"ko-KR", u"한국어", u"", u"Korean"), + (u"id-ID", u"Bahasa Indonesia", u"", u"Indonesian"), + (u"cho", u"Choctaw", u"", u"Choctaw"), + (u"chr", u"ᏣᎳᎩ", u"", u"Cherokee"), + (u"vi", u"Tiếng Việt", u"", u"Vietnamese"), + (u"chy", u"Tsetsêhestâhese", u"", u"Cheyenne"), + (u"is", u"Íslenska", u"", u"Icelandic"), + (u"tk", u"تركمن / Туркмен", u"", u"Turkmen"), + (u"da-DK", u"Dansk", u"", u"Danish"), + (u"pfl", u"Pälzisch", u"", u"Palatinate German"), + (u"hu-HU", u"Magyar", u"", u"Hungarian"), + (u"he-IL", u"עברית", u"", u"Hebrew"), + (u"mg", u"Malagasy", u"", u"Malagasy"), + (u"ml", u"മലയാളം", u"", u"Malayalam"), + (u"mo", u"Молдовеняскэ", u"", u"Moldovan"), + (u"mn", u"Монгол", u"", u"Mongolian"), + (u"mi", u"Māori", u"", u"Maori"), + (u"mh", u"Ebon", u"", u"Marshallese"), + (u"mk", u"Македонски", u"", u"Macedonian"), + (u"mt", u"Malti", u"", u"Maltese"), + (u"ms", u"Bahasa Melayu", u"", u"Malay"), + (u"mr", u"मराठी", u"", u"Marathi"), + (u"mwl", u"Mirandés", u"", u"Mirandese"), + (u"my", u"မြန်မာဘာသာ", u"", u"Burmese"), + (u"en-PH", u"English", u"", u"English"), + (u"srn", u"Sranantongo", u"", u"Sranan"), + (u"pl-PL", u"Polski", u"", u"Polish"), + (u"sl-SL", u"Slovenščina", u"", u"Slovenian"), + (u"csb", u"Kaszëbsczi", u"", u"Kashubian"), + (u"cbk-zam", u"Chavacano de Zamboanga", u"", u"Zamboanga Chavacano"), + (u"nyn", u"Runyankore", u"", u""), + (u"ig", u"Igbo", u"", u"Igbo"), + (u"fr", u"Français", u"", u"French"), + (u"lad", u"Dzhudezmo", u"", u"Ladino"), + (u"fy", u"Frysk", u"", u"West Frisian"), + (u"fa", u"فارسی", u"", u"Persian"), + (u"ff", u"Fulfulde", u"", u"Fula"), + (u"mai", u"मैथिली", u"", u"Maithili"), + (u"fi", u"Suomi", u"", u"Finnish"), + (u"fj", u"Na Vosa Vakaviti", u"", u"Fijian"), + (u"fo", u"Føroyskt", u"", u"Faroese"), + (u"ss", u"SiSwati", u"", u"Swati"), + (u"roa-tara", u"Tarandíne", u"", u"Tarantino"), + (u"sq", u"Shqip", u"", u"Albanian"), + (u"sw", u"Kiswahili", u"", u"Swahili"), + (u"sv", u"Svenska", u"", u"Swedish"), + (u"su", u"Basa Sunda", u"", u"Sundanese"), + (u"st", u"Sesotho", u"", u"Sesotho"), + (u"sk", u"Slovenčina", u"", u"Slovak"), + (u"si", u"සිංහල", u"", u"Sinhalese"), + (u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"), + (u"so", u"Soomaali", u"", u"Somali"), + (u"sn", u"chiShona", u"", u"Shona"), + (u"sm", u"Gagana Samoa", u"", u"Samoan"), + (u"sl", u"Slovenščina", u"", u"Slovenian"), + (u"sc", u"Sardu", u"", u"Sardinian"), + (u"pt-BR", u"português (Brasil)", u"", u""), + (u"sa", u"संस्कृतम्", u"", u"Sanskrit"), + (u"sg", u"Sängö", u"", u"Sango"), + (u"se", u"Sámegiella", u"", u"Northern Sami"), + (u"sd", u"سنڌي، سندھی ، सिन्ध", u"", u"Sindhi"), + (u"fr-CH", u"Français", u"", u"French"), + (u"zea", u"Zeêuws", u"", u"Zeelandic"), + (u"it-CH", u"Italiano", u"", u"Italian"), + (u"wuu", u"吴语", u"", u"Wu"), + (u"fr-CA", u"Français", u"", u"French"), + (u"ar-XA", u"العربية", u"", u"Arabic"), + (u"kbd", u"Адыгэбзэ (Adighabze)", u"", u"Kabardian Circassian"), + (u"no-NO", u"Norsk (Bokmål)", u"", u"Norwegian (Bokmål)"), + (u"ca-ES", u"Català", u"", u"Catalan"), + (u"lg", u"Luganda", u"", u"Luganda"), + (u"lb", u"Lëtzebuergesch", u"", u"Luxembourgish"), + (u"la", u"Latina", u"", u"Latin"), + (u"ln", u"Lingala", u"", u"Lingala"), + (u"lo", u"ລາວ", u"", u"Lao"), + (u"de-CH", u"Deutsch", u"", u"German"), + (u"li", u"Limburgs", u"", u"Limburgish"), + (u"lv", u"Latviešu", u"", u"Latvian"), + (u"lt", u"Lietuvių", u"", u"Lithuanian"), + (u"pcm", u"Nigerian Pidgin", u"", u""), + (u"pcd", u"Picard", u"", u"Picard"), + (u"yi", u"ייִדיש", u"", u"Yiddish"), + (u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"), + (u"yo", u"Yorùbá", u"", u"Yoruba"), + (u"ro-RO", u"Română", u"", u"Romanian"), + (u"bar", u"Boarisch", u"", u"Bavarian"), + (u"nov", u"Novial", u"", u"Novial"), + (u"sr-ME", u"srpski (Crna Gora)", u"", u""), + (u"es-CL", u"Español", u"", u"Spanish"), + (u"es-CO", u"Español", u"", u"Spanish"), + (u"nl-NL", u"Nederlands", u"", u"Dutch"), + (u"map-bms", u"Basa Banyumasan", u"", u"Banyumasan"), + (u"el", u"Ελληνικά", u"", u"Greek"), + (u"eo", u"Esperanto", u"", u"Esperanto"), + (u"en", u"English", u"", u"English"), + (u"ee", u"Eʋegbe", u"", u"Ewe"), + (u"mdf", u"Мокшень (Mokshanj Kälj)", u"", u"Moksha"), + (u"eu", u"Euskara", u"", u"Basque"), + (u"et", u"Eesti", u"", u"Estonian"), + (u"es", u"Español", u"", u"Spanish"), + (u"gom", u"गोवा कोंकणी / Gova Konknni", u"", u"Goan Konkani"), + (u"ru", u"Русский", u"", u"Russian"), + (u"rw", u"Ikinyarwanda", u"", u"Kinyarwanda"), + (u"rm", u"Rumantsch", u"", u"Romansh"), + (u"rn", u"Kirundi", u"", u"Kirundi"), + (u"es-419", u"español (Latinoamérica)", u"", u""), + (u"dsb", u"Dolnoserbski", u"", u"Lower Sorbian"), + (u"ast", u"Asturianu", u"", u"Asturian"), + (u"lmo", u"Lumbaart", u"", u"Lombard"), + (u"ltg", u"Latgaļu", u"", u"Latgalian"), + (u"xh", u"isiXhosa", u"", u"Xhosa"), + (u"en-CA", u"English", u"", u"English"), + (u"koi", u"Перем Коми (Perem Komi)", u"", u"Komi-Permyak"), + (u"tr-TR", u"Türkçe", u"", u"Turkish"), + (u"pnt", u"Ποντιακά", u"", u"Pontic"), + (u"es-XL", u"Español", u"", u"Spanish"), + (u"fi-FI", u"Suomi", u"", u"Finnish"), + (u"pnb", u"شاہ مکھی پنجابی (Shāhmukhī Pañjābī)", u"", u"Western Punjabi"), + (u"udm", u"Удмурт кыл", u"", u"Udmurt"), + (u"bem", u"Ichibemba", u"", u""), + (u"roa-rup", u"Armãneashce", u"", u"Aromanian"), + (u"sr-Latn", u"srpski (latinica)", u"", u""), + (u"stq", u"Seeltersk", u"", u"Saterland Frisian"), + (u"sr", u"Српски / Srpski", u"", u"Serbian"), + (u"ang", u"Englisc", u"", u"Anglo-Saxon"), + (u"ru-RU", u"Русский", u"", u"Russian"), + (u"lbe", u"Лакку", u"", u"Lak"), + (u"min", u"Minangkabau", u"", u"Minangkabau"), + (u"es-US", u"Español", u"", u"Spanish"), + (u"lij", u"Líguru", u"", u"Ligurian"), + (u"kab", u"Taqbaylit", u"", u"Kabyle"), + (u"kaa", u"Qaraqalpaqsha", u"", u"Karakalpak"), + (u"fr-FR", u"Français", u"", u"French"), + (u"tyv", u"Тыва", u"", u"Tuvan"), + (u"ka", u"ქართული", u"", u"Georgian"), + (u"kg", u"KiKongo", u"", u"Kongo"), + (u"ckb", u"Soranî / کوردی", u"", u"Sorani"), + (u"kk", u"Қазақша", u"", u"Kazakh"), + (u"kj", u"Kuanyama", u"", u"Kuanyama"), + (u"ki", u"Gĩkũyũ", u"", u"Kikuyu"), + (u"ko", u"한국어", u"", u"Korean"), + (u"kn", u"ಕನ್ನಡ", u"", u"Kannada"), + (u"tpi", u"Tok Pisin", u"", u"Tok Pisin"), + (u"kl", u"Kalaallisut", u"", u"Greenlandic"), + (u"ks", u"कश्मीरी / كشميري", u"", u"Kashmiri"), + (u"kr", u"Kanuri", u"", u"Kanuri"), + (u"ext", u"Estremeñu", u"", u"Extremaduran"), + (u"kw", u"Kernewek/Karnuack", u"", u"Cornish"), + (u"kv", u"Коми", u"", u"Komi"), + (u"mrj", u"Кырык Мары (Kyryk Mary)", u"", u"Hill Mari"), + (u"ky", u"Кыргызча", u"", u"Kirghiz"), + (u"szl", u"Ślůnski", u"", u"Silesian"), + (u"cdo", u"Mìng-dĕ̤ng-ngṳ̄", u"", u"Min Dong"), + (u"en-GB", u"English", u"", u"English"), + (u"xmf", u"მარგალური (Margaluri)", u"", u"Mingrelian"), + (u"jam", u"Jamaican Creole English", u"", u"Patois"), + (u"ar-SA", u"العربية", u"", u"Arabic"), + (u"ksh", u"Ripoarisch", u"", u"Ripuarian"), + (u"ms-MY", u"Bahasa Melayu", u"", u"Malay"), + (u"de", u"Deutsch", u"", u"German"), + (u"da", u"Dansk", u"", u"Danish"), + (u"dz", u"ཇོང་ཁ", u"", u"Dzongkha"), + (u"hif", u"Fiji Hindi", u"", u"Fiji Hindi"), + (u"dv", u"ދިވެހިބަސް", u"", u"Divehi"), + (u"crs", u"Seychellois Creole", u"", u""), + (u"qu", u"Runa Simi", u"", u"Quechua"), + (u"eml", u"Emiliàn e rumagnòl", u"", u"Emilian-Romagnol"), + (u"ban", u"Balinese", u"", u""), + (u"crh", u"Qırımtatarca", u"", u"Crimean Tatar"), + (u"arz", u"مصرى (Maṣri)", u"", u"Egyptian Arabic"), + (u"rmy", u"romani - रोमानी", u"", u"Romani"), + (u"arc", u"ܐܪܡܝܐ", u"", u"Aramaic"), + (u"th-TH", u"ไทย", u"", u"Thai"), + (u"mus", u"Muskogee", u"", u"Muscogee"), + (u"lua", u"Luba-Lulua", u"", u""), + (u"en-ZA", u"English", u"", u"English"), + (u"wa", u"Walon", u"", u"Walloon"), + (u"wo", u"Wolof", u"", u"Wolof"), + (u"jv", u"Basa Jawa", u"", u"Javanese"), + (u"jw", u"Javanese", u"", u""), + (u"fr-BE", u"Français", u"", u"French"), + (u"tum", u"chiTumbuka", u"", u"Tumbuka"), + (u"ja", u"日本語", u"", u"Japanese"), + (u"pt-PT", u"português (Portugal)", u"", u""), + (u"ilo", u"Ilokano", u"", u"Ilokano"), + (u"tlh", u"Klingon", u"", u""), + (u"pdc", u"Deitsch", u"", u"Pennsylvania German"), + (u"aa", u"Afar", u"", u"Afar"), + (u"ch", u"Chamoru", u"", u"Chamorro"), + (u"co", u"Corsu", u"", u"Corsican"), + (u"simple", u"Simple English", u"", u"Simple English"), + (u"ca", u"Català", u"", u"Catalan"), + (u"xx-pirate", u"Pirate", u"", u""), + (u"ce", u"Нохчийн", u"", u"Chechen"), + (u"cy", u"Cymraeg", u"", u"Welsh"), + (u"sah", u"Саха тыла (Saxa Tyla)", u"", u"Sakha"), + (u"cs", u"Čeština", u"", u"Czech"), + (u"cr", u"Nehiyaw", u"", u"Cree"), + (u"bg-BG", u"Български", u"", u"Bulgarian"), + (u"cv", u"Чăваш", u"", u"Chuvash"), + (u"cu", u"Словѣньскъ", u"", u"Old Church Slavonic"), + (u"ps", u"پښتو", u"", u"Pashto"), + (u"pt", u"Português", u"", u"Portuguese"), + (u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"), + (u"frr", u"Nordfriisk", u"", u"North Frisian"), + (u"frp", u"Arpitan", u"", u"Franco-Provençal"), + (u"xal", u"Хальмг", u"", u"Kalmyk"), + (u"pi", u"पाऴि", u"", u"Pali"), + (u"it-IT", u"Italiano", u"", u"Italian"), + (u"pl", u"Polski", u"", u"Polish"), + (u"nrm", u"Nouormand/Normaund", u"", u"Norman"), + (u"en-US", u"English", u"", u"English"), + (u"gan", u"贛語", u"", u"Gan"), + (u"bat-smg", u"Žemaitėška", u"", u"Samogitian"), + (u"en-UK", u"English", u"", u"English"), + (u"gag", u"Gagauz", u"", u"Gagauz"), + (u"an", u"Aragonés", u"", u"Aragonese"), + (u"gaa", u"Ga", u"", u""), + (u"fur", u"Furlan", u"", u"Friulian"), + (u"kr-KR", u"Kanuri", u"", u"Kanuri"), + (u"zh-CN", u"中文 (简体)", u"", u""), + (u"tl-PH", u"Tagalog", u"", u"Tagalog"), + (u"en-IN", u"English", u"", u"English"), + (u"ve", u"Tshivenda", u"", u"Venda"), + (u"en-ID", u"English", u"", u"English"), + (u"en-IE", u"English", u"", u"English"), + (u"xx-bork", u"Bork, bork, bork!", u"", u""), + (u"iu", u"ᐃᓄᒃᑎᑐᑦ", u"", u"Inuktitut"), + (u"it", u"Italiano", u"", u"Italian"), + (u"iw", u"עברית", u"", u""), + (u"vo", u"Volapük", u"", u"Volapük"), + (u"ii", u"ꆇꉙ", u"", u"Sichuan Yi"), + (u"ik", u"Iñupiak", u"", u"Inupiak"), + (u"io", u"Ido", u"", u"Ido"), + (u"ia", u"Interlingua", u"", u"Interlingua"), + (u"ja-JP", u"日本語", u"", u"Japanese"), + (u"ie", u"Interlingue", u"", u"Interlingue"), + (u"id", u"Bahasa Indonesia", u"", u"Indonesian"), + (u"nds-nl", u"Nedersaksisch", u"", u"Dutch Low Saxon"), + (u"pap", u"Papiamentu", u"", u"Papiamentu"), + (u"pag", u"Pangasinan", u"", u"Pangasinan"), + (u"pam", u"Kapampangan", u"", u"Kapampangan"), + (u"lv-LV", u"Latviešu", u"", u"Latvian"), + (u"mzn", u"مَزِروني", u"", u"Mazandarani"), + (u"nl-BE", u"Nederlands", u"", u"Dutch"), + (u"sk-SK", u"Slovenčina", u"", u"Slovak"), + (u"zh-TW", u"中文 (繁體)", u"", u""), + (u"es-MX", u"Español", u"", u"Spanish"), + (u"de-DE", u"Deutsch", u"", u"German"), + (u"jbo", u"Lojban", u"", u"Lojban"), + (u"mfe", u"kreol morisien", u"", u""), + (u"hak", u"Hak-kâ-fa / 客家話", u"", u"Hakka"), + (u"ny", u"Chichewa", u"", u"Chichewa"), + (u"ady", u"Адыгэбзэ", u"", u"Adyghe"), + (u"haw", u"Hawai`i", u"", u"Hawaiian"), + (u"el-GR", u"Ελληνικά", u"", u"Greek"), + (u"bpy", u"ইমার ঠার/বিষ্ণুপ্রিয়া মণিপুরী", u"", u"Bishnupriya Manipuri"), + (u"mhr", u"Олык Марий (Olyk Marij)", u"", u"Meadow Mari"), + (u"ca-CT", u"Català", u"", u"Catalan"), + (u"en-MY", u"English", u"", u"English"), + (u"sv-SE", u"Svenska", u"", u"Swedish"), + (u"de-AT", u"Deutsch", u"", u"German"), + (u"xx-elmer", u"Elmer Fudd", u"", u""), + (u"hsb", u"Hornjoserbsce", u"", u"Upper Sorbian"), + (u"be", u"Беларуская", u"", u"Belarusian"), + (u"bg", u"Български", u"", u"Bulgarian"), + (u"ba", u"Башҡорт", u"", u"Bashkir"), + (u"bm", u"Bamanankan", u"", u"Bambara"), + (u"bn", u"বাংলা", u"", u"Bengali"), + (u"bo", u"བོད་སྐད", u"", u"Tibetan"), + (u"bh", u"भोजपुरी", u"", u"Bihari"), + (u"bi", u"Bislama", u"", u"Bislama"), + (u"rue", u"Русиньскый", u"", u"Rusyn"), + (u"et-EE", u"Eesti", u"", u"Estonian"), + (u"br", u"Brezhoneg", u"", u"Breton"), + (u"bs", u"Bosanski", u"", u"Bosnian"), + (u"om", u"Oromoo", u"", u"Oromo"), + (u"ace", u"Bahsa Acèh", u"", u"Acehnese"), + (u"es-AR", u"Español", u"", u"Spanish"), + (u"ach", u"Acoli", u"", u""), + (u"oc", u"Occitan", u"", u"Occitan"), + (u"kri", u"Krio (Sierra Leone)", u"", u""), + (u"be-tarask", u"Беларуская (тарашкевіца)", u"", u"Belarusian (Taraškievica)"), + (u"krc", u"Къарачай-Малкъар (Qarachay-Malqar)", u"", u"Karachay-Balkar"), + (u"nds", u"Plattdüütsch", u"", u"Low Saxon"), + (u"os", u"Иронау", u"", u"Ossetian"), + (u"or", u"ଓଡ଼ିଆ", u"", u"Oriya"), + (u"nso", u"Sepedi", u"", u"Northern Sotho"), + (u"bjn", u"Bahasa Banjar", u"", u"Banjar"), + (u"xx-hacker", u"Hacker", u"", u""), + (u"zh-min-nan", u"Bân-lâm-gú", u"", u"Min Nan"), + (u"pa", u"ਪੰਜਾਬੀ", u"", u"Punjabi"), + (u"loz", u"Lozi", u"", u""), + (u"war", u"Winaray", u"", u"Waray-Waray"), + (u"hz", u"Otsiherero", u"", u"Herero"), + (u"hy", u"Հայերեն", u"", u"Armenian"), + (u"hr", u"Hrvatski", u"", u"Croatian"), + (u"ht", u"Krèyol ayisyen", u"", u"Haitian"), + (u"hu", u"Magyar", u"", u"Hungarian"), + (u"hi", u"हिन्दी", u"", u"Hindi"), + (u"ho", u"Hiri Motu", u"", u"Hiri Motu"), + (u"ha", u"هَوُسَ", u"", u"Hausa"), + (u"bug", u"Basa Ugi", u"", u"Buginese"), + (u"he", u"עברית", u"", u"Hebrew"), + (u"hr-HR", u"Hrvatski", u"", u"Croatian"), + (u"uz", u"O‘zbek", u"", u"Uzbek"), + (u"azb", u"تۆرکجه", u"", u"South Azerbaijani"), + (u"ur", u"اردو", u"", u"Urdu"), + (u"uk", u"Українська", u"", u"Ukrainian"), + (u"ug", u"ئۇيغۇر تىلى", u"", u"Uyghur"), + (u"pih", u"Norfuk", u"", u"Norfolk"), + (u"ab", u"Аҧсуа", u"", u"Abkhazian"), + (u"af", u"Afrikaans", u"", u"Afrikaans"), + (u"ak", u"Akana", u"", u"Akan"), + (u"am", u"አማርኛ", u"", u"Amharic"), + (u"myv", u"Эрзянь (Erzjanj Kelj)", u"", u"Erzya"), + (u"as", u"অসমীয়া", u"", u"Assamese"), + (u"ar", u"العربية", u"", u"Arabic"), + (u"km", u"ភាសាខ្មែរ", u"", u"Khmer"), + (u"uk-UA", u"Українська", u"", u"Ukrainian"), + (u"av", u"Авар", u"", u"Avar"), + (u"ay", u"Aymar", u"", u"Aymara"), + (u"az", u"Azərbaycanca", u"", u"Azerbaijani"), + (u"es-ES", u"Español", u"", u"Spanish"), + (u"nl", u"Nederlands", u"", u"Dutch"), + (u"nn", u"Nynorsk", u"", u"Norwegian (Nynorsk)"), + (u"no", u"Norsk (Bokmål)", u"", u"Norwegian (Bokmål)"), + (u"na", u"dorerin Naoero", u"", u"Nauruan"), + (u"nah", u"Nāhuatl", u"", u"Nahuatl"), + (u"ne", u"नेपाली", u"", u"Nepali"), + (u"ng", u"Oshiwambo", u"", u"Ndonga"), + (u"en-AU", u"English", u"", u"English"), + (u"nap", u"Nnapulitano", u"", u"Neapolitan"), + (u"nv", u"Diné bizaad", u"", u"Navajo"), + (u"ku", u"Kurdî / كوردی", u"", u"Kurdish"), + (u"cs-CZ", u"Čeština", u"", u"Czech"), + (u"zh-yue", u"粵語", u"", u"Cantonese"), + (u"en-SG", u"English", u"", u"English"), + (u"zh-classical", u"古文 / 文言文", u"", u"Classical Chinese"), + (u"bcl", u"Bikol", u"", u"Central Bicolano"), + (u"en-XA", u"English", u"", u"English") +) diff --git a/searx/query.py b/searx/query.py index b8afba6ed..dcef1924d 100644 --- a/searx/query.py +++ b/searx/query.py @@ -71,21 +71,24 @@ class RawTextQuery(object): # check if any language-code is equal with # declared language-codes for lc in language_codes: - lang_id, lang_name, country = map(str.lower, lc) + lang_id, lang_name, country, english_name = map(unicode.lower, lc) # if correct language-code is found # set it as new search-language if lang == lang_id\ or lang_id.startswith(lang)\ or lang == lang_name\ + or lang == english_name\ or lang.replace('_', ' ') == country: parse_next = True - self.languages.append(lang) - break + self.languages.append(lang_id) + # to ensure best match (first match is not necessarily the best one) + if lang == lang_id: + break # this force a engine or category if query_part[0] == '!' or query_part[0] == '?': - prefix = query_part[1:].replace('_', ' ') + prefix = query_part[1:].replace('-', ' ') # check if prefix is equal with engine shortcut if prefix in engine_shortcuts: diff --git a/searx/templates/courgette/preferences.html b/searx/templates/courgette/preferences.html index ba4d0c650..56a6e0202 100644 --- a/searx/templates/courgette/preferences.html +++ b/searx/templates/courgette/preferences.html @@ -13,9 +13,9 @@ {{ _('Search language') }}

diff --git a/searx/templates/legacy/preferences.html b/searx/templates/legacy/preferences.html index 30d632c93..f418dcd5b 100644 --- a/searx/templates/legacy/preferences.html +++ b/searx/templates/legacy/preferences.html @@ -14,9 +14,9 @@ {{ _('Search language') }}

diff --git a/searx/templates/oscar/advanced.html b/searx/templates/oscar/advanced.html index 2c694cf5f..b4a054a11 100644 --- a/searx/templates/oscar/advanced.html +++ b/searx/templates/oscar/advanced.html @@ -6,4 +6,5 @@
{% include 'oscar/categories.html' %} {% include 'oscar/time-range.html' %} + {% include 'oscar/languages.html' %}
diff --git a/searx/templates/oscar/languages.html b/searx/templates/oscar/languages.html new file mode 100644 index 000000000..3aa6376a2 --- /dev/null +++ b/searx/templates/oscar/languages.html @@ -0,0 +1,12 @@ +{% if preferences %} + +{% endif %} + + {% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %} + + {% endfor %} + diff --git a/searx/templates/oscar/preferences.html b/searx/templates/oscar/preferences.html index 6ad795095..18bd7c212 100644 --- a/searx/templates/oscar/preferences.html +++ b/searx/templates/oscar/preferences.html @@ -40,12 +40,7 @@ {% set language_label = _('Search language') %} {% set language_info = _('What language do you prefer for search?') %} {{ preferences_item_header(language_info, language_label, rtl) }} - + {% include 'oscar/languages.html' %} {{ preferences_item_footer(language_info, language_label, rtl) }} {% set locale_label = _('Interface language') %} @@ -153,6 +148,7 @@ {{ _("Allow") }} {{ _("Engine name") }} {{ _("Shortcut") }} + {{ _("Language support") }} {{ _("SafeSearch") }} {{ _("Time range") }} {{ _("Avg. time") }} @@ -161,6 +157,7 @@ {{ _("Max time") }} {{ _("Avg. time") }} {{ _("SafeSearch") }} + {{ _("Language support") }} {{ _("Shortcut") }} {{ _("Engine name") }} {{ _("Allow") }} @@ -175,6 +172,7 @@ {{ search_engine.name }} {{ shortcuts[search_engine.name] }} + {{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }} @@ -183,6 +181,7 @@ {{ search_engine.timeout }} {{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }} + {{ shortcuts[search_engine.name] }} {{ search_engine.name }} diff --git a/searx/templates/pix-art/preferences.html b/searx/templates/pix-art/preferences.html index ea5557b07..05876dedf 100644 --- a/searx/templates/pix-art/preferences.html +++ b/searx/templates/pix-art/preferences.html @@ -9,9 +9,9 @@ {{ _('Search language') }}

diff --git a/searx/webapp.py b/searx/webapp.py index 096e1f269..c4a35a970 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -330,6 +330,10 @@ def render(template_name, override_theme=None, **kwargs): kwargs['safesearch'] = str(request.preferences.get_value('safesearch')) + kwargs['language_codes'] = language_codes + if 'current_language' not in kwargs: + kwargs['current_language'] = request.preferences.get_value('language') + # override url_for function in templates kwargs['url_for'] = url_for_theme @@ -510,6 +514,7 @@ def index(): answers=result_container.answers, infoboxes=result_container.infoboxes, paging=result_container.paging, + current_language=search.lang, base_url=get_base_url(), theme=get_current_theme_name(), favicons=global_favicons[themes.index(get_current_theme_name())] @@ -552,7 +557,7 @@ def autocompleter(): if not language or language == 'all': language = 'en' else: - language = language.split('_')[0] + language = language.split('-')[0] # run autocompletion raw_results.extend(completer(raw_text_query.getSearchQuery(), language)) @@ -615,9 +620,7 @@ def preferences(): return render('preferences.html', locales=settings['locales'], current_locale=get_locale(), - current_language=lang, image_proxy=image_proxy, - language_codes=language_codes, engines_by_category=categories, stats=stats, answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers], @@ -627,7 +630,8 @@ def preferences(): themes=themes, plugins=plugins, allowed_plugins=allowed_plugins, - theme=get_current_theme_name()) + theme=get_current_theme_name(), + preferences=True) @app.route('/image_proxy', methods=['GET']) diff --git a/utils/update_languages.py b/utils/update_languages.py new file mode 100644 index 000000000..5c340b01e --- /dev/null +++ b/utils/update_languages.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- + +# This script generates languages.py from +# intersecting each engine's supported languages. +# +# The language's native names are obtained from +# Wikipedia's supported languages. +# +# Output file (languages.py) is written in current directory +# to avoid overwriting in case something goes wrong. + +from requests import get +from re import sub +from lxml.html import fromstring +from json import loads +from sys import path +path.append('../searx') +from searx.engines import engines + +# list of language names +wiki_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' +google_languages_url = 'https://www.google.com/preferences?#languages' + +google_json_name = 'google.preferences.langMap' + +languages = {} + +# Get language names from Wikipedia. +def get_wikipedia_languages(): + response = get(wiki_languages_url) + dom = fromstring(response.text) + tables = dom.xpath('//table[contains(@class,"sortable")]') + for table in tables: + # exclude header row + trs = table.xpath('.//tr')[1:] + for tr in trs: + td = tr.xpath('./td') + code = td[3].xpath('./a')[0].text + name = td[2].xpath('./a')[0].text + english_name = td[1].xpath('./a')[0].text + + if code not in languages: + languages[code] = (name, '', english_name) + +# Get language names from Google. +def get_google_languages(): + response = get(google_languages_url) + dom = fromstring(response.text) + options = dom.xpath('//select[@name="hl"]/option') + for option in options: + code = option.xpath('./@value')[0] + name = option.text[:-1] + + if code not in languages: + languages[code] = (name, '', '') + +# Join all language lists. +# iterate all languages supported by each engine +def join_language_lists(): + for engine_name in engines: + for locale in engines[engine_name].supported_languages: + locale = locale.replace('_', '-') + if locale not in languages: + # try to get language name + language = languages.get(locale.split('-')[0], None) + if language == None: + print engine_name + ": " + locale + continue + + (name, country, english) = language + languages[locale] = (name, country, english) + +# Write languages.py. +def write_languages_file(): + new_file = open('languages.py', 'w') + file_content = '# -*- coding: utf-8 -*-\n' + file_content += '# list of language codes\n' + file_content += '# this file is generated automatically by utils/update_search_languages.py\n' + file_content += '\nlanguage_codes = (' + for code in languages: + (name, country, english) = languages[code] + file_content += '\n (u"' + code + '"'\ + + ', u"' + name + '"'\ + + ', u"' + country[1:-1] + '"'\ + + ', u"' + english + '"),' + # remove last comma + file_content = file_content[:-1] + file_content += '\n)\n' + new_file.write(file_content.encode('utf8')) + new_file.close() + +def main(): + get_wikipedia_languages() + get_google_languages() + join_language_lists() + write_languages_file() + +if __name__ == "__main__": + main() From c677aee58a4eca1015262eb24530620a333ddcef Mon Sep 17 00:00:00 2001 From: marc Date: Sat, 6 Aug 2016 22:19:21 -0500 Subject: [PATCH 02/14] filter langauges --- searx/engines/gigablast.py | 10 +++++- searx/engines/google.py | 34 ++++++++++--------- searx/languages.py | 68 ++++++++++---------------------------- utils/update_languages.py | 41 +++++++++++++++++++---- 4 files changed, 81 insertions(+), 72 deletions(-) diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index e139842fa..125ffa0a6 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -40,6 +40,12 @@ url_xpath = './/url' title_xpath = './/title' content_xpath = './/sum' +supported_languages = ["en", "fr", "es", "ru", "tr", "ja", "zh-CN", "zh-TW", "ko", "de", + "nl", "it", "fi", "sv", "no", "pt", "vi", "ar", "he", "id", "el", + "th", "hi", "bn", "pl", "tl", "la", "eo", "ca", "bg", "tx", "sr", + "hu", "da", "lt", "cs", "gl", "ka", "gd", "go", "ro", "ga", "lv", + "hy", "is", "ag", "gv", "io", "fa", "te", "vv", "mg", "ku", "lb", "et"] + # do search-request def request(query, params): @@ -48,7 +54,9 @@ def request(query, params): if params['language'] == 'all': language = 'xx' else: - language = params['language'].split('-')[0] + language = params['language'].replace('-', '_').lower() + if language.split('-')[0] != 'zh': + language = language.split('-')[0] if params['safesearch'] >= 1: safesearch = 1 diff --git a/searx/engines/google.py b/searx/engines/google.py index 375e627ba..31035be69 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -23,20 +23,20 @@ categories = ['general'] paging = True language_support = True use_locale_domain = True -supported_languages = ['de', 'en', 'es', 'es_419', 'fr', 'hr', 'it', 'nl', 'pl', 'pt-BR', - 'pt-PT', 'vi', 'tr', 'ru', 'ar', 'th', 'ko', 'zh-CN', 'zh-TW', 'ja', - 'ach', 'af', 'ak', 'az', 'ms', 'ban', 'xx_bork', 'bs', 'br', 'ca', - 'ceb', 'ckb', 'cs', 'sn', 'co', 'cy', 'da', 'yo', 'et', 'xx_elmer', - 'eo', 'eu', 'ee', 'tl', 'fo', 'gaa', 'ga', 'gd', 'gl', 'gn', 'xx_hacker', - 'ht', 'ha', 'haw', 'bem', 'ig', 'rn', 'id', 'ia', 'zu', 'is', 'jw', 'rw', - 'sw', 'tlh', 'kg', 'mfe', 'kri', 'la', 'lv', 'to', 'lt', 'ln', 'loz', - 'lua', 'lg', 'hu', 'mg', 'mt', 'mi', 'pcm', 'no', 'nso', 'ny', 'nn', - 'uz', 'oc', 'om', 'xx_pirate', 'pt', 'ro', 'mo', 'rm', 'qu', 'nyn', 'crs', - 'sq', 'sd', 'sk', 'sl', 'so', 'st', 'sr_ME', 'sr_Latn', 'su', 'fi', 'sv', - 'tg', 'tt', 'tn', 'tum', 'tk', 'tw', 'fy', 'wo', 'xh', 'el', 'be', 'bg', - 'ky', 'kk', 'mk', 'mn', 'sr', 'uk', 'ka', 'hy', 'yi', 'iw', 'ug', 'ur', - 'ps', 'fa', 'ti', 'am', 'ne', 'mr', 'hi', 'bn', 'pa', 'gu', 'or', 'ta', - 'te', 'kn', 'ml', 'si', 'lo', 'my', 'km', 'chr'] +supported_languages = ["ach", "af", "ak", "az", "ms", "ban", "xx-bork", "bs", "br", "ca", + "ceb", "ckb", "cs", "sn", "co", "cy", "da", "de", "yo", "et", + "xx-elmer", "en", "es", "es-419", "eo", "eu", "ee", "tl", "fo", "fr", + "gaa", "ga", "gd", "gl", "gn", "xx-hacker", "ht", "ha", "hr", "haw", + "bem", "ig", "rn", "id", "ia", "zu", "is", "it", "jw", "rw", "sw", + "tlh", "kg", "mfe", "kri", "la", "lv", "to", "lt", "ln", "loz", + "lua", "lg", "hu", "mg", "mt", "mi", "nl", "pcm", "no", "nso", + "ny", "nn", "uz", "oc", "om", "xx-pirate", "pl", "pt-BR", "pt-PT", + "ro", "rm", "qu", "nyn", "crs", "sq", "sd", "sk", "sl", "so", "st", + "sr-ME", "sr-Latn", "su", "fi", "sv", "tg", "tt", "vi", "tn", "tum", + "tr", "tk", "tw", "fy", "wo", "xh", "el", "be", "bg", "ky", "kk", "mk", + "mn", "ru", "sr", "uk", "ka", "hy", "yi", "iw", "ug", "ur", "ar", "ps", + "fa", "ti", "am", "ne", "mr", "hi", "bn", "pa", "gu", "or", "ta", "te", + "kn", "ml", "si", "th", "lo", "my", "km", "chr", "ko", "zh-CN", "zh-TW", "ja"] time_range_support = True # based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests @@ -181,8 +181,12 @@ def request(query, params): language = 'en' country = 'US' url_lang = '' + elif params['language'][:2] == 'jv': + language = 'jw' + country = 'ID' + url_lang = 'lang_jw' else: - language_array = params['language'].lower().split('_') + language_array = params['language'].lower().split('-') if len(language_array) == 2: country = language_array[1] else: diff --git a/searx/languages.py b/searx/languages.py index 011b11c02..dddd380d4 100644 --- a/searx/languages.py +++ b/searx/languages.py @@ -32,9 +32,7 @@ language_codes = ( (u"en-NZ", u"English", u"", u"English"), (u"got", u"𐌲𐌿𐍄𐌹𐍃𐌺", u"", u"Gothic"), (u"vls", u"West-Vlams", u"", u"West Flemish"), - (u"ro", u"Română", u"", u"Romanian"), (u"bxr", u"Буряад", u"", u"Buryat"), - (u"fiu-vro", u"Võro", u"", u"Võro"), (u"diq", u"Zazaki", u"", u"Zazaki"), (u"zh", u"中文", u"", u"Chinese"), (u"pms", u"Piemontèis", u"", u"Piedmontese"), @@ -42,28 +40,25 @@ language_codes = ( (u"zh-HK", u"中文", u"", u"Chinese"), (u"zu", u"isiZulu", u"", u"Zulu"), (u"tet", u"Tetun", u"", u"Tetum"), - (u"es-PE", u"Español", u"", u"Spanish"), + (u"zh-CN", u"中文 (简体)", u"", u""), (u"new", u"नेपाल भाषा", u"", u"Newar"), + (u"ro-RO", u"Română", u"", u"Romanian"), (u"lez", u"Лезги чІал (Lezgi č’al)", u"", u"Lezgian"), (u"glk", u"گیلکی", u"", u"Gilaki"), (u"ko-KR", u"한국어", u"", u"Korean"), (u"id-ID", u"Bahasa Indonesia", u"", u"Indonesian"), - (u"cho", u"Choctaw", u"", u"Choctaw"), (u"chr", u"ᏣᎳᎩ", u"", u"Cherokee"), (u"vi", u"Tiếng Việt", u"", u"Vietnamese"), (u"chy", u"Tsetsêhestâhese", u"", u"Cheyenne"), - (u"is", u"Íslenska", u"", u"Icelandic"), (u"tk", u"تركمن / Туркмен", u"", u"Turkmen"), (u"da-DK", u"Dansk", u"", u"Danish"), (u"pfl", u"Pälzisch", u"", u"Palatinate German"), (u"hu-HU", u"Magyar", u"", u"Hungarian"), - (u"he-IL", u"עברית", u"", u"Hebrew"), (u"mg", u"Malagasy", u"", u"Malagasy"), (u"ml", u"മലയാളം", u"", u"Malayalam"), (u"mo", u"Молдовеняскэ", u"", u"Moldovan"), (u"mn", u"Монгол", u"", u"Mongolian"), (u"mi", u"Māori", u"", u"Maori"), - (u"mh", u"Ebon", u"", u"Marshallese"), (u"mk", u"Македонски", u"", u"Macedonian"), (u"mt", u"Malti", u"", u"Maltese"), (u"ms", u"Bahasa Melayu", u"", u"Malay"), @@ -75,7 +70,6 @@ language_codes = ( (u"pl-PL", u"Polski", u"", u"Polish"), (u"sl-SL", u"Slovenščina", u"", u"Slovenian"), (u"csb", u"Kaszëbsczi", u"", u"Kashubian"), - (u"cbk-zam", u"Chavacano de Zamboanga", u"", u"Zamboanga Chavacano"), (u"nyn", u"Runyankore", u"", u""), (u"ig", u"Igbo", u"", u"Igbo"), (u"fr", u"Français", u"", u"French"), @@ -88,7 +82,7 @@ language_codes = ( (u"fj", u"Na Vosa Vakaviti", u"", u"Fijian"), (u"fo", u"Føroyskt", u"", u"Faroese"), (u"ss", u"SiSwati", u"", u"Swati"), - (u"roa-tara", u"Tarandíne", u"", u"Tarantino"), + (u"sr", u"Српски / Srpski", u"", u"Serbian"), (u"sq", u"Shqip", u"", u"Albanian"), (u"sw", u"Kiswahili", u"", u"Swahili"), (u"sv", u"Svenska", u"", u"Swedish"), @@ -104,7 +98,7 @@ language_codes = ( (u"sc", u"Sardu", u"", u"Sardinian"), (u"pt-BR", u"português (Brasil)", u"", u""), (u"sa", u"संस्कृतम्", u"", u"Sanskrit"), - (u"sg", u"Sängö", u"", u"Sango"), + (u"he-IL", u"עברית", u"", u"Hebrew"), (u"se", u"Sámegiella", u"", u"Northern Sami"), (u"sd", u"سنڌي، سندھی ، सिन्ध", u"", u"Sindhi"), (u"fr-CH", u"Français", u"", u"French"), @@ -112,7 +106,6 @@ language_codes = ( (u"it-CH", u"Italiano", u"", u"Italian"), (u"wuu", u"吴语", u"", u"Wu"), (u"fr-CA", u"Français", u"", u"French"), - (u"ar-XA", u"العربية", u"", u"Arabic"), (u"kbd", u"Адыгэбзэ (Adighabze)", u"", u"Kabardian Circassian"), (u"no-NO", u"Norsk (Bokmål)", u"", u"Norwegian (Bokmål)"), (u"ca-ES", u"Català", u"", u"Catalan"), @@ -130,14 +123,13 @@ language_codes = ( (u"yi", u"ייִדיש", u"", u"Yiddish"), (u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"), (u"yo", u"Yorùbá", u"", u"Yoruba"), - (u"ro-RO", u"Română", u"", u"Romanian"), + (u"ru-RU", u"Русский", u"", u"Russian"), (u"bar", u"Boarisch", u"", u"Bavarian"), (u"nov", u"Novial", u"", u"Novial"), (u"sr-ME", u"srpski (Crna Gora)", u"", u""), (u"es-CL", u"Español", u"", u"Spanish"), (u"es-CO", u"Español", u"", u"Spanish"), (u"nl-NL", u"Nederlands", u"", u"Dutch"), - (u"map-bms", u"Basa Banyumasan", u"", u"Banyumasan"), (u"el", u"Ελληνικά", u"", u"Greek"), (u"eo", u"Esperanto", u"", u"Esperanto"), (u"en", u"English", u"", u"English"), @@ -151,7 +143,7 @@ language_codes = ( (u"rw", u"Ikinyarwanda", u"", u"Kinyarwanda"), (u"rm", u"Rumantsch", u"", u"Romansh"), (u"rn", u"Kirundi", u"", u"Kirundi"), - (u"es-419", u"español (Latinoamérica)", u"", u""), + (u"ro", u"Română", u"", u"Romanian"), (u"dsb", u"Dolnoserbski", u"", u"Lower Sorbian"), (u"ast", u"Asturianu", u"", u"Asturian"), (u"lmo", u"Lumbaart", u"", u"Lombard"), @@ -161,17 +153,13 @@ language_codes = ( (u"koi", u"Перем Коми (Perem Komi)", u"", u"Komi-Permyak"), (u"tr-TR", u"Türkçe", u"", u"Turkish"), (u"pnt", u"Ποντιακά", u"", u"Pontic"), - (u"es-XL", u"Español", u"", u"Spanish"), (u"fi-FI", u"Suomi", u"", u"Finnish"), (u"pnb", u"شاہ مکھی پنجابی (Shāhmukhī Pañjābī)", u"", u"Western Punjabi"), - (u"udm", u"Удмурт кыл", u"", u"Udmurt"), + (u"ar-SA", u"العربية", u"", u"Arabic"), (u"bem", u"Ichibemba", u"", u""), - (u"roa-rup", u"Armãneashce", u"", u"Aromanian"), - (u"sr-Latn", u"srpski (latinica)", u"", u""), + (u"en-ZA", u"English", u"", u"English"), (u"stq", u"Seeltersk", u"", u"Saterland Frisian"), - (u"sr", u"Српски / Srpski", u"", u"Serbian"), (u"ang", u"Englisc", u"", u"Anglo-Saxon"), - (u"ru-RU", u"Русский", u"", u"Russian"), (u"lbe", u"Лакку", u"", u"Lak"), (u"min", u"Minangkabau", u"", u"Minangkabau"), (u"es-US", u"Español", u"", u"Spanish"), @@ -184,14 +172,13 @@ language_codes = ( (u"kg", u"KiKongo", u"", u"Kongo"), (u"ckb", u"Soranî / کوردی", u"", u"Sorani"), (u"kk", u"Қазақша", u"", u"Kazakh"), - (u"kj", u"Kuanyama", u"", u"Kuanyama"), + (u"sg", u"Sängö", u"", u"Sango"), (u"ki", u"Gĩkũyũ", u"", u"Kikuyu"), (u"ko", u"한국어", u"", u"Korean"), (u"kn", u"ಕನ್ನಡ", u"", u"Kannada"), (u"tpi", u"Tok Pisin", u"", u"Tok Pisin"), (u"kl", u"Kalaallisut", u"", u"Greenlandic"), (u"ks", u"कश्मीरी / كشميري", u"", u"Kashmiri"), - (u"kr", u"Kanuri", u"", u"Kanuri"), (u"ext", u"Estremeñu", u"", u"Extremaduran"), (u"kw", u"Kernewek/Karnuack", u"", u"Cornish"), (u"kv", u"Коми", u"", u"Komi"), @@ -202,7 +189,7 @@ language_codes = ( (u"en-GB", u"English", u"", u"English"), (u"xmf", u"მარგალური (Margaluri)", u"", u"Mingrelian"), (u"jam", u"Jamaican Creole English", u"", u"Patois"), - (u"ar-SA", u"العربية", u"", u"Arabic"), + (u"udm", u"Удмурт кыл", u"", u"Udmurt"), (u"ksh", u"Ripoarisch", u"", u"Ripuarian"), (u"ms-MY", u"Bahasa Melayu", u"", u"Malay"), (u"de", u"Deutsch", u"", u"German"), @@ -219,13 +206,10 @@ language_codes = ( (u"rmy", u"romani - रोमानी", u"", u"Romani"), (u"arc", u"ܐܪܡܝܐ", u"", u"Aramaic"), (u"th-TH", u"ไทย", u"", u"Thai"), - (u"mus", u"Muskogee", u"", u"Muscogee"), (u"lua", u"Luba-Lulua", u"", u""), - (u"en-ZA", u"English", u"", u"English"), (u"wa", u"Walon", u"", u"Walloon"), (u"wo", u"Wolof", u"", u"Wolof"), (u"jv", u"Basa Jawa", u"", u"Javanese"), - (u"jw", u"Javanese", u"", u""), (u"fr-BE", u"Français", u"", u"French"), (u"tum", u"chiTumbuka", u"", u"Tumbuka"), (u"ja", u"日本語", u"", u"Japanese"), @@ -233,12 +217,9 @@ language_codes = ( (u"ilo", u"Ilokano", u"", u"Ilokano"), (u"tlh", u"Klingon", u"", u""), (u"pdc", u"Deitsch", u"", u"Pennsylvania German"), - (u"aa", u"Afar", u"", u"Afar"), (u"ch", u"Chamoru", u"", u"Chamorro"), (u"co", u"Corsu", u"", u"Corsican"), - (u"simple", u"Simple English", u"", u"Simple English"), (u"ca", u"Català", u"", u"Catalan"), - (u"xx-pirate", u"Pirate", u"", u""), (u"ce", u"Нохчийн", u"", u"Chechen"), (u"cy", u"Cymraeg", u"", u"Welsh"), (u"sah", u"Саха тыла (Saxa Tyla)", u"", u"Sakha"), @@ -254,30 +235,26 @@ language_codes = ( (u"frp", u"Arpitan", u"", u"Franco-Provençal"), (u"xal", u"Хальмг", u"", u"Kalmyk"), (u"pi", u"पाऴि", u"", u"Pali"), - (u"it-IT", u"Italiano", u"", u"Italian"), + (u"ak", u"Akana", u"", u"Akan"), (u"pl", u"Polski", u"", u"Polish"), (u"nrm", u"Nouormand/Normaund", u"", u"Norman"), (u"en-US", u"English", u"", u"English"), (u"gan", u"贛語", u"", u"Gan"), - (u"bat-smg", u"Žemaitėška", u"", u"Samogitian"), - (u"en-UK", u"English", u"", u"English"), (u"gag", u"Gagauz", u"", u"Gagauz"), (u"an", u"Aragonés", u"", u"Aragonese"), (u"gaa", u"Ga", u"", u""), (u"fur", u"Furlan", u"", u"Friulian"), - (u"kr-KR", u"Kanuri", u"", u"Kanuri"), - (u"zh-CN", u"中文 (简体)", u"", u""), + (u"es-PE", u"Español", u"", u"Spanish"), (u"tl-PH", u"Tagalog", u"", u"Tagalog"), (u"en-IN", u"English", u"", u"English"), (u"ve", u"Tshivenda", u"", u"Venda"), (u"en-ID", u"English", u"", u"English"), (u"en-IE", u"English", u"", u"English"), - (u"xx-bork", u"Bork, bork, bork!", u"", u""), + (u"is", u"Íslenska", u"", u"Icelandic"), (u"iu", u"ᐃᓄᒃᑎᑐᑦ", u"", u"Inuktitut"), (u"it", u"Italiano", u"", u"Italian"), (u"iw", u"עברית", u"", u""), (u"vo", u"Volapük", u"", u"Volapük"), - (u"ii", u"ꆇꉙ", u"", u"Sichuan Yi"), (u"ik", u"Iñupiak", u"", u"Inupiak"), (u"io", u"Ido", u"", u"Ido"), (u"ia", u"Interlingua", u"", u"Interlingua"), @@ -288,7 +265,7 @@ language_codes = ( (u"pap", u"Papiamentu", u"", u"Papiamentu"), (u"pag", u"Pangasinan", u"", u"Pangasinan"), (u"pam", u"Kapampangan", u"", u"Kapampangan"), - (u"lv-LV", u"Latviešu", u"", u"Latvian"), + (u"nl", u"Nederlands", u"", u"Dutch"), (u"mzn", u"مَزِروني", u"", u"Mazandarani"), (u"nl-BE", u"Nederlands", u"", u"Dutch"), (u"sk-SK", u"Slovenčina", u"", u"Slovak"), @@ -297,6 +274,7 @@ language_codes = ( (u"de-DE", u"Deutsch", u"", u"German"), (u"jbo", u"Lojban", u"", u"Lojban"), (u"mfe", u"kreol morisien", u"", u""), + (u"tcy", u"Tulu", u"", u"ತುಳು"), (u"hak", u"Hak-kâ-fa / 客家話", u"", u"Hakka"), (u"ny", u"Chichewa", u"", u"Chichewa"), (u"ady", u"Адыгэбзэ", u"", u"Adyghe"), @@ -308,7 +286,6 @@ language_codes = ( (u"en-MY", u"English", u"", u"English"), (u"sv-SE", u"Svenska", u"", u"Swedish"), (u"de-AT", u"Deutsch", u"", u"German"), - (u"xx-elmer", u"Elmer Fudd", u"", u""), (u"hsb", u"Hornjoserbsce", u"", u"Upper Sorbian"), (u"be", u"Беларуская", u"", u"Belarusian"), (u"bg", u"Български", u"", u"Bulgarian"), @@ -328,25 +305,20 @@ language_codes = ( (u"ach", u"Acoli", u"", u""), (u"oc", u"Occitan", u"", u"Occitan"), (u"kri", u"Krio (Sierra Leone)", u"", u""), - (u"be-tarask", u"Беларуская (тарашкевіца)", u"", u"Belarusian (Taraškievica)"), (u"krc", u"Къарачай-Малкъар (Qarachay-Malqar)", u"", u"Karachay-Balkar"), (u"nds", u"Plattdüütsch", u"", u"Low Saxon"), (u"os", u"Иронау", u"", u"Ossetian"), (u"or", u"ଓଡ଼ିଆ", u"", u"Oriya"), (u"nso", u"Sepedi", u"", u"Northern Sotho"), (u"bjn", u"Bahasa Banjar", u"", u"Banjar"), - (u"xx-hacker", u"Hacker", u"", u""), - (u"zh-min-nan", u"Bân-lâm-gú", u"", u"Min Nan"), (u"pa", u"ਪੰਜਾਬੀ", u"", u"Punjabi"), (u"loz", u"Lozi", u"", u""), (u"war", u"Winaray", u"", u"Waray-Waray"), - (u"hz", u"Otsiherero", u"", u"Herero"), (u"hy", u"Հայերեն", u"", u"Armenian"), (u"hr", u"Hrvatski", u"", u"Croatian"), (u"ht", u"Krèyol ayisyen", u"", u"Haitian"), (u"hu", u"Magyar", u"", u"Hungarian"), (u"hi", u"हिन्दी", u"", u"Hindi"), - (u"ho", u"Hiri Motu", u"", u"Hiri Motu"), (u"ha", u"هَوُسَ", u"", u"Hausa"), (u"bug", u"Basa Ugi", u"", u"Buginese"), (u"he", u"עברית", u"", u"Hebrew"), @@ -359,7 +331,7 @@ language_codes = ( (u"pih", u"Norfuk", u"", u"Norfolk"), (u"ab", u"Аҧсуа", u"", u"Abkhazian"), (u"af", u"Afrikaans", u"", u"Afrikaans"), - (u"ak", u"Akana", u"", u"Akan"), + (u"it-IT", u"Italiano", u"", u"Italian"), (u"am", u"አማርኛ", u"", u"Amharic"), (u"myv", u"Эрзянь (Erzjanj Kelj)", u"", u"Erzya"), (u"as", u"অসমীয়া", u"", u"Assamese"), @@ -370,21 +342,17 @@ language_codes = ( (u"ay", u"Aymar", u"", u"Aymara"), (u"az", u"Azərbaycanca", u"", u"Azerbaijani"), (u"es-ES", u"Español", u"", u"Spanish"), - (u"nl", u"Nederlands", u"", u"Dutch"), + (u"lv-LV", u"Latviešu", u"", u"Latvian"), (u"nn", u"Nynorsk", u"", u"Norwegian (Nynorsk)"), (u"no", u"Norsk (Bokmål)", u"", u"Norwegian (Bokmål)"), (u"na", u"dorerin Naoero", u"", u"Nauruan"), (u"nah", u"Nāhuatl", u"", u"Nahuatl"), (u"ne", u"नेपाली", u"", u"Nepali"), - (u"ng", u"Oshiwambo", u"", u"Ndonga"), (u"en-AU", u"English", u"", u"English"), (u"nap", u"Nnapulitano", u"", u"Neapolitan"), (u"nv", u"Diné bizaad", u"", u"Navajo"), (u"ku", u"Kurdî / كوردی", u"", u"Kurdish"), (u"cs-CZ", u"Čeština", u"", u"Czech"), - (u"zh-yue", u"粵語", u"", u"Cantonese"), (u"en-SG", u"English", u"", u"English"), - (u"zh-classical", u"古文 / 文言文", u"", u"Classical Chinese"), - (u"bcl", u"Bikol", u"", u"Central Bicolano"), - (u"en-XA", u"English", u"", u"English") + (u"bcl", u"Bikol", u"", u"Central Bicolano") ) diff --git a/utils/update_languages.py b/utils/update_languages.py index 5c340b01e..cb230c210 100644 --- a/utils/update_languages.py +++ b/utils/update_languages.py @@ -11,7 +11,7 @@ from requests import get from re import sub -from lxml.html import fromstring +from lxml.html import fromstring, tostring from json import loads from sys import path path.append('../searx') @@ -25,6 +25,28 @@ google_json_name = 'google.preferences.langMap' languages = {} + +# To filter out invalid codes and dialects. +def valid_code(lang_code): + # filter invalid codes + if lang_code[:2] == 'xx'\ + or lang_code == 'jw'\ + or lang_code[-2:] == 'UK'\ + or lang_code[-2:] == 'XA'\ + or lang_code[-2:] == 'XL': + return False + + # filter dialects + lang_code = lang_code.split('-') + if len(lang_code) > 2 or len(lang_code[0]) > 3: + return False + if len(lang_code) == 2 and len(lang_code[1]) > 2: + print lang_code + return False + + return True + + # Get language names from Wikipedia. def get_wikipedia_languages(): response = get(wiki_languages_url) @@ -38,10 +60,13 @@ def get_wikipedia_languages(): code = td[3].xpath('./a')[0].text name = td[2].xpath('./a')[0].text english_name = td[1].xpath('./a')[0].text + articles = int(td[4].xpath('./a/b')[0].text.replace(',','')) - if code not in languages: + # exclude languages with few articles and language variants + if code not in languages and articles >= 100 and valid_code(code): languages[code] = (name, '', english_name) + # Get language names from Google. def get_google_languages(): response = get(google_languages_url) @@ -51,25 +76,27 @@ def get_google_languages(): code = option.xpath('./@value')[0] name = option.text[:-1] - if code not in languages: + if code not in languages and valid_code(code): languages[code] = (name, '', '') + # Join all language lists. # iterate all languages supported by each engine def join_language_lists(): for engine_name in engines: for locale in engines[engine_name].supported_languages: locale = locale.replace('_', '-') - if locale not in languages: + if locale not in languages and valid_code(locale): # try to get language name language = languages.get(locale.split('-')[0], None) if language == None: - print engine_name + ": " + locale + # print engine_name + ": " + locale continue (name, country, english) = language languages[locale] = (name, country, english) + # Write languages.py. def write_languages_file(): new_file = open('languages.py', 'w') @@ -81,7 +108,7 @@ def write_languages_file(): (name, country, english) = languages[code] file_content += '\n (u"' + code + '"'\ + ', u"' + name + '"'\ - + ', u"' + country[1:-1] + '"'\ + + ', u"' + country + '"'\ + ', u"' + english + '"),' # remove last comma file_content = file_content[:-1] @@ -89,11 +116,13 @@ def write_languages_file(): new_file.write(file_content.encode('utf8')) new_file.close() + def main(): get_wikipedia_languages() get_google_languages() join_language_lists() write_languages_file() + if __name__ == "__main__": main() From a11230819ff76312b4528b2bfb6e03d0560075b1 Mon Sep 17 00:00:00 2001 From: marc Date: Tue, 20 Sep 2016 15:19:02 -0500 Subject: [PATCH 03/14] automatic search when language is selected --- searx/static/plugins/js/search_on_category_select.js | 5 +++++ searx/templates/oscar/languages.html | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/searx/static/plugins/js/search_on_category_select.js b/searx/static/plugins/js/search_on_category_select.js index 19aeef944..a76fd1266 100644 --- a/searx/static/plugins/js/search_on_category_select.js +++ b/searx/static/plugins/js/search_on_category_select.js @@ -15,5 +15,10 @@ $(document).ready(function() { $('#search_form').submit(); } }); + $('#language').change(function(e) { + if($('#q').val()) { + $('#search_form').submit(); + } + }); } }); diff --git a/searx/templates/oscar/languages.html b/searx/templates/oscar/languages.html index 3aa6376a2..03df06823 100644 --- a/searx/templates/oscar/languages.html +++ b/searx/templates/oscar/languages.html @@ -1,7 +1,7 @@ {% if preferences %} + + +
+
+
+
+ + + + + """ + response = mock.Mock(text=html) + languages = bing._fetch_supported_languages(response) + self.assertEqual(type(languages), list) + self.assertEqual(len(languages), 3) + self.assertIn('es', languages) + self.assertIn('pt-BR', languages) + self.assertIn('pt-PT', languages) diff --git a/tests/unit/engines/test_dailymotion.py b/tests/unit/engines/test_dailymotion.py index 368b3a7a5..72071af66 100644 --- a/tests/unit/engines/test_dailymotion.py +++ b/tests/unit/engines/test_dailymotion.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from collections import defaultdict import mock from searx.engines import dailymotion @@ -72,3 +73,39 @@ class TestDailymotionEngine(SearxTestCase): results = dailymotion.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 0) + + def test_fetch_supported_languages(self): + json = r""" + {"list":[{"code":"af","name":"Afrikaans","native_name":"Afrikaans", + "localized_name":"Afrikaans","display_name":"Afrikaans"}, + {"code":"ar","name":"Arabic","native_name":"\u0627\u0644\u0639\u0631\u0628\u064a\u0629", + "localized_name":"Arabic","display_name":"Arabic"}, + {"code":"la","name":"Latin","native_name":null, + "localized_name":"Latin","display_name":"Latin"} + ]} + """ + response = mock.Mock(text=json) + languages = dailymotion._fetch_supported_languages(response) + self.assertEqual(type(languages), dict) + self.assertEqual(len(languages), 3) + self.assertIn('af', languages) + self.assertIn('ar', languages) + self.assertIn('la', languages) + + self.assertEqual(type(languages['af']), dict) + self.assertEqual(type(languages['ar']), dict) + self.assertEqual(type(languages['la']), dict) + + self.assertIn('name', languages['af']) + self.assertIn('name', languages['ar']) + self.assertNotIn('name', languages['la']) + + self.assertIn('english_name', languages['af']) + self.assertIn('english_name', languages['ar']) + self.assertIn('english_name', languages['la']) + + self.assertEqual(languages['af']['name'], 'Afrikaans') + self.assertEqual(languages['af']['english_name'], 'Afrikaans') + self.assertEqual(languages['ar']['name'], u'العربية') + self.assertEqual(languages['ar']['english_name'], 'Arabic') + self.assertEqual(languages['la']['english_name'], 'Latin') diff --git a/tests/unit/engines/test_duckduckgo.py b/tests/unit/engines/test_duckduckgo.py index 2aeaa1880..7d6abad22 100644 --- a/tests/unit/engines/test_duckduckgo.py +++ b/tests/unit/engines/test_duckduckgo.py @@ -84,3 +84,17 @@ class TestDuckduckgoEngine(SearxTestCase): self.assertEqual(results[0]['title'], 'This is the title') self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű') self.assertEqual(results[0]['content'], 'This should be the content.') + + def test_fetch_supported_languages(self): + js = """some code...regions:{ + "wt-wt":"All Results","ar-es":"Argentina","au-en":"Australia","at-de":"Austria","be-fr":"Belgium (fr)" + }some more code...""" + response = mock.Mock(text=js) + languages = duckduckgo._fetch_supported_languages(response) + self.assertEqual(type(languages), list) + self.assertEqual(len(languages), 5) + self.assertIn('wt-WT', languages) + self.assertIn('es-AR', languages) + self.assertIn('en-AU', languages) + self.assertIn('de-AT', languages) + self.assertIn('fr-BE', languages) diff --git a/tests/unit/engines/test_gigablast.py b/tests/unit/engines/test_gigablast.py index 0723b064c..74bf89333 100644 --- a/tests/unit/engines/test_gigablast.py +++ b/tests/unit/engines/test_gigablast.py @@ -89,3 +89,28 @@ class TestGigablastEngine(SearxTestCase): self.assertEqual(results[0]['title'], 'South by Southwest 2016') self.assertEqual(results[0]['url'], 'www.sxsw.com') self.assertEqual(results[0]['content'], 'This should be the content.') + + def test_fetch_supported_languages(self): + html = """""" + response = mock.Mock(text=html) + results = gigablast._fetch_supported_languages(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + html = """ + + + + + + + + + + """ + response = mock.Mock(text=html) + languages = gigablast._fetch_supported_languages(response) + self.assertEqual(type(languages), list) + self.assertEqual(len(languages), 2) + self.assertIn('en', languages) + self.assertIn('fr', languages) diff --git a/tests/unit/engines/test_google.py b/tests/unit/engines/test_google.py index c83eb3bf0..0d56b1e7b 100644 --- a/tests/unit/engines/test_google.py +++ b/tests/unit/engines/test_google.py @@ -177,3 +177,60 @@ class TestGoogleEngine(SearxTestCase): self.assertEqual(results[0]['title'], '') self.assertEqual(results[0]['content'], '') self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg') + + def test_fetch_supported_languages(self): + html = """""" + response = mock.Mock(text=html) + languages = google._fetch_supported_languages(response) + self.assertEqual(type(languages), dict) + self.assertEqual(len(languages), 0) + + html = u""" + + + + + + + + + +
+ + + + + + + + +
+ + + """ + response = mock.Mock(text=html) + languages = google._fetch_supported_languages(response) + self.assertEqual(type(languages), dict) + self.assertEqual(len(languages), 3) + + self.assertIn('en', languages) + self.assertIn('zh-CN', languages) + self.assertIn('zh-TW', languages) + + self.assertEquals(type(languages['en']), dict) + self.assertEquals(type(languages['zh-CN']), dict) + self.assertEquals(type(languages['zh-TW']), dict) + + self.assertIn('name', languages['en']) + self.assertIn('name', languages['zh-CN']) + self.assertIn('name', languages['zh-TW']) + + self.assertEquals(languages['en']['name'], 'English') + self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)') + self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)') diff --git a/tests/unit/engines/test_swisscows.py b/tests/unit/engines/test_swisscows.py index dbbc044da..27f33d70a 100644 --- a/tests/unit/engines/test_swisscows.py +++ b/tests/unit/engines/test_swisscows.py @@ -126,3 +126,30 @@ class TestSwisscowsEngine(SearxTestCase): self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg') self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png') self.assertEqual(results[2]['template'], 'images.html') + + def test_fetch_supported_languages(self): + html = """""" + response = mock.Mock(text=html) + languages = swisscows._fetch_supported_languages(response) + self.assertEqual(type(languages), list) + self.assertEqual(len(languages), 0) + + html = """ + +
+
+
    +
  • +
  • +
  • +
+
+
+ + """ + response = mock.Mock(text=html) + languages = swisscows._fetch_supported_languages(response) + self.assertEqual(type(languages), list) + self.assertEqual(len(languages), 3) + self.assertIn('de-CH', languages) + self.assertIn('fr-CH', languages) diff --git a/tests/unit/engines/test_wikipedia.py b/tests/unit/engines/test_wikipedia.py index 0057277c5..988080b6a 100644 --- a/tests/unit/engines/test_wikipedia.py +++ b/tests/unit/engines/test_wikipedia.py @@ -164,3 +164,96 @@ class TestWikipediaEngine(SearxTestCase): self.assertEqual(len(results), 2) self.assertEqual(results[1]['infobox'], u'披頭四樂隊') self.assertIn(u'披头士乐队...', results[1]['content']) + + def test_fetch_supported_languages(self): + html = u"""""" + response = mock.Mock(text=html) + languages = wikipedia._fetch_supported_languages(response) + self.assertEqual(type(languages), dict) + self.assertEqual(len(languages), 0) + + html = u""" + + +
+
+

Table header

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
NLanguageLanguage (local)WikiArticles
2SwedishSvenskasv3000000
3CebuanoSinugboanong Binisayaceb3000000
+

Table header

+ + + + + + + + + + + + + + + + + + + +
NLanguageLanguage (local)WikiArticles
2Norwegian (Bokmål)Norsk (Bokmål)no100000
+
+
+ + + """ + response = mock.Mock(text=html) + languages = wikipedia._fetch_supported_languages(response) + self.assertEqual(type(languages), dict) + self.assertEqual(len(languages), 3) + + self.assertIn('sv', languages) + self.assertIn('ceb', languages) + self.assertIn('no', languages) + + self.assertEqual(type(languages['sv']), dict) + self.assertEqual(type(languages['ceb']), dict) + self.assertEqual(type(languages['no']), dict) + + self.assertIn('name', languages['sv']) + self.assertIn('english_name', languages['sv']) + self.assertIn('articles', languages['sv']) + + self.assertEqual(languages['sv']['name'], 'Svenska') + self.assertEqual(languages['sv']['english_name'], 'Swedish') + self.assertEqual(languages['sv']['articles'], 3000000) + self.assertEqual(languages['ceb']['name'], 'Sinugboanong Binisaya') + self.assertEqual(languages['ceb']['english_name'], 'Cebuano') + self.assertEqual(languages['ceb']['articles'], 3000000) + self.assertEqual(languages['no']['name'], u'Norsk (Bokmål)') + self.assertEqual(languages['no']['english_name'], u'Norwegian (Bokmål)') + self.assertEqual(languages['no']['articles'], 100000) diff --git a/tests/unit/engines/test_yahoo.py b/tests/unit/engines/test_yahoo.py index 303295e2f..82c4d99bb 100644 --- a/tests/unit/engines/test_yahoo.py +++ b/tests/unit/engines/test_yahoo.py @@ -147,3 +147,33 @@ class TestYahooEngine(SearxTestCase): results = yahoo.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 0) + + def test_fetch_supported_languages(self): + html = """""" + response = mock.Mock(text=html) + results = yahoo._fetch_supported_languages(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + html = """ + +
+
+ + + + + + + +
+
+ + """ + response = mock.Mock(text=html) + languages = yahoo._fetch_supported_languages(response) + self.assertEqual(type(languages), list) + self.assertEqual(len(languages), 3) + self.assertIn('ar', languages) + self.assertIn('zh-chs', languages) + self.assertIn('zh-cht', languages) diff --git a/utils/fetch_languages.py b/utils/fetch_languages.py index ae4a2def9..aed875daf 100644 --- a/utils/fetch_languages.py +++ b/utils/fetch_languages.py @@ -84,7 +84,7 @@ def fetch_supported_languages(): # write json file f = io.open(engines_languages_file, "w", encoding="utf-8") - f.write(unicode(dumps(engines_languages, indent=4, ensure_ascii=False, encoding="utf-8"))) + f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8"))) f.close() @@ -110,18 +110,22 @@ def join_language_lists(): else: languages[locale] = {} - # get locales that have no name yet + # get locales that have no name or country yet for locale in languages.keys(): if not languages[locale].get('name'): - # try to get language and country names + # try to get language names name = languages.get(locale.split('-')[0], {}).get('name', None) if name: languages[locale]['name'] = name - languages[locale]['country'] = get_country_name(locale) or '' languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '') else: # filter out locales with no name del languages[locale] + continue + + # try to get country name + if locale.find('-') > 0 and not languages[locale].get('country'): + languages[locale]['country'] = get_country_name(locale) or '' # Remove countryless language if language is featured in only one country. From 4a1ff56389d6ad560594ba82b448aef1d70bbbf4 Mon Sep 17 00:00:00 2001 From: marc Date: Fri, 16 Dec 2016 22:14:14 -0600 Subject: [PATCH 13/14] minor fixes in utils/fetch_languages.py --- searx/data/engines_languages.json | 2 +- searx/engines/wikipedia.py | 3 +- searx/languages.py | 4 +- searx/templates/oscar/preferences.html | 4 +- utils/fetch_languages.py | 55 ++++++++++++++------------ 5 files changed, 36 insertions(+), 32 deletions(-) diff --git a/searx/data/engines_languages.json b/searx/data/engines_languages.json index 243f8777c..6d669fb2f 100644 --- a/searx/data/engines_languages.json +++ b/searx/data/engines_languages.json @@ -1 +1 @@ -{"google news": {"el": {"name": "Ελληνικά"}, "eo": {"name": "Esperanto"}, "en": {"name": "English"}, "af": {"name": "Afrikaans"}, "vi": {"name": "Tiếng Việt"}, "ca": {"name": "Català"}, "it": {"name": "Italiano"}, "iw": {"name": "עברית"}, "hy": {"name": "Հայերեն"}, "cs": {"name": "Čeština"}, "et": {"name": "Eesti"}, "id": {"name": "Indonesia"}, "es": {"name": "Español"}, "ru": {"name": "Русский"}, "nl": {"name": "Nederlands"}, "pt": {"name": "Português"}, "no": {"name": "Norsk"}, "tr": {"name": "Türkçe"}, "lt": {"name": "Lietuvių"}, "lv": {"name": "Latviešu"}, "tl": {"name": "Filipino"}, "zh-TW": {"name": "中文 (繁體)"}, "th": {"name": "ไทย"}, "ro": {"name": "Română"}, "is": {"name": "Íslenska"}, "pl": {"name": "Polski"}, "be": {"name": "Беларуская"}, "fr": {"name": "Français"}, "bg": {"name": "Български"}, "hr": {"name": "Hrvatski"}, "de": {"name": "Deutsch"}, "ko": {"name": "한국어"}, "da": {"name": "Dansk"}, "fa": {"name": "فارسی"}, "hi": {"name": "हिन्दी"}, "fi": {"name": "Suomi"}, "hu": {"name": "Magyar"}, "ja": {"name": "日本語"}, "sr": {"name": "Српски"}, "sw": {"name": "Kiswahili"}, "sv": {"name": "Svenska"}, "sk": {"name": "Slovenčina"}, "zh-CN": {"name": "中文 (简体)"}, "ar": {"name": "العربية"}, "uk": {"name": "Українська"}, "sl": {"name": "Slovenščina"}}, "dailymotion": {"gv": {"english_name": "Manx"}, "gu": {"name": "ગુજરાતી", "english_name": "Gujarati"}, "gd": {"english_name": "Gaelic, Scottish"}, "ga": {"name": "Gaeilge", "english_name": "Irish"}, "gn": {"english_name": "Guarani"}, "gl": {"name": "Galego", "english_name": "Galician"}, "lg": {"english_name": "Ganda"}, "lb": {"english_name": "Luxembourgish"}, "la": {"english_name": "Latin"}, "ln": {"english_name": "Lingala"}, "lo": {"english_name": "Lao"}, "tt": {"name": "Татарча", "english_name": "Tatar"}, "tr": {"name": "Türkçe", "english_name": "Turkish"}, "ts": {"english_name": "Tsonga"}, "li": {"english_name": "Limburgan"}, "lv": {"name": "Latviešu", "english_name": "Latvian"}, "to": {"english_name": "Tonga (Tonga Islands)"}, "lt": {"name": "Lietuvių", "english_name": "Lithuanian"}, "lu": {"english_name": "Luba-Katanga"}, "tk": {"english_name": "Turkmen"}, "th": {"name": "ไทย", "english_name": "Thai"}, "ti": {"name": "ትግርኛ", "english_name": "Tigrinya"}, "tg": {"english_name": "Tajik"}, "te": {"english_name": "Telugu"}, "ta": {"name": "தமிழ்", "english_name": "Tamil"}, "yi": {"english_name": "Yiddish"}, "yo": {"english_name": "Yoruba"}, "de": {"name": "Deutsch", "english_name": "German"}, "da": {"name": "Dansk", "english_name": "Danish"}, "dz": {"english_name": "Dzongkha"}, "st": {"english_name": "Sotho, Southern"}, "dv": {"english_name": "Dhivehi"}, "qu": {"english_name": "Quechua"}, "el": {"name": "Ελληνικά", "english_name": "Greek, Modern (1453-)"}, "eo": {"name": "Esperanto", "english_name": "Esperanto"}, "en": {"english_name": "English"}, "zh": {"name": "中文", "english_name": "Chinese"}, "ee": {"english_name": "Ewe"}, "za": {"english_name": "Zhuang"}, "mh": {"english_name": "Marshallese"}, "uk": {"name": "українська", "english_name": "Ukrainian"}, "eu": {"name": "Euskara", "english_name": "Basque"}, "et": {"name": "Eesti", "english_name": "Estonian"}, "es": {"name": "Español", "english_name": "Spanish"}, "ru": {"name": "русский", "english_name": "Russian"}, "rw": {"name": "Ikinyarwanda", "english_name": "Kinyarwanda"}, "rm": {"english_name": "Romansh"}, "rn": {"english_name": "Rundi"}, "ro": {"name": "Română", "english_name": "Romanian"}, "bn": {"name": "বাংলা", "english_name": "Bengali"}, "be": {"english_name": "Belarusian"}, "bg": {"name": "Български", "english_name": "Bulgarian"}, "ba": {"english_name": "Bashkir"}, "wa": {"name": "Walon", "english_name": "Walloon"}, "wo": {"english_name": "Wolof"}, "bm": {"english_name": "Bambara"}, "jv": {"english_name": "Javanese"}, "bo": {"english_name": "Tibetan"}, "bi": {"english_name": "Bislama"}, "br": {"name": "Brezhoneg", "english_name": "Breton"}, "bs": {"name": "Bosnian", "english_name": "Bosnian"}, "ja": {"name": "日本語", "english_name": "Japanese"}, "om": {"english_name": "Oromo"}, "oj": {"english_name": "Ojibwa"}, "ty": {"english_name": "Tahitian"}, "oc": {"name": "Occitan", "english_name": "Occitan"}, "tw": {"english_name": "Twi"}, "os": {"english_name": "Ossetian"}, "or": {"name": "Oriya", "english_name": "Oriya"}, "xh": {"name": "Xhosa", "english_name": "Xhosa"}, "ch": {"english_name": "Chamorro"}, "co": {"english_name": "Corsican"}, "ca": {"name": "Català", "english_name": "Catalan"}, "ce": {"english_name": "Chechen"}, "cy": {"name": "Cymraeg", "english_name": "Welsh"}, "cs": {"name": "čeština", "english_name": "Czech"}, "cr": {"english_name": "Cree"}, "cv": {"english_name": "Chuvash"}, "cu": {"english_name": "Slavic, Church"}, "ve": {"name": "Venda", "english_name": "Venda"}, "ps": {"name": "Pushto", "english_name": "Pushto"}, "pt": {"name": "Português", "english_name": "Portuguese"}, "tl": {"english_name": "Tagalog"}, "pa": {"name": "ਪੰਜਾਬੀ", "english_name": "Panjabi"}, "vi": {"name": "Tiếng Việt", "english_name": "Vietnamese"}, "pi": {"english_name": "Pali"}, "is": {"name": "Íslenska", "english_name": "Icelandic"}, "pl": {"name": "polski", "english_name": "Polish"}, "hz": {"english_name": "Herero"}, "hy": {"english_name": "Armenian"}, "hr": {"name": "hrvatski", "english_name": "Croatian"}, "iu": {"english_name": "Inuktitut"}, "ht": {"english_name": "Haitian"}, "hu": {"name": "magyar", "english_name": "Hungarian"}, "hi": {"name": "हिंदी", "english_name": "Hindi"}, "ho": {"english_name": "Hiri Motu"}, "ha": {"english_name": "Hausa"}, "he": {"name": "עברית", "english_name": "Hebrew"}, "mg": {"english_name": "Malagasy"}, "uz": {"english_name": "Uzbek"}, "ml": {"english_name": "Malayalam"}, "mn": {"name": "Монгол", "english_name": "Mongolian"}, "mi": {"name": "Reo Māori", "english_name": "Maori"}, "ik": {"english_name": "Inupiaq"}, "mk": {"name": "Македонски", "english_name": "Macedonian"}, "ur": {"english_name": "Urdu"}, "mt": {"name": "Malti", "english_name": "Maltese"}, "ms": {"name": "Malay", "english_name": "Malay"}, "mr": {"name": "मराठी", "english_name": "Marathi"}, "ug": {"english_name": "Uighur"}, "my": {"english_name": "Burmese"}, "sq": {"english_name": "Albanian"}, "ae": {"english_name": "Avestan"}, "ss": {"english_name": "Swati"}, "af": {"name": "Afrikaans", "english_name": "Afrikaans"}, "tn": {"english_name": "Tswana"}, "sw": {"english_name": "Swahili (macrolanguage)"}, "ak": {"english_name": "Akan"}, "am": {"name": "አማርኛ", "english_name": "Amharic"}, "it": {"name": "Italiano", "english_name": "Italian"}, "an": {"english_name": "Aragonese"}, "ii": {"english_name": "Yi, Sichuan"}, "ia": {"english_name": "Interlingua"}, "as": {"english_name": "Assamese"}, "ar": {"name": "العربية", "english_name": "Arabic"}, "su": {"english_name": "Sundanese"}, "io": {"english_name": "Ido"}, "av": {"english_name": "Avaric"}, "ay": {"english_name": "Aymara"}, "az": {"name": "Azerbaijani", "english_name": "Azerbaijani"}, "ie": {"english_name": "Interlingue"}, "id": {"name": "Indonesian", "english_name": "Indonesian"}, "ig": {"english_name": "Igbo"}, "sk": {"name": "Slovenský", "english_name": "Slovak"}, "sr": {"name": "српски", "english_name": "Serbian"}, "nl": {"name": "Nederlands", "english_name": "Dutch"}, "nn": {"name": "Norwegian Nynorsk", "english_name": "Norwegian Nynorsk"}, "no": {"english_name": "Norwegian"}, "na": {"english_name": "Nauru"}, "nb": {"name": "Norwegian Bokmål", "english_name": "Norwegian Bokmål"}, "nd": {"english_name": "Ndebele, North"}, "ne": {"english_name": "Nepali (macrolanguage)"}, "ng": {"english_name": "Ndonga"}, "ny": {"english_name": "Nyanja"}, "vo": {"english_name": "Volapük"}, "zu": {"name": "Isi-Zulu", "english_name": "Zulu"}, "so": {"english_name": "Somali"}, "nr": {"english_name": "Ndebele, South"}, "nv": {"english_name": "Navajo"}, "sn": {"english_name": "Shona"}, "fr": {"name": "français", "english_name": "French"}, "sm": {"english_name": "Samoan"}, "fy": {"english_name": "Frisian, Western"}, "sv": {"name": "Svenska", "english_name": "Swedish"}, "fa": {"name": "فارسی", "english_name": "Persian"}, "ff": {"english_name": "Fulah"}, "fi": {"name": "suomi", "english_name": "Finnish"}, "fj": {"english_name": "Fijian"}, "sa": {"english_name": "Sanskrit"}, "fo": {"english_name": "Faroese"}, "ka": {"english_name": "Georgian"}, "kg": {"english_name": "Kongo"}, "kk": {"english_name": "Kazakh"}, "kj": {"english_name": "Kuanyama"}, "ki": {"english_name": "Kikuyu"}, "ko": {"name": "한국어", "english_name": "Korean"}, "kn": {"name": "ಕನ್ನಡ", "english_name": "Kannada"}, "km": {"english_name": "Khmer, Central"}, "kl": {"english_name": "Kalaallisut"}, "ks": {"english_name": "Kashmiri"}, "kr": {"english_name": "Kanuri"}, "si": {"english_name": "Sinhala"}, "sh": {"name": "Serbo-Croatian", "english_name": "Serbo-Croatian"}, "kw": {"english_name": "Cornish"}, "kv": {"english_name": "Komi"}, "ku": {"english_name": "Kurdish"}, "sl": {"name": "slovenščina", "english_name": "Slovenian"}, "sc": {"english_name": "Sardinian"}, "ky": {"english_name": "Kirghiz"}, "sg": {"english_name": "Sango"}, "se": {"english_name": "Sami, Northern"}, "sd": {"english_name": "Sindhi"}}, "google": {"el": {"name": "Ελληνικά"}, "eo": {"name": "Esperanto"}, "en": {"name": "English"}, "af": {"name": "Afrikaans"}, "vi": {"name": "Tiếng Việt"}, "ca": {"name": "Català"}, "it": {"name": "Italiano"}, "iw": {"name": "עברית"}, "hy": {"name": "Հայերեն"}, "cs": {"name": "Čeština"}, "et": {"name": "Eesti"}, "id": {"name": "Indonesia"}, "es": {"name": "Español"}, "ru": {"name": "Русский"}, "nl": {"name": "Nederlands"}, "pt": {"name": "Português"}, "no": {"name": "Norsk"}, "tr": {"name": "Türkçe"}, "lt": {"name": "Lietuvių"}, "lv": {"name": "Latviešu"}, "tl": {"name": "Filipino"}, "zh-TW": {"name": "中文 (繁體)"}, "th": {"name": "ไทย"}, "ro": {"name": "Română"}, "is": {"name": "Íslenska"}, "pl": {"name": "Polski"}, "be": {"name": "Беларуская"}, "fr": {"name": "Français"}, "bg": {"name": "Български"}, "hr": {"name": "Hrvatski"}, "de": {"name": "Deutsch"}, "ko": {"name": "한국어"}, "da": {"name": "Dansk"}, "fa": {"name": "فارسی"}, "hi": {"name": "हिन्दी"}, "fi": {"name": "Suomi"}, "hu": {"name": "Magyar"}, "ja": {"name": "日本語"}, "sr": {"name": "Српски"}, "sw": {"name": "Kiswahili"}, "sv": {"name": "Svenska"}, "sk": {"name": "Slovenčina"}, "zh-CN": {"name": "中文 (简体)"}, "ar": {"name": "العربية"}, "uk": {"name": "Українська"}, "sl": {"name": "Slovenščina"}}, "duckduckgo": ["da-DK", "vi-VN", "en-SG", "sl-SL", "en-XA", "tzh-HK", "en-UK", "ro-RO", "en-MY", "el-GR", "it-CH", "hu-HU", "fr-FR", "en-PH", "tl-PH", "fr-CA", "fi-FI", "et-EE", "sv-SE", "es-XL", "th-TH", "sk-SK", "es-ES", "en-IE", "es-US", "es-PE", "nl-NL", "en-US", "de-DE", "de-AT", "wt-WT", "no-NO", "tr-TR", "ca-ES", "it-IT", "es-CO", "ru-RU", "ca-CT", "en-ZA", "en-CA", "jp-JP", "es-MX", "id-ID", "es-AR", "he-IL", "kr-KR", "en-AU", "ms-MY", "pl-PL", "lv-LV", "bg-BG", "zh-CN", "en-NZ", "lt-LT", "tzh-TW", "hr-HR", "pt-PT", "fr-BE", "de-CH", "cs-CZ", "en-IN", "nl-BE", "fr-CH", "en-ID", "ar-XA", "pt-BR", "uk-UA", "es-CL"], "bing": ["sq", "de", "ar", "bg", "ca", "cs", "zh-CHS", "zh-CHT", "ko", "hr", "da", "sk", "sl", "es", "et", "fi", "fr", "el", "he", "nl", "hu", "id", "en", "is", "it", "ja", "lv", "lt", "ms", "nb", "fa", "pl", "pt-BR", "pt-PT", "ro", "ru", "sr", "sv", "th", "tr", "uk", "vi"], "wikipedia": {"sco": {"articles": 41758, "name": "Scots", "english_name": "Scots"}, "scn": {"articles": 25373, "name": "Sicilianu", "english_name": "Sicilian"}, "gu": {"articles": 26688, "name": "ગુજરાતી", "english_name": "Gujarati"}, "gd": {"articles": 14250, "name": "Gàidhlig", "english_name": "Scottish Gaelic"}, "ga": {"articles": 38830, "name": "Gaeilge", "english_name": "Irish"}, "gl": {"articles": 134702, "name": "Galego", "english_name": "Galician"}, "als": {"articles": 22350, "name": "Alemannisch", "english_name": "Alemannic"}, "lb": {"articles": 47313, "name": "Lëtzebuergesch", "english_name": "Luxembourgish"}, "la": {"articles": 125692, "name": "Latina", "english_name": "Latin"}, "tt": {"articles": 69908, "name": "Tatarça / Татарча", "english_name": "Tatar"}, "tr": {"articles": 287464, "name": "Türkçe", "english_name": "Turkish"}, "li": {"articles": 11552, "name": "Limburgs", "english_name": "Limburgish"}, "lv": {"articles": 74371, "name": "Latviešu", "english_name": "Latvian"}, "tl": {"articles": 65689, "name": "Tagalog", "english_name": "Tagalog"}, "vec": {"articles": 10855, "name": "Vèneto", "english_name": "Venetian"}, "th": {"articles": 113239, "name": "ไทย", "english_name": "Thai"}, "tg": {"articles": 67388, "name": "Тоҷикӣ", "english_name": "Tajik"}, "te": {"articles": 66228, "name": "తెలుగు", "english_name": "Telugu"}, "ta": {"articles": 89570, "name": "தமிழ்", "english_name": "Tamil"}, "yi": {"articles": 13591, "name": "ייִדיש", "english_name": "Yiddish"}, "ceb": {"articles": 3535780, "name": "Sinugboanong Binisaya", "english_name": "Cebuano"}, "yo": {"articles": 31494, "name": "Yorùbá", "english_name": "Yoruba"}, "de": {"articles": 2009282, "name": "Deutsch", "english_name": "German"}, "da": {"articles": 221813, "name": "Dansk", "english_name": "Danish"}, "qu": {"articles": 19810, "name": "Runa Simi", "english_name": "Quechua"}, "bar": {"articles": 21968, "name": "Boarisch", "english_name": "Bavarian"}, "kn": {"articles": 21607, "name": "ಕನ್ನಡ", "english_name": "Kannada"}, "bpy": {"articles": 25067, "name": "ইমার ঠার/বিষ্ণুপ্রিয়া মণিপুরী", "english_name": "Bishnupriya Manipuri"}, "el": {"articles": 124371, "name": "Ελληνικά", "english_name": "Greek"}, "eo": {"articles": 235590, "name": "Esperanto", "english_name": "Esperanto"}, "en": {"articles": 5308212, "name": "English", "english_name": "English"}, "zh": {"articles": 915374, "name": "中文", "english_name": "Chinese"}, "pms": {"articles": 63988, "name": "Piemontèis", "english_name": "Piedmontese"}, "arz": {"articles": 16103, "name": "مصرى (Maṣri)", "english_name": "Egyptian Arabic"}, "eu": {"articles": 261872, "name": "Euskara", "english_name": "Basque"}, "et": {"articles": 151596, "name": "Eesti", "english_name": "Estonian"}, "es": {"articles": 1301888, "name": "Español", "english_name": "Spanish"}, "ba": {"articles": 36625, "name": "Башҡорт", "english_name": "Bashkir"}, "ru": {"articles": 1359387, "name": "Русский", "english_name": "Russian"}, "new": {"articles": 72176, "name": "नेपाल भाषा", "english_name": "Newar"}, "ro": {"articles": 373081, "name": "Română", "english_name": "Romanian"}, "jv": {"articles": 49676, "name": "Basa Jawa", "english_name": "Javanese"}, "hsb": {"articles": 10909, "name": "Hornjoserbsce", "english_name": "Upper Sorbian"}, "be": {"articles": 123483, "name": "Беларуская", "english_name": "Belarusian"}, "bg": {"articles": 223764, "name": "Български", "english_name": "Bulgarian"}, "uk": {"articles": 667063, "name": "Українська", "english_name": "Ukrainian"}, "wa": {"articles": 14315, "name": "Walon", "english_name": "Walloon"}, "ast": {"articles": 47719, "name": "Asturianu", "english_name": "Asturian"}, "bn": {"articles": 46064, "name": "বাংলা", "english_name": "Bengali"}, "map-bms": {"articles": 13275, "name": "Basa Banyumasan", "english_name": "Banyumasan"}, "br": {"articles": 60626, "name": "Brezhoneg", "english_name": "Breton"}, "bs": {"articles": 72084, "name": "Bosanski", "english_name": "Bosnian"}, "ja": {"articles": 1041648, "name": "日本語", "english_name": "Japanese"}, "oc": {"articles": 84329, "name": "Occitan", "english_name": "Occitan"}, "be-tarask": {"articles": 59873, "name": "Беларуская (тарашкевіца)", "english_name": "Belarusian (Taraškievica)"}, "nds": {"articles": 25733, "name": "Plattdüütsch", "english_name": "Low Saxon"}, "os": {"articles": 10293, "name": "Иронау", "english_name": "Ossetian"}, "or": {"articles": 11725, "name": "ଓଡ଼ିଆ", "english_name": "Oriya"}, "simple": {"articles": 121800, "name": "Simple English", "english_name": "Simple English"}, "ca": {"articles": 528757, "name": "Català", "english_name": "Catalan"}, "lmo": {"articles": 34558, "name": "Lumbaart", "english_name": "Lombard"}, "ce": {"articles": 158923, "name": "Нохчийн", "english_name": "Chechen"}, "cy": {"articles": 89273, "name": "Cymraeg", "english_name": "Welsh"}, "cs": {"articles": 369105, "name": "Čeština", "english_name": "Czech"}, "cv": {"articles": 36501, "name": "Чăваш", "english_name": "Chuvash"}, "pt": {"articles": 949123, "name": "Português", "english_name": "Portuguese"}, "lt": {"articles": 180377, "name": "Lietuvių", "english_name": "Lithuanian"}, "zh-min-nan": {"articles": 201884, "name": "Bân-lâm-gú", "english_name": "Min Nan"}, "pa": {"articles": 24072, "name": "ਪੰਜਾਬੀ", "english_name": "Punjabi"}, "war": {"articles": 1261970, "name": "Winaray", "english_name": "Waray-Waray"}, "pl": {"articles": 1197597, "name": "Polski", "english_name": "Polish"}, "hy": {"articles": 212747, "name": "Հայերեն", "english_name": "Armenian"}, "an": {"articles": 31832, "name": "Aragonés", "english_name": "Aragonese"}, "hr": {"articles": 171047, "name": "Hrvatski", "english_name": "Croatian"}, "ht": {"articles": 51108, "name": "Krèyol ayisyen", "english_name": "Haitian"}, "hu": {"articles": 399927, "name": "Magyar", "english_name": "Hungarian"}, "bat-smg": {"articles": 15940, "name": "Žemaitėška", "english_name": "Samogitian"}, "hi": {"articles": 114396, "name": "हिन्दी", "english_name": "Hindi"}, "pnb": {"articles": 42670, "name": "شاہ مکھی پنجابی (Shāhmukhī Pañjābī)", "english_name": "Western Punjabi"}, "bug": {"articles": 14116, "name": "Basa Ugi", "english_name": "Buginese"}, "he": {"articles": 199250, "name": "עברית", "english_name": "Hebrew"}, "mg": {"articles": 82826, "name": "Malagasy", "english_name": "Malagasy"}, "uz": {"articles": 128744, "name": "O‘zbek", "english_name": "Uzbek"}, "ml": {"articles": 46807, "name": "മലയാളം", "english_name": "Malayalam"}, "azb": {"articles": 11827, "name": "تۆرکجه", "english_name": "South Azerbaijani"}, "mn": {"articles": 16285, "name": "Монгол", "english_name": "Mongolian"}, "mk": {"articles": 87543, "name": "Македонски", "english_name": "Macedonian"}, "ur": {"articles": 110819, "name": "اردو", "english_name": "Urdu"}, "ms": {"articles": 286189, "name": "Bahasa Melayu", "english_name": "Malay"}, "mr": {"articles": 45052, "name": "मराठी", "english_name": "Marathi"}, "my": {"articles": 33576, "name": "မြန်မာဘာသာ", "english_name": "Burmese"}, "sah": {"articles": 10965, "name": "Саха тыла (Saxa Tyla)", "english_name": "Sakha"}, "af": {"articles": 43005, "name": "Afrikaans", "english_name": "Afrikaans"}, "vi": {"articles": 1151596, "name": "Tiếng Việt", "english_name": "Vietnamese"}, "is": {"articles": 41503, "name": "Íslenska", "english_name": "Icelandic"}, "am": {"articles": 13297, "name": "አማርኛ", "english_name": "Amharic"}, "it": {"articles": 1317642, "name": "Italiano", "english_name": "Italian"}, "vo": {"articles": 120413, "name": "Volapük", "english_name": "Volapük"}, "ar": {"articles": 453687, "name": "العربية", "english_name": "Arabic"}, "io": {"articles": 26845, "name": "Ido", "english_name": "Ido"}, "ia": {"articles": 19784, "name": "Interlingua", "english_name": "Interlingua"}, "az": {"articles": 111510, "name": "Azərbaycanca", "english_name": "Azerbaijani"}, "id": {"articles": 390261, "name": "Bahasa Indonesia", "english_name": "Indonesian"}, "nl": {"articles": 1885863, "name": "Nederlands", "english_name": "Dutch"}, "nn": {"articles": 131704, "name": "Nynorsk", "english_name": "Norwegian (Nynorsk)"}, "no": {"articles": 458295, "name": "Norsk (Bokmål)", "english_name": "Norwegian (Bokmål)"}, "nah": {"articles": 10429, "name": "Nāhuatl", "english_name": "Nahuatl"}, "ne": {"articles": 29279, "name": "नेपाली", "english_name": "Nepali"}, "nap": {"articles": 14400, "name": "Nnapulitano", "english_name": "Neapolitan"}, "fr": {"articles": 1823284, "name": "Français", "english_name": "French"}, "mrj": {"articles": 10164, "name": "Кырык Мары (Kyryk Mary)", "english_name": "Hill Mari"}, "zh-yue": {"articles": 49372, "name": "粵語", "english_name": "Cantonese"}, "fy": {"articles": 36467, "name": "Frysk", "english_name": "West Frisian"}, "fa": {"articles": 516655, "name": "فارسی", "english_name": "Persian"}, "fi": {"articles": 405189, "name": "Suomi", "english_name": "Finnish"}, "mzn": {"articles": 12362, "name": "مَزِروني", "english_name": "Mazandarani"}, "sa": {"articles": 10198, "name": "संस्कृतम्", "english_name": "Sanskrit"}, "fo": {"articles": 12373, "name": "Føroyskt", "english_name": "Faroese"}, "ka": {"articles": 111160, "name": "ქართული", "english_name": "Georgian"}, "ckb": {"articles": 18218, "name": "Soranî / کوردی", "english_name": "Sorani"}, "kk": {"articles": 217488, "name": "Қазақша", "english_name": "Kazakh"}, "sr": {"articles": 342526, "name": "Српски / Srpski", "english_name": "Serbian"}, "sq": {"articles": 62453, "name": "Shqip", "english_name": "Albanian"}, "min": {"articles": 221961, "name": "Minangkabau", "english_name": "Minangkabau"}, "ko": {"articles": 367197, "name": "한국어", "english_name": "Korean"}, "sv": {"articles": 3783412, "name": "Svenska", "english_name": "Swedish"}, "su": {"articles": 19163, "name": "Basa Sunda", "english_name": "Sundanese"}, "sk": {"articles": 215364, "name": "Slovenčina", "english_name": "Slovak"}, "si": {"articles": 12831, "name": "සිංහල", "english_name": "Sinhalese"}, "sh": {"articles": 436549, "name": "Srpskohrvatski / Српскохрватски", "english_name": "Serbo-Croatian"}, "ku": {"articles": 22370, "name": "Kurdî / كوردی", "english_name": "Kurdish"}, "sl": {"articles": 153986, "name": "Slovenščina", "english_name": "Slovenian"}, "ky": {"articles": 60035, "name": "Кыргызча", "english_name": "Kirghiz"}, "sw": {"articles": 34775, "name": "Kiswahili", "english_name": "Swahili"}}, "bing news": ["sq", "de", "ar", "bg", "ca", "cs", "zh-CHS", "zh-CHT", "ko", "hr", "da", "sk", "sl", "es", "et", "fi", "fr", "el", "he", "nl", "hu", "id", "en", "is", "it", "ja", "lv", "lt", "ms", "nb", "fa", "pl", "pt-BR", "pt-PT", "ro", "ru", "sr", "sv", "th", "tr", "uk", "vi"], "yahoo news": ["ar", "bg", "zh-chs", "zh-cht", "hr", "cs", "da", "nl", "en", "et", "fi", "fr", "de", "el", "he", "hu", "it", "ja", "ko", "lv", "lt", "no", "pl", "pt", "ro", "ru", "sk", "sl", "es", "sv", "th", "tr"], "swisscows": ["browser", "ar-SA", "es-AR", "en-AU", "de-AT", "fr-BE", "nl-BE", "pt-BR", "en-CA", "fr-CA", "es-CL", "zh-CN", "da-DK", "fi-FI", "fr-FR", "de-DE", "zh-HK", "en-IN", "en-IE", "it-IT", "ja-JP", "ko-KR", "en-MY", "es-MX", "nl-NL", "en-NZ", "nb-NO", "en-PH", "pl-PL", "pt-PT", "ru-RU", "en-ZA", "es-ES", "sv-SE", "de-CH", "fr-CH", "zh-TW", "tr-TR", "en-GB", "en-US", "es-US"], "wikidata": {"sco": {"articles": 41758, "name": "Scots", "english_name": "Scots"}, "scn": {"articles": 25373, "name": "Sicilianu", "english_name": "Sicilian"}, "gu": {"articles": 26688, "name": "ગુજરાતી", "english_name": "Gujarati"}, "gd": {"articles": 14250, "name": "Gàidhlig", "english_name": "Scottish Gaelic"}, "ga": {"articles": 38830, "name": "Gaeilge", "english_name": "Irish"}, "gl": {"articles": 134702, "name": "Galego", "english_name": "Galician"}, "als": {"articles": 22350, "name": "Alemannisch", "english_name": "Alemannic"}, "lb": {"articles": 47313, "name": "Lëtzebuergesch", "english_name": "Luxembourgish"}, "la": {"articles": 125692, "name": "Latina", "english_name": "Latin"}, "tt": {"articles": 69908, "name": "Tatarça / Татарча", "english_name": "Tatar"}, "tr": {"articles": 287464, "name": "Türkçe", "english_name": "Turkish"}, "li": {"articles": 11552, "name": "Limburgs", "english_name": "Limburgish"}, "lv": {"articles": 74371, "name": "Latviešu", "english_name": "Latvian"}, "tl": {"articles": 65689, "name": "Tagalog", "english_name": "Tagalog"}, "vec": {"articles": 10855, "name": "Vèneto", "english_name": "Venetian"}, "th": {"articles": 113239, "name": "ไทย", "english_name": "Thai"}, "tg": {"articles": 67388, "name": "Тоҷикӣ", "english_name": "Tajik"}, "te": {"articles": 66228, "name": "తెలుగు", "english_name": "Telugu"}, "ta": {"articles": 89570, "name": "தமிழ்", "english_name": "Tamil"}, "yi": {"articles": 13591, "name": "ייִדיש", "english_name": "Yiddish"}, "ceb": {"articles": 3535780, "name": "Sinugboanong Binisaya", "english_name": "Cebuano"}, "yo": {"articles": 31494, "name": "Yorùbá", "english_name": "Yoruba"}, "de": {"articles": 2009282, "name": "Deutsch", "english_name": "German"}, "da": {"articles": 221813, "name": "Dansk", "english_name": "Danish"}, "qu": {"articles": 19810, "name": "Runa Simi", "english_name": "Quechua"}, "bar": {"articles": 21968, "name": "Boarisch", "english_name": "Bavarian"}, "kn": {"articles": 21607, "name": "ಕನ್ನಡ", "english_name": "Kannada"}, "bpy": {"articles": 25067, "name": "ইমার ঠার/বিষ্ণুপ্রিয়া মণিপুরী", "english_name": "Bishnupriya Manipuri"}, "el": {"articles": 124371, "name": "Ελληνικά", "english_name": "Greek"}, "eo": {"articles": 235590, "name": "Esperanto", "english_name": "Esperanto"}, "en": {"articles": 5308212, "name": "English", "english_name": "English"}, "zh": {"articles": 915374, "name": "中文", "english_name": "Chinese"}, "pms": {"articles": 63988, "name": "Piemontèis", "english_name": "Piedmontese"}, "arz": {"articles": 16103, "name": "مصرى (Maṣri)", "english_name": "Egyptian Arabic"}, "eu": {"articles": 261872, "name": "Euskara", "english_name": "Basque"}, "et": {"articles": 151596, "name": "Eesti", "english_name": "Estonian"}, "es": {"articles": 1301888, "name": "Español", "english_name": "Spanish"}, "ba": {"articles": 36625, "name": "Башҡорт", "english_name": "Bashkir"}, "ru": {"articles": 1359387, "name": "Русский", "english_name": "Russian"}, "new": {"articles": 72176, "name": "नेपाल भाषा", "english_name": "Newar"}, "ro": {"articles": 373081, "name": "Română", "english_name": "Romanian"}, "jv": {"articles": 49676, "name": "Basa Jawa", "english_name": "Javanese"}, "hsb": {"articles": 10909, "name": "Hornjoserbsce", "english_name": "Upper Sorbian"}, "be": {"articles": 123483, "name": "Беларуская", "english_name": "Belarusian"}, "bg": {"articles": 223764, "name": "Български", "english_name": "Bulgarian"}, "uk": {"articles": 667063, "name": "Українська", "english_name": "Ukrainian"}, "wa": {"articles": 14315, "name": "Walon", "english_name": "Walloon"}, "ast": {"articles": 47719, "name": "Asturianu", "english_name": "Asturian"}, "bn": {"articles": 46064, "name": "বাংলা", "english_name": "Bengali"}, "map-bms": {"articles": 13275, "name": "Basa Banyumasan", "english_name": "Banyumasan"}, "br": {"articles": 60626, "name": "Brezhoneg", "english_name": "Breton"}, "bs": {"articles": 72084, "name": "Bosanski", "english_name": "Bosnian"}, "ja": {"articles": 1041648, "name": "日本語", "english_name": "Japanese"}, "oc": {"articles": 84329, "name": "Occitan", "english_name": "Occitan"}, "be-tarask": {"articles": 59873, "name": "Беларуская (тарашкевіца)", "english_name": "Belarusian (Taraškievica)"}, "nds": {"articles": 25733, "name": "Plattdüütsch", "english_name": "Low Saxon"}, "os": {"articles": 10293, "name": "Иронау", "english_name": "Ossetian"}, "or": {"articles": 11725, "name": "ଓଡ଼ିଆ", "english_name": "Oriya"}, "simple": {"articles": 121800, "name": "Simple English", "english_name": "Simple English"}, "ca": {"articles": 528757, "name": "Català", "english_name": "Catalan"}, "lmo": {"articles": 34558, "name": "Lumbaart", "english_name": "Lombard"}, "ce": {"articles": 158923, "name": "Нохчийн", "english_name": "Chechen"}, "cy": {"articles": 89273, "name": "Cymraeg", "english_name": "Welsh"}, "cs": {"articles": 369105, "name": "Čeština", "english_name": "Czech"}, "cv": {"articles": 36501, "name": "Чăваш", "english_name": "Chuvash"}, "pt": {"articles": 949123, "name": "Português", "english_name": "Portuguese"}, "lt": {"articles": 180377, "name": "Lietuvių", "english_name": "Lithuanian"}, "zh-min-nan": {"articles": 201884, "name": "Bân-lâm-gú", "english_name": "Min Nan"}, "pa": {"articles": 24072, "name": "ਪੰਜਾਬੀ", "english_name": "Punjabi"}, "war": {"articles": 1261970, "name": "Winaray", "english_name": "Waray-Waray"}, "pl": {"articles": 1197597, "name": "Polski", "english_name": "Polish"}, "hy": {"articles": 212747, "name": "Հայերեն", "english_name": "Armenian"}, "an": {"articles": 31832, "name": "Aragonés", "english_name": "Aragonese"}, "hr": {"articles": 171047, "name": "Hrvatski", "english_name": "Croatian"}, "ht": {"articles": 51108, "name": "Krèyol ayisyen", "english_name": "Haitian"}, "hu": {"articles": 399927, "name": "Magyar", "english_name": "Hungarian"}, "bat-smg": {"articles": 15940, "name": "Žemaitėška", "english_name": "Samogitian"}, "hi": {"articles": 114396, "name": "हिन्दी", "english_name": "Hindi"}, "pnb": {"articles": 42670, "name": "شاہ مکھی پنجابی (Shāhmukhī Pañjābī)", "english_name": "Western Punjabi"}, "bug": {"articles": 14116, "name": "Basa Ugi", "english_name": "Buginese"}, "he": {"articles": 199250, "name": "עברית", "english_name": "Hebrew"}, "mg": {"articles": 82826, "name": "Malagasy", "english_name": "Malagasy"}, "uz": {"articles": 128744, "name": "O‘zbek", "english_name": "Uzbek"}, "ml": {"articles": 46807, "name": "മലയാളം", "english_name": "Malayalam"}, "azb": {"articles": 11827, "name": "تۆرکجه", "english_name": "South Azerbaijani"}, "mn": {"articles": 16285, "name": "Монгол", "english_name": "Mongolian"}, "mk": {"articles": 87543, "name": "Македонски", "english_name": "Macedonian"}, "ur": {"articles": 110819, "name": "اردو", "english_name": "Urdu"}, "ms": {"articles": 286189, "name": "Bahasa Melayu", "english_name": "Malay"}, "mr": {"articles": 45052, "name": "मराठी", "english_name": "Marathi"}, "my": {"articles": 33576, "name": "မြန်မာဘာသာ", "english_name": "Burmese"}, "sah": {"articles": 10965, "name": "Саха тыла (Saxa Tyla)", "english_name": "Sakha"}, "af": {"articles": 43005, "name": "Afrikaans", "english_name": "Afrikaans"}, "vi": {"articles": 1151596, "name": "Tiếng Việt", "english_name": "Vietnamese"}, "is": {"articles": 41503, "name": "Íslenska", "english_name": "Icelandic"}, "am": {"articles": 13297, "name": "አማርኛ", "english_name": "Amharic"}, "it": {"articles": 1317642, "name": "Italiano", "english_name": "Italian"}, "vo": {"articles": 120413, "name": "Volapük", "english_name": "Volapük"}, "ar": {"articles": 453687, "name": "العربية", "english_name": "Arabic"}, "io": {"articles": 26845, "name": "Ido", "english_name": "Ido"}, "ia": {"articles": 19784, "name": "Interlingua", "english_name": "Interlingua"}, "az": {"articles": 111510, "name": "Azərbaycanca", "english_name": "Azerbaijani"}, "id": {"articles": 390261, "name": "Bahasa Indonesia", "english_name": "Indonesian"}, "nl": {"articles": 1885863, "name": "Nederlands", "english_name": "Dutch"}, "nn": {"articles": 131704, "name": "Nynorsk", "english_name": "Norwegian (Nynorsk)"}, "no": {"articles": 458295, "name": "Norsk (Bokmål)", "english_name": "Norwegian (Bokmål)"}, "nah": {"articles": 10429, "name": "Nāhuatl", "english_name": "Nahuatl"}, "ne": {"articles": 29279, "name": "नेपाली", "english_name": "Nepali"}, "nap": {"articles": 14400, "name": "Nnapulitano", "english_name": "Neapolitan"}, "fr": {"articles": 1823284, "name": "Français", "english_name": "French"}, "mrj": {"articles": 10164, "name": "Кырык Мары (Kyryk Mary)", "english_name": "Hill Mari"}, "zh-yue": {"articles": 49372, "name": "粵語", "english_name": "Cantonese"}, "fy": {"articles": 36467, "name": "Frysk", "english_name": "West Frisian"}, "fa": {"articles": 516655, "name": "فارسی", "english_name": "Persian"}, "fi": {"articles": 405189, "name": "Suomi", "english_name": "Finnish"}, "mzn": {"articles": 12362, "name": "مَزِروني", "english_name": "Mazandarani"}, "sa": {"articles": 10198, "name": "संस्कृतम्", "english_name": "Sanskrit"}, "fo": {"articles": 12373, "name": "Føroyskt", "english_name": "Faroese"}, "ka": {"articles": 111160, "name": "ქართული", "english_name": "Georgian"}, "ckb": {"articles": 18218, "name": "Soranî / کوردی", "english_name": "Sorani"}, "kk": {"articles": 217488, "name": "Қазақша", "english_name": "Kazakh"}, "sr": {"articles": 342526, "name": "Српски / Srpski", "english_name": "Serbian"}, "sq": {"articles": 62453, "name": "Shqip", "english_name": "Albanian"}, "min": {"articles": 221961, "name": "Minangkabau", "english_name": "Minangkabau"}, "ko": {"articles": 367197, "name": "한국어", "english_name": "Korean"}, "sv": {"articles": 3783412, "name": "Svenska", "english_name": "Swedish"}, "su": {"articles": 19163, "name": "Basa Sunda", "english_name": "Sundanese"}, "sk": {"articles": 215364, "name": "Slovenčina", "english_name": "Slovak"}, "si": {"articles": 12831, "name": "සිංහල", "english_name": "Sinhalese"}, "sh": {"articles": 436549, "name": "Srpskohrvatski / Српскохрватски", "english_name": "Serbo-Croatian"}, "ku": {"articles": 22370, "name": "Kurdî / كوردی", "english_name": "Kurdish"}, "sl": {"articles": 153986, "name": "Slovenščina", "english_name": "Slovenian"}, "ky": {"articles": 60035, "name": "Кыргызча", "english_name": "Kirghiz"}, "sw": {"articles": 34775, "name": "Kiswahili", "english_name": "Swahili"}}, "ddg definitions": ["da-DK", "vi-VN", "en-SG", "sl-SL", "en-XA", "tzh-HK", "en-UK", "ro-RO", "en-MY", "el-GR", "it-CH", "hu-HU", "fr-FR", "en-PH", "tl-PH", "fr-CA", "fi-FI", "et-EE", "sv-SE", "es-XL", "th-TH", "sk-SK", "es-ES", "en-IE", "es-US", "es-PE", "nl-NL", "en-US", "de-DE", "de-AT", "wt-WT", "no-NO", "tr-TR", "ca-ES", "it-IT", "es-CO", "ru-RU", "ca-CT", "en-ZA", "en-CA", "jp-JP", "es-MX", "id-ID", "es-AR", "he-IL", "kr-KR", "en-AU", "ms-MY", "pl-PL", "lv-LV", "bg-BG", "zh-CN", "en-NZ", "lt-LT", "tzh-TW", "hr-HR", "pt-PT", "fr-BE", "de-CH", "cs-CZ", "en-IN", "nl-BE", "fr-CH", "en-ID", "ar-XA", "pt-BR", "uk-UA", "es-CL"], "bing images": ["sq", "de", "ar", "bg", "ca", "cs", "zh-CHS", "zh-CHT", "ko", "hr", "da", "sk", "sl", "es", "et", "fi", "fr", "el", "he", "nl", "hu", "id", "en", "is", "it", "ja", "lv", "lt", "ms", "nb", "fa", "pl", "pt-BR", "pt-PT", "ro", "ru", "sr", "sv", "th", "tr", "uk", "vi"], "yahoo": ["ar", "bg", "zh-chs", "zh-cht", "hr", "cs", "da", "nl", "en", "et", "fi", "fr", "de", "el", "he", "hu", "it", "ja", "ko", "lv", "lt", "no", "pl", "pt", "ro", "ru", "sk", "sl", "es", "sv", "th", "tr"], "gigablast": ["en", "fr", "es", "ru", "tr", "ja", "h_", "tw", "cn", "ko", "de", "nl", "it", "fi", "sv", "no", "pt", "vi", "ar", "he", "id", "el", "th", "hi", "bn", "pl", "tl", "la", "eo", "ca", "bg", "tx", "sr", "hu", "da", "lt", "cs", "gl", "ka", "gd", "go", "ro", "ga", "lv", "hy", "is", "ag", "gv", "io", "fa", "te", "vv", "mg", "ku", "lb", "et"]} \ No newline at end of file +{"google news": {"el": {"name": "Ελληνικά"}, "eo": {"name": "Esperanto"}, "en": {"name": "English"}, "af": {"name": "Afrikaans"}, "vi": {"name": "Tiếng Việt"}, "ca": {"name": "Català"}, "it": {"name": "Italiano"}, "iw": {"name": "עברית"}, "hy": {"name": "Հայերեն"}, "cs": {"name": "Čeština"}, "et": {"name": "Eesti"}, "id": {"name": "Indonesia"}, "es": {"name": "Español"}, "ru": {"name": "Русский"}, "nl": {"name": "Nederlands"}, "pt": {"name": "Português"}, "no": {"name": "Norsk"}, "tr": {"name": "Türkçe"}, "lt": {"name": "Lietuvių"}, "lv": {"name": "Latviešu"}, "tl": {"name": "Filipino"}, "zh-TW": {"name": "中文 (繁體)"}, "th": {"name": "ไทย"}, "ro": {"name": "Română"}, "is": {"name": "Íslenska"}, "pl": {"name": "Polski"}, "be": {"name": "Беларуская"}, "fr": {"name": "Français"}, "bg": {"name": "Български"}, "hr": {"name": "Hrvatski"}, "de": {"name": "Deutsch"}, "ko": {"name": "한국어"}, "da": {"name": "Dansk"}, "fa": {"name": "فارسی"}, "hi": {"name": "हिन्दी"}, "fi": {"name": "Suomi"}, "hu": {"name": "Magyar"}, "ja": {"name": "日本語"}, "sr": {"name": "Српски"}, "sw": {"name": "Kiswahili"}, "sv": {"name": "Svenska"}, "sk": {"name": "Slovenčina"}, "zh-CN": {"name": "中文 (简体)"}, "ar": {"name": "العربية"}, "uk": {"name": "Українська"}, "sl": {"name": "Slovenščina"}}, "dailymotion": {"gv": {"english_name": "Manx"}, "gu": {"name": "ગુજરાતી", "english_name": "Gujarati"}, "gd": {"english_name": "Gaelic, Scottish"}, "ga": {"name": "Gaeilge", "english_name": "Irish"}, "gn": {"english_name": "Guarani"}, "gl": {"name": "Galego", "english_name": "Galician"}, "lg": {"english_name": "Ganda"}, "lb": {"english_name": "Luxembourgish"}, "la": {"english_name": "Latin"}, "ln": {"english_name": "Lingala"}, "lo": {"english_name": "Lao"}, "tt": {"name": "Татарча", "english_name": "Tatar"}, "tr": {"name": "Türkçe", "english_name": "Turkish"}, "ts": {"english_name": "Tsonga"}, "li": {"english_name": "Limburgan"}, "lv": {"name": "Latviešu", "english_name": "Latvian"}, "to": {"english_name": "Tonga (Tonga Islands)"}, "lt": {"name": "Lietuvių", "english_name": "Lithuanian"}, "lu": {"english_name": "Luba-Katanga"}, "tk": {"english_name": "Turkmen"}, "th": {"name": "ไทย", "english_name": "Thai"}, "ti": {"name": "ትግርኛ", "english_name": "Tigrinya"}, "tg": {"english_name": "Tajik"}, "te": {"english_name": "Telugu"}, "ta": {"name": "தமிழ்", "english_name": "Tamil"}, "yi": {"english_name": "Yiddish"}, "yo": {"english_name": "Yoruba"}, "de": {"name": "Deutsch", "english_name": "German"}, "da": {"name": "Dansk", "english_name": "Danish"}, "dz": {"english_name": "Dzongkha"}, "st": {"english_name": "Sotho, Southern"}, "dv": {"english_name": "Dhivehi"}, "qu": {"english_name": "Quechua"}, "el": {"name": "Ελληνικά", "english_name": "Greek, Modern (1453-)"}, "eo": {"name": "Esperanto", "english_name": "Esperanto"}, "en": {"english_name": "English"}, "zh": {"name": "中文", "english_name": "Chinese"}, "ee": {"english_name": "Ewe"}, "za": {"english_name": "Zhuang"}, "mh": {"english_name": "Marshallese"}, "uk": {"name": "українська", "english_name": "Ukrainian"}, "eu": {"name": "Euskara", "english_name": "Basque"}, "et": {"name": "Eesti", "english_name": "Estonian"}, "es": {"name": "Español", "english_name": "Spanish"}, "ru": {"name": "русский", "english_name": "Russian"}, "rw": {"name": "Ikinyarwanda", "english_name": "Kinyarwanda"}, "rm": {"english_name": "Romansh"}, "rn": {"english_name": "Rundi"}, "ro": {"name": "Română", "english_name": "Romanian"}, "bn": {"name": "বাংলা", "english_name": "Bengali"}, "be": {"english_name": "Belarusian"}, "bg": {"name": "Български", "english_name": "Bulgarian"}, "ba": {"english_name": "Bashkir"}, "wa": {"name": "Walon", "english_name": "Walloon"}, "wo": {"english_name": "Wolof"}, "bm": {"english_name": "Bambara"}, "jv": {"english_name": "Javanese"}, "bo": {"english_name": "Tibetan"}, "bi": {"english_name": "Bislama"}, "br": {"name": "Brezhoneg", "english_name": "Breton"}, "bs": {"name": "Bosnian", "english_name": "Bosnian"}, "ja": {"name": "日本語", "english_name": "Japanese"}, "om": {"english_name": "Oromo"}, "oj": {"english_name": "Ojibwa"}, "ty": {"english_name": "Tahitian"}, "oc": {"name": "Occitan", "english_name": "Occitan"}, "tw": {"english_name": "Twi"}, "os": {"english_name": "Ossetian"}, "or": {"name": "Oriya", "english_name": "Oriya"}, "xh": {"name": "Xhosa", "english_name": "Xhosa"}, "ch": {"english_name": "Chamorro"}, "co": {"english_name": "Corsican"}, "ca": {"name": "Català", "english_name": "Catalan"}, "ce": {"english_name": "Chechen"}, "cy": {"name": "Cymraeg", "english_name": "Welsh"}, "cs": {"name": "čeština", "english_name": "Czech"}, "cr": {"english_name": "Cree"}, "cv": {"english_name": "Chuvash"}, "cu": {"english_name": "Slavic, Church"}, "ve": {"name": "Venda", "english_name": "Venda"}, "ps": {"name": "Pushto", "english_name": "Pushto"}, "pt": {"name": "Português", "english_name": "Portuguese"}, "tl": {"english_name": "Tagalog"}, "pa": {"name": "ਪੰਜਾਬੀ", "english_name": "Panjabi"}, "vi": {"name": "Tiếng Việt", "english_name": "Vietnamese"}, "pi": {"english_name": "Pali"}, "is": {"name": "Íslenska", "english_name": "Icelandic"}, "pl": {"name": "polski", "english_name": "Polish"}, "hz": {"english_name": "Herero"}, "hy": {"english_name": "Armenian"}, "hr": {"name": "hrvatski", "english_name": "Croatian"}, "iu": {"english_name": "Inuktitut"}, "ht": {"english_name": "Haitian"}, "hu": {"name": "magyar", "english_name": "Hungarian"}, "hi": {"name": "हिंदी", "english_name": "Hindi"}, "ho": {"english_name": "Hiri Motu"}, "ha": {"english_name": "Hausa"}, "he": {"name": "עברית", "english_name": "Hebrew"}, "mg": {"english_name": "Malagasy"}, "uz": {"english_name": "Uzbek"}, "ml": {"english_name": "Malayalam"}, "mn": {"name": "Монгол", "english_name": "Mongolian"}, "mi": {"name": "Reo Māori", "english_name": "Maori"}, "ik": {"english_name": "Inupiaq"}, "mk": {"name": "Македонски", "english_name": "Macedonian"}, "ur": {"english_name": "Urdu"}, "mt": {"name": "Malti", "english_name": "Maltese"}, "ms": {"name": "Malay", "english_name": "Malay"}, "mr": {"name": "मराठी", "english_name": "Marathi"}, "ug": {"english_name": "Uighur"}, "my": {"english_name": "Burmese"}, "sq": {"english_name": "Albanian"}, "ae": {"english_name": "Avestan"}, "ss": {"english_name": "Swati"}, "af": {"name": "Afrikaans", "english_name": "Afrikaans"}, "tn": {"english_name": "Tswana"}, "sw": {"english_name": "Swahili (macrolanguage)"}, "ak": {"english_name": "Akan"}, "am": {"name": "አማርኛ", "english_name": "Amharic"}, "it": {"name": "Italiano", "english_name": "Italian"}, "an": {"english_name": "Aragonese"}, "ii": {"english_name": "Yi, Sichuan"}, "ia": {"english_name": "Interlingua"}, "as": {"english_name": "Assamese"}, "ar": {"name": "العربية", "english_name": "Arabic"}, "su": {"english_name": "Sundanese"}, "io": {"english_name": "Ido"}, "av": {"english_name": "Avaric"}, "ay": {"english_name": "Aymara"}, "az": {"name": "Azerbaijani", "english_name": "Azerbaijani"}, "ie": {"english_name": "Interlingue"}, "id": {"name": "Indonesian", "english_name": "Indonesian"}, "ig": {"english_name": "Igbo"}, "sk": {"name": "Slovenský", "english_name": "Slovak"}, "sr": {"name": "српски", "english_name": "Serbian"}, "nl": {"name": "Nederlands", "english_name": "Dutch"}, "nn": {"name": "Norwegian Nynorsk", "english_name": "Norwegian Nynorsk"}, "no": {"english_name": "Norwegian"}, "na": {"english_name": "Nauru"}, "nb": {"name": "Norwegian Bokmål", "english_name": "Norwegian Bokmål"}, "nd": {"english_name": "Ndebele, North"}, "ne": {"english_name": "Nepali (macrolanguage)"}, "ng": {"english_name": "Ndonga"}, "ny": {"english_name": "Nyanja"}, "vo": {"english_name": "Volapük"}, "zu": {"name": "Isi-Zulu", "english_name": "Zulu"}, "so": {"english_name": "Somali"}, "nr": {"english_name": "Ndebele, South"}, "nv": {"english_name": "Navajo"}, "sn": {"english_name": "Shona"}, "fr": {"name": "français", "english_name": "French"}, "sm": {"english_name": "Samoan"}, "fy": {"english_name": "Frisian, Western"}, "sv": {"name": "Svenska", "english_name": "Swedish"}, "fa": {"name": "فارسی", "english_name": "Persian"}, "ff": {"english_name": "Fulah"}, "fi": {"name": "suomi", "english_name": "Finnish"}, "fj": {"english_name": "Fijian"}, "sa": {"english_name": "Sanskrit"}, "fo": {"english_name": "Faroese"}, "ka": {"english_name": "Georgian"}, "kg": {"english_name": "Kongo"}, "kk": {"english_name": "Kazakh"}, "kj": {"english_name": "Kuanyama"}, "ki": {"english_name": "Kikuyu"}, "ko": {"name": "한국어", "english_name": "Korean"}, "kn": {"name": "ಕನ್ನಡ", "english_name": "Kannada"}, "km": {"english_name": "Khmer, Central"}, "kl": {"english_name": "Kalaallisut"}, "ks": {"english_name": "Kashmiri"}, "kr": {"english_name": "Kanuri"}, "si": {"english_name": "Sinhala"}, "sh": {"name": "Serbo-Croatian", "english_name": "Serbo-Croatian"}, "kw": {"english_name": "Cornish"}, "kv": {"english_name": "Komi"}, "ku": {"english_name": "Kurdish"}, "sl": {"name": "slovenščina", "english_name": "Slovenian"}, "sc": {"english_name": "Sardinian"}, "ky": {"english_name": "Kirghiz"}, "sg": {"english_name": "Sango"}, "se": {"english_name": "Sami, Northern"}, "sd": {"english_name": "Sindhi"}}, "google": {"el": {"name": "Ελληνικά"}, "eo": {"name": "Esperanto"}, "en": {"name": "English"}, "af": {"name": "Afrikaans"}, "vi": {"name": "Tiếng Việt"}, "ca": {"name": "Català"}, "it": {"name": "Italiano"}, "iw": {"name": "עברית"}, "hy": {"name": "Հայերեն"}, "cs": {"name": "Čeština"}, "et": {"name": "Eesti"}, "id": {"name": "Indonesia"}, "es": {"name": "Español"}, "ru": {"name": "Русский"}, "nl": {"name": "Nederlands"}, "pt": {"name": "Português"}, "no": {"name": "Norsk"}, "tr": {"name": "Türkçe"}, "lt": {"name": "Lietuvių"}, "lv": {"name": "Latviešu"}, "tl": {"name": "Filipino"}, "zh-TW": {"name": "中文 (繁體)"}, "th": {"name": "ไทย"}, "ro": {"name": "Română"}, "is": {"name": "Íslenska"}, "pl": {"name": "Polski"}, "be": {"name": "Беларуская"}, "fr": {"name": "Français"}, "bg": {"name": "Български"}, "hr": {"name": "Hrvatski"}, "de": {"name": "Deutsch"}, "ko": {"name": "한국어"}, "da": {"name": "Dansk"}, "fa": {"name": "فارسی"}, "hi": {"name": "हिन्दी"}, "fi": {"name": "Suomi"}, "hu": {"name": "Magyar"}, "ja": {"name": "日本語"}, "sr": {"name": "Српски"}, "sw": {"name": "Kiswahili"}, "sv": {"name": "Svenska"}, "sk": {"name": "Slovenčina"}, "zh-CN": {"name": "中文 (简体)"}, "ar": {"name": "العربية"}, "uk": {"name": "Українська"}, "sl": {"name": "Slovenščina"}}, "duckduckgo": ["da-DK", "vi-VN", "en-SG", "sl-SL", "en-XA", "tzh-HK", "en-UK", "ro-RO", "en-MY", "el-GR", "it-CH", "hu-HU", "fr-FR", "en-PH", "tl-PH", "fr-CA", "fi-FI", "et-EE", "sv-SE", "es-XL", "th-TH", "sk-SK", "es-ES", "en-IE", "es-US", "es-PE", "nl-NL", "en-US", "de-DE", "de-AT", "wt-WT", "no-NO", "tr-TR", "ca-ES", "it-IT", "es-CO", "ru-RU", "ca-CT", "en-ZA", "en-CA", "jp-JP", "es-MX", "id-ID", "es-AR", "he-IL", "kr-KR", "en-AU", "ms-MY", "pl-PL", "lv-LV", "bg-BG", "zh-CN", "en-NZ", "lt-LT", "tzh-TW", "hr-HR", "pt-PT", "fr-BE", "de-CH", "cs-CZ", "en-IN", "nl-BE", "fr-CH", "en-ID", "ar-XA", "pt-BR", "uk-UA", "es-CL"], "bing": ["sq", "de", "ar", "bg", "ca", "cs", "zh-CHS", "zh-CHT", "ko", "hr", "da", "sk", "sl", "es", "et", "fi", "fr", "el", "he", "nl", "hu", "id", "en", "is", "it", "ja", "lv", "lt", "ms", "nb", "fa", "pl", "pt-BR", "pt-PT", "ro", "ru", "sr", "sv", "th", "tr", "uk", "vi"], "wikipedia": {"el": {"articles": 124415, "name": "Ελληνικά", "english_name": "Greek"}, "eo": {"articles": 235634, "name": "Esperanto", "english_name": "Esperanto"}, "en": {"articles": 5309305, "name": "English", "english_name": "English"}, "zh": {"articles": 915635, "name": "中文", "english_name": "Chinese"}, "simple": {"articles": 121825, "name": "Simple English", "english_name": "Simple English"}, "vi": {"articles": 1151694, "name": "Tiếng Việt", "english_name": "Vietnamese"}, "ca": {"articles": 529048, "name": "Català", "english_name": "Catalan"}, "it": {"articles": 1318160, "name": "Italiano", "english_name": "Italian"}, "ce": {"articles": 159058, "name": "Нохчийн", "english_name": "Chechen"}, "vo": {"articles": 120413, "name": "Volapük", "english_name": "Volapük"}, "eu": {"articles": 261908, "name": "Euskara", "english_name": "Basque"}, "ar": {"articles": 453836, "name": "العربية", "english_name": "Arabic"}, "cs": {"articles": 369249, "name": "Čeština", "english_name": "Czech"}, "et": {"articles": 151639, "name": "Eesti", "english_name": "Estonian"}, "gl": {"articles": 134762, "name": "Galego", "english_name": "Galician"}, "id": {"articles": 390382, "name": "Bahasa Indonesia", "english_name": "Indonesian"}, "es": {"articles": 1302275, "name": "Español", "english_name": "Spanish"}, "ru": {"articles": 1359763, "name": "Русский", "english_name": "Russian"}, "az": {"articles": 111556, "name": "Azərbaycanca", "english_name": "Azerbaijani"}, "nl": {"articles": 1886078, "name": "Nederlands", "english_name": "Dutch"}, "pt": {"articles": 949323, "name": "Português", "english_name": "Portuguese"}, "no": {"articles": 458365, "name": "Norsk (Bokmål)", "english_name": "Norwegian (Bokmål)"}, "tr": {"articles": 287474, "name": "Türkçe", "english_name": "Turkish"}, "zh-min-nan": {"articles": 201946, "name": "Bân-lâm-gú", "english_name": "Min Nan"}, "lt": {"articles": 180396, "name": "Lietuvių", "english_name": "Lithuanian"}, "th": {"articles": 113285, "name": "ไทย", "english_name": "Thai"}, "nn": {"articles": 131737, "name": "Nynorsk", "english_name": "Norwegian (Nynorsk)"}, "ro": {"articles": 373101, "name": "Română", "english_name": "Romanian"}, "war": {"articles": 1261974, "name": "Winaray", "english_name": "Waray-Waray"}, "pl": {"articles": 1197910, "name": "Polski", "english_name": "Polish"}, "be": {"articles": 123504, "name": "Беларуская", "english_name": "Belarusian"}, "fr": {"articles": 1823855, "name": "Français", "english_name": "French"}, "bg": {"articles": 223845, "name": "Български", "english_name": "Bulgarian"}, "la": {"articles": 125703, "name": "Latina", "english_name": "Latin"}, "ceb": {"articles": 3550855, "name": "Sinugboanong Binisaya", "english_name": "Cebuano"}, "hr": {"articles": 171093, "name": "Hrvatski", "english_name": "Croatian"}, "de": {"articles": 2009917, "name": "Deutsch", "english_name": "German"}, "hu": {"articles": 400092, "name": "Magyar", "english_name": "Hungarian"}, "fa": {"articles": 516845, "name": "فارسی", "english_name": "Persian"}, "hi": {"articles": 114434, "name": "हिन्दी", "english_name": "Hindi"}, "fi": {"articles": 405252, "name": "Suomi", "english_name": "Finnish"}, "hy": {"articles": 212809, "name": "Հայերեն", "english_name": "Armenian"}, "da": {"articles": 221877, "name": "Dansk", "english_name": "Danish"}, "ja": {"articles": 1041824, "name": "日本語", "english_name": "Japanese"}, "he": {"articles": 199341, "name": "עברית", "english_name": "Hebrew"}, "ka": {"articles": 111191, "name": "ქართული", "english_name": "Georgian"}, "ms": {"articles": 286242, "name": "Bahasa Melayu", "english_name": "Malay"}, "uz": {"articles": 128746, "name": "O‘zbek", "english_name": "Uzbek"}, "kk": {"articles": 217493, "name": "Қазақша", "english_name": "Kazakh"}, "sr": {"articles": 342597, "name": "Српски / Srpski", "english_name": "Serbian"}, "min": {"articles": 221961, "name": "Minangkabau", "english_name": "Minangkabau"}, "ko": {"articles": 367360, "name": "한국어", "english_name": "Korean"}, "sv": {"articles": 3783545, "name": "Svenska", "english_name": "Swedish"}, "ur": {"articles": 110877, "name": "اردو", "english_name": "Urdu"}, "sk": {"articles": 215374, "name": "Slovenčina", "english_name": "Slovak"}, "sh": {"articles": 436576, "name": "Srpskohrvatski / Српскохрватски", "english_name": "Serbo-Croatian"}, "uk": {"articles": 667399, "name": "Українська", "english_name": "Ukrainian"}, "sl": {"articles": 154006, "name": "Slovenščina", "english_name": "Slovenian"}}, "bing news": ["sq", "de", "ar", "bg", "ca", "cs", "zh-CHS", "zh-CHT", "ko", "hr", "da", "sk", "sl", "es", "et", "fi", "fr", "el", "he", "nl", "hu", "id", "en", "is", "it", "ja", "lv", "lt", "ms", "nb", "fa", "pl", "pt-BR", "pt-PT", "ro", "ru", "sr", "sv", "th", "tr", "uk", "vi"], "yahoo news": ["ar", "bg", "zh-chs", "zh-cht", "hr", "cs", "da", "nl", "en", "et", "fi", "fr", "de", "el", "he", "hu", "it", "ja", "ko", "lv", "lt", "no", "pl", "pt", "ro", "ru", "sk", "sl", "es", "sv", "th", "tr"], "swisscows": ["browser", "ar-SA", "es-AR", "en-AU", "de-AT", "fr-BE", "nl-BE", "pt-BR", "en-CA", "fr-CA", "es-CL", "zh-CN", "da-DK", "fi-FI", "fr-FR", "de-DE", "zh-HK", "en-IN", "en-IE", "it-IT", "ja-JP", "ko-KR", "en-MY", "es-MX", "nl-NL", "en-NZ", "nb-NO", "en-PH", "pl-PL", "pt-PT", "ru-RU", "en-ZA", "es-ES", "sv-SE", "de-CH", "fr-CH", "zh-TW", "tr-TR", "en-GB", "en-US", "es-US"], "wikidata": {"el": {"articles": 124415, "name": "Ελληνικά", "english_name": "Greek"}, "eo": {"articles": 235634, "name": "Esperanto", "english_name": "Esperanto"}, "en": {"articles": 5309305, "name": "English", "english_name": "English"}, "zh": {"articles": 915635, "name": "中文", "english_name": "Chinese"}, "simple": {"articles": 121825, "name": "Simple English", "english_name": "Simple English"}, "vi": {"articles": 1151694, "name": "Tiếng Việt", "english_name": "Vietnamese"}, "ca": {"articles": 529048, "name": "Català", "english_name": "Catalan"}, "it": {"articles": 1318160, "name": "Italiano", "english_name": "Italian"}, "ce": {"articles": 159058, "name": "Нохчийн", "english_name": "Chechen"}, "vo": {"articles": 120413, "name": "Volapük", "english_name": "Volapük"}, "eu": {"articles": 261908, "name": "Euskara", "english_name": "Basque"}, "ar": {"articles": 453836, "name": "العربية", "english_name": "Arabic"}, "cs": {"articles": 369249, "name": "Čeština", "english_name": "Czech"}, "et": {"articles": 151639, "name": "Eesti", "english_name": "Estonian"}, "gl": {"articles": 134762, "name": "Galego", "english_name": "Galician"}, "id": {"articles": 390382, "name": "Bahasa Indonesia", "english_name": "Indonesian"}, "es": {"articles": 1302275, "name": "Español", "english_name": "Spanish"}, "ru": {"articles": 1359763, "name": "Русский", "english_name": "Russian"}, "az": {"articles": 111556, "name": "Azərbaycanca", "english_name": "Azerbaijani"}, "nl": {"articles": 1886078, "name": "Nederlands", "english_name": "Dutch"}, "pt": {"articles": 949323, "name": "Português", "english_name": "Portuguese"}, "no": {"articles": 458365, "name": "Norsk (Bokmål)", "english_name": "Norwegian (Bokmål)"}, "tr": {"articles": 287474, "name": "Türkçe", "english_name": "Turkish"}, "zh-min-nan": {"articles": 201946, "name": "Bân-lâm-gú", "english_name": "Min Nan"}, "lt": {"articles": 180396, "name": "Lietuvių", "english_name": "Lithuanian"}, "th": {"articles": 113285, "name": "ไทย", "english_name": "Thai"}, "nn": {"articles": 131737, "name": "Nynorsk", "english_name": "Norwegian (Nynorsk)"}, "ro": {"articles": 373101, "name": "Română", "english_name": "Romanian"}, "war": {"articles": 1261974, "name": "Winaray", "english_name": "Waray-Waray"}, "pl": {"articles": 1197910, "name": "Polski", "english_name": "Polish"}, "be": {"articles": 123504, "name": "Беларуская", "english_name": "Belarusian"}, "fr": {"articles": 1823855, "name": "Français", "english_name": "French"}, "bg": {"articles": 223845, "name": "Български", "english_name": "Bulgarian"}, "la": {"articles": 125703, "name": "Latina", "english_name": "Latin"}, "ceb": {"articles": 3550855, "name": "Sinugboanong Binisaya", "english_name": "Cebuano"}, "hr": {"articles": 171093, "name": "Hrvatski", "english_name": "Croatian"}, "de": {"articles": 2009917, "name": "Deutsch", "english_name": "German"}, "hu": {"articles": 400092, "name": "Magyar", "english_name": "Hungarian"}, "fa": {"articles": 516845, "name": "فارسی", "english_name": "Persian"}, "hi": {"articles": 114434, "name": "हिन्दी", "english_name": "Hindi"}, "fi": {"articles": 405252, "name": "Suomi", "english_name": "Finnish"}, "hy": {"articles": 212809, "name": "Հայերեն", "english_name": "Armenian"}, "da": {"articles": 221877, "name": "Dansk", "english_name": "Danish"}, "ja": {"articles": 1041824, "name": "日本語", "english_name": "Japanese"}, "he": {"articles": 199341, "name": "עברית", "english_name": "Hebrew"}, "ka": {"articles": 111191, "name": "ქართული", "english_name": "Georgian"}, "ms": {"articles": 286242, "name": "Bahasa Melayu", "english_name": "Malay"}, "uz": {"articles": 128746, "name": "O‘zbek", "english_name": "Uzbek"}, "kk": {"articles": 217493, "name": "Қазақша", "english_name": "Kazakh"}, "sr": {"articles": 342597, "name": "Српски / Srpski", "english_name": "Serbian"}, "min": {"articles": 221961, "name": "Minangkabau", "english_name": "Minangkabau"}, "ko": {"articles": 367360, "name": "한국어", "english_name": "Korean"}, "sv": {"articles": 3783545, "name": "Svenska", "english_name": "Swedish"}, "ur": {"articles": 110877, "name": "اردو", "english_name": "Urdu"}, "sk": {"articles": 215374, "name": "Slovenčina", "english_name": "Slovak"}, "sh": {"articles": 436576, "name": "Srpskohrvatski / Српскохрватски", "english_name": "Serbo-Croatian"}, "uk": {"articles": 667399, "name": "Українська", "english_name": "Ukrainian"}, "sl": {"articles": 154006, "name": "Slovenščina", "english_name": "Slovenian"}}, "ddg definitions": ["da-DK", "vi-VN", "en-SG", "sl-SL", "en-XA", "tzh-HK", "en-UK", "ro-RO", "en-MY", "el-GR", "it-CH", "hu-HU", "fr-FR", "en-PH", "tl-PH", "fr-CA", "fi-FI", "et-EE", "sv-SE", "es-XL", "th-TH", "sk-SK", "es-ES", "en-IE", "es-US", "es-PE", "nl-NL", "en-US", "de-DE", "de-AT", "wt-WT", "no-NO", "tr-TR", "ca-ES", "it-IT", "es-CO", "ru-RU", "ca-CT", "en-ZA", "en-CA", "jp-JP", "es-MX", "id-ID", "es-AR", "he-IL", "kr-KR", "en-AU", "ms-MY", "pl-PL", "lv-LV", "bg-BG", "zh-CN", "en-NZ", "lt-LT", "tzh-TW", "hr-HR", "pt-PT", "fr-BE", "de-CH", "cs-CZ", "en-IN", "nl-BE", "fr-CH", "en-ID", "ar-XA", "pt-BR", "uk-UA", "es-CL"], "bing images": ["sq", "de", "ar", "bg", "ca", "cs", "zh-CHS", "zh-CHT", "ko", "hr", "da", "sk", "sl", "es", "et", "fi", "fr", "el", "he", "nl", "hu", "id", "en", "is", "it", "ja", "lv", "lt", "ms", "nb", "fa", "pl", "pt-BR", "pt-PT", "ro", "ru", "sr", "sv", "th", "tr", "uk", "vi"], "yahoo": ["ar", "bg", "zh-chs", "zh-cht", "hr", "cs", "da", "nl", "en", "et", "fi", "fr", "de", "el", "he", "hu", "it", "ja", "ko", "lv", "lt", "no", "pl", "pt", "ro", "ru", "sk", "sl", "es", "sv", "th", "tr"], "gigablast": ["en", "fr", "es", "ru", "tr", "ja", "h_", "tw", "cn", "ko", "de", "nl", "it", "fi", "sv", "no", "pt", "vi", "ar", "he", "id", "el", "th", "hi", "bn", "pl", "tl", "la", "eo", "ca", "bg", "tx", "sr", "hu", "da", "lt", "cs", "gl", "ka", "gd", "go", "ro", "ga", "lv", "hy", "is", "ag", "gv", "io", "fa", "te", "vv", "mg", "ku", "lb", "et"]} \ No newline at end of file diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 322e8d128..78acd349d 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -131,7 +131,8 @@ def _fetch_supported_languages(resp): name = td[2].xpath('./a')[0].text english_name = td[1].xpath('./a')[0].text articles = int(td[4].xpath('./a/b')[0].text.replace(',', '')) - if articles >= 10000: + # exclude languages with too few articles + if articles >= 100000: supported_languages[code] = {"name": name, "english_name": english_name, "articles": articles} return supported_languages diff --git a/searx/languages.py b/searx/languages.py index 63291ff9d..529e5b11f 100644 --- a/searx/languages.py +++ b/searx/languages.py @@ -124,8 +124,8 @@ language_codes = ( (u"war", u"Winaray", u"", u"Waray-Waray"), (u"xh", u"Xhosa", u"", u"Xhosa"), (u"zh", u"中文", u"", u"Chinese"), - (u"zh-CN", u"中文", u"中国", u""), + (u"zh-CN", u"中文", u"中国", u"Chinese"), (u"zh-HK", u"中文", u"香港", u"Chinese"), - (u"zh-TW", u"中文", u"台湾", u""), + (u"zh-TW", u"中文", u"台湾", u"Chinese"), (u"zu", u"Isi-Zulu", u"", u"Zulu") ) diff --git a/searx/templates/oscar/preferences.html b/searx/templates/oscar/preferences.html index 18bd7c212..ade0376da 100644 --- a/searx/templates/oscar/preferences.html +++ b/searx/templates/oscar/preferences.html @@ -172,7 +172,7 @@ {{ search_engine.name }} {{ shortcuts[search_engine.name] }} - + {{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }} @@ -181,7 +181,7 @@ {{ search_engine.timeout }} {{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }} - + {{ shortcuts[search_engine.name] }} {{ search_engine.name }} diff --git a/utils/fetch_languages.py b/utils/fetch_languages.py index aed875daf..3510a3234 100644 --- a/utils/fetch_languages.py +++ b/utils/fetch_languages.py @@ -32,25 +32,28 @@ languages = {} def valid_code(lang_code): # filter invalid codes # sl-SL is technically not invalid, but still a mistake + invalid_codes = ['sl-SL', 'wt-WT', 'jw'] + invalid_countries = ['UK', 'XA', 'XL'] if lang_code[:2] == 'xx'\ - or lang_code == 'sl-SL'\ - or lang_code == 'wt-WT'\ - or lang_code == 'jw'\ - or lang_code[-2:] == 'UK'\ - or lang_code[-2:] == 'XA'\ - or lang_code[-2:] == 'XL': - return False - - # filter dialects - lang_code = lang_code.split('-') - if len(lang_code) > 2 or len(lang_code[0]) > 3: - return False - if len(lang_code) == 2 and len(lang_code[1]) > 2: + or lang_code in invalid_codes\ + or lang_code[-2:] in invalid_countries\ + or is_dialect(lang_code): return False return True +# Language codes with any additional tags other than language and country. +def is_dialect(lang_code): + lang_code = lang_code.split('-') + if len(lang_code) > 2 or len(lang_code[0]) > 3: + return True + if len(lang_code) == 2 and len(lang_code[1]) > 2: + return True + + return False + + # Get country name in specified language. def get_country_name(locale): if geonames_user is '': @@ -83,19 +86,17 @@ def fetch_supported_languages(): print e # write json file - f = io.open(engines_languages_file, "w", encoding="utf-8") - f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8"))) - f.close() + with io.open(engines_languages_file, "w", encoding="utf-8") as f: + f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8"))) # Join all language lists. # Iterate all languages supported by each engine. def join_language_lists(): # include wikipedia first for more accurate language names - # exclude languages with too few articles languages.update({code: lang for code, lang in engines_languages['wikipedia'].iteritems() - if valid_code(code) and lang['articles'] >= 100000}) + if valid_code(code)}) for engine_name in engines_languages: for locale in engines_languages[engine_name]: @@ -104,25 +105,27 @@ def join_language_lists(): # if language is not on list or if it has no name yet if locale not in languages or not languages[locale].get('name'): - if isinstance(engines_languages[engine_name], dict) \ - and engines_languages[engine_name][locale].get('articles', float('inf')) >= 100000: + if isinstance(engines_languages[engine_name], dict): languages[locale] = engines_languages[engine_name][locale] else: languages[locale] = {} # get locales that have no name or country yet for locale in languages.keys(): + # try to get language names if not languages[locale].get('name'): - # try to get language names name = languages.get(locale.split('-')[0], {}).get('name', None) if name: languages[locale]['name'] = name - languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '') else: # filter out locales with no name del languages[locale] continue + # try to get language name in english + if not languages[locale].get('english_name'): + languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '') + # try to get country name if locale.find('-') > 0 and not languages[locale].get('country'): languages[locale]['country'] = get_country_name(locale) or '' @@ -145,10 +148,10 @@ def filter_single_country_languages(): # Write languages.py. def write_languages_file(): new_file = open(languages_file, 'w') - file_content = '# -*- coding: utf-8 -*-\n' - file_content += '# list of language codes\n' - file_content += '# this file is generated automatically by utils/update_search_languages.py\n' - file_content += '\nlanguage_codes = (' + file_content = '# -*- coding: utf-8 -*-\n'\ + + '# list of language codes\n'\ + + '# this file is generated automatically by utils/update_search_languages.py\n'\ + + '\nlanguage_codes = (' for code in sorted(languages): file_content += '\n (u"' + code + '"'\ + ', u"' + languages[code]['name'].split(' (')[0] + '"'\ From f30d5e87938275219852c94d57771e182bfa435b Mon Sep 17 00:00:00 2001 From: marc Date: Fri, 16 Dec 2016 22:16:59 -0600 Subject: [PATCH 14/14] [fix] supported languages column in preferences page --- searx/templates/oscar/preferences.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/searx/templates/oscar/preferences.html b/searx/templates/oscar/preferences.html index ade0376da..2a05ea3e3 100644 --- a/searx/templates/oscar/preferences.html +++ b/searx/templates/oscar/preferences.html @@ -172,7 +172,7 @@ {{ search_engine.name }} {{ shortcuts[search_engine.name] }} - + {{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }} @@ -181,7 +181,7 @@ {{ search_engine.timeout }} {{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }} - + {{ shortcuts[search_engine.name] }} {{ search_engine.name }}