From 858aa3e6043a5102aec1b05e94ef1d65059f8898 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 28 Oct 2022 19:12:59 +0200 Subject: [PATCH] [mod] wikipedia & wikidata: upgrade to data_type: traits_v1 BTW this fix an issue in wikipedia: SearXNG's locales zh-TW and zh-HK are now using language `zh-classical` from wikipedia (and not `zh`). Signed-off-by: Markus Heiser --- docs/src/searx.engines.wikipedia.rst | 27 + searx/autocomplete.py | 31 +- searx/data/engine_traits.json | 2703 +------------------------- searx/engines/wikidata.py | 51 +- searx/engines/wikipedia.py | 252 +-- 5 files changed, 275 insertions(+), 2789 deletions(-) create mode 100644 docs/src/searx.engines.wikipedia.rst diff --git a/docs/src/searx.engines.wikipedia.rst b/docs/src/searx.engines.wikipedia.rst new file mode 100644 index 000000000..e644cd645 --- /dev/null +++ b/docs/src/searx.engines.wikipedia.rst @@ -0,0 +1,27 @@ +.. _wikimedia engines: + +========= +Wikimedia +========= + +.. contents:: Contents + :depth: 2 + :local: + :backlinks: entry + + +.. _wikipedia engine: + +Wikipedia +========= + +.. automodule:: searx.engines.wikipedia + :members: + +.. _wikidata engine: + +Wikidata +========= + +.. automodule:: searx.engines.wikidata + :members: diff --git a/searx/autocomplete.py b/searx/autocomplete.py index 848600e57..53e19905c 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -143,14 +143,31 @@ def qwant(query, sxng_locale): return results -def wikipedia(query, lang): - # wikipedia autocompleter - url = 'https://' + lang + '.wikipedia.org/w/api.php?action=opensearch&{0}&limit=10&namespace=0&format=json' +def wikipedia(query, sxng_locale): + """Autocomplete from Wikipedia. Supports Wikipedia's languages (aka netloc).""" + results = [] + eng_traits = engines['wikipedia'].traits + wiki_lang = eng_traits.get_language(sxng_locale, 'en') + wiki_netloc = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org') - resp = loads(get(url.format(urlencode(dict(search=query)))).text) - if len(resp) > 1: - return resp[1] - return [] + url = 'https://{wiki_netloc}/w/api.php?{args}' + args = urlencode( + { + 'action': 'opensearch', + 'format': 'json', + 'formatversion': '2', + 'search': query, + 'namespace': '0', + 'limit': '10', + } + ) + resp = get(url.format(args=args, wiki_netloc=wiki_netloc)) + if resp.ok: + data = resp.json() + if len(data) > 1: + results = data[1] + + return results def yandex(query, _lang): diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json index 8d4806c90..521486ddd 100644 --- a/searx/data/engine_traits.json +++ b/searx/data/engine_traits.json @@ -3467,11 +3467,12 @@ }, "wikidata": { "all_locale": null, - "custom": {}, - "data_type": "supported_languages", + "custom": { + "wiki_netloc": {} + }, + "data_type": "traits_v1", "languages": { "af": "af", - "ak": "tw", "am": "am", "ar": "ar", "as": "as", @@ -3479,15 +3480,12 @@ "be": "be", "bg": "bg", "bn": "bn", - "bo": "bo", "bs": "bs", "ca": "ca", - "chr": "chr", "ckb": "ckb", "cs": "cs", "da": "da", "de": "de", - "dsb": "dsb", "el": "el", "en": "en", "es": "es", @@ -3497,13 +3495,10 @@ "fil": "tl", "fo": "fo", "fr": "fr", - "fur": "fur", "fy": "fy", "gl": "gl", "gsw": "als", "gu": "gu", - "gv": "gv", - "haw": "haw", "he": "he", "hi": "hi", "hsb": "hsb", @@ -3515,16 +3510,9 @@ "ja": "ja", "jv": "jv", "ka": "ka", - "km": "km", "kn": "kn", "ko": "ko", - "ks": "ks", - "ksh": "ksh", - "kw": "kw", "lb": "lb", - "lg": "lg", - "ln": "ln", - "lo": "lo", "lt": "lt", "lv": "lv", "mai": "mai", @@ -3532,12 +3520,8 @@ "ml": "ml", "mn": "mn", "mr": "mr", - "ms": "ms", - "mt": "mt", - "nds": "nds-nl", "ne": "ne", "no": "no", - "om": "om", "or": "or", "os": "os", "pa": "pa", @@ -3545,1323 +3529,118 @@ "ps": "ps", "pt": "pt", "qu": "qu", - "rm": "rm", "ro": "ro", "ru": "ru", - "rw": "rw", "sa": "sa", "sah": "sah", "sd": "sd", - "se": "se", - "shi": "shi", "si": "si", "sk": "sk", "sl": "sl", - "smn": "smn", - "so": "so", "sq": "sq", "sr": "sr", "ta": "ta", "te": "te", "th": "th", - "tk": "tk", - "to": "to", "tr": "tr", - "ug": "ug", "uk": "uk", "ur": "ur", "uz": "uz", "vi": "vi", - "wo": "wo", - "xh": "xh", "yi": "yi", "zh": "zh", - "zh_Hans": "zh", "zh_Hant": "zh-classical" }, "regions": {}, - "supported_languages": { - "ab": { - "english_name": "Abkhazian", - "name": "\u0410\u0525\u0441\u0443\u0430" - }, - "ace": { - "english_name": "Acehnese", - "name": "Basa Ac\u00e8h" - }, - "ady": { - "english_name": "Adyghe", - "name": "\u0410\u0434\u044b\u0433\u044d\u0431\u0437\u044d" - }, - "af": { - "english_name": "Afrikaans", - "name": "Afrikaans" - }, - "ak": { - "english_name": "Akan", - "name": "Akana" - }, - "als": { - "english_name": "Alemannic", - "name": "Alemannisch" - }, - "alt": { - "english_name": "Southern Altai", - "name": "\u0410\u043b\u0442\u0430\u0439" - }, - "am": { - "english_name": "Amharic", - "name": "\u12a0\u121b\u122d\u129b" - }, - "ami": { - "english_name": "Amis", - "name": "Pangcah" - }, - "an": { - "english_name": "Aragonese", - "name": "Aragon\u00e9s" - }, - "ang": { - "english_name": "Anglo-Saxon", - "name": "\u00c6nglisc" - }, - "ar": { - "english_name": "Arabic", - "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629" - }, - "arc": { - "english_name": "Aramaic", - "name": "\u0710\u072a\u0721\u071d\u0710" - }, - "ary": { - "english_name": "Moroccan Arabic", - "name": "\u062f\u0627\u0631\u064a\u062c\u0629" - }, - "arz": { - "english_name": "Egyptian Arabic", - "name": "\u0645\u0635\u0631\u0649 (Ma\u1e63ri)" - }, - "as": { - "english_name": "Assamese", - "name": "\u0985\u09b8\u09ae\u09c0\u09af\u09bc\u09be" - }, - "ast": { - "english_name": "Asturian", - "name": "Asturianu" - }, - "atj": { - "english_name": "Atikamekw", - "name": "Atikamekw" - }, - "av": { - "english_name": "Avar", - "name": "\u0410\u0432\u0430\u0440" - }, - "avk": { - "english_name": "Kotava", - "name": "Kotava" - }, - "awa": { - "english_name": "Awadhi", - "name": "\u0905\u0935\u0927\u0940" - }, - "ay": { - "english_name": "Aymara", - "name": "Aymar" - }, - "az": { - "english_name": "Azerbaijani", - "name": "Az\u0259rbaycanca" - }, - "azb": { - "english_name": "South Azerbaijani", - "name": "\u062a\u06c6\u0631\u06a9\u062c\u0647" - }, - "ba": { - "english_name": "Bashkir", - "name": "\u0411\u0430\u0448\u04a1\u043e\u0440\u0442" - }, - "ban": { - "english_name": "Balinese", - "name": "Bali" - }, - "bar": { - "english_name": "Bavarian", - "name": "Boarisch" - }, - "bat-smg": { - "english_name": "Samogitian", - "name": "\u017demait\u0117\u0161ka" - }, - "bcl": { - "english_name": "Central Bicolano", - "name": "Bikol" - }, - "be": { - "english_name": "Belarusian", - "name": "\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f" - }, - "be-tarask": { - "english_name": "Belarusian (Tara\u0161kievica)", - "name": "\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f (\u0442\u0430\u0440\u0430\u0448\u043a\u0435\u0432\u0456\u0446\u0430)" - }, - "bg": { - "english_name": "Bulgarian", - "name": "\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438" - }, - "bh": { - "english_name": "Bhojpuri", - "name": "\u092d\u094b\u091c\u092a\u0941\u0930\u0940" - }, - "bi": { - "english_name": "Bislama", - "name": "Bislama" - }, - "bjn": { - "english_name": "Banjar", - "name": "Bahasa Banjar" - }, - "blk": { - "english_name": "Pa'O", - "name": "\u1015\u1021\u102d\u102f\u101d\u103a\u108f\u1018\u102c\u108f\u101e\u102c\u108f" - }, - "bm": { - "english_name": "Bambara", - "name": "Bamanankan" - }, - "bn": { - "english_name": "Bengali", - "name": "\u09ac\u09be\u0982\u09b2\u09be" - }, - "bo": { - "english_name": "Tibetan", - "name": "\u0f56\u0f7c\u0f51\u0f0b\u0f66\u0f90\u0f51" - }, - "bpy": { - "english_name": "Bishnupriya Manipuri", - "name": "\u0987\u09ae\u09be\u09b0 \u09a0\u09be\u09b0/\u09ac\u09bf\u09b7\u09cd\u09a3\u09c1\u09aa\u09cd\u09b0\u09bf\u09af\u09bc\u09be \u09ae\u09a3\u09bf\u09aa\u09c1\u09b0\u09c0" - }, - "br": { - "english_name": "Breton", - "name": "Brezhoneg" - }, - "bs": { - "english_name": "Bosnian", - "name": "Bosanski" - }, - "bug": { - "english_name": "Buginese", - "name": "Basa Ugi" - }, - "bxr": { - "english_name": "Buryat", - "name": "\u0411\u0443\u0440\u044f\u0430\u0434" - }, - "ca": { - "english_name": "Catalan", - "name": "Catal\u00e0" - }, - "cbk-zam": { - "english_name": "Zamboanga Chavacano", - "name": "Chavacano de Zamboanga" - }, - "cdo": { - "english_name": "Min Dong", - "name": "M\u00ecng-d\u0115\u0324ng-ng\u1e73\u0304" - }, - "ce": { - "english_name": "Chechen", - "name": "\u041d\u043e\u0445\u0447\u0438\u0439\u043d" - }, - "ceb": { - "english_name": "Cebuano", - "name": "Sinugboanong Binisaya" - }, - "ch": { - "english_name": "Chamorro", - "name": "Chamoru" - }, - "chr": { - "english_name": "Cherokee", - "name": "\u13e3\u13b3\u13a9" - }, - "chy": { - "english_name": "Cheyenne", - "name": "Tsets\u00eahest\u00e2hese" - }, - "ckb": { - "english_name": "Sorani", - "name": "Soran\u00ee / \u06a9\u0648\u0631\u062f\u06cc" - }, - "co": { - "english_name": "Corsican", - "name": "Corsu" - }, - "cr": { - "english_name": "Cree", - "name": "Nehiyaw" - }, - "crh": { - "english_name": "Crimean Tatar", - "name": "Q\u0131r\u0131mtatarca" - }, - "cs": { - "english_name": "Czech", - "name": "\u010ce\u0161tina" - }, - "csb": { - "english_name": "Kashubian", - "name": "Kasz\u00ebbsczi" - }, - "cu": { - "english_name": "Old Church Slavonic", - "name": "\u0421\u043b\u043e\u0432\u0463\u043d\u044c\u0441\u043a\u044a" - }, - "cv": { - "english_name": "Chuvash", - "name": "\u0427\u0103\u0432\u0430\u0448" - }, - "cy": { - "english_name": "Welsh", - "name": "Cymraeg" - }, - "da": { - "english_name": "Danish", - "name": "Dansk" - }, - "dag": { - "english_name": "Dagbani", - "name": "Dagbanli" - }, - "de": { - "english_name": "German", - "name": "Deutsch" - }, - "din": { - "english_name": "Dinka", - "name": "Thu\u0254\u014bj\u00e4\u014b" - }, - "diq": { - "english_name": "Zazaki", - "name": "Zazaki" - }, - "dsb": { - "english_name": "Lower Sorbian", - "name": "Dolnoserbski" - }, - "dty": { - "english_name": "Doteli", - "name": "\u0921\u094b\u091f\u0947\u0932\u0940" - }, - "dv": { - "english_name": "Divehi", - "name": "\u078b\u07a8\u0788\u07ac\u0780\u07a8\u0784\u07a6\u0790\u07b0" - }, - "dz": { - "english_name": "Dzongkha", - "name": "\u0f47\u0f7c\u0f44\u0f0b\u0f41" - }, - "ee": { - "english_name": "Ewe", - "name": "E\u028begbe" - }, - "el": { - "english_name": "Greek", - "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac" - }, - "eml": { - "english_name": "Emilian-Romagnol", - "name": "Emili\u00e0n e rumagn\u00f2l" - }, - "en": { - "english_name": "English", - "name": "English" - }, - "eo": { - "english_name": "Esperanto", - "name": "Esperanto" - }, - "es": { - "english_name": "Spanish", - "name": "Espa\u00f1ol" - }, - "et": { - "english_name": "Estonian", - "name": "Eesti" - }, - "eu": { - "english_name": "Basque", - "name": "Euskara" - }, - "ext": { - "english_name": "Extremaduran", - "name": "Estreme\u00f1u" - }, - "fa": { - "english_name": "Persian", - "name": "\u0641\u0627\u0631\u0633\u06cc" - }, - "ff": { - "english_name": "Fula", - "name": "Fulfulde" - }, - "fi": { - "english_name": "Finnish", - "name": "Suomi" - }, - "fiu-vro": { - "english_name": "V\u00f5ro", - "name": "V\u00f5ro" - }, - "fj": { - "english_name": "Fijian", - "name": "Na Vosa Vakaviti" - }, - "fo": { - "english_name": "Faroese", - "name": "F\u00f8royskt" - }, - "fr": { - "english_name": "French", - "name": "Fran\u00e7ais" - }, - "frp": { - "english_name": "Franco-Proven\u00e7al", - "name": "Arpetan" - }, - "frr": { - "english_name": "North Frisian", - "name": "Nordfrasch" - }, - "fur": { - "english_name": "Friulian", - "name": "Furlan" - }, - "fy": { - "english_name": "West Frisian", - "name": "Frysk" - }, - "ga": { - "english_name": "Irish", - "name": "Gaeilge" - }, - "gag": { - "english_name": "Gagauz", - "name": "Gagauz" - }, - "gan": { - "english_name": "Gan", - "name": "\u8d1b\u8a9e" - }, - "gcr": { - "english_name": "Guianan Creole", - "name": "Kriy\u00f2l Gwiyannen" - }, - "gd": { - "english_name": "Scottish Gaelic", - "name": "G\u00e0idhlig" - }, - "gl": { - "english_name": "Galician", - "name": "Galego" - }, - "glk": { - "english_name": "Gilaki", - "name": "\u06af\u06cc\u0644\u06a9\u06cc" - }, - "gn": { - "english_name": "Guarani", - "name": "Ava\u00f1e'\u1ebd" - }, - "gom": { - "english_name": "Goan Konkani", - "name": "\u0917\u094b\u0902\u092f\u091a\u0940 \u0915\u094b\u0902\u0915\u0923\u0940 / G\u00f5ychi Konknni" - }, - "gor": { - "english_name": "Gorontalo", - "name": "Hulontalo" - }, - "got": { - "english_name": "Gothic", - "name": "\ud800\udf32\ud800\udf3f\ud800\udf44\ud800\udf39\ud800\udf43\ud800\udf3a" - }, - "gu": { - "english_name": "Gujarati", - "name": "\u0a97\u0ac1\u0a9c\u0ab0\u0abe\u0aa4\u0ac0" - }, - "guw": { - "english_name": "Gun", - "name": "Gungbe" - }, - "gv": { - "english_name": "Manx", - "name": "Gaelg" - }, - "ha": { - "english_name": "Hausa", - "name": "Hausa / \u0647\u064e\u0648\u064f\u0633\u064e" - }, - "hak": { - "english_name": "Hakka", - "name": "Hak-k\u00e2-fa / \u5ba2\u5bb6\u8a71" - }, - "haw": { - "english_name": "Hawaiian", - "name": "Hawai\u02bbi" - }, - "he": { - "english_name": "Hebrew", - "name": "\u05e2\u05d1\u05e8\u05d9\u05ea" - }, - "hi": { - "english_name": "Hindi", - "name": "\u0939\u093f\u0928\u094d\u0926\u0940" - }, - "hif": { - "english_name": "Fiji Hindi", - "name": "Fiji Hindi" - }, - "hr": { - "english_name": "Croatian", - "name": "Hrvatski" - }, - "hsb": { - "english_name": "Upper Sorbian", - "name": "Hornjoserbsce" - }, - "ht": { - "english_name": "Haitian", - "name": "Kr\u00e8yol ayisyen" - }, - "hu": { - "english_name": "Hungarian", - "name": "Magyar" - }, - "hy": { - "english_name": "Armenian", - "name": "\u0540\u0561\u0575\u0565\u0580\u0565\u0576" - }, - "hyw": { - "english_name": "Western Armenian", - "name": "\u0531\u0580\u0565\u0582\u0574\u057f\u0561\u0570\u0561\u0575\u0565\u0580\u0567\u0576" - }, - "ia": { - "english_name": "Interlingua", - "name": "Interlingua" - }, - "id": { - "english_name": "Indonesian", - "name": "Bahasa Indonesia" - }, - "ie": { - "english_name": "Interlingue", - "name": "Interlingue" - }, - "ig": { - "english_name": "Igbo", - "name": "\u00ccgb\u00f2" - }, - "ik": { - "english_name": "Inupiak", - "name": "I\u00f1upiatun" - }, - "ilo": { - "english_name": "Ilokano", - "name": "Ilokano" - }, - "inh": { - "english_name": "Ingush", - "name": "\u0413\u04c0\u0430\u043b\u0433\u04c0\u0430\u0439" - }, - "io": { - "english_name": "Ido", - "name": "Ido" - }, - "is": { - "english_name": "Icelandic", - "name": "\u00cdslenska" - }, - "it": { - "english_name": "Italian", - "name": "Italiano" - }, - "iu": { - "english_name": "Inuktitut", - "name": "\u1403\u14c4\u1483\u144e\u1450\u1466" - }, - "ja": { - "english_name": "Japanese", - "name": "\u65e5\u672c\u8a9e" - }, - "jam": { - "english_name": "Jamaican Patois", - "name": "Jumiekan Kryuol" - }, - "jbo": { - "english_name": "Lojban", - "name": "Lojban" - }, - "jv": { - "english_name": "Javanese", - "name": "Basa Jawa" - }, - "ka": { - "english_name": "Georgian", - "name": "\u10e5\u10d0\u10e0\u10d7\u10e3\u10da\u10d8" - }, - "kaa": { - "english_name": "Karakalpak", - "name": "Qaraqalpaqsha" - }, - "kab": { - "english_name": "Kabyle", - "name": "Taqbaylit" - }, - "kbd": { - "english_name": "Kabardian Circassian", - "name": "\u0410\u0434\u044b\u0433\u044d\u0431\u0437\u044d (Adighabze)" - }, - "kbp": { - "english_name": "Kabiye", - "name": "Kab\u0269y\u025b" - }, - "kcg": { - "english_name": "Tyap", - "name": "Tyap" - }, - "kg": { - "english_name": "Kongo", - "name": "Kik\u00f4ngo" - }, - "ki": { - "english_name": "Kikuyu", - "name": "G\u0129k\u0169y\u0169" - }, - "kk": { - "english_name": "Kazakh", - "name": "\u049a\u0430\u0437\u0430\u049b\u0448\u0430" - }, - "kl": { - "english_name": "Greenlandic", - "name": "Kalaallisut" - }, - "km": { - "english_name": "Khmer", - "name": "\u1797\u17b6\u179f\u17b6\u1781\u17d2\u1798\u17c2\u179a" - }, - "kn": { - "english_name": "Kannada", - "name": "\u0c95\u0ca8\u0ccd\u0ca8\u0ca1" - }, - "ko": { - "english_name": "Korean", - "name": "\ud55c\uad6d\uc5b4" - }, - "koi": { - "english_name": "Komi-Permyak", - "name": "\u041f\u0435\u0440\u0435\u043c \u041a\u043e\u043c\u0438 (Perem Komi)" - }, - "krc": { - "english_name": "Karachay-Balkar", - "name": "\u041a\u044a\u0430\u0440\u0430\u0447\u0430\u0439-\u041c\u0430\u043b\u043a\u044a\u0430\u0440 (Qarachay-Malqar)" - }, - "ks": { - "english_name": "Kashmiri", - "name": "\u0915\u0936\u094d\u092e\u0940\u0930\u0940 / \u0643\u0634\u0645\u064a\u0631\u064a" - }, - "ksh": { - "english_name": "Ripuarian", - "name": "Ripoarisch" - }, - "ku": { - "english_name": "Kurdish", - "name": "Kurd\u00ee / \u0643\u0648\u0631\u062f\u06cc" - }, - "kv": { - "english_name": "Komi", - "name": "\u041a\u043e\u043c\u0438" - }, - "kw": { - "english_name": "Cornish", - "name": "Kernowek/Karnuack" - }, - "ky": { - "english_name": "Kyrgyz", - "name": "\u041a\u044b\u0440\u0433\u044b\u0437\u0447\u0430" - }, - "la": { - "english_name": "Latin", - "name": "Latina" - }, - "lad": { - "english_name": "Ladino", - "name": "Dzhudezmo" - }, - "lb": { - "english_name": "Luxembourgish", - "name": "L\u00ebtzebuergesch" - }, - "lbe": { - "english_name": "Lak", - "name": "\u041b\u0430\u043a\u043a\u0443" - }, - "lez": { - "english_name": "Lezgian", - "name": "\u041b\u0435\u0437\u0433\u0438 \u0447\u0406\u0430\u043b (Lezgi \u010d\u2019al)" - }, - "lfn": { - "english_name": "Lingua Franca Nova", - "name": "Lingua franca nova" - }, - "lg": { - "english_name": "Luganda", - "name": "Luganda" - }, - "li": { - "english_name": "Limburgish", - "name": "Limburgs" - }, - "lij": { - "english_name": "Ligurian", - "name": "L\u00ecgure" - }, - "lld": { - "english_name": "Ladin", - "name": "Lingaz" - }, - "lmo": { - "english_name": "Lombard", - "name": "Lumbaart" - }, - "ln": { - "english_name": "Lingala", - "name": "Lingala" - }, - "lo": { - "english_name": "Lao", - "name": "\u0ea5\u0eb2\u0ea7" - }, - "lt": { - "english_name": "Lithuanian", - "name": "Lietuvi\u0173" - }, - "ltg": { - "english_name": "Latgalian", - "name": "Latga\u013cu" - }, - "lv": { - "english_name": "Latvian", - "name": "Latvie\u0161u" - }, - "mad": { - "english_name": "Madurese", - "name": "Madhur\u00e2" - }, - "mai": { - "english_name": "Maithili", - "name": "\u092e\u0948\u0925\u093f\u0932\u0940" - }, - "map-bms": { - "english_name": "Banyumasan", - "name": "Basa Banyumasan" - }, - "mdf": { - "english_name": "Moksha", - "name": "\u041c\u043e\u043a\u0448\u0435\u043d\u044c (Mokshanj K\u00e4lj)" - }, - "mg": { - "english_name": "Malagasy", - "name": "Malagasy" - }, - "mhr": { - "english_name": "Meadow Mari", - "name": "\u041e\u043b\u044b\u043a \u041c\u0430\u0440\u0438\u0439 (Olyk Marij)" - }, - "mi": { - "english_name": "Maori", - "name": "M\u0101ori" - }, - "min": { - "english_name": "Minangkabau", - "name": "Minangkabau" - }, - "mk": { - "english_name": "Macedonian", - "name": "\u041c\u0430\u043a\u0435\u0434\u043e\u043d\u0441\u043a\u0438" - }, - "ml": { - "english_name": "Malayalam", - "name": "\u0d2e\u0d32\u0d2f\u0d3e\u0d33\u0d02" - }, - "mn": { - "english_name": "Mongolian", - "name": "\u041c\u043e\u043d\u0433\u043e\u043b" - }, - "mni": { - "english_name": "Meitei", - "name": "\uabc3\uabe4\uabc7\uabe9\uabc2\uabe3\uabdf" - }, - "mnw": { - "english_name": "Mon", - "name": "\u1019\u1014\u103a" - }, - "mr": { - "english_name": "Marathi", - "name": "\u092e\u0930\u093e\u0920\u0940" - }, - "mrj": { - "english_name": "Hill Mari", - "name": "\u041a\u044b\u0440\u044b\u043a \u041c\u0430\u0440\u044b (Kyryk Mary)" - }, - "ms": { - "english_name": "Malay", - "name": "Bahasa Melayu" - }, - "mt": { - "english_name": "Maltese", - "name": "Malti" - }, - "mwl": { - "english_name": "Mirandese", - "name": "Mirand\u00e9s" - }, - "my": { - "english_name": "Burmese", - "name": "\u1019\u103c\u1014\u103a\u1019\u102c\u1018\u102c\u101e\u102c" - }, - "myv": { - "english_name": "Erzya", - "name": "\u042d\u0440\u0437\u044f\u043d\u044c (Erzjanj Kelj)" - }, - "mzn": { - "english_name": "Mazandarani", - "name": "\u0645\u064e\u0632\u0650\u0631\u0648\u0646\u064a" - }, - "na": { - "english_name": "Nauruan", - "name": "dorerin Naoero" - }, - "nah": { - "english_name": "Nahuatl", - "name": "N\u0101huatl" - }, - "nap": { - "english_name": "Neapolitan", - "name": "Nnapulitano" - }, - "nds": { - "english_name": "Low Saxon", - "name": "Plattd\u00fc\u00fctsch" - }, - "nds-nl": { - "english_name": "Dutch Low Saxon", - "name": "Nedersaksisch" - }, - "ne": { - "english_name": "Nepali", - "name": "\u0928\u0947\u092a\u093e\u0932\u0940" - }, - "new": { - "english_name": "Newar", - "name": "\u0928\u0947\u092a\u093e\u0932 \u092d\u093e\u0937\u093e" - }, - "nia": { - "english_name": "Nias", - "name": "Li Niha" - }, - "nl": { - "english_name": "Dutch", - "name": "Nederlands" - }, - "nn": { - "english_name": "Norwegian (Nynorsk)", - "name": "Nynorsk" - }, - "no": { - "english_name": "Norwegian (Bokm\u00e5l)", - "name": "Norsk (Bokm\u00e5l)" - }, - "nov": { - "english_name": "Novial", - "name": "Novial" - }, - "nqo": { - "english_name": "N\u2019Ko", - "name": "\u07d2\u07de\u07cf" - }, - "nrm": { - "english_name": "Norman", - "name": "Nouormand/Normaund" - }, - "nso": { - "english_name": "Northern Sotho", - "name": "Sepedi" - }, - "nv": { - "english_name": "Navajo", - "name": "Din\u00e9 bizaad" - }, - "ny": { - "english_name": "Chichewa", - "name": "Chichewa" - }, - "oc": { - "english_name": "Occitan", - "name": "Occitan" - }, - "olo": { - "english_name": "Livvi-Karelian", - "name": "Karjalan" - }, - "om": { - "english_name": "Oromo", - "name": "Oromoo" - }, - "or": { - "english_name": "Oriya", - "name": "\u0b13\u0b21\u0b3c\u0b3f\u0b06" - }, - "os": { - "english_name": "Ossetian", - "name": "\u0418\u0440\u043e\u043d\u0430\u0443" - }, - "pa": { - "english_name": "Punjabi", - "name": "\u0a2a\u0a70\u0a1c\u0a3e\u0a2c\u0a40" - }, - "pag": { - "english_name": "Pangasinan", - "name": "Pangasinan" - }, - "pam": { - "english_name": "Kapampangan", - "name": "Kapampangan" - }, - "pap": { - "english_name": "Papiamentu", - "name": "Papiamentu" - }, - "pcd": { - "english_name": "Picard", - "name": "Picard" - }, - "pcm": { - "english_name": "Nigerian Pidgin", - "name": "Naij\u00e1" - }, - "pdc": { - "english_name": "Pennsylvania German", - "name": "Deitsch" - }, - "pfl": { - "english_name": "Palatinate German", - "name": "P\u00e4lzisch" - }, - "pi": { - "english_name": "Pali", - "name": "\u092a\u093e\u0934\u093f" - }, - "pih": { - "english_name": "Norfolk", - "name": "Norfuk" - }, - "pl": { - "english_name": "Polish", - "name": "Polski" - }, - "pms": { - "english_name": "Piedmontese", - "name": "Piemont\u00e8is" - }, - "pnb": { - "english_name": "Western Punjabi", - "name": "\u0634\u0627\u06c1 \u0645\u06a9\u06be\u06cc \u067e\u0646\u062c\u0627\u0628\u06cc (Sh\u0101hmukh\u012b Pa\u00f1j\u0101b\u012b)" - }, - "pnt": { - "english_name": "Pontic", - "name": "\u03a0\u03bf\u03bd\u03c4\u03b9\u03b1\u03ba\u03ac" - }, - "ps": { - "english_name": "Pashto", - "name": "\u067e\u069a\u062a\u0648" - }, - "pt": { - "english_name": "Portuguese", - "name": "Portugu\u00eas" - }, - "pwn": { - "english_name": "Paiwan", - "name": "Paiwan" - }, - "qu": { - "english_name": "Quechua", - "name": "Qichwa simi" - }, - "rm": { - "english_name": "Romansh", - "name": "Rumantsch" - }, - "rmy": { - "english_name": "Romani", - "name": "romani - \u0930\u094b\u092e\u093e\u0928\u0940" - }, - "rn": { - "english_name": "Kirundi", - "name": "Ikirundi" - }, - "ro": { - "english_name": "Romanian", - "name": "Rom\u00e2n\u0103" - }, - "roa-rup": { - "english_name": "Aromanian", - "name": "Arm\u00e3neashce" - }, - "roa-tara": { - "english_name": "Tarantino", - "name": "Tarand\u00edne" - }, - "ru": { - "english_name": "Russian", - "name": "\u0420\u0443\u0441\u0441\u043a\u0438\u0439" - }, - "rue": { - "english_name": "Rusyn", - "name": "\u0420\u0443\u0441\u0438\u043d\u044c\u0441\u043a\u044b\u0439" - }, - "rw": { - "english_name": "Kinyarwanda", - "name": "Ikinyarwanda" - }, - "sa": { - "english_name": "Sanskrit", - "name": "\u0938\u0902\u0938\u094d\u0915\u0943\u0924\u092e\u094d" - }, - "sah": { - "english_name": "Sakha", - "name": "\u0421\u0430\u0445\u0430 \u0442\u044b\u043b\u0430 (Saxa Tyla)" - }, - "sat": { - "english_name": "Santali", - "name": "\u1c65\u1c5f\u1c71\u1c5b\u1c5f\u1c72\u1c64" - }, - "sc": { - "english_name": "Sardinian", - "name": "Sardu" - }, - "scn": { - "english_name": "Sicilian", - "name": "Sicilianu" - }, - "sco": { - "english_name": "Scots", - "name": "Scots" - }, - "sd": { - "english_name": "Sindhi", - "name": "\u0633\u0646\u068c\u064a\u060c \u0633\u0646\u062f\u06be\u06cc \u060c \u0938\u093f\u0928\u094d\u0927" - }, - "se": { - "english_name": "Northern Sami", - "name": "S\u00e1megiella" - }, - "sg": { - "english_name": "Sango", - "name": "S\u00e4ng\u00f6" - }, - "sh": { - "english_name": "Serbo-Croatian", - "name": "Srpskohrvatski / \u0421\u0440\u043f\u0441\u043a\u043e\u0445\u0440\u0432\u0430\u0442\u0441\u043a\u0438" - }, - "shi": { - "english_name": "Tachelhit", - "name": "Tacl\u1e25it" - }, - "shn": { - "english_name": "Shan", - "name": "\u101c\u102d\u1075\u103a\u1088\u1010\u1086\u1038" - }, - "si": { - "english_name": "Sinhalese", - "name": "\u0dc3\u0dd2\u0d82\u0dc4\u0dbd" - }, - "simple": { - "english_name": "Simple English", - "name": "Simple English" - }, - "sk": { - "english_name": "Slovak", - "name": "Sloven\u010dina" - }, - "skr": { - "english_name": "Saraiki", - "name": "\u0633\u0631\u0627\u0626\u06cc\u06a9\u06cc" - }, - "sl": { - "english_name": "Slovenian", - "name": "Sloven\u0161\u010dina" - }, - "sm": { - "english_name": "Samoan", - "name": "Gagana Samoa" - }, - "smn": { - "english_name": "Inari Sami", - "name": "Anar\u00e2\u0161kiel\u00e2" - }, - "sn": { - "english_name": "Shona", - "name": "chiShona" - }, - "so": { - "english_name": "Somali", - "name": "Soomaali" - }, - "sq": { - "english_name": "Albanian", - "name": "Shqip" - }, - "sr": { - "english_name": "Serbian", - "name": "\u0421\u0440\u043f\u0441\u043a\u0438 / Srpski" - }, - "srn": { - "english_name": "Sranan", - "name": "Sranantongo" - }, - "ss": { - "english_name": "Swati", - "name": "SiSwati" - }, - "st": { - "english_name": "Sesotho", - "name": "Sesotho" - }, - "stq": { - "english_name": "Saterland Frisian", - "name": "Seeltersk" - }, - "su": { - "english_name": "Sundanese", - "name": "Basa Sunda" - }, - "sv": { - "english_name": "Swedish", - "name": "Svenska" - }, - "sw": { - "english_name": "Swahili", - "name": "Kiswahili" - }, - "szl": { - "english_name": "Silesian", - "name": "\u015al\u016fnski" - }, - "szy": { - "english_name": "Sakizaya", - "name": "Sakizaya" - }, - "ta": { - "english_name": "Tamil", - "name": "\u0ba4\u0bae\u0bbf\u0bb4\u0bcd" - }, - "tay": { - "english_name": "Atayal", - "name": "Tayal" - }, - "tcy": { - "english_name": "Tulu", - "name": "\u0ca4\u0cc1\u0cb3\u0cc1" - }, - "te": { - "english_name": "Telugu", - "name": "\u0c24\u0c46\u0c32\u0c41\u0c17\u0c41" - }, - "tet": { - "english_name": "Tetum", - "name": "Tetun" - }, - "tg": { - "english_name": "Tajik", - "name": "\u0422\u043e\u04b7\u0438\u043a\u04e3" - }, - "th": { - "english_name": "Thai", - "name": "\u0e44\u0e17\u0e22" - }, - "ti": { - "english_name": "Tigrinya", - "name": "\u1275\u130d\u122d\u129b" - }, - "tk": { - "english_name": "Turkmen", - "name": "T\u00fcrkmen" - }, - "tl": { - "english_name": "Tagalog", - "name": "Tagalog" - }, - "tn": { - "english_name": "Tswana", - "name": "Setswana" - }, - "to": { - "english_name": "Tongan", - "name": "faka Tonga" - }, - "tpi": { - "english_name": "Tok Pisin", - "name": "Tok Pisin" - }, - "tr": { - "english_name": "Turkish", - "name": "T\u00fcrk\u00e7e" - }, - "trv": { - "english_name": "Seediq", - "name": "Taroko" - }, - "ts": { - "english_name": "Tsonga", - "name": "Xitsonga" - }, - "tt": { - "english_name": "Tatar", - "name": "Tatar\u00e7a / \u0422\u0430\u0442\u0430\u0440\u0447\u0430" - }, - "tum": { - "english_name": "Tumbuka", - "name": "chiTumbuka" - }, - "tw": { - "english_name": "Twi", - "name": "Twi" - }, - "ty": { - "english_name": "Tahitian", - "name": "Reo M\u0101`ohi" - }, - "tyv": { - "english_name": "Tuvan", - "name": "\u0422\u044b\u0432\u0430" - }, - "udm": { - "english_name": "Udmurt", - "name": "\u0423\u0434\u043c\u0443\u0440\u0442 \u043a\u044b\u043b" - }, - "ug": { - "english_name": "Uyghur", - "name": "\u0626\u06c7\u064a\u063a\u06c7\u0631 \u062a\u0649\u0644\u0649" - }, - "uk": { - "english_name": "Ukrainian", - "name": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430" - }, - "ur": { - "english_name": "Urdu", - "name": "\u0627\u0631\u062f\u0648" - }, - "uz": { - "english_name": "Uzbek", - "name": "O\u2018zbek" - }, - "ve": { - "english_name": "Venda", - "name": "Tshivenda" - }, - "vec": { - "english_name": "Venetian", - "name": "V\u00e8neto" - }, - "vep": { - "english_name": "Vepsian", - "name": "Veps\u00e4n" - }, - "vi": { - "english_name": "Vietnamese", - "name": "Ti\u1ebfng Vi\u1ec7t" - }, - "vls": { - "english_name": "West Flemish", - "name": "West-Vlams" - }, - "vo": { - "english_name": "Volap\u00fck", - "name": "Volap\u00fck" - }, - "wa": { - "english_name": "Walloon", - "name": "Walon" - }, - "war": { - "english_name": "Waray-Waray", - "name": "Winaray" - }, - "wo": { - "english_name": "Wolof", - "name": "Wolof" - }, - "wuu": { - "english_name": "Wu", - "name": "\u5434\u8bed" - }, - "xal": { - "english_name": "Kalmyk", - "name": "\u0425\u0430\u043b\u044c\u043c\u0433" - }, - "xh": { - "english_name": "Xhosa", - "name": "isiXhosa" - }, - "xmf": { - "english_name": "Mingrelian", - "name": "\u10db\u10d0\u10e0\u10d2\u10d0\u10da\u10e3\u10e0\u10d8 (Margaluri)" - }, - "yi": { - "english_name": "Yiddish", - "name": "\u05d9\u05d9\u05b4\u05d3\u05d9\u05e9" - }, - "yo": { - "english_name": "Yoruba", - "name": "Yor\u00f9b\u00e1" - }, - "za": { - "english_name": "Zhuang", - "name": "Cuengh" - }, - "zea": { - "english_name": "Zeelandic", - "name": "Ze\u00eauws" - }, - "zh": { - "english_name": "Chinese", - "name": "\u4e2d\u6587" - }, - "zh-classical": { - "english_name": "Classical Chinese", - "name": "\u53e4\u6587 / \u6587\u8a00\u6587" - }, - "zh-min-nan": { - "english_name": "Min Nan", - "name": "B\u00e2n-l\u00e2m-g\u00fa" - }, - "zh-yue": { - "english_name": "Cantonese", - "name": "\u7cb5\u8a9e" - }, - "zu": { - "english_name": "Zulu", - "name": "isiZulu" - } - } + "supported_languages": {} }, "wikipedia": { "all_locale": null, - "custom": {}, - "data_type": "supported_languages", + "custom": { + "wiki_netloc": { + "af": "af.wikipedia.org", + "als": "als.wikipedia.org", + "am": "am.wikipedia.org", + "ar": "ar.wikipedia.org", + "as": "as.wikipedia.org", + "az": "az.wikipedia.org", + "be": "be.wikipedia.org", + "bg": "bg.wikipedia.org", + "bn": "bn.wikipedia.org", + "bs": "bs.wikipedia.org", + "ca": "ca.wikipedia.org", + "ckb": "ckb.wikipedia.org", + "cs": "cs.wikipedia.org", + "da": "da.wikipedia.org", + "de": "de.wikipedia.org", + "el": "el.wikipedia.org", + "en": "en.wikipedia.org", + "es": "es.wikipedia.org", + "et": "et.wikipedia.org", + "fa": "fa.wikipedia.org", + "fi": "fi.wikipedia.org", + "fo": "fo.wikipedia.org", + "fr": "fr.wikipedia.org", + "fy": "fy.wikipedia.org", + "gl": "gl.wikipedia.org", + "gu": "gu.wikipedia.org", + "he": "he.wikipedia.org", + "hi": "hi.wikipedia.org", + "hsb": "hsb.wikipedia.org", + "hu": "hu.wikipedia.org", + "hy": "hy.wikipedia.org", + "id": "id.wikipedia.org", + "is": "is.wikipedia.org", + "it": "it.wikipedia.org", + "ja": "ja.wikipedia.org", + "jv": "jv.wikipedia.org", + "ka": "ka.wikipedia.org", + "kn": "kn.wikipedia.org", + "ko": "ko.wikipedia.org", + "lb": "lb.wikipedia.org", + "lt": "lt.wikipedia.org", + "lv": "lv.wikipedia.org", + "mai": "mai.wikipedia.org", + "mk": "mk.wikipedia.org", + "ml": "ml.wikipedia.org", + "mn": "mn.wikipedia.org", + "mr": "mr.wikipedia.org", + "ne": "ne.wikipedia.org", + "no": "no.wikipedia.org", + "or": "or.wikipedia.org", + "os": "os.wikipedia.org", + "pa": "pa.wikipedia.org", + "pl": "pl.wikipedia.org", + "ps": "ps.wikipedia.org", + "pt": "pt.wikipedia.org", + "qu": "qu.wikipedia.org", + "ro": "ro.wikipedia.org", + "ru": "ru.wikipedia.org", + "sa": "sa.wikipedia.org", + "sah": "sah.wikipedia.org", + "sd": "sd.wikipedia.org", + "si": "si.wikipedia.org", + "sk": "sk.wikipedia.org", + "sl": "sl.wikipedia.org", + "sq": "sq.wikipedia.org", + "sr": "sr.wikipedia.org", + "ta": "ta.wikipedia.org", + "te": "te.wikipedia.org", + "th": "th.wikipedia.org", + "tl": "tl.wikipedia.org", + "tr": "tr.wikipedia.org", + "uk": "uk.wikipedia.org", + "ur": "ur.wikipedia.org", + "uz": "uz.wikipedia.org", + "vi": "vi.wikipedia.org", + "yi": "yi.wikipedia.org", + "zh": "zh.wikipedia.org", + "zh-classical": "zh-classical.wikipedia.org" + } + }, + "data_type": "traits_v1", "languages": { "af": "af", - "ak": "tw", "am": "am", "ar": "ar", "as": "as", @@ -4869,15 +3648,12 @@ "be": "be", "bg": "bg", "bn": "bn", - "bo": "bo", "bs": "bs", "ca": "ca", - "chr": "chr", "ckb": "ckb", "cs": "cs", "da": "da", "de": "de", - "dsb": "dsb", "el": "el", "en": "en", "es": "es", @@ -4887,13 +3663,10 @@ "fil": "tl", "fo": "fo", "fr": "fr", - "fur": "fur", "fy": "fy", "gl": "gl", "gsw": "als", "gu": "gu", - "gv": "gv", - "haw": "haw", "he": "he", "hi": "hi", "hsb": "hsb", @@ -4905,16 +3678,9 @@ "ja": "ja", "jv": "jv", "ka": "ka", - "km": "km", "kn": "kn", "ko": "ko", - "ks": "ks", - "ksh": "ksh", - "kw": "kw", "lb": "lb", - "lg": "lg", - "ln": "ln", - "lo": "lo", "lt": "lt", "lv": "lv", "mai": "mai", @@ -4922,12 +3688,8 @@ "ml": "ml", "mn": "mn", "mr": "mr", - "ms": "ms", - "mt": "mt", - "nds": "nds-nl", "ne": "ne", "no": "no", - "om": "om", "or": "or", "os": "os", "pa": "pa", @@ -4935,1318 +3697,35 @@ "ps": "ps", "pt": "pt", "qu": "qu", - "rm": "rm", "ro": "ro", "ru": "ru", - "rw": "rw", "sa": "sa", "sah": "sah", "sd": "sd", - "se": "se", - "shi": "shi", "si": "si", "sk": "sk", "sl": "sl", - "smn": "smn", - "so": "so", "sq": "sq", "sr": "sr", "ta": "ta", "te": "te", "th": "th", - "tk": "tk", - "to": "to", "tr": "tr", - "ug": "ug", "uk": "uk", "ur": "ur", "uz": "uz", "vi": "vi", - "wo": "wo", - "xh": "xh", "yi": "yi", "zh": "zh", "zh_Hans": "zh", "zh_Hant": "zh-classical" }, "regions": {}, - "supported_languages": { - "ab": { - "english_name": "Abkhazian", - "name": "\u0410\u0525\u0441\u0443\u0430" - }, - "ace": { - "english_name": "Acehnese", - "name": "Basa Ac\u00e8h" - }, - "ady": { - "english_name": "Adyghe", - "name": "\u0410\u0434\u044b\u0433\u044d\u0431\u0437\u044d" - }, - "af": { - "english_name": "Afrikaans", - "name": "Afrikaans" - }, - "ak": { - "english_name": "Akan", - "name": "Akana" - }, - "als": { - "english_name": "Alemannic", - "name": "Alemannisch" - }, - "alt": { - "english_name": "Southern Altai", - "name": "\u0410\u043b\u0442\u0430\u0439" - }, - "am": { - "english_name": "Amharic", - "name": "\u12a0\u121b\u122d\u129b" - }, - "ami": { - "english_name": "Amis", - "name": "Pangcah" - }, - "an": { - "english_name": "Aragonese", - "name": "Aragon\u00e9s" - }, - "ang": { - "english_name": "Anglo-Saxon", - "name": "\u00c6nglisc" - }, - "ar": { - "english_name": "Arabic", - "name": "\u0627\u0644\u0639\u0631\u0628\u064a\u0629" - }, - "arc": { - "english_name": "Aramaic", - "name": "\u0710\u072a\u0721\u071d\u0710" - }, - "ary": { - "english_name": "Moroccan Arabic", - "name": "\u062f\u0627\u0631\u064a\u062c\u0629" - }, - "arz": { - "english_name": "Egyptian Arabic", - "name": "\u0645\u0635\u0631\u0649 (Ma\u1e63ri)" - }, - "as": { - "english_name": "Assamese", - "name": "\u0985\u09b8\u09ae\u09c0\u09af\u09bc\u09be" - }, - "ast": { - "english_name": "Asturian", - "name": "Asturianu" - }, - "atj": { - "english_name": "Atikamekw", - "name": "Atikamekw" - }, - "av": { - "english_name": "Avar", - "name": "\u0410\u0432\u0430\u0440" - }, - "avk": { - "english_name": "Kotava", - "name": "Kotava" - }, - "awa": { - "english_name": "Awadhi", - "name": "\u0905\u0935\u0927\u0940" - }, - "ay": { - "english_name": "Aymara", - "name": "Aymar" - }, - "az": { - "english_name": "Azerbaijani", - "name": "Az\u0259rbaycanca" - }, - "azb": { - "english_name": "South Azerbaijani", - "name": "\u062a\u06c6\u0631\u06a9\u062c\u0647" - }, - "ba": { - "english_name": "Bashkir", - "name": "\u0411\u0430\u0448\u04a1\u043e\u0440\u0442" - }, - "ban": { - "english_name": "Balinese", - "name": "Bali" - }, - "bar": { - "english_name": "Bavarian", - "name": "Boarisch" - }, - "bat-smg": { - "english_name": "Samogitian", - "name": "\u017demait\u0117\u0161ka" - }, - "bcl": { - "english_name": "Central Bicolano", - "name": "Bikol" - }, - "be": { - "english_name": "Belarusian", - "name": "\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f" - }, - "be-tarask": { - "english_name": "Belarusian (Tara\u0161kievica)", - "name": "\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f (\u0442\u0430\u0440\u0430\u0448\u043a\u0435\u0432\u0456\u0446\u0430)" - }, - "bg": { - "english_name": "Bulgarian", - "name": "\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438" - }, - "bh": { - "english_name": "Bhojpuri", - "name": "\u092d\u094b\u091c\u092a\u0941\u0930\u0940" - }, - "bi": { - "english_name": "Bislama", - "name": "Bislama" - }, - "bjn": { - "english_name": "Banjar", - "name": "Bahasa Banjar" - }, - "blk": { - "english_name": "Pa'O", - "name": "\u1015\u1021\u102d\u102f\u101d\u103a\u108f\u1018\u102c\u108f\u101e\u102c\u108f" - }, - "bm": { - "english_name": "Bambara", - "name": "Bamanankan" - }, - "bn": { - "english_name": "Bengali", - "name": "\u09ac\u09be\u0982\u09b2\u09be" - }, - "bo": { - "english_name": "Tibetan", - "name": "\u0f56\u0f7c\u0f51\u0f0b\u0f66\u0f90\u0f51" - }, - "bpy": { - "english_name": "Bishnupriya Manipuri", - "name": "\u0987\u09ae\u09be\u09b0 \u09a0\u09be\u09b0/\u09ac\u09bf\u09b7\u09cd\u09a3\u09c1\u09aa\u09cd\u09b0\u09bf\u09af\u09bc\u09be \u09ae\u09a3\u09bf\u09aa\u09c1\u09b0\u09c0" - }, - "br": { - "english_name": "Breton", - "name": "Brezhoneg" - }, - "bs": { - "english_name": "Bosnian", - "name": "Bosanski" - }, - "bug": { - "english_name": "Buginese", - "name": "Basa Ugi" - }, - "bxr": { - "english_name": "Buryat", - "name": "\u0411\u0443\u0440\u044f\u0430\u0434" - }, - "ca": { - "english_name": "Catalan", - "name": "Catal\u00e0" - }, - "cbk-zam": { - "english_name": "Zamboanga Chavacano", - "name": "Chavacano de Zamboanga" - }, - "cdo": { - "english_name": "Min Dong", - "name": "M\u00ecng-d\u0115\u0324ng-ng\u1e73\u0304" - }, - "ce": { - "english_name": "Chechen", - "name": "\u041d\u043e\u0445\u0447\u0438\u0439\u043d" - }, - "ceb": { - "english_name": "Cebuano", - "name": "Sinugboanong Binisaya" - }, - "ch": { - "english_name": "Chamorro", - "name": "Chamoru" - }, - "chr": { - "english_name": "Cherokee", - "name": "\u13e3\u13b3\u13a9" - }, - "chy": { - "english_name": "Cheyenne", - "name": "Tsets\u00eahest\u00e2hese" - }, - "ckb": { - "english_name": "Sorani", - "name": "Soran\u00ee / \u06a9\u0648\u0631\u062f\u06cc" - }, - "co": { - "english_name": "Corsican", - "name": "Corsu" - }, - "cr": { - "english_name": "Cree", - "name": "Nehiyaw" - }, - "crh": { - "english_name": "Crimean Tatar", - "name": "Q\u0131r\u0131mtatarca" - }, - "cs": { - "english_name": "Czech", - "name": "\u010ce\u0161tina" - }, - "csb": { - "english_name": "Kashubian", - "name": "Kasz\u00ebbsczi" - }, - "cu": { - "english_name": "Old Church Slavonic", - "name": "\u0421\u043b\u043e\u0432\u0463\u043d\u044c\u0441\u043a\u044a" - }, - "cv": { - "english_name": "Chuvash", - "name": "\u0427\u0103\u0432\u0430\u0448" - }, - "cy": { - "english_name": "Welsh", - "name": "Cymraeg" - }, - "da": { - "english_name": "Danish", - "name": "Dansk" - }, - "dag": { - "english_name": "Dagbani", - "name": "Dagbanli" - }, - "de": { - "english_name": "German", - "name": "Deutsch" - }, - "din": { - "english_name": "Dinka", - "name": "Thu\u0254\u014bj\u00e4\u014b" - }, - "diq": { - "english_name": "Zazaki", - "name": "Zazaki" - }, - "dsb": { - "english_name": "Lower Sorbian", - "name": "Dolnoserbski" - }, - "dty": { - "english_name": "Doteli", - "name": "\u0921\u094b\u091f\u0947\u0932\u0940" - }, - "dv": { - "english_name": "Divehi", - "name": "\u078b\u07a8\u0788\u07ac\u0780\u07a8\u0784\u07a6\u0790\u07b0" - }, - "dz": { - "english_name": "Dzongkha", - "name": "\u0f47\u0f7c\u0f44\u0f0b\u0f41" - }, - "ee": { - "english_name": "Ewe", - "name": "E\u028begbe" - }, - "el": { - "english_name": "Greek", - "name": "\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac" - }, - "eml": { - "english_name": "Emilian-Romagnol", - "name": "Emili\u00e0n e rumagn\u00f2l" - }, - "en": { - "english_name": "English", - "name": "English" - }, - "eo": { - "english_name": "Esperanto", - "name": "Esperanto" - }, - "es": { - "english_name": "Spanish", - "name": "Espa\u00f1ol" - }, - "et": { - "english_name": "Estonian", - "name": "Eesti" - }, - "eu": { - "english_name": "Basque", - "name": "Euskara" - }, - "ext": { - "english_name": "Extremaduran", - "name": "Estreme\u00f1u" - }, - "fa": { - "english_name": "Persian", - "name": "\u0641\u0627\u0631\u0633\u06cc" - }, - "ff": { - "english_name": "Fula", - "name": "Fulfulde" - }, - "fi": { - "english_name": "Finnish", - "name": "Suomi" - }, - "fiu-vro": { - "english_name": "V\u00f5ro", - "name": "V\u00f5ro" - }, - "fj": { - "english_name": "Fijian", - "name": "Na Vosa Vakaviti" - }, - "fo": { - "english_name": "Faroese", - "name": "F\u00f8royskt" - }, - "fr": { - "english_name": "French", - "name": "Fran\u00e7ais" - }, - "frp": { - "english_name": "Franco-Proven\u00e7al", - "name": "Arpetan" - }, - "frr": { - "english_name": "North Frisian", - "name": "Nordfrasch" - }, - "fur": { - "english_name": "Friulian", - "name": "Furlan" - }, - "fy": { - "english_name": "West Frisian", - "name": "Frysk" - }, - "ga": { - "english_name": "Irish", - "name": "Gaeilge" - }, - "gag": { - "english_name": "Gagauz", - "name": "Gagauz" - }, - "gan": { - "english_name": "Gan", - "name": "\u8d1b\u8a9e" - }, - "gcr": { - "english_name": "Guianan Creole", - "name": "Kriy\u00f2l Gwiyannen" - }, - "gd": { - "english_name": "Scottish Gaelic", - "name": "G\u00e0idhlig" - }, - "gl": { - "english_name": "Galician", - "name": "Galego" - }, - "glk": { - "english_name": "Gilaki", - "name": "\u06af\u06cc\u0644\u06a9\u06cc" - }, - "gn": { - "english_name": "Guarani", - "name": "Ava\u00f1e'\u1ebd" - }, - "gom": { - "english_name": "Goan Konkani", - "name": "\u0917\u094b\u0902\u092f\u091a\u0940 \u0915\u094b\u0902\u0915\u0923\u0940 / G\u00f5ychi Konknni" - }, - "gor": { - "english_name": "Gorontalo", - "name": "Hulontalo" - }, - "got": { - "english_name": "Gothic", - "name": "\ud800\udf32\ud800\udf3f\ud800\udf44\ud800\udf39\ud800\udf43\ud800\udf3a" - }, - "gu": { - "english_name": "Gujarati", - "name": "\u0a97\u0ac1\u0a9c\u0ab0\u0abe\u0aa4\u0ac0" - }, - "guw": { - "english_name": "Gun", - "name": "Gungbe" - }, - "gv": { - "english_name": "Manx", - "name": "Gaelg" - }, - "ha": { - "english_name": "Hausa", - "name": "Hausa / \u0647\u064e\u0648\u064f\u0633\u064e" - }, - "hak": { - "english_name": "Hakka", - "name": "Hak-k\u00e2-fa / \u5ba2\u5bb6\u8a71" - }, - "haw": { - "english_name": "Hawaiian", - "name": "Hawai\u02bbi" - }, - "he": { - "english_name": "Hebrew", - "name": "\u05e2\u05d1\u05e8\u05d9\u05ea" - }, - "hi": { - "english_name": "Hindi", - "name": "\u0939\u093f\u0928\u094d\u0926\u0940" - }, - "hif": { - "english_name": "Fiji Hindi", - "name": "Fiji Hindi" - }, - "hr": { - "english_name": "Croatian", - "name": "Hrvatski" - }, - "hsb": { - "english_name": "Upper Sorbian", - "name": "Hornjoserbsce" - }, - "ht": { - "english_name": "Haitian", - "name": "Kr\u00e8yol ayisyen" - }, - "hu": { - "english_name": "Hungarian", - "name": "Magyar" - }, - "hy": { - "english_name": "Armenian", - "name": "\u0540\u0561\u0575\u0565\u0580\u0565\u0576" - }, - "hyw": { - "english_name": "Western Armenian", - "name": "\u0531\u0580\u0565\u0582\u0574\u057f\u0561\u0570\u0561\u0575\u0565\u0580\u0567\u0576" - }, - "ia": { - "english_name": "Interlingua", - "name": "Interlingua" - }, - "id": { - "english_name": "Indonesian", - "name": "Bahasa Indonesia" - }, - "ie": { - "english_name": "Interlingue", - "name": "Interlingue" - }, - "ig": { - "english_name": "Igbo", - "name": "\u00ccgb\u00f2" - }, - "ik": { - "english_name": "Inupiak", - "name": "I\u00f1upiatun" - }, - "ilo": { - "english_name": "Ilokano", - "name": "Ilokano" - }, - "inh": { - "english_name": "Ingush", - "name": "\u0413\u04c0\u0430\u043b\u0433\u04c0\u0430\u0439" - }, - "io": { - "english_name": "Ido", - "name": "Ido" - }, - "is": { - "english_name": "Icelandic", - "name": "\u00cdslenska" - }, - "it": { - "english_name": "Italian", - "name": "Italiano" - }, - "iu": { - "english_name": "Inuktitut", - "name": "\u1403\u14c4\u1483\u144e\u1450\u1466" - }, - "ja": { - "english_name": "Japanese", - "name": "\u65e5\u672c\u8a9e" - }, - "jam": { - "english_name": "Jamaican Patois", - "name": "Jumiekan Kryuol" - }, - "jbo": { - "english_name": "Lojban", - "name": "Lojban" - }, - "jv": { - "english_name": "Javanese", - "name": "Basa Jawa" - }, - "ka": { - "english_name": "Georgian", - "name": "\u10e5\u10d0\u10e0\u10d7\u10e3\u10da\u10d8" - }, - "kaa": { - "english_name": "Karakalpak", - "name": "Qaraqalpaqsha" - }, - "kab": { - "english_name": "Kabyle", - "name": "Taqbaylit" - }, - "kbd": { - "english_name": "Kabardian Circassian", - "name": "\u0410\u0434\u044b\u0433\u044d\u0431\u0437\u044d (Adighabze)" - }, - "kbp": { - "english_name": "Kabiye", - "name": "Kab\u0269y\u025b" - }, - "kcg": { - "english_name": "Tyap", - "name": "Tyap" - }, - "kg": { - "english_name": "Kongo", - "name": "Kik\u00f4ngo" - }, - "ki": { - "english_name": "Kikuyu", - "name": "G\u0129k\u0169y\u0169" - }, - "kk": { - "english_name": "Kazakh", - "name": "\u049a\u0430\u0437\u0430\u049b\u0448\u0430" - }, - "kl": { - "english_name": "Greenlandic", - "name": "Kalaallisut" - }, - "km": { - "english_name": "Khmer", - "name": "\u1797\u17b6\u179f\u17b6\u1781\u17d2\u1798\u17c2\u179a" - }, - "kn": { - "english_name": "Kannada", - "name": "\u0c95\u0ca8\u0ccd\u0ca8\u0ca1" - }, - "ko": { - "english_name": "Korean", - "name": "\ud55c\uad6d\uc5b4" - }, - "koi": { - "english_name": "Komi-Permyak", - "name": "\u041f\u0435\u0440\u0435\u043c \u041a\u043e\u043c\u0438 (Perem Komi)" - }, - "krc": { - "english_name": "Karachay-Balkar", - "name": "\u041a\u044a\u0430\u0440\u0430\u0447\u0430\u0439-\u041c\u0430\u043b\u043a\u044a\u0430\u0440 (Qarachay-Malqar)" - }, - "ks": { - "english_name": "Kashmiri", - "name": "\u0915\u0936\u094d\u092e\u0940\u0930\u0940 / \u0643\u0634\u0645\u064a\u0631\u064a" - }, - "ksh": { - "english_name": "Ripuarian", - "name": "Ripoarisch" - }, - "ku": { - "english_name": "Kurdish", - "name": "Kurd\u00ee / \u0643\u0648\u0631\u062f\u06cc" - }, - "kv": { - "english_name": "Komi", - "name": "\u041a\u043e\u043c\u0438" - }, - "kw": { - "english_name": "Cornish", - "name": "Kernowek/Karnuack" - }, - "ky": { - "english_name": "Kyrgyz", - "name": "\u041a\u044b\u0440\u0433\u044b\u0437\u0447\u0430" - }, - "la": { - "english_name": "Latin", - "name": "Latina" - }, - "lad": { - "english_name": "Ladino", - "name": "Dzhudezmo" - }, - "lb": { - "english_name": "Luxembourgish", - "name": "L\u00ebtzebuergesch" - }, - "lbe": { - "english_name": "Lak", - "name": "\u041b\u0430\u043a\u043a\u0443" - }, - "lez": { - "english_name": "Lezgian", - "name": "\u041b\u0435\u0437\u0433\u0438 \u0447\u0406\u0430\u043b (Lezgi \u010d\u2019al)" - }, - "lfn": { - "english_name": "Lingua Franca Nova", - "name": "Lingua franca nova" - }, - "lg": { - "english_name": "Luganda", - "name": "Luganda" - }, - "li": { - "english_name": "Limburgish", - "name": "Limburgs" - }, - "lij": { - "english_name": "Ligurian", - "name": "L\u00ecgure" - }, - "lld": { - "english_name": "Ladin", - "name": "Lingaz" - }, - "lmo": { - "english_name": "Lombard", - "name": "Lumbaart" - }, - "ln": { - "english_name": "Lingala", - "name": "Lingala" - }, - "lo": { - "english_name": "Lao", - "name": "\u0ea5\u0eb2\u0ea7" - }, - "lt": { - "english_name": "Lithuanian", - "name": "Lietuvi\u0173" - }, - "ltg": { - "english_name": "Latgalian", - "name": "Latga\u013cu" - }, - "lv": { - "english_name": "Latvian", - "name": "Latvie\u0161u" - }, - "mad": { - "english_name": "Madurese", - "name": "Madhur\u00e2" - }, - "mai": { - "english_name": "Maithili", - "name": "\u092e\u0948\u0925\u093f\u0932\u0940" - }, - "map-bms": { - "english_name": "Banyumasan", - "name": "Basa Banyumasan" - }, - "mdf": { - "english_name": "Moksha", - "name": "\u041c\u043e\u043a\u0448\u0435\u043d\u044c (Mokshanj K\u00e4lj)" - }, - "mg": { - "english_name": "Malagasy", - "name": "Malagasy" - }, - "mhr": { - "english_name": "Meadow Mari", - "name": "\u041e\u043b\u044b\u043a \u041c\u0430\u0440\u0438\u0439 (Olyk Marij)" - }, - "mi": { - "english_name": "Maori", - "name": "M\u0101ori" - }, - "min": { - "english_name": "Minangkabau", - "name": "Minangkabau" - }, - "mk": { - "english_name": "Macedonian", - "name": "\u041c\u0430\u043a\u0435\u0434\u043e\u043d\u0441\u043a\u0438" - }, - "ml": { - "english_name": "Malayalam", - "name": "\u0d2e\u0d32\u0d2f\u0d3e\u0d33\u0d02" - }, - "mn": { - "english_name": "Mongolian", - "name": "\u041c\u043e\u043d\u0433\u043e\u043b" - }, - "mni": { - "english_name": "Meitei", - "name": "\uabc3\uabe4\uabc7\uabe9\uabc2\uabe3\uabdf" - }, - "mnw": { - "english_name": "Mon", - "name": "\u1019\u1014\u103a" - }, - "mr": { - "english_name": "Marathi", - "name": "\u092e\u0930\u093e\u0920\u0940" - }, - "mrj": { - "english_name": "Hill Mari", - "name": "\u041a\u044b\u0440\u044b\u043a \u041c\u0430\u0440\u044b (Kyryk Mary)" - }, - "ms": { - "english_name": "Malay", - "name": "Bahasa Melayu" - }, - "mt": { - "english_name": "Maltese", - "name": "Malti" - }, - "mwl": { - "english_name": "Mirandese", - "name": "Mirand\u00e9s" - }, - "my": { - "english_name": "Burmese", - "name": "\u1019\u103c\u1014\u103a\u1019\u102c\u1018\u102c\u101e\u102c" - }, - "myv": { - "english_name": "Erzya", - "name": "\u042d\u0440\u0437\u044f\u043d\u044c (Erzjanj Kelj)" - }, - "mzn": { - "english_name": "Mazandarani", - "name": "\u0645\u064e\u0632\u0650\u0631\u0648\u0646\u064a" - }, - "na": { - "english_name": "Nauruan", - "name": "dorerin Naoero" - }, - "nah": { - "english_name": "Nahuatl", - "name": "N\u0101huatl" - }, - "nap": { - "english_name": "Neapolitan", - "name": "Nnapulitano" - }, - "nds": { - "english_name": "Low Saxon", - "name": "Plattd\u00fc\u00fctsch" - }, - "nds-nl": { - "english_name": "Dutch Low Saxon", - "name": "Nedersaksisch" - }, - "ne": { - "english_name": "Nepali", - "name": "\u0928\u0947\u092a\u093e\u0932\u0940" - }, - "new": { - "english_name": "Newar", - "name": "\u0928\u0947\u092a\u093e\u0932 \u092d\u093e\u0937\u093e" - }, - "nia": { - "english_name": "Nias", - "name": "Li Niha" - }, - "nl": { - "english_name": "Dutch", - "name": "Nederlands" - }, - "nn": { - "english_name": "Norwegian (Nynorsk)", - "name": "Nynorsk" - }, - "no": { - "english_name": "Norwegian (Bokm\u00e5l)", - "name": "Norsk (Bokm\u00e5l)" - }, - "nov": { - "english_name": "Novial", - "name": "Novial" - }, - "nqo": { - "english_name": "N\u2019Ko", - "name": "\u07d2\u07de\u07cf" - }, - "nrm": { - "english_name": "Norman", - "name": "Nouormand/Normaund" - }, - "nso": { - "english_name": "Northern Sotho", - "name": "Sepedi" - }, - "nv": { - "english_name": "Navajo", - "name": "Din\u00e9 bizaad" - }, - "ny": { - "english_name": "Chichewa", - "name": "Chichewa" - }, - "oc": { - "english_name": "Occitan", - "name": "Occitan" - }, - "olo": { - "english_name": "Livvi-Karelian", - "name": "Karjalan" - }, - "om": { - "english_name": "Oromo", - "name": "Oromoo" - }, - "or": { - "english_name": "Oriya", - "name": "\u0b13\u0b21\u0b3c\u0b3f\u0b06" - }, - "os": { - "english_name": "Ossetian", - "name": "\u0418\u0440\u043e\u043d\u0430\u0443" - }, - "pa": { - "english_name": "Punjabi", - "name": "\u0a2a\u0a70\u0a1c\u0a3e\u0a2c\u0a40" - }, - "pag": { - "english_name": "Pangasinan", - "name": "Pangasinan" - }, - "pam": { - "english_name": "Kapampangan", - "name": "Kapampangan" - }, - "pap": { - "english_name": "Papiamentu", - "name": "Papiamentu" - }, - "pcd": { - "english_name": "Picard", - "name": "Picard" - }, - "pcm": { - "english_name": "Nigerian Pidgin", - "name": "Naij\u00e1" - }, - "pdc": { - "english_name": "Pennsylvania German", - "name": "Deitsch" - }, - "pfl": { - "english_name": "Palatinate German", - "name": "P\u00e4lzisch" - }, - "pi": { - "english_name": "Pali", - "name": "\u092a\u093e\u0934\u093f" - }, - "pih": { - "english_name": "Norfolk", - "name": "Norfuk" - }, - "pl": { - "english_name": "Polish", - "name": "Polski" - }, - "pms": { - "english_name": "Piedmontese", - "name": "Piemont\u00e8is" - }, - "pnb": { - "english_name": "Western Punjabi", - "name": "\u0634\u0627\u06c1 \u0645\u06a9\u06be\u06cc \u067e\u0646\u062c\u0627\u0628\u06cc (Sh\u0101hmukh\u012b Pa\u00f1j\u0101b\u012b)" - }, - "pnt": { - "english_name": "Pontic", - "name": "\u03a0\u03bf\u03bd\u03c4\u03b9\u03b1\u03ba\u03ac" - }, - "ps": { - "english_name": "Pashto", - "name": "\u067e\u069a\u062a\u0648" - }, - "pt": { - "english_name": "Portuguese", - "name": "Portugu\u00eas" - }, - "pwn": { - "english_name": "Paiwan", - "name": "Paiwan" - }, - "qu": { - "english_name": "Quechua", - "name": "Qichwa simi" - }, - "rm": { - "english_name": "Romansh", - "name": "Rumantsch" - }, - "rmy": { - "english_name": "Romani", - "name": "romani - \u0930\u094b\u092e\u093e\u0928\u0940" - }, - "rn": { - "english_name": "Kirundi", - "name": "Ikirundi" - }, - "ro": { - "english_name": "Romanian", - "name": "Rom\u00e2n\u0103" - }, - "roa-rup": { - "english_name": "Aromanian", - "name": "Arm\u00e3neashce" - }, - "roa-tara": { - "english_name": "Tarantino", - "name": "Tarand\u00edne" - }, - "ru": { - "english_name": "Russian", - "name": "\u0420\u0443\u0441\u0441\u043a\u0438\u0439" - }, - "rue": { - "english_name": "Rusyn", - "name": "\u0420\u0443\u0441\u0438\u043d\u044c\u0441\u043a\u044b\u0439" - }, - "rw": { - "english_name": "Kinyarwanda", - "name": "Ikinyarwanda" - }, - "sa": { - "english_name": "Sanskrit", - "name": "\u0938\u0902\u0938\u094d\u0915\u0943\u0924\u092e\u094d" - }, - "sah": { - "english_name": "Sakha", - "name": "\u0421\u0430\u0445\u0430 \u0442\u044b\u043b\u0430 (Saxa Tyla)" - }, - "sat": { - "english_name": "Santali", - "name": "\u1c65\u1c5f\u1c71\u1c5b\u1c5f\u1c72\u1c64" - }, - "sc": { - "english_name": "Sardinian", - "name": "Sardu" - }, - "scn": { - "english_name": "Sicilian", - "name": "Sicilianu" - }, - "sco": { - "english_name": "Scots", - "name": "Scots" - }, - "sd": { - "english_name": "Sindhi", - "name": "\u0633\u0646\u068c\u064a\u060c \u0633\u0646\u062f\u06be\u06cc \u060c \u0938\u093f\u0928\u094d\u0927" - }, - "se": { - "english_name": "Northern Sami", - "name": "S\u00e1megiella" - }, - "sg": { - "english_name": "Sango", - "name": "S\u00e4ng\u00f6" - }, - "sh": { - "english_name": "Serbo-Croatian", - "name": "Srpskohrvatski / \u0421\u0440\u043f\u0441\u043a\u043e\u0445\u0440\u0432\u0430\u0442\u0441\u043a\u0438" - }, - "shi": { - "english_name": "Tachelhit", - "name": "Tacl\u1e25it" - }, - "shn": { - "english_name": "Shan", - "name": "\u101c\u102d\u1075\u103a\u1088\u1010\u1086\u1038" - }, - "si": { - "english_name": "Sinhalese", - "name": "\u0dc3\u0dd2\u0d82\u0dc4\u0dbd" - }, - "simple": { - "english_name": "Simple English", - "name": "Simple English" - }, - "sk": { - "english_name": "Slovak", - "name": "Sloven\u010dina" - }, - "skr": { - "english_name": "Saraiki", - "name": "\u0633\u0631\u0627\u0626\u06cc\u06a9\u06cc" - }, - "sl": { - "english_name": "Slovenian", - "name": "Sloven\u0161\u010dina" - }, - "sm": { - "english_name": "Samoan", - "name": "Gagana Samoa" - }, - "smn": { - "english_name": "Inari Sami", - "name": "Anar\u00e2\u0161kiel\u00e2" - }, - "sn": { - "english_name": "Shona", - "name": "chiShona" - }, - "so": { - "english_name": "Somali", - "name": "Soomaali" - }, - "sq": { - "english_name": "Albanian", - "name": "Shqip" - }, - "sr": { - "english_name": "Serbian", - "name": "\u0421\u0440\u043f\u0441\u043a\u0438 / Srpski" - }, - "srn": { - "english_name": "Sranan", - "name": "Sranantongo" - }, - "ss": { - "english_name": "Swati", - "name": "SiSwati" - }, - "st": { - "english_name": "Sesotho", - "name": "Sesotho" - }, - "stq": { - "english_name": "Saterland Frisian", - "name": "Seeltersk" - }, - "su": { - "english_name": "Sundanese", - "name": "Basa Sunda" - }, - "sv": { - "english_name": "Swedish", - "name": "Svenska" - }, - "sw": { - "english_name": "Swahili", - "name": "Kiswahili" - }, - "szl": { - "english_name": "Silesian", - "name": "\u015al\u016fnski" - }, - "szy": { - "english_name": "Sakizaya", - "name": "Sakizaya" - }, - "ta": { - "english_name": "Tamil", - "name": "\u0ba4\u0bae\u0bbf\u0bb4\u0bcd" - }, - "tay": { - "english_name": "Atayal", - "name": "Tayal" - }, - "tcy": { - "english_name": "Tulu", - "name": "\u0ca4\u0cc1\u0cb3\u0cc1" - }, - "te": { - "english_name": "Telugu", - "name": "\u0c24\u0c46\u0c32\u0c41\u0c17\u0c41" - }, - "tet": { - "english_name": "Tetum", - "name": "Tetun" - }, - "tg": { - "english_name": "Tajik", - "name": "\u0422\u043e\u04b7\u0438\u043a\u04e3" - }, - "th": { - "english_name": "Thai", - "name": "\u0e44\u0e17\u0e22" - }, - "ti": { - "english_name": "Tigrinya", - "name": "\u1275\u130d\u122d\u129b" - }, - "tk": { - "english_name": "Turkmen", - "name": "T\u00fcrkmen" - }, - "tl": { - "english_name": "Tagalog", - "name": "Tagalog" - }, - "tn": { - "english_name": "Tswana", - "name": "Setswana" - }, - "to": { - "english_name": "Tongan", - "name": "faka Tonga" - }, - "tpi": { - "english_name": "Tok Pisin", - "name": "Tok Pisin" - }, - "tr": { - "english_name": "Turkish", - "name": "T\u00fcrk\u00e7e" - }, - "trv": { - "english_name": "Seediq", - "name": "Taroko" - }, - "ts": { - "english_name": "Tsonga", - "name": "Xitsonga" - }, - "tt": { - "english_name": "Tatar", - "name": "Tatar\u00e7a / \u0422\u0430\u0442\u0430\u0440\u0447\u0430" - }, - "tum": { - "english_name": "Tumbuka", - "name": "chiTumbuka" - }, - "tw": { - "english_name": "Twi", - "name": "Twi" - }, - "ty": { - "english_name": "Tahitian", - "name": "Reo M\u0101`ohi" - }, - "tyv": { - "english_name": "Tuvan", - "name": "\u0422\u044b\u0432\u0430" - }, - "udm": { - "english_name": "Udmurt", - "name": "\u0423\u0434\u043c\u0443\u0440\u0442 \u043a\u044b\u043b" - }, - "ug": { - "english_name": "Uyghur", - "name": "\u0626\u06c7\u064a\u063a\u06c7\u0631 \u062a\u0649\u0644\u0649" - }, - "uk": { - "english_name": "Ukrainian", - "name": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430" - }, - "ur": { - "english_name": "Urdu", - "name": "\u0627\u0631\u062f\u0648" - }, - "uz": { - "english_name": "Uzbek", - "name": "O\u2018zbek" - }, - "ve": { - "english_name": "Venda", - "name": "Tshivenda" - }, - "vec": { - "english_name": "Venetian", - "name": "V\u00e8neto" - }, - "vep": { - "english_name": "Vepsian", - "name": "Veps\u00e4n" - }, - "vi": { - "english_name": "Vietnamese", - "name": "Ti\u1ebfng Vi\u1ec7t" - }, - "vls": { - "english_name": "West Flemish", - "name": "West-Vlams" - }, - "vo": { - "english_name": "Volap\u00fck", - "name": "Volap\u00fck" - }, - "wa": { - "english_name": "Walloon", - "name": "Walon" - }, - "war": { - "english_name": "Waray-Waray", - "name": "Winaray" - }, - "wo": { - "english_name": "Wolof", - "name": "Wolof" - }, - "wuu": { - "english_name": "Wu", - "name": "\u5434\u8bed" - }, - "xal": { - "english_name": "Kalmyk", - "name": "\u0425\u0430\u043b\u044c\u043c\u0433" - }, - "xh": { - "english_name": "Xhosa", - "name": "isiXhosa" - }, - "xmf": { - "english_name": "Mingrelian", - "name": "\u10db\u10d0\u10e0\u10d2\u10d0\u10da\u10e3\u10e0\u10d8 (Margaluri)" - }, - "yi": { - "english_name": "Yiddish", - "name": "\u05d9\u05d9\u05b4\u05d3\u05d9\u05e9" - }, - "yo": { - "english_name": "Yoruba", - "name": "Yor\u00f9b\u00e1" - }, - "za": { - "english_name": "Zhuang", - "name": "Cuengh" - }, - "zea": { - "english_name": "Zeelandic", - "name": "Ze\u00eauws" - }, - "zh": { - "english_name": "Chinese", - "name": "\u4e2d\u6587" - }, - "zh-classical": { - "english_name": "Classical Chinese", - "name": "\u53e4\u6587 / \u6587\u8a00\u6587" - }, - "zh-min-nan": { - "english_name": "Min Nan", - "name": "B\u00e2n-l\u00e2m-g\u00fa" - }, - "zh-yue": { - "english_name": "Cantonese", - "name": "\u7cb5\u8a9e" - }, - "zu": { - "english_name": "Zulu", - "name": "isiZulu" - } - } + "supported_languages": {} }, "yahoo": { "all_locale": "any", + "custom": {}, "data_type": "traits_v1", "languages": { "ar": "ar", @@ -6285,4 +3764,4 @@ "regions": {}, "supported_languages": {} } -} +} \ No newline at end of file diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index a38600978..6ea77f092 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -1,9 +1,12 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Wikidata +"""This module implements the Wikidata engine. Some implementations are shared +from :ref:`wikipedia engine`. + """ # pylint: disable=missing-class-docstring +from typing import TYPE_CHECKING from hashlib import md5 from urllib.parse import urlencode, unquote from json import loads @@ -13,13 +16,17 @@ from babel.dates import format_datetime, format_date, format_time, get_datetime_ from searx.data import WIKIDATA_UNITS from searx.network import post, get -from searx.utils import match_language, searx_useragent, get_string_replaces_function +from searx.utils import searx_useragent, get_string_replaces_function from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom -from searx.engines.wikipedia import ( # pylint: disable=unused-import - fetch_traits, - _fetch_supported_languages, - supported_languages_url, -) +from searx.engines.wikipedia import fetch_traits as _fetch_traits +from searx.enginelib.traits import EngineTraits + +if TYPE_CHECKING: + import logging + + logger: logging.Logger + +traits: EngineTraits # about about = { @@ -155,33 +162,35 @@ def send_wikidata_query(query, method='GET'): def request(query, params): - language = params['language'].split('-')[0] - if language == 'all': - language = 'en' - else: - language = match_language(params['language'], supported_languages, language_aliases).split('-')[0] + + # wikidata does not support zh-classical (zh_Hans) / zh-TW, zh-HK and zh-CN + # mapped to zh + sxng_lang = params['searxng_locale'].split('-')[0] + language = traits.get_language(sxng_lang, 'en') query, attributes = get_query(query, language) + logger.debug("request --> language %s // len(attributes): %s", language, len(attributes)) params['method'] = 'POST' params['url'] = SPARQL_ENDPOINT_URL params['data'] = {'query': query} params['headers'] = get_headers() - params['language'] = language params['attributes'] = attributes + return params def response(resp): + results = [] jsonresponse = loads(resp.content.decode()) - language = resp.search_params['language'].lower() + language = resp.search_params['language'] attributes = resp.search_params['attributes'] + logger.debug("request --> language %s // len(attributes): %s", language, len(attributes)) seen_entities = set() - for result in jsonresponse.get('results', {}).get('bindings', []): attribute_result = {key: value['value'] for key, value in result.items()} entity_url = attribute_result['item'] @@ -757,3 +766,15 @@ def init(engine_settings=None): # pylint: disable=unused-argument lang = result['name']['xml:lang'] entity_id = result['item']['value'].replace('http://www.wikidata.org/entity/', '') WIKIDATA_PROPERTIES[(entity_id, lang)] = name.capitalize() + + +def fetch_traits(engine_traits: EngineTraits): + """Use languages evaluated from :py:obj:`wikipedia.fetch_traits + ` except zh-classical (zh_Hans) what + is not supported by wikidata.""" + + _fetch_traits(engine_traits) + # wikidata does not support zh-classical (zh_Hans) + engine_traits.languages.pop('zh_Hans') + # wikidata does not have net-locations for the languages + engine_traits.custom['wiki_netloc'] = {} diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 4d5474e17..9d2d30afa 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -1,16 +1,26 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -""" - Wikipedia (Web) +# lint: pylint +"""This module implements the Wikipedia engine. Some of this implementations +are shared by other engines: + +- :ref:`wikidata engine` + +The list of supported languages is fetched from the article linked by +:py:obj:`wikipedia_article_depth`. Unlike traditional search engines, wikipedia +does not support one Wikipedia for all the languages, but there is one Wikipedia +for every language (:py:obj:`fetch_traits`). """ -from urllib.parse import quote -from json import loads +import urllib.parse +import babel + from lxml import html -from searx.utils import match_language, searx_useragent + from searx import network +from searx.locales import language_tag from searx.enginelib.traits import EngineTraits -engine_traits: EngineTraits +traits: EngineTraits # about about = { @@ -22,32 +32,40 @@ about = { "results": 'JSON', } - send_accept_language_header = True -# search-url -search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}' -supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' -language_variants = {"zh": ("zh-cn", "zh-hk", "zh-mo", "zh-my", "zh-sg", "zh-tw")} +wikipedia_article_depth = 'https://meta.wikimedia.org/wiki/Wikipedia_article_depth' +"""The *editing depth* of Wikipedia is one of several possible rough indicators +of the encyclopedia's collaborative quality, showing how frequently its articles +are updated. The measurement of depth was introduced after some limitations of +the classic measurement of article count were realized. +""" + +# example: https://zh-classical.wikipedia.org/api/rest_v1/page/summary/日 +rest_v1_summary_url = 'https://{wiki_netloc}/api/rest_v1/page/summary/{title}' +"""`wikipedia rest_v1 summary API`_: The summary response includes an extract of +the first paragraph of the page in plain text and HTML as well as the type of +page. This is useful for page previews (fka. Hovercards, aka. Popups) on the web +and link previews in the apps. + +.. _wikipedia rest_v1 summary API: https://en.wikipedia.org/api/rest_v1/#/Page%20content/get_page_summary__title_ + +""" -# set language in base_url -def url_lang(lang): - lang_pre = lang.split('-')[0] - if lang_pre == 'all' or lang_pre not in supported_languages and lang_pre not in language_aliases: - return 'en' - return match_language(lang, supported_languages, language_aliases).split('-')[0] - - -# do search-request def request(query, params): + """Assemble a request (`wikipedia rest_v1 summary API`_).""" if query.islower(): query = query.title() - language = url_lang(params['language']) - params['url'] = search_url.format(title=quote(query), language=language) + engine_language = traits.get_language(params['searxng_locale'], 'en') + wiki_netloc = traits.custom['wiki_netloc'].get(engine_language, 'https://en.wikipedia.org/wiki/') + title = urllib.parse.quote(query) + + # '!wikipedia 日 :zh-TW' --> https://zh-classical.wikipedia.org/ + # '!wikipedia 日 :zh' --> https://zh.wikipedia.org/ + params['url'] = rest_v1_summary_url.format(wiki_netloc=wiki_netloc, title=title) - params['headers']['User-Agent'] = searx_useragent() params['raise_for_httperror'] = False params['soft_max_redirects'] = 2 @@ -56,13 +74,14 @@ def request(query, params): # get response from search-request def response(resp): + + results = [] if resp.status_code == 404: return [] - if resp.status_code == 400: try: - api_result = loads(resp.text) - except: + api_result = resp.json() + except Exception: # pylint: disable=broad-except pass else: if ( @@ -73,52 +92,25 @@ def response(resp): network.raise_for_httperror(resp) - results = [] - api_result = loads(resp.text) - - # skip disambiguation pages - if api_result.get('type') != 'standard': - return [] - + api_result = resp.json() title = api_result['title'] wikipedia_link = api_result['content_urls']['desktop']['page'] + results.append({'url': wikipedia_link, 'title': title, 'content': api_result.get('description', '')}) - results.append({'url': wikipedia_link, 'title': title}) - - results.append( - { - 'infobox': title, - 'id': wikipedia_link, - 'content': api_result.get('extract', ''), - 'img_src': api_result.get('thumbnail', {}).get('source'), - 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}], - } - ) + if api_result.get('type') == 'standard': + results.append( + { + 'infobox': title, + 'id': wikipedia_link, + 'content': api_result.get('extract', ''), + 'img_src': api_result.get('thumbnail', {}).get('source'), + 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}], + } + ) return results -# get supported languages from their site -def _fetch_supported_languages(resp): - supported_languages = {} - dom = html.fromstring(resp.text) - tables = dom.xpath('//table[contains(@class,"sortable")]') - for table in tables: - # exclude header row - trs = table.xpath('.//tr')[1:] - for tr in trs: - td = tr.xpath('./td') - code = td[3].xpath('./a')[0].text - name = td[1].xpath('./a')[0].text - english_name = td[1].xpath('./a')[0].text - articles = int(td[4].xpath('./a')[0].text.replace(',', '')) - # exclude languages with too few articles - if articles >= 100: - supported_languages[code] = {"name": name, "english_name": english_name} - - return supported_languages - - # Nonstandard language codes # # These Wikipedias use language codes that do not conform to the ISO 639 @@ -135,104 +127,57 @@ lang_map = { 'nrm': 'nrf', 'roa-rup': 'rup', 'nds-nl': 'nds', - #'roa-tara: – invented code used for the Tarantino Wikipedia (again, roa is the standard code for the large family of Romance languages that the Tarantino dialect falls within) #'simple: – invented code used for the Simple English Wikipedia (not the official IETF code en-simple) - 'zh-classical': 'zh_Hant', 'zh-min-nan': 'nan', 'zh-yue': 'yue', 'an': 'arg', + 'zh-classical': 'zh-Hant', # babel maps classical to zh-Hans (for whatever reason) } unknown_langs = [ - 'ab', # Abkhazian - 'alt', # Southern Altai 'an', # Aragonese - 'ang', # Anglo-Saxon - 'arc', # Aramaic - 'ary', # Moroccan Arabic - 'av', # Avar 'ba', # Bashkir - 'be-tarask', 'bar', # Bavarian 'bcl', # Central Bicolano - 'bh', # Bhojpuri - 'bi', # Bislama - 'bjn', # Banjar - 'blk', # Pa'O - 'bpy', # Bishnupriya Manipuri - 'bxr', # Buryat - 'cbk-zam', # Zamboanga Chavacano - 'co', # Corsican - 'cu', # Old Church Slavonic - 'dty', # Doteli - 'dv', # Divehi - 'ext', # Extremaduran - 'fj', # Fijian - 'frp', # Franco-Provençal - 'gan', # Gan - 'gom', # Goan Konkani + 'be-tarask', # Belarusian variant / Belarusian is already covered by 'be' + 'bpy', # Bishnupriya Manipuri is unknown by babel 'hif', # Fiji Hindi 'ilo', # Ilokano - 'inh', # Ingush - 'jbo', # Lojban - 'kaa', # Karakalpak - 'kbd', # Kabardian Circassian - 'kg', # Kongo - 'koi', # Komi-Permyak - 'krc', # Karachay-Balkar - 'kv', # Komi - 'lad', # Ladino - 'lbe', # Lak - 'lez', # Lezgian 'li', # Limburgish - 'ltg', # Latgalian - 'mdf', # Moksha - 'mnw', # Mon - 'mwl', # Mirandese - 'myv', # Erzya - 'na', # Nauruan - 'nah', # Nahuatl - 'nov', # Novial - 'nrm', # Norman - 'pag', # Pangasinan - 'pam', # Kapampangan - 'pap', # Papiamentu - 'pdc', # Pennsylvania German - 'pfl', # Palatinate German - 'roa-rup', # Aromanian - 'sco', # Scots - 'sco', # Scots (https://sco.wikipedia.org) is not known by babel, Scottish Gaelic (https://gd.wikipedia.org) is known by babel + 'sco', # Scots (sco) is not known by babel, Scottish Gaelic (gd) is known by babel 'sh', # Serbo-Croatian 'simple', # simple english is not know as a natural language different to english (babel) - 'sm', # Samoan - 'srn', # Sranan - 'stq', # Saterland Frisian - 'szy', # Sakizaya - 'tcy', # Tulu - 'tet', # Tetum - 'tpi', # Tok Pisin - 'trv', # Seediq - 'ty', # Tahitian - 'tyv', # Tuvan - 'udm', # Udmurt - 'vep', # Vepsian - 'vls', # West Flemish 'vo', # Volapük 'wa', # Walloon - 'xal', # Kalmyk ] def fetch_traits(engine_traits: EngineTraits): - """Fetch languages from Wikipedia""" - # pylint: disable=import-outside-toplevel + """Fetch languages from Wikipedia. - engine_traits.data_type = 'supported_languages' # deprecated + The location of the Wikipedia address of a language is mapped in a + :py:obj:`custom field ` + (``wiki_netloc``). Here is a reduced example: - import babel - from searx.locales import language_tag + .. code:: python - resp = network.get('https://meta.wikimedia.org/wiki/List_of_Wikipedias') + traits.custom['wiki_netloc'] = { + "en": "en.wikipedia.org", + .. + "gsw": "als.wikipedia.org", + .. + "zh": "zh.wikipedia.org", + "zh-classical": "zh-classical.wikipedia.org" + } + + """ + + engine_traits.custom['wiki_netloc'] = {} + + # insert alias to map from a region like zh-CN to a language zh_Hans + engine_traits.languages['zh_Hans'] = 'zh' + + resp = network.get(wikipedia_article_depth) if not resp.ok: print("ERROR: response from Wikipedia is not OK.") @@ -242,34 +187,31 @@ def fetch_traits(engine_traits: EngineTraits): cols = row.xpath('./td') if not cols: continue - cols = [c.text_content().strip() for c in cols] - articles = int(cols[4].replace(',', '').replace('-', '0')) - users = int(cols[8].replace(',', '').replace('-', '0')) - depth = cols[11].strip('-') - if articles < 1000: + depth = float(cols[3].replace('-', '0').replace(',', '')) + articles = int(cols[4].replace(',', '').replace(',', '')) + + if articles < 10000: # exclude languages with too few articles continue - # depth: rough indicator of a Wikipedia’s quality, showing how - # frequently its articles are updated. - if depth == '': - if users < 1000: - # depth is not calculated --> at least 1000 user should registered - continue - elif int(depth) < 20: + if int(depth) < 20: + # Rough indicator of a Wikipedia’s quality, showing how frequently + # its articles are updated. continue - eng_tag = cols[3] + eng_tag = cols[2] + wiki_url = row.xpath('./td[3]/a/@href')[0] + wiki_url = urllib.parse.urlparse(wiki_url) if eng_tag in unknown_langs: continue try: - sxng_tag = language_tag(babel.Locale.parse(lang_map.get(eng_tag, eng_tag))) + sxng_tag = language_tag(babel.Locale.parse(lang_map.get(eng_tag, eng_tag), sep='-')) except babel.UnknownLocaleError: - print("ERROR: %s -> %s is unknown by babel" % (cols[1], eng_tag)) + print("ERROR: %s [%s] is unknown by babel" % (cols[0], eng_tag)) continue conflict = engine_traits.languages.get(sxng_tag) @@ -277,6 +219,6 @@ def fetch_traits(engine_traits: EngineTraits): if conflict != eng_tag: print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag)) continue - engine_traits.languages[sxng_tag] = eng_tag - engine_traits.languages['zh_Hans'] = 'zh' + engine_traits.languages[sxng_tag] = eng_tag + engine_traits.custom['wiki_netloc'][eng_tag] = wiki_url.netloc