mirror of
https://github.com/searxng/searxng.git
synced 2024-12-27 11:40:29 +00:00
tests for _fetch_supported_languages in engines
and refactor method to make it testable without making requests
This commit is contained in:
parent
e0c270bd72
commit
af35eee10b
27 changed files with 387 additions and 3388 deletions
File diff suppressed because one or more lines are too long
Binary file not shown.
|
@ -21,6 +21,7 @@ import sys
|
|||
from flask_babel import gettext
|
||||
from operator import itemgetter
|
||||
from json import loads
|
||||
from requests import get
|
||||
from searx import settings
|
||||
from searx import logger
|
||||
from searx.utils import load_module
|
||||
|
@ -79,9 +80,6 @@ def load_engine(engine_data):
|
|||
if not hasattr(engine, arg_name):
|
||||
setattr(engine, arg_name, arg_value)
|
||||
|
||||
if engine_data['name'] in languages:
|
||||
setattr(engine, 'supported_languages', languages[engine_data['name']])
|
||||
|
||||
# checking required variables
|
||||
for engine_attr in dir(engine):
|
||||
if engine_attr.startswith('_'):
|
||||
|
@ -91,6 +89,15 @@ def load_engine(engine_data):
|
|||
.format(engine.name, engine_attr))
|
||||
sys.exit(1)
|
||||
|
||||
# assign supported languages from json file
|
||||
if engine_data['name'] in languages:
|
||||
setattr(engine, 'supported_languages', languages[engine_data['name']])
|
||||
|
||||
# assign language fetching method if auxiliary method exists
|
||||
if hasattr(engine, '_fetch_supported_languages'):
|
||||
setattr(engine, 'fetch_supported_languages',
|
||||
lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
|
||||
|
||||
engine.stats = {
|
||||
'result_count': 0,
|
||||
'search_count': 0,
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
|
||||
from urllib import urlencode
|
||||
from lxml import html
|
||||
from requests import get
|
||||
from searx.engines.xpath import extract_text
|
||||
|
||||
# engine dependent config
|
||||
|
@ -86,10 +85,9 @@ def response(resp):
|
|||
|
||||
|
||||
# get supported languages from their site
|
||||
def fetch_supported_languages():
|
||||
def _fetch_supported_languages(resp):
|
||||
supported_languages = []
|
||||
response = get(supported_languages_url)
|
||||
dom = html.fromstring(response.text)
|
||||
dom = html.fromstring(resp.text)
|
||||
options = dom.xpath('//div[@id="limit-languages"]//input')
|
||||
for option in options:
|
||||
code = option.xpath('./@id')[0].replace('_', '-')
|
||||
|
|
|
@ -19,7 +19,7 @@ from urllib import urlencode
|
|||
from lxml import html
|
||||
from json import loads
|
||||
import re
|
||||
from searx.engines.bing import fetch_supported_languages
|
||||
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
|
||||
|
||||
# engine dependent config
|
||||
categories = ['images']
|
||||
|
|
|
@ -17,7 +17,7 @@ from datetime import datetime
|
|||
from dateutil import parser
|
||||
from lxml import etree
|
||||
from searx.utils import list_get
|
||||
from searx.engines.bing import fetch_supported_languages
|
||||
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
|
||||
|
||||
# engine dependent config
|
||||
categories = ['news']
|
||||
|
|
|
@ -80,11 +80,10 @@ def response(resp):
|
|||
|
||||
|
||||
# get supported languages from their site
|
||||
def fetch_supported_languages():
|
||||
def _fetch_supported_languages(resp):
|
||||
supported_languages = {}
|
||||
|
||||
response = get(supported_languages_url)
|
||||
response_json = loads(response.text)
|
||||
response_json = loads(resp.text)
|
||||
|
||||
for language in response_json['list']:
|
||||
supported_languages[language['code']] = {}
|
||||
|
|
|
@ -119,11 +119,10 @@ def response(resp):
|
|||
|
||||
|
||||
# get supported languages from their site
|
||||
def fetch_supported_languages():
|
||||
response = get(supported_languages_url)
|
||||
def _fetch_supported_languages(resp):
|
||||
|
||||
# response is a js file with regions as an embedded object
|
||||
response_page = response.text
|
||||
response_page = resp.text
|
||||
response_page = response_page[response_page.find('regions:{') + 8:]
|
||||
response_page = response_page[:response_page.find('}') + 1]
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ from re import compile, sub
|
|||
from lxml import html
|
||||
from searx.utils import html_to_text
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.engines.duckduckgo import fetch_supported_languages
|
||||
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
|
||||
|
||||
url = 'https://api.duckduckgo.com/'\
|
||||
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'
|
||||
|
|
|
@ -14,7 +14,6 @@ from json import loads
|
|||
from random import randint
|
||||
from time import time
|
||||
from urllib import urlencode
|
||||
from requests import get
|
||||
from lxml.html import fromstring
|
||||
|
||||
# engine dependent config
|
||||
|
@ -91,10 +90,9 @@ def response(resp):
|
|||
|
||||
|
||||
# get supported languages from their site
|
||||
def fetch_supported_languages():
|
||||
def _fetch_supported_languages(resp):
|
||||
supported_languages = []
|
||||
response = get(supported_languages_url)
|
||||
dom = fromstring(response.text)
|
||||
dom = fromstring(resp.text)
|
||||
links = dom.xpath('//span[@id="menu2"]/a')
|
||||
for link in links:
|
||||
code = link.xpath('./@href')[0][-2:]
|
||||
|
|
|
@ -12,7 +12,6 @@ import re
|
|||
from urllib import urlencode
|
||||
from urlparse import urlparse, parse_qsl
|
||||
from lxml import html, etree
|
||||
from requests import get
|
||||
from searx.engines.xpath import extract_text, extract_url
|
||||
from searx.search import logger
|
||||
|
||||
|
@ -364,14 +363,13 @@ def attributes_to_html(attributes):
|
|||
|
||||
|
||||
# get supported languages from their site
|
||||
def fetch_supported_languages():
|
||||
def _fetch_supported_languages(resp):
|
||||
supported_languages = {}
|
||||
response = get(supported_languages_url)
|
||||
dom = html.fromstring(response.text)
|
||||
options = dom.xpath('//select[@name="hl"]/option')
|
||||
dom = html.fromstring(resp.text)
|
||||
options = dom.xpath('//table//td/font/label/span')
|
||||
for option in options:
|
||||
code = option.xpath('./@value')[0].split('-')[0]
|
||||
name = option.text[:-1].title()
|
||||
code = option.xpath('./@id')[0][1:]
|
||||
name = option.text.title()
|
||||
supported_languages[code] = {"name": name}
|
||||
|
||||
return supported_languages
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
from lxml import html
|
||||
from urllib import urlencode
|
||||
from json import loads
|
||||
from searx.engines.google import fetch_supported_languages
|
||||
from searx.engines.google import _fetch_supported_languages, supported_languages_url
|
||||
|
||||
# search-url
|
||||
categories = ['news']
|
||||
|
|
|
@ -13,7 +13,6 @@
|
|||
from json import loads
|
||||
from urllib import urlencode, unquote
|
||||
import re
|
||||
from requests import get
|
||||
from lxml.html import fromstring
|
||||
|
||||
# engine dependent config
|
||||
|
@ -25,6 +24,8 @@ language_support = True
|
|||
base_url = 'https://swisscows.ch/'
|
||||
search_string = '?{query}&page={page}'
|
||||
|
||||
supported_languages_url = base_url
|
||||
|
||||
# regex
|
||||
regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
|
||||
regex_json_remove_start = re.compile(r'^initialData:\s*')
|
||||
|
@ -113,10 +114,9 @@ def response(resp):
|
|||
|
||||
|
||||
# get supported languages from their site
|
||||
def fetch_supported_languages():
|
||||
def _fetch_supported_languages(resp):
|
||||
supported_languages = []
|
||||
response = get(base_url)
|
||||
dom = fromstring(response.text)
|
||||
dom = fromstring(resp.text)
|
||||
options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
|
||||
for option in options:
|
||||
code = option.xpath('./@data-val')[0]
|
||||
|
|
|
@ -15,7 +15,7 @@ from searx import logger
|
|||
from searx.poolrequests import get
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.utils import format_date_by_locale
|
||||
from searx.engines.wikipedia import fetch_supported_languages
|
||||
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
|
||||
|
||||
from json import loads
|
||||
from lxml.html import fromstring
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
|
||||
from json import loads
|
||||
from urllib import urlencode, quote
|
||||
from requests import get
|
||||
from lxml.html import fromstring
|
||||
|
||||
|
||||
|
@ -119,10 +118,9 @@ def response(resp):
|
|||
|
||||
|
||||
# get supported languages from their site
|
||||
def fetch_supported_languages():
|
||||
def _fetch_supported_languages(resp):
|
||||
supported_languages = {}
|
||||
response = get(supported_languages_url)
|
||||
dom = fromstring(response.text)
|
||||
dom = fromstring(resp.text)
|
||||
tables = dom.xpath('//table[contains(@class,"sortable")]')
|
||||
for table in tables:
|
||||
# exclude header row
|
||||
|
|
|
@ -14,7 +14,6 @@
|
|||
from urllib import urlencode
|
||||
from urlparse import unquote
|
||||
from lxml import html
|
||||
from requests import get
|
||||
from searx.engines.xpath import extract_text, extract_url
|
||||
|
||||
# engine dependent config
|
||||
|
@ -144,13 +143,12 @@ def response(resp):
|
|||
|
||||
|
||||
# get supported languages from their site
|
||||
def fetch_supported_languages():
|
||||
def _fetch_supported_languages(resp):
|
||||
supported_languages = []
|
||||
response = get(supported_languages_url)
|
||||
dom = html.fromstring(response.text)
|
||||
dom = html.fromstring(resp.text)
|
||||
options = dom.xpath('//div[@id="yschlang"]/span/label/input')
|
||||
for option in options:
|
||||
code = option.xpath('./@value')[0][5:]
|
||||
code = option.xpath('./@value')[0][5:].replace('_', '-')
|
||||
supported_languages.append(code)
|
||||
|
||||
return supported_languages
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
from urllib import urlencode
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text, extract_url
|
||||
from searx.engines.yahoo import parse_url, fetch_supported_languages
|
||||
from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
|
||||
from datetime import datetime, timedelta
|
||||
import re
|
||||
from dateutil import parser
|
||||
|
|
|
@ -3,36 +3,27 @@
|
|||
# this file is generated automatically by utils/update_search_languages.py
|
||||
|
||||
language_codes = (
|
||||
(u"ach", u"Acoli", u"", u""),
|
||||
(u"af", u"Afrikaans", u"", u""),
|
||||
(u"ak", u"Akan", u"", u""),
|
||||
(u"am", u"አማርኛ", u"", u""),
|
||||
(u"am", u"አማርኛ", u"", u"Amharic"),
|
||||
(u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"),
|
||||
(u"az", u"Azərbaycanca", u"", u"Azerbaijani"),
|
||||
(u"ban", u"Balinese", u"", u""),
|
||||
(u"be", u"Беларуская", u"", u"Belarusian"),
|
||||
(u"bem", u"Ichibemba", u"", u""),
|
||||
(u"bg-BG", u"Български", u"България", u"Bulgarian"),
|
||||
(u"bn", u"বাংলা", u"", u""),
|
||||
(u"br", u"Brezhoneg", u"", u""),
|
||||
(u"bs", u"Bosanski", u"", u""),
|
||||
(u"bn", u"বাংলা", u"", u"Bengali"),
|
||||
(u"br", u"Brezhoneg", u"", u"Breton"),
|
||||
(u"bs", u"Bosnian", u"", u"Bosnian"),
|
||||
(u"ca", u"Català", u"", u"Catalan"),
|
||||
(u"ca-CT", u"Català", u"", u"Catalan"),
|
||||
(u"ca-ES", u"Català", u"Espanya", u"Catalan"),
|
||||
(u"ce", u"Нохчийн", u"", u"Chechen"),
|
||||
(u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"),
|
||||
(u"chr", u"ᏣᎳᎩ", u"", u""),
|
||||
(u"ckb", u"Central Kurdish", u"", u""),
|
||||
(u"co", u"Corsican", u"", u""),
|
||||
(u"crs", u"Seychellois Creole", u"", u""),
|
||||
(u"cs-CZ", u"Čeština", u"Česko", u"Czech"),
|
||||
(u"cy", u"Cymraeg", u"", u""),
|
||||
(u"cy", u"Cymraeg", u"", u"Welsh"),
|
||||
(u"da-DK", u"Dansk", u"Danmark", u"Danish"),
|
||||
(u"de", u"Deutsch", u"", u"German"),
|
||||
(u"de-AT", u"Deutsch", u"Österreich", u"German"),
|
||||
(u"de-CH", u"Deutsch", u"Schweiz", u"German"),
|
||||
(u"de-DE", u"Deutsch", u"Deutschland", u"German"),
|
||||
(u"ee", u"Eʋegbe", u"", u""),
|
||||
(u"el-GR", u"Ελληνικά", u"Ελλάδα", u"Greek"),
|
||||
(u"en", u"English", u"", u"English"),
|
||||
(u"en-AU", u"English", u"Australia", u"English"),
|
||||
|
@ -60,30 +51,20 @@ language_codes = (
|
|||
(u"eu", u"Euskara", u"", u"Basque"),
|
||||
(u"fa", u"فارسی", u"", u"Persian"),
|
||||
(u"fi-FI", u"Suomi", u"Suomi", u"Finnish"),
|
||||
(u"fo", u"Føroyskt", u"", u""),
|
||||
(u"fr", u"Français", u"", u"French"),
|
||||
(u"fr-BE", u"Français", u"Belgique", u"French"),
|
||||
(u"fr-CA", u"Français", u"Canada", u"French"),
|
||||
(u"fr-CH", u"Français", u"Suisse", u"French"),
|
||||
(u"fr-FR", u"Français", u"France", u"French"),
|
||||
(u"fy", u"West-Frysk", u"", u""),
|
||||
(u"ga", u"Gaeilge", u"", u""),
|
||||
(u"gaa", u"Ga", u"", u""),
|
||||
(u"gd", u"Gàidhlig", u"", u""),
|
||||
(u"ga", u"Gaeilge", u"", u"Irish"),
|
||||
(u"gl", u"Galego", u"", u"Galician"),
|
||||
(u"gn", u"Guarani", u"", u""),
|
||||
(u"gu", u"ગુજરાતી", u"", u""),
|
||||
(u"ha", u"Hausa", u"", u""),
|
||||
(u"haw", u"ʻŌlelo HawaiʻI", u"", u""),
|
||||
(u"gu", u"ગુજરાતી", u"", u"Gujarati"),
|
||||
(u"he-IL", u"עברית", u"ישראל", u"Hebrew"),
|
||||
(u"hi", u"हिन्दी", u"", u"Hindi"),
|
||||
(u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"),
|
||||
(u"ht", u"Haitian Creole", u"", u""),
|
||||
(u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"),
|
||||
(u"hy", u"Հայերեն", u"", u"Armenian"),
|
||||
(u"ia", u"Interlingua", u"", u""),
|
||||
(u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"),
|
||||
(u"ig", u"Igbo", u"", u""),
|
||||
(u"is", u"Íslenska", u"", u""),
|
||||
(u"it", u"Italiano", u"", u"Italian"),
|
||||
(u"it-CH", u"Italiano", u"Svizzera", u"Italian"),
|
||||
|
@ -91,86 +72,48 @@ language_codes = (
|
|||
(u"iw", u"עברית", u"", u""),
|
||||
(u"ja-JP", u"日本語", u"日本", u"Japanese"),
|
||||
(u"ka", u"ქართული", u"", u"Georgian"),
|
||||
(u"kg", u"Kongo", u"", u""),
|
||||
(u"kk", u"Қазақша", u"", u"Kazakh"),
|
||||
(u"km", u"ខ្មែរ", u"", u""),
|
||||
(u"kn", u"ಕನ್ನಡ", u"", u""),
|
||||
(u"kn", u"ಕನ್ನಡ", u"", u"Kannada"),
|
||||
(u"ko-KR", u"한국어", u"대한민국", u"Korean"),
|
||||
(u"kri", u"Krio", u"", u""),
|
||||
(u"ky", u"Кыргызча", u"", u""),
|
||||
(u"la", u"Latina", u"", u"Latin"),
|
||||
(u"lg", u"Luganda", u"", u""),
|
||||
(u"ln", u"Lingála", u"", u""),
|
||||
(u"lo", u"ລາວ", u"", u""),
|
||||
(u"loz", u"Lozi", u"", u""),
|
||||
(u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"),
|
||||
(u"lua", u"Luba-Lulua", u"", u""),
|
||||
(u"lv-LV", u"Latviešu", u"Latvijas Republika", u""),
|
||||
(u"mfe", u"Kreol Morisien", u"", u""),
|
||||
(u"mg", u"Malagasy", u"", u""),
|
||||
(u"mi", u"Maori", u"", u""),
|
||||
(u"mi", u"Reo Māori", u"", u"Maori"),
|
||||
(u"min", u"Minangkabau", u"", u"Minangkabau"),
|
||||
(u"mk", u"Македонски", u"", u""),
|
||||
(u"ml", u"മലയാളം", u"", u""),
|
||||
(u"mn", u"Монгол", u"", u""),
|
||||
(u"mr", u"मराठी", u"", u""),
|
||||
(u"mk", u"Македонски", u"", u"Macedonian"),
|
||||
(u"mn", u"Монгол", u"", u"Mongolian"),
|
||||
(u"mr", u"मराठी", u"", u"Marathi"),
|
||||
(u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"),
|
||||
(u"mt", u"Malti", u"", u""),
|
||||
(u"my", u"ဗမာ", u"", u""),
|
||||
(u"mt", u"Malti", u"", u"Maltese"),
|
||||
(u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"),
|
||||
(u"ne", u"नेपाली", u"", u""),
|
||||
(u"nl", u"Nederlands", u"", u"Dutch"),
|
||||
(u"nl-BE", u"Nederlands", u"België", u"Dutch"),
|
||||
(u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
|
||||
(u"nn", u"Nynorsk", u"", u"Norwegian"),
|
||||
(u"no-NO", u"Norsk", u"Norge", u"Norwegian"),
|
||||
(u"nso", u"Northern Sotho", u"", u""),
|
||||
(u"ny", u"Nyanja", u"", u""),
|
||||
(u"nyn", u"Runyankore", u"", u""),
|
||||
(u"oc", u"Occitan", u"", u""),
|
||||
(u"om", u"Oromoo", u"", u""),
|
||||
(u"or", u"ଓଡ଼ିଆ", u"", u""),
|
||||
(u"pa", u"ਪੰਜਾਬੀ", u"", u""),
|
||||
(u"pcm", u"Nigerian Pidgin", u"", u""),
|
||||
(u"oc", u"Occitan", u"", u"Occitan"),
|
||||
(u"or", u"Oriya", u"", u"Oriya"),
|
||||
(u"pa", u"ਪੰਜਾਬੀ", u"", u"Panjabi"),
|
||||
(u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"),
|
||||
(u"ps", u"پښتو", u"", u""),
|
||||
(u"ps", u"Pushto", u"", u"Pushto"),
|
||||
(u"pt", u"Português", u"", u"Portuguese"),
|
||||
(u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
|
||||
(u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
|
||||
(u"qu", u"Runasimi", u"", u""),
|
||||
(u"rm", u"Rumantsch", u"", u""),
|
||||
(u"rn", u"Ikirundi", u"", u""),
|
||||
(u"ro-RO", u"Română", u"România", u"Romanian"),
|
||||
(u"ru-RU", u"Русский", u"Россия", u"Russian"),
|
||||
(u"rw", u"Kinyarwanda", u"", u""),
|
||||
(u"sd", u"Sindhi", u"", u""),
|
||||
(u"rw", u"Ikinyarwanda", u"", u"Kinyarwanda"),
|
||||
(u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"),
|
||||
(u"si", u"සිංහල", u"", u""),
|
||||
(u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"),
|
||||
(u"sl", u"Slovenščina", u"", u"Slovenian"),
|
||||
(u"sn", u"Chishona", u"", u""),
|
||||
(u"so", u"Soomaali", u"", u""),
|
||||
(u"sq", u"Shqip", u"", u""),
|
||||
(u"sr", u"Српски / Srpski", u"", u"Serbian"),
|
||||
(u"st", u"Southern Sotho", u"", u""),
|
||||
(u"su", u"Sundanese", u"", u""),
|
||||
(u"sv-SE", u"Svenska", u"Sverige", u"Swedish"),
|
||||
(u"sw", u"Kiswahili", u"", u""),
|
||||
(u"ta", u"தமிழ்", u"", u""),
|
||||
(u"te", u"తెలుగు", u"", u""),
|
||||
(u"tg", u"Tajik", u"", u""),
|
||||
(u"ta", u"தமிழ்", u"", u"Tamil"),
|
||||
(u"th-TH", u"ไทย", u"ไทย", u"Thai"),
|
||||
(u"ti", u"ትግርኛ", u"", u""),
|
||||
(u"tk", u"Turkmen", u"", u""),
|
||||
(u"ti", u"ትግርኛ", u"", u"Tigrinya"),
|
||||
(u"tl-PH", u"Filipino", u"Pilipinas", u""),
|
||||
(u"tlh", u"Klingon", u"", u""),
|
||||
(u"tn", u"Tswana", u"", u""),
|
||||
(u"to", u"Lea Fakatonga", u"", u""),
|
||||
(u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"),
|
||||
(u"tt", u"Tatar", u"", u""),
|
||||
(u"tum", u"Tumbuka", u"", u""),
|
||||
(u"tw", u"Twi", u"", u""),
|
||||
(u"ug", u"ئۇيغۇرچە", u"", u""),
|
||||
(u"tt", u"Татарча", u"", u"Tatar"),
|
||||
(u"uk-UA", u"Українська", u"Україна", u"Ukrainian"),
|
||||
(u"ur", u"اردو", u"", u"Urdu"),
|
||||
(u"uz", u"O‘zbek", u"", u"Uzbek"),
|
||||
|
@ -179,13 +122,10 @@ language_codes = (
|
|||
(u"vo", u"Volapük", u"", u"Volapük"),
|
||||
(u"wa", u"Walon", u"", u"Walloon"),
|
||||
(u"war", u"Winaray", u"", u"Waray-Waray"),
|
||||
(u"wo", u"Wolof", u"", u""),
|
||||
(u"xh", u"Xhosa", u"", u""),
|
||||
(u"yi", u"ייִדיש", u"", u""),
|
||||
(u"yo", u"Èdè Yorùbá", u"", u""),
|
||||
(u"xh", u"Xhosa", u"", u"Xhosa"),
|
||||
(u"zh", u"中文", u"", u"Chinese"),
|
||||
(u"zh-CN", u"中文", u"中国", u"Chinese"),
|
||||
(u"zh-CN", u"中文", u"中国", u""),
|
||||
(u"zh-HK", u"中文", u"香港", u"Chinese"),
|
||||
(u"zh-TW", u"中文", u"台湾", u"Chinese"),
|
||||
(u"zu", u"Isizulu", u"", u"")
|
||||
(u"zh-TW", u"中文", u"台湾", u""),
|
||||
(u"zu", u"Isi-Zulu", u"", u"Zulu")
|
||||
)
|
||||
|
|
|
@ -86,3 +86,35 @@ class TestBingEngine(SearxTestCase):
|
|||
self.assertEqual(results[0]['title'], 'This should be the title')
|
||||
self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
|
||||
self.assertEqual(results[0]['content'], 'This should be the content.')
|
||||
|
||||
def test_fetch_supported_languages(self):
|
||||
html = """<html></html>"""
|
||||
response = mock.Mock(text=html)
|
||||
results = bing._fetch_supported_languages(response)
|
||||
self.assertEqual(type(results), list)
|
||||
self.assertEqual(len(results), 0)
|
||||
|
||||
html = """
|
||||
<html>
|
||||
<body>
|
||||
<form>
|
||||
<div id="limit-languages">
|
||||
<div>
|
||||
<div><input id="es" value="es"></input></div>
|
||||
</div>
|
||||
<div>
|
||||
<div><input id="pt_BR" value="pt_BR"></input></div>
|
||||
<div><input id="pt_PT" value="pt_PT"></input></div>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
response = mock.Mock(text=html)
|
||||
languages = bing._fetch_supported_languages(response)
|
||||
self.assertEqual(type(languages), list)
|
||||
self.assertEqual(len(languages), 3)
|
||||
self.assertIn('es', languages)
|
||||
self.assertIn('pt-BR', languages)
|
||||
self.assertIn('pt-PT', languages)
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from collections import defaultdict
|
||||
import mock
|
||||
from searx.engines import dailymotion
|
||||
|
@ -72,3 +73,39 @@ class TestDailymotionEngine(SearxTestCase):
|
|||
results = dailymotion.response(response)
|
||||
self.assertEqual(type(results), list)
|
||||
self.assertEqual(len(results), 0)
|
||||
|
||||
def test_fetch_supported_languages(self):
|
||||
json = r"""
|
||||
{"list":[{"code":"af","name":"Afrikaans","native_name":"Afrikaans",
|
||||
"localized_name":"Afrikaans","display_name":"Afrikaans"},
|
||||
{"code":"ar","name":"Arabic","native_name":"\u0627\u0644\u0639\u0631\u0628\u064a\u0629",
|
||||
"localized_name":"Arabic","display_name":"Arabic"},
|
||||
{"code":"la","name":"Latin","native_name":null,
|
||||
"localized_name":"Latin","display_name":"Latin"}
|
||||
]}
|
||||
"""
|
||||
response = mock.Mock(text=json)
|
||||
languages = dailymotion._fetch_supported_languages(response)
|
||||
self.assertEqual(type(languages), dict)
|
||||
self.assertEqual(len(languages), 3)
|
||||
self.assertIn('af', languages)
|
||||
self.assertIn('ar', languages)
|
||||
self.assertIn('la', languages)
|
||||
|
||||
self.assertEqual(type(languages['af']), dict)
|
||||
self.assertEqual(type(languages['ar']), dict)
|
||||
self.assertEqual(type(languages['la']), dict)
|
||||
|
||||
self.assertIn('name', languages['af'])
|
||||
self.assertIn('name', languages['ar'])
|
||||
self.assertNotIn('name', languages['la'])
|
||||
|
||||
self.assertIn('english_name', languages['af'])
|
||||
self.assertIn('english_name', languages['ar'])
|
||||
self.assertIn('english_name', languages['la'])
|
||||
|
||||
self.assertEqual(languages['af']['name'], 'Afrikaans')
|
||||
self.assertEqual(languages['af']['english_name'], 'Afrikaans')
|
||||
self.assertEqual(languages['ar']['name'], u'العربية')
|
||||
self.assertEqual(languages['ar']['english_name'], 'Arabic')
|
||||
self.assertEqual(languages['la']['english_name'], 'Latin')
|
||||
|
|
|
@ -84,3 +84,17 @@ class TestDuckduckgoEngine(SearxTestCase):
|
|||
self.assertEqual(results[0]['title'], 'This is the title')
|
||||
self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
|
||||
self.assertEqual(results[0]['content'], 'This should be the content.')
|
||||
|
||||
def test_fetch_supported_languages(self):
|
||||
js = """some code...regions:{
|
||||
"wt-wt":"All Results","ar-es":"Argentina","au-en":"Australia","at-de":"Austria","be-fr":"Belgium (fr)"
|
||||
}some more code..."""
|
||||
response = mock.Mock(text=js)
|
||||
languages = duckduckgo._fetch_supported_languages(response)
|
||||
self.assertEqual(type(languages), list)
|
||||
self.assertEqual(len(languages), 5)
|
||||
self.assertIn('wt-WT', languages)
|
||||
self.assertIn('es-AR', languages)
|
||||
self.assertIn('en-AU', languages)
|
||||
self.assertIn('de-AT', languages)
|
||||
self.assertIn('fr-BE', languages)
|
||||
|
|
|
@ -89,3 +89,28 @@ class TestGigablastEngine(SearxTestCase):
|
|||
self.assertEqual(results[0]['title'], 'South by Southwest 2016')
|
||||
self.assertEqual(results[0]['url'], 'www.sxsw.com')
|
||||
self.assertEqual(results[0]['content'], 'This should be the content.')
|
||||
|
||||
def test_fetch_supported_languages(self):
|
||||
html = """<html></html>"""
|
||||
response = mock.Mock(text=html)
|
||||
results = gigablast._fetch_supported_languages(response)
|
||||
self.assertEqual(type(results), list)
|
||||
self.assertEqual(len(results), 0)
|
||||
|
||||
html = """
|
||||
<html>
|
||||
<body>
|
||||
<span id="menu2">
|
||||
<a href="/search?&rxikd=1&qlang=xx"></a>
|
||||
<a href="/search?&rxikd=1&qlang=en"></a>
|
||||
<a href="/search?&rxikd=1&qlang=fr"></a>
|
||||
</span>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
response = mock.Mock(text=html)
|
||||
languages = gigablast._fetch_supported_languages(response)
|
||||
self.assertEqual(type(languages), list)
|
||||
self.assertEqual(len(languages), 2)
|
||||
self.assertIn('en', languages)
|
||||
self.assertIn('fr', languages)
|
||||
|
|
|
@ -177,3 +177,60 @@ class TestGoogleEngine(SearxTestCase):
|
|||
self.assertEqual(results[0]['title'], '')
|
||||
self.assertEqual(results[0]['content'], '')
|
||||
self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
|
||||
|
||||
def test_fetch_supported_languages(self):
|
||||
html = """<html></html>"""
|
||||
response = mock.Mock(text=html)
|
||||
languages = google._fetch_supported_languages(response)
|
||||
self.assertEqual(type(languages), dict)
|
||||
self.assertEqual(len(languages), 0)
|
||||
|
||||
html = u"""
|
||||
<html>
|
||||
<body>
|
||||
<table>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>
|
||||
<font>
|
||||
<label>
|
||||
<span id="ten">English</span>
|
||||
</label>
|
||||
</font>
|
||||
</td>
|
||||
<td>
|
||||
<font>
|
||||
<label>
|
||||
<span id="tzh-CN">中文 (简体)</span>
|
||||
</label>
|
||||
<label>
|
||||
<span id="tzh-TW">中文 (繁體)</span>
|
||||
</label>
|
||||
</font>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
response = mock.Mock(text=html)
|
||||
languages = google._fetch_supported_languages(response)
|
||||
self.assertEqual(type(languages), dict)
|
||||
self.assertEqual(len(languages), 3)
|
||||
|
||||
self.assertIn('en', languages)
|
||||
self.assertIn('zh-CN', languages)
|
||||
self.assertIn('zh-TW', languages)
|
||||
|
||||
self.assertEquals(type(languages['en']), dict)
|
||||
self.assertEquals(type(languages['zh-CN']), dict)
|
||||
self.assertEquals(type(languages['zh-TW']), dict)
|
||||
|
||||
self.assertIn('name', languages['en'])
|
||||
self.assertIn('name', languages['zh-CN'])
|
||||
self.assertIn('name', languages['zh-TW'])
|
||||
|
||||
self.assertEquals(languages['en']['name'], 'English')
|
||||
self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)')
|
||||
self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)')
|
||||
|
|
|
@ -126,3 +126,30 @@ class TestSwisscowsEngine(SearxTestCase):
|
|||
self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
|
||||
self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png')
|
||||
self.assertEqual(results[2]['template'], 'images.html')
|
||||
|
||||
def test_fetch_supported_languages(self):
|
||||
html = """<html></html>"""
|
||||
response = mock.Mock(text=html)
|
||||
languages = swisscows._fetch_supported_languages(response)
|
||||
self.assertEqual(type(languages), list)
|
||||
self.assertEqual(len(languages), 0)
|
||||
|
||||
html = """
|
||||
<html>
|
||||
<div id="regions-popup">
|
||||
<div>
|
||||
<ul>
|
||||
<li><a data-val="browser"></a></li>
|
||||
<li><a data-val="de-CH"></a></li>
|
||||
<li><a data-val="fr-CH"></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</html>
|
||||
"""
|
||||
response = mock.Mock(text=html)
|
||||
languages = swisscows._fetch_supported_languages(response)
|
||||
self.assertEqual(type(languages), list)
|
||||
self.assertEqual(len(languages), 3)
|
||||
self.assertIn('de-CH', languages)
|
||||
self.assertIn('fr-CH', languages)
|
||||
|
|
|
@ -164,3 +164,96 @@ class TestWikipediaEngine(SearxTestCase):
|
|||
self.assertEqual(len(results), 2)
|
||||
self.assertEqual(results[1]['infobox'], u'披頭四樂隊')
|
||||
self.assertIn(u'披头士乐队...', results[1]['content'])
|
||||
|
||||
def test_fetch_supported_languages(self):
|
||||
html = u"""<html></html>"""
|
||||
response = mock.Mock(text=html)
|
||||
languages = wikipedia._fetch_supported_languages(response)
|
||||
self.assertEqual(type(languages), dict)
|
||||
self.assertEqual(len(languages), 0)
|
||||
|
||||
html = u"""
|
||||
<html>
|
||||
<body>
|
||||
<div>
|
||||
<div>
|
||||
<h3>Table header</h3>
|
||||
<table class="sortable jquery-tablesorter">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>N</th>
|
||||
<th>Language</th>
|
||||
<th>Language (local)</th>
|
||||
<th>Wiki</th>
|
||||
<th>Articles</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>2</td>
|
||||
<td><a>Swedish</a></td>
|
||||
<td><a>Svenska</a></td>
|
||||
<td><a>sv</a></td>
|
||||
<td><a><b>3000000</b></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>3</td>
|
||||
<td><a>Cebuano</a></td>
|
||||
<td><a>Sinugboanong Binisaya</a></td>
|
||||
<td><a>ceb</a></td>
|
||||
<td><a><b>3000000</b></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<h3>Table header</h3>
|
||||
<table class="sortable jquery-tablesorter">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>N</th>
|
||||
<th>Language</th>
|
||||
<th>Language (local)</th>
|
||||
<th>Wiki</th>
|
||||
<th>Articles</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>2</td>
|
||||
<td><a>Norwegian (Bokmål)</a></td>
|
||||
<td><a>Norsk (Bokmål)</a></td>
|
||||
<td><a>no</a></td>
|
||||
<td><a><b>100000</b></a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
response = mock.Mock(text=html)
|
||||
languages = wikipedia._fetch_supported_languages(response)
|
||||
self.assertEqual(type(languages), dict)
|
||||
self.assertEqual(len(languages), 3)
|
||||
|
||||
self.assertIn('sv', languages)
|
||||
self.assertIn('ceb', languages)
|
||||
self.assertIn('no', languages)
|
||||
|
||||
self.assertEqual(type(languages['sv']), dict)
|
||||
self.assertEqual(type(languages['ceb']), dict)
|
||||
self.assertEqual(type(languages['no']), dict)
|
||||
|
||||
self.assertIn('name', languages['sv'])
|
||||
self.assertIn('english_name', languages['sv'])
|
||||
self.assertIn('articles', languages['sv'])
|
||||
|
||||
self.assertEqual(languages['sv']['name'], 'Svenska')
|
||||
self.assertEqual(languages['sv']['english_name'], 'Swedish')
|
||||
self.assertEqual(languages['sv']['articles'], 3000000)
|
||||
self.assertEqual(languages['ceb']['name'], 'Sinugboanong Binisaya')
|
||||
self.assertEqual(languages['ceb']['english_name'], 'Cebuano')
|
||||
self.assertEqual(languages['ceb']['articles'], 3000000)
|
||||
self.assertEqual(languages['no']['name'], u'Norsk (Bokmål)')
|
||||
self.assertEqual(languages['no']['english_name'], u'Norwegian (Bokmål)')
|
||||
self.assertEqual(languages['no']['articles'], 100000)
|
||||
|
|
|
@ -147,3 +147,33 @@ class TestYahooEngine(SearxTestCase):
|
|||
results = yahoo.response(response)
|
||||
self.assertEqual(type(results), list)
|
||||
self.assertEqual(len(results), 0)
|
||||
|
||||
def test_fetch_supported_languages(self):
|
||||
html = """<html></html>"""
|
||||
response = mock.Mock(text=html)
|
||||
results = yahoo._fetch_supported_languages(response)
|
||||
self.assertEqual(type(results), list)
|
||||
self.assertEqual(len(results), 0)
|
||||
|
||||
html = """
|
||||
<html>
|
||||
<div>
|
||||
<div id="yschlang">
|
||||
<span>
|
||||
<label><input value="lang_ar"></input></label>
|
||||
</span>
|
||||
<span>
|
||||
<label><input value="lang_zh_chs"></input></label>
|
||||
<label><input value="lang_zh_cht"></input></label>
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</html>
|
||||
"""
|
||||
response = mock.Mock(text=html)
|
||||
languages = yahoo._fetch_supported_languages(response)
|
||||
self.assertEqual(type(languages), list)
|
||||
self.assertEqual(len(languages), 3)
|
||||
self.assertIn('ar', languages)
|
||||
self.assertIn('zh-chs', languages)
|
||||
self.assertIn('zh-cht', languages)
|
||||
|
|
|
@ -84,7 +84,7 @@ def fetch_supported_languages():
|
|||
|
||||
# write json file
|
||||
f = io.open(engines_languages_file, "w", encoding="utf-8")
|
||||
f.write(unicode(dumps(engines_languages, indent=4, ensure_ascii=False, encoding="utf-8")))
|
||||
f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8")))
|
||||
f.close()
|
||||
|
||||
|
||||
|
@ -110,18 +110,22 @@ def join_language_lists():
|
|||
else:
|
||||
languages[locale] = {}
|
||||
|
||||
# get locales that have no name yet
|
||||
# get locales that have no name or country yet
|
||||
for locale in languages.keys():
|
||||
if not languages[locale].get('name'):
|
||||
# try to get language and country names
|
||||
# try to get language names
|
||||
name = languages.get(locale.split('-')[0], {}).get('name', None)
|
||||
if name:
|
||||
languages[locale]['name'] = name
|
||||
languages[locale]['country'] = get_country_name(locale) or ''
|
||||
languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '')
|
||||
else:
|
||||
# filter out locales with no name
|
||||
del languages[locale]
|
||||
continue
|
||||
|
||||
# try to get country name
|
||||
if locale.find('-') > 0 and not languages[locale].get('country'):
|
||||
languages[locale]['country'] = get_country_name(locale) or ''
|
||||
|
||||
|
||||
# Remove countryless language if language is featured in only one country.
|
||||
|
|
Loading…
Reference in a new issue