mirror of
https://github.com/searxng/searxng.git
synced 2025-01-11 02:45:31 +00:00
commit
4cffd78650
115 changed files with 517 additions and 513 deletions
|
@ -9,6 +9,7 @@ addons:
|
||||||
language: python
|
language: python
|
||||||
python:
|
python:
|
||||||
- "2.7"
|
- "2.7"
|
||||||
|
- "3.6"
|
||||||
before_install:
|
before_install:
|
||||||
- "export DISPLAY=:99.0"
|
- "export DISPLAY=:99.0"
|
||||||
- "sh -e /etc/init.d/xvfb start"
|
- "sh -e /etc/init.d/xvfb start"
|
||||||
|
@ -24,9 +25,9 @@ script:
|
||||||
- ./manage.sh styles
|
- ./manage.sh styles
|
||||||
- ./manage.sh grunt_build
|
- ./manage.sh grunt_build
|
||||||
- ./manage.sh tests
|
- ./manage.sh tests
|
||||||
- ./manage.sh py_test_coverage
|
|
||||||
after_success:
|
after_success:
|
||||||
coveralls
|
- ./manage.sh py_test_coverage
|
||||||
|
- coveralls
|
||||||
notifications:
|
notifications:
|
||||||
irc:
|
irc:
|
||||||
channels:
|
channels:
|
||||||
|
|
|
@ -3,8 +3,7 @@ mock==2.0.0
|
||||||
nose2[coverage-plugin]
|
nose2[coverage-plugin]
|
||||||
pep8==1.7.0
|
pep8==1.7.0
|
||||||
plone.testing==5.0.0
|
plone.testing==5.0.0
|
||||||
robotframework-selenium2library==1.8.0
|
splinter==0.7.5
|
||||||
robotsuite==1.7.0
|
|
||||||
transifex-client==0.12.2
|
transifex-client==0.12.2
|
||||||
unittest2==1.1.0
|
unittest2==1.1.0
|
||||||
zope.testrunner==4.5.1
|
zope.testrunner==4.5.1
|
||||||
|
|
|
@ -1,8 +1,12 @@
|
||||||
from os import listdir
|
from os import listdir
|
||||||
from os.path import realpath, dirname, join, isdir
|
from os.path import realpath, dirname, join, isdir
|
||||||
|
from sys import version_info
|
||||||
from searx.utils import load_module
|
from searx.utils import load_module
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
|
if version_info[0] == 3:
|
||||||
|
unicode = str
|
||||||
|
|
||||||
|
|
||||||
answerers_dir = dirname(realpath(__file__))
|
answerers_dir = dirname(realpath(__file__))
|
||||||
|
|
||||||
|
@ -10,7 +14,7 @@ answerers_dir = dirname(realpath(__file__))
|
||||||
def load_answerers():
|
def load_answerers():
|
||||||
answerers = []
|
answerers = []
|
||||||
for filename in listdir(answerers_dir):
|
for filename in listdir(answerers_dir):
|
||||||
if not isdir(join(answerers_dir, filename)):
|
if not isdir(join(answerers_dir, filename)) or filename.startswith('_'):
|
||||||
continue
|
continue
|
||||||
module = load_module('answerer.py', join(answerers_dir, filename))
|
module = load_module('answerer.py', join(answerers_dir, filename))
|
||||||
if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords):
|
if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords):
|
||||||
|
@ -30,12 +34,12 @@ def get_answerers_by_keywords(answerers):
|
||||||
|
|
||||||
def ask(query):
|
def ask(query):
|
||||||
results = []
|
results = []
|
||||||
query_parts = filter(None, query.query.split())
|
query_parts = list(filter(None, query.query.split()))
|
||||||
|
|
||||||
if query_parts[0] not in answerers_by_keywords:
|
if query_parts[0].decode('utf-8') not in answerers_by_keywords:
|
||||||
return results
|
return results
|
||||||
|
|
||||||
for answerer in answerers_by_keywords[query_parts[0]]:
|
for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]:
|
||||||
result = answerer(query)
|
result = answerer(query)
|
||||||
if result:
|
if result:
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import random
|
import random
|
||||||
import string
|
import string
|
||||||
|
import sys
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
|
|
||||||
# required answerer attribute
|
# required answerer attribute
|
||||||
|
@ -8,7 +9,11 @@ keywords = ('random',)
|
||||||
|
|
||||||
random_int_max = 2**31
|
random_int_max = 2**31
|
||||||
|
|
||||||
random_string_letters = string.lowercase + string.digits + string.uppercase
|
if sys.version_info[0] == 2:
|
||||||
|
random_string_letters = string.lowercase + string.digits + string.uppercase
|
||||||
|
else:
|
||||||
|
unicode = str
|
||||||
|
random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
|
||||||
|
|
||||||
|
|
||||||
def random_string():
|
def random_string():
|
||||||
|
@ -24,9 +29,9 @@ def random_int():
|
||||||
return unicode(random.randint(-random_int_max, random_int_max))
|
return unicode(random.randint(-random_int_max, random_int_max))
|
||||||
|
|
||||||
|
|
||||||
random_types = {u'string': random_string,
|
random_types = {b'string': random_string,
|
||||||
u'int': random_int,
|
b'int': random_int,
|
||||||
u'float': random_float}
|
b'float': random_float}
|
||||||
|
|
||||||
|
|
||||||
# required answerer function
|
# required answerer function
|
||||||
|
|
|
@ -1,8 +1,12 @@
|
||||||
|
from sys import version_info
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
from operator import mul
|
from operator import mul
|
||||||
|
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
|
|
||||||
|
if version_info[0] == 3:
|
||||||
|
unicode = str
|
||||||
|
|
||||||
keywords = ('min',
|
keywords = ('min',
|
||||||
'max',
|
'max',
|
||||||
'avg',
|
'avg',
|
||||||
|
@ -19,22 +23,22 @@ def answer(query):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
args = map(float, parts[1:])
|
args = list(map(float, parts[1:]))
|
||||||
except:
|
except:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
func = parts[0]
|
func = parts[0]
|
||||||
answer = None
|
answer = None
|
||||||
|
|
||||||
if func == 'min':
|
if func == b'min':
|
||||||
answer = min(args)
|
answer = min(args)
|
||||||
elif func == 'max':
|
elif func == b'max':
|
||||||
answer = max(args)
|
answer = max(args)
|
||||||
elif func == 'avg':
|
elif func == b'avg':
|
||||||
answer = sum(args) / len(args)
|
answer = sum(args) / len(args)
|
||||||
elif func == 'sum':
|
elif func == b'sum':
|
||||||
answer = sum(args)
|
answer = sum(args)
|
||||||
elif func == 'prod':
|
elif func == b'prod':
|
||||||
answer = reduce(mul, args, 1)
|
answer = reduce(mul, args, 1)
|
||||||
|
|
||||||
if answer is None:
|
if answer is None:
|
||||||
|
|
|
@ -18,7 +18,6 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from json import loads
|
from json import loads
|
||||||
from urllib import urlencode
|
|
||||||
from searx import settings
|
from searx import settings
|
||||||
from searx.languages import language_codes
|
from searx.languages import language_codes
|
||||||
from searx.engines import (
|
from searx.engines import (
|
||||||
|
@ -26,6 +25,11 @@ from searx.engines import (
|
||||||
)
|
)
|
||||||
from searx.poolrequests import get as http_get
|
from searx.poolrequests import get as http_get
|
||||||
|
|
||||||
|
try:
|
||||||
|
from urllib import urlencode
|
||||||
|
except:
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
|
|
||||||
def get(*args, **kwargs):
|
def get(*args, **kwargs):
|
||||||
if 'timeout' not in kwargs:
|
if 'timeout' not in kwargs:
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
from urllib import quote
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.utils import get_torrent_size
|
from searx.utils import get_torrent_size
|
||||||
from urlparse import urljoin
|
from searx.url_utils import quote, urljoin
|
||||||
|
|
||||||
url = 'https://1337x.to/'
|
url = 'https://1337x.to/'
|
||||||
search_url = url + 'search/{search_term}/{pageno}/'
|
search_url = url + 'search/{search_term}/{pageno}/'
|
||||||
|
|
|
@ -72,12 +72,11 @@ def load_engine(engine_data):
|
||||||
if engine_data['categories'] == 'none':
|
if engine_data['categories'] == 'none':
|
||||||
engine.categories = []
|
engine.categories = []
|
||||||
else:
|
else:
|
||||||
engine.categories = map(
|
engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
|
||||||
str.strip, engine_data['categories'].split(','))
|
|
||||||
continue
|
continue
|
||||||
setattr(engine, param_name, engine_data[param_name])
|
setattr(engine, param_name, engine_data[param_name])
|
||||||
|
|
||||||
for arg_name, arg_value in engine_default_args.iteritems():
|
for arg_name, arg_value in engine_default_args.items():
|
||||||
if not hasattr(engine, arg_name):
|
if not hasattr(engine, arg_name):
|
||||||
setattr(engine, arg_name, arg_value)
|
setattr(engine, arg_name, arg_value)
|
||||||
|
|
||||||
|
|
|
@ -11,10 +11,9 @@
|
||||||
@parse url, title
|
@parse url, title
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urlparse import urljoin
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
from searx.url_utils import urlencode, urljoin
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['it']
|
categories = ['it']
|
||||||
|
|
|
@ -14,10 +14,10 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from urllib import urlencode
|
|
||||||
from searx.utils import searx_useragent
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import re
|
import re
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
from searx.utils import searx_useragent
|
||||||
|
|
||||||
|
|
||||||
categories = ['science']
|
categories = ['science']
|
||||||
|
@ -73,7 +73,7 @@ def request(query, params):
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
search_results = etree.XML(resp.content)
|
search_results = etree.XML(resp.text)
|
||||||
|
|
||||||
for entry in search_results.xpath('./result/doc'):
|
for entry in search_results.xpath('./result/doc'):
|
||||||
content = "No description available"
|
content = "No description available"
|
||||||
|
|
|
@ -13,9 +13,9 @@
|
||||||
@todo publishedDate
|
@todo publishedDate
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['general']
|
categories = ['general']
|
||||||
|
|
|
@ -15,11 +15,11 @@
|
||||||
limited response to 10 images
|
limited response to 10 images
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from json import loads
|
from json import loads
|
||||||
import re
|
import re
|
||||||
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
|
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['images']
|
categories = ['images']
|
||||||
|
|
|
@ -11,13 +11,12 @@
|
||||||
@parse url, title, content, publishedDate, thumbnail
|
@parse url, title, content, publishedDate, thumbnail
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from urlparse import urlparse, parse_qsl
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from searx.utils import list_get
|
from searx.utils import list_get
|
||||||
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
|
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
|
||||||
|
from searx.url_utils import urlencode, urlparse, parse_qsl
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['news']
|
categories = ['news']
|
||||||
|
@ -86,7 +85,7 @@ def request(query, params):
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
rss = etree.fromstring(resp.content)
|
rss = etree.fromstring(resp.text)
|
||||||
|
|
||||||
ns = rss.nsmap
|
ns = rss.nsmap
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from urllib import urlencode
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['images']
|
categories = ['images']
|
||||||
|
|
|
@ -10,11 +10,10 @@
|
||||||
@parse url, title, content, seed, leech, magnetlink
|
@parse url, title, content, seed, leech, magnetlink
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urlparse import urljoin
|
|
||||||
from urllib import quote
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
from searx.url_utils import quote, urljoin
|
||||||
from searx.utils import get_torrent_size
|
from searx.utils import get_torrent_size
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
@ -38,7 +37,7 @@ def request(query, params):
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
dom = html.fromstring(resp.content)
|
dom = html.fromstring(resp.text)
|
||||||
|
|
||||||
search_res = dom.xpath('//div[@id="search_res"]/table/tr')
|
search_res = dom.xpath('//div[@id="search_res"]/table/tr')
|
||||||
|
|
||||||
|
|
|
@ -1,21 +1,25 @@
|
||||||
from datetime import datetime
|
import json
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
import json
|
import sys
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
if sys.version_info[0] == 3:
|
||||||
|
unicode = str
|
||||||
|
|
||||||
categories = []
|
categories = []
|
||||||
url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
|
url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
|
||||||
weight = 100
|
weight = 100
|
||||||
|
|
||||||
parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) # noqa
|
parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
|
||||||
|
|
||||||
db = 1
|
db = 1
|
||||||
|
|
||||||
|
|
||||||
def normalize_name(name):
|
def normalize_name(name):
|
||||||
name = name.lower().replace('-', ' ').rstrip('s')
|
name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s')
|
||||||
name = re.sub(' +', ' ', name)
|
name = re.sub(' +', ' ', name)
|
||||||
return unicodedata.normalize('NFKD', name).lower()
|
return unicodedata.normalize('NFKD', name).lower()
|
||||||
|
|
||||||
|
@ -35,7 +39,7 @@ def iso4217_to_name(iso4217, language):
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
m = parser_re.match(unicode(query, 'utf8'))
|
m = parser_re.match(query)
|
||||||
if not m:
|
if not m:
|
||||||
# wrong query
|
# wrong query
|
||||||
return params
|
return params
|
||||||
|
|
|
@ -12,10 +12,9 @@
|
||||||
@todo set content-parameter with correct data
|
@todo set content-parameter with correct data
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from requests import get
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['videos']
|
categories = ['videos']
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from urllib import urlencode
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['music']
|
categories = ['music']
|
||||||
|
@ -30,8 +30,7 @@ embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true"
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
offset = (params['pageno'] - 1) * 25
|
offset = (params['pageno'] - 1) * 25
|
||||||
|
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
|
||||||
offset=offset)
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
|
@ -12,10 +12,10 @@
|
||||||
@todo rewrite to api
|
@todo rewrite to api
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
import re
|
import re
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['images']
|
categories = ['images']
|
||||||
|
|
|
@ -10,20 +10,20 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from urlparse import urljoin
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.utils import is_valid_lang
|
from searx.utils import is_valid_lang
|
||||||
|
from searx.url_utils import urljoin
|
||||||
|
|
||||||
categories = ['general']
|
categories = ['general']
|
||||||
url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
|
url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
|
||||||
weight = 100
|
weight = 100
|
||||||
|
|
||||||
parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
|
parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
|
||||||
results_xpath = './/table[@id="r"]/tr'
|
results_xpath = './/table[@id="r"]/tr'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
m = parser_re.match(unicode(query, 'utf8'))
|
m = parser_re.match(query)
|
||||||
if not m:
|
if not m:
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
|
@ -10,10 +10,14 @@
|
||||||
@parse url, title, content, magnetlink
|
@parse url, title, content, magnetlink
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urlparse import urljoin
|
from sys import version_info
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.utils import get_torrent_size
|
from searx.utils import get_torrent_size
|
||||||
|
from searx.url_utils import urljoin
|
||||||
|
|
||||||
|
if version_info[0] == 3:
|
||||||
|
unicode = str
|
||||||
|
|
||||||
categories = ['videos', 'music', 'files']
|
categories = ['videos', 'music', 'files']
|
||||||
paging = True
|
paging = True
|
||||||
|
@ -31,7 +35,7 @@ def request(query, params):
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
dom = html.fromstring(resp.content)
|
dom = html.fromstring(resp.text)
|
||||||
search_res = dom.xpath('.//td[@class="x-item"]')
|
search_res = dom.xpath('.//td[@class="x-item"]')
|
||||||
|
|
||||||
if not search_res:
|
if not search_res:
|
||||||
|
|
|
@ -10,10 +10,10 @@
|
||||||
@parse url, title, content, publishedDate, thumbnail
|
@parse url, title, content, publishedDate, thumbnail
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import quote_plus
|
from dateutil import parser
|
||||||
from json import loads
|
from json import loads
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from dateutil import parser
|
from searx.url_utils import quote_plus
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['news', 'social media']
|
categories = ['news', 'social media']
|
||||||
|
|
|
@ -9,9 +9,9 @@
|
||||||
# @stable yes
|
# @stable yes
|
||||||
# @parse (general) url, title, content
|
# @parse (general) url, title, content
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml.html import fromstring
|
from lxml.html import fromstring
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'
|
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'
|
||||||
|
|
|
@ -13,11 +13,11 @@
|
||||||
@todo rewrite to api
|
@todo rewrite to api
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml.html import fromstring
|
from lxml.html import fromstring
|
||||||
from requests import get
|
from requests import get
|
||||||
from json import loads
|
from json import loads
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['general']
|
categories = ['general']
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
import json
|
import json
|
||||||
from urllib import urlencode
|
|
||||||
from re import compile, sub
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.utils import html_to_text
|
from re import compile
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
|
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
from searx.utils import html_to_text
|
||||||
|
|
||||||
url = 'https://api.duckduckgo.com/'\
|
url = 'https://api.duckduckgo.com/'\
|
||||||
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'
|
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'
|
||||||
|
|
|
@ -10,10 +10,10 @@
|
||||||
@parse url, title, content, publishedDate, img_src
|
@parse url, title, content, publishedDate, img_src
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
from json import loads
|
||||||
import datetime
|
import datetime
|
||||||
from searx.utils import searx_useragent
|
from searx.utils import searx_useragent
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['general', 'news']
|
categories = ['general', 'news']
|
||||||
|
|
|
@ -9,9 +9,9 @@
|
||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
from searx.engines.xpath import extract_text
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['files']
|
categories = ['files']
|
||||||
|
@ -24,8 +24,7 @@ search_url = base_url + 'repository/browse/?{query}'
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
query = urlencode({'fdfilter': query,
|
query = urlencode({'fdfilter': query, 'fdpage': params['pageno']})
|
||||||
'fdpage': params['pageno']})
|
|
||||||
params['url'] = search_url.format(query=query)
|
params['url'] = search_url.format(query=query)
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,9 @@
|
||||||
from urllib import urlencode
|
from searx.url_utils import urlencode
|
||||||
from HTMLParser import HTMLParser
|
|
||||||
|
try:
|
||||||
|
from HTMLParser import HTMLParser
|
||||||
|
except:
|
||||||
|
from html.parser import HTMLParser
|
||||||
|
|
||||||
url = 'http://www.filecrop.com/'
|
url = 'http://www.filecrop.com/'
|
||||||
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa
|
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa
|
||||||
|
@ -73,8 +77,7 @@ class FilecropResultParser(HTMLParser):
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
index = 1 + (params['pageno'] - 1) * 30
|
index = 1 + (params['pageno'] - 1) * 30
|
||||||
params['url'] = search_url.format(query=urlencode({'w': query}),
|
params['url'] = search_url.format(query=urlencode({'w': query}), index=index)
|
||||||
index=index)
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -13,8 +13,8 @@
|
||||||
More info on api-key : https://www.flickr.com/services/apps/create/
|
More info on api-key : https://www.flickr.com/services/apps/create/
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
from json import loads
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
categories = ['images']
|
categories = ['images']
|
||||||
|
|
||||||
|
|
|
@ -12,11 +12,11 @@
|
||||||
@parse url, title, thumbnail, img_src
|
@parse url, title, thumbnail, img_src
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from time import time
|
from time import time
|
||||||
import re
|
import re
|
||||||
from searx.engines import logger
|
from searx.engines import logger
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
|
|
||||||
logger = logger.getChild('flickr-noapi')
|
logger = logger.getChild('flickr-noapi')
|
||||||
|
|
|
@ -10,12 +10,10 @@
|
||||||
@parse url, title, content, thumbnail, img_src
|
@parse url, title, content, thumbnail, img_src
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urlparse import urljoin
|
|
||||||
from cgi import escape
|
from cgi import escape
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from dateutil import parser
|
from searx.url_utils import urljoin, urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['it']
|
categories = ['it']
|
||||||
|
|
|
@ -10,7 +10,7 @@ Frinkiac (Images)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from urllib import urlencode
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
categories = ['images']
|
categories = ['images']
|
||||||
|
|
||||||
|
|
|
@ -11,10 +11,9 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from random import randint
|
|
||||||
from time import time
|
from time import time
|
||||||
from urllib import urlencode
|
|
||||||
from lxml.html import fromstring
|
from lxml.html import fromstring
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['general']
|
categories = ['general']
|
||||||
|
|
|
@ -10,8 +10,8 @@
|
||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
from json import loads
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['it']
|
categories = ['it']
|
||||||
|
|
|
@ -9,11 +9,10 @@
|
||||||
# @parse url, title, content, suggestion
|
# @parse url, title, content, suggestion
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from urllib import urlencode
|
|
||||||
from urlparse import urlparse, parse_qsl
|
|
||||||
from lxml import html, etree
|
from lxml import html, etree
|
||||||
from searx.engines.xpath import extract_text, extract_url
|
from searx.engines.xpath import extract_text, extract_url
|
||||||
from searx.search import logger
|
from searx import logger
|
||||||
|
from searx.url_utils import urlencode, urlparse, parse_qsl
|
||||||
|
|
||||||
logger = logger.getChild('google engine')
|
logger = logger.getChild('google engine')
|
||||||
|
|
||||||
|
|
|
@ -11,9 +11,9 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from datetime import date, timedelta
|
from datetime import date, timedelta
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
|
|
@ -11,9 +11,8 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
|
||||||
from searx.engines.google import _fetch_supported_languages, supported_languages_url
|
from searx.engines.google import _fetch_supported_languages, supported_languages_url
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
categories = ['news']
|
categories = ['news']
|
||||||
|
|
|
@ -12,11 +12,15 @@
|
||||||
# @todo embedded (needs some md5 from video page)
|
# @todo embedded (needs some md5 from video page)
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from HTMLParser import HTMLParser
|
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
|
from searx.engines.xpath import extract_text
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
|
try:
|
||||||
|
from HTMLParser import HTMLParser
|
||||||
|
except:
|
||||||
|
from html.parser import HTMLParser
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['videos']
|
categories = ['videos']
|
||||||
|
|
|
@ -1,11 +1,16 @@
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
|
||||||
from collections import Iterable
|
from collections import Iterable
|
||||||
|
from json import loads
|
||||||
|
from sys import version_info
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
|
if version_info[0] == 3:
|
||||||
|
unicode = str
|
||||||
|
|
||||||
search_url = None
|
search_url = None
|
||||||
url_query = None
|
url_query = None
|
||||||
content_query = None
|
content_query = None
|
||||||
title_query = None
|
title_query = None
|
||||||
|
paging = False
|
||||||
suggestion_query = ''
|
suggestion_query = ''
|
||||||
results_query = ''
|
results_query = ''
|
||||||
|
|
||||||
|
@ -20,7 +25,7 @@ first_page_num = 1
|
||||||
|
|
||||||
def iterate(iterable):
|
def iterate(iterable):
|
||||||
if type(iterable) == dict:
|
if type(iterable) == dict:
|
||||||
it = iterable.iteritems()
|
it = iterable.items()
|
||||||
|
|
||||||
else:
|
else:
|
||||||
it = enumerate(iterable)
|
it = enumerate(iterable)
|
||||||
|
|
|
@ -10,12 +10,11 @@
|
||||||
@parse url, title, content, seed, leech, magnetlink
|
@parse url, title, content, seed, leech, magnetlink
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urlparse import urljoin
|
|
||||||
from urllib import quote
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.utils import get_torrent_size, convert_str_to_int
|
from searx.utils import get_torrent_size, convert_str_to_int
|
||||||
|
from searx.url_utils import quote, urljoin
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['videos', 'music', 'files']
|
categories = ['videos', 'music', 'files']
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from string import Formatter
|
from string import Formatter
|
||||||
from urllib import urlencode, quote
|
from searx.url_utils import urlencode, quote
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['general']
|
categories = ['general']
|
||||||
|
|
|
@ -11,8 +11,8 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from urllib import urlencode
|
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['music']
|
categories = ['music']
|
||||||
|
|
|
@ -9,9 +9,9 @@
|
||||||
@parse url, title, content, seed, leech, torrentfile
|
@parse url, title, content, seed, leech, torrentfile
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['files', 'images', 'videos', 'music']
|
categories = ['files', 'images', 'videos', 'music']
|
||||||
|
|
|
@ -11,7 +11,6 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from searx.utils import searx_useragent
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['map']
|
categories = ['map']
|
||||||
|
@ -27,9 +26,6 @@ result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = base_url + search_string.format(query=query)
|
params['url'] = base_url + search_string.format(query=query)
|
||||||
|
|
||||||
# using searx User-Agent
|
|
||||||
params['headers']['User-Agent'] = searx_useragent()
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -10,9 +10,9 @@
|
||||||
@parse url, title
|
@parse url, title
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from searx.utils import searx_useragent
|
from searx.utils import searx_useragent
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['map']
|
categories = ['map']
|
||||||
|
|
|
@ -8,11 +8,10 @@
|
||||||
# @stable yes (HTML can change)
|
# @stable yes (HTML can change)
|
||||||
# @parse url, title, content, seed, leech, magnetlink
|
# @parse url, title, content, seed, leech, magnetlink
|
||||||
|
|
||||||
from urlparse import urljoin
|
|
||||||
from urllib import quote
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
from searx.url_utils import quote, urljoin
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['videos', 'music', 'files']
|
categories = ['videos', 'music', 'files']
|
||||||
|
|
|
@ -12,9 +12,8 @@
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from json import loads
|
from json import loads
|
||||||
from urllib import urlencode
|
|
||||||
|
|
||||||
from searx.utils import html_to_text
|
from searx.utils import html_to_text
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = None
|
categories = None
|
||||||
|
|
|
@ -11,9 +11,8 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from urllib import urlencode
|
|
||||||
from urlparse import urlparse, urljoin
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from searx.url_utils import urlencode, urljoin, urlparse
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['general', 'images', 'news', 'social media']
|
categories = ['general', 'images', 'news', 'social media']
|
||||||
|
@ -26,8 +25,7 @@ search_url = base_url + 'search.json?{query}'
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
query = urlencode({'q': query,
|
query = urlencode({'q': query, 'limit': page_size})
|
||||||
'limit': page_size})
|
|
||||||
params['url'] = search_url.format(query=query)
|
params['url'] = search_url.format(query=query)
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
|
@ -10,9 +10,7 @@
|
||||||
@parse url, title, content, img_src
|
@parse url, title, content, img_src
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads, dumps
|
from json import loads, dumps
|
||||||
from dateutil import parser
|
|
||||||
from searx.utils import html_to_text
|
from searx.utils import html_to_text
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
@ -48,7 +46,7 @@ def response(resp):
|
||||||
search_res = loads(resp.text)
|
search_res = loads(resp.text)
|
||||||
|
|
||||||
# return empty array if there are no results
|
# return empty array if there are no results
|
||||||
if search_res.get('total') < 1:
|
if search_res.get('total', 0) < 1:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# parse results
|
# parse results
|
||||||
|
|
|
@ -10,8 +10,8 @@
|
||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
from json import loads
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
@ -31,8 +31,7 @@ code_endings = {'cs': 'c#',
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
|
||||||
pageno=params['pageno'] - 1)
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
|
@ -10,8 +10,8 @@
|
||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from json import loads
|
from json import loads
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['it']
|
categories = ['it']
|
||||||
|
@ -24,8 +24,7 @@ search_url = url + 'api/search_IV/?{query}&p={pageno}'
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
|
||||||
pageno=params['pageno'] - 1)
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
|
@ -8,11 +8,9 @@
|
||||||
# @stable yes (HTML can change)
|
# @stable yes (HTML can change)
|
||||||
# @parse url, title, content, seed, leech, magnetlink
|
# @parse url, title, content, seed, leech, magnetlink
|
||||||
|
|
||||||
from urlparse import urljoin
|
|
||||||
from urllib import quote
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from searx.engines.xpath import extract_text
|
from searx.url_utils import quote, urljoin
|
||||||
|
|
||||||
|
|
||||||
url = 'http://www.seedpeer.eu/'
|
url = 'http://www.seedpeer.eu/'
|
||||||
|
|
|
@ -11,13 +11,17 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from StringIO import StringIO
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from lxml import etree
|
from lxml import html
|
||||||
from urllib import urlencode, quote_plus
|
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
from searx import logger
|
from searx import logger
|
||||||
from searx.poolrequests import get as http_get
|
from searx.poolrequests import get as http_get
|
||||||
|
from searx.url_utils import quote_plus, urlencode
|
||||||
|
|
||||||
|
try:
|
||||||
|
from cStringIO import StringIO
|
||||||
|
except:
|
||||||
|
from io import StringIO
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['music']
|
categories = ['music']
|
||||||
|
@ -36,14 +40,15 @@ embedded_url = '<iframe width="100%" height="166" ' +\
|
||||||
'scrolling="no" frameborder="no" ' +\
|
'scrolling="no" frameborder="no" ' +\
|
||||||
'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
|
'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
|
||||||
|
|
||||||
|
cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
|
||||||
|
|
||||||
|
|
||||||
def get_client_id():
|
def get_client_id():
|
||||||
response = http_get("https://soundcloud.com")
|
response = http_get("https://soundcloud.com")
|
||||||
rx_namespace = {"re": "http://exslt.org/regular-expressions"}
|
|
||||||
|
|
||||||
if response.ok:
|
if response.ok:
|
||||||
tree = etree.parse(StringIO(response.content), etree.HTMLParser())
|
tree = html.fromstring(response.content)
|
||||||
script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace)
|
script_tags = tree.xpath("//script[contains(@src, '/assets/app')]")
|
||||||
app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
|
app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
|
||||||
|
|
||||||
# extracts valid app_js urls from soundcloud.com content
|
# extracts valid app_js urls from soundcloud.com content
|
||||||
|
@ -51,7 +56,7 @@ def get_client_id():
|
||||||
# gets app_js and searches for the clientid
|
# gets app_js and searches for the clientid
|
||||||
response = http_get(app_js_url)
|
response = http_get(app_js_url)
|
||||||
if response.ok:
|
if response.ok:
|
||||||
cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I)
|
cids = cid_re.search(response.text)
|
||||||
if cids is not None and len(cids.groups()):
|
if cids is not None and len(cids.groups()):
|
||||||
return cids.groups()[0]
|
return cids.groups()[0]
|
||||||
logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
|
logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from urllib import urlencode
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['music']
|
categories = ['music']
|
||||||
|
@ -29,8 +29,7 @@ embedded_url = '<iframe data-src="https://embed.spotify.com/?uri=spotify:track:{
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
offset = (params['pageno'] - 1) * 20
|
offset = (params['pageno'] - 1) * 20
|
||||||
|
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
|
||||||
offset=offset)
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
|
@ -10,10 +10,9 @@
|
||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urlparse import urljoin
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
from searx.url_utils import urlencode, urljoin
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['it']
|
categories = ['it']
|
||||||
|
@ -31,8 +30,7 @@ content_xpath = './/div[@class="excerpt"]'
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}),
|
params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'])
|
||||||
pageno=params['pageno'])
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
|
@ -56,7 +56,7 @@ def request(query, params):
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
dom = html.fromstring(resp.content)
|
dom = html.fromstring(resp.text)
|
||||||
|
|
||||||
# parse results
|
# parse results
|
||||||
for result in dom.xpath(results_xpath):
|
for result in dom.xpath(results_xpath):
|
||||||
|
|
|
@ -10,10 +10,10 @@
|
||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import quote_plus
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.languages import language_codes
|
from searx.languages import language_codes
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
from searx.url_utils import quote_plus
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['videos']
|
categories = ['videos']
|
||||||
|
|
|
@ -11,9 +11,9 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from urllib import urlencode, unquote
|
|
||||||
import re
|
import re
|
||||||
from lxml.html import fromstring
|
from lxml.html import fromstring
|
||||||
|
from searx.url_utils import unquote, urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['general', 'images']
|
categories = ['general', 'images']
|
||||||
|
@ -27,10 +27,10 @@ search_string = '?{query}&page={page}'
|
||||||
supported_languages_url = base_url
|
supported_languages_url = base_url
|
||||||
|
|
||||||
# regex
|
# regex
|
||||||
regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
|
regex_json = re.compile(b'initialData: {"Request":(.|\n)*},\s*environment')
|
||||||
regex_json_remove_start = re.compile(r'^initialData:\s*')
|
regex_json_remove_start = re.compile(b'^initialData:\s*')
|
||||||
regex_json_remove_end = re.compile(r',\s*environment$')
|
regex_json_remove_end = re.compile(b',\s*environment$')
|
||||||
regex_img_url_remove_start = re.compile(r'^https?://i\.swisscows\.ch/\?link=')
|
regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=')
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
|
@ -45,10 +45,9 @@ def request(query, params):
|
||||||
ui_language = params['language'].split('-')[0]
|
ui_language = params['language'].split('-')[0]
|
||||||
|
|
||||||
search_path = search_string.format(
|
search_path = search_string.format(
|
||||||
query=urlencode({'query': query,
|
query=urlencode({'query': query, 'uiLanguage': ui_language, 'region': region}),
|
||||||
'uiLanguage': ui_language,
|
page=params['pageno']
|
||||||
'region': region}),
|
)
|
||||||
page=params['pageno'])
|
|
||||||
|
|
||||||
# image search query is something like 'image?{query}&page={page}'
|
# image search query is something like 'image?{query}&page={page}'
|
||||||
if params['category'] == 'images':
|
if params['category'] == 'images':
|
||||||
|
@ -63,14 +62,14 @@ def request(query, params):
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
json_regex = regex_json.search(resp.content)
|
json_regex = regex_json.search(resp.text)
|
||||||
|
|
||||||
# check if results are returned
|
# check if results are returned
|
||||||
if not json_regex:
|
if not json_regex:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
json_raw = regex_json_remove_end.sub('', regex_json_remove_start.sub('', json_regex.group()))
|
json_raw = regex_json_remove_end.sub(b'', regex_json_remove_start.sub(b'', json_regex.group()))
|
||||||
json = loads(json_raw)
|
json = loads(json_raw.decode('utf-8'))
|
||||||
|
|
||||||
# parse results
|
# parse results
|
||||||
for result in json['Results'].get('items', []):
|
for result in json['Results'].get('items', []):
|
||||||
|
@ -78,7 +77,7 @@ def response(resp):
|
||||||
|
|
||||||
# parse image results
|
# parse image results
|
||||||
if result.get('ContentType', '').startswith('image'):
|
if result.get('ContentType', '').startswith('image'):
|
||||||
img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
|
img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8'))
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': result['SourceUrl'],
|
results.append({'url': result['SourceUrl'],
|
||||||
|
@ -100,7 +99,7 @@ def response(resp):
|
||||||
# parse images
|
# parse images
|
||||||
for result in json.get('Images', []):
|
for result in json.get('Images', []):
|
||||||
# decode image url
|
# decode image url
|
||||||
img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
|
img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8'))
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': result['SourceUrl'],
|
results.append({'url': result['SourceUrl'],
|
||||||
|
|
|
@ -11,11 +11,11 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from searx.engines.nyaa import int_or_zero, get_filesize_mul
|
from searx.engines.nyaa import int_or_zero, get_filesize_mul
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['files', 'videos', 'music']
|
categories = ['files', 'videos', 'music']
|
||||||
|
@ -28,8 +28,7 @@ search_url = base_url + 'search.php?{query}'
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
query = urlencode({'page': params['pageno'],
|
query = urlencode({'page': params['pageno'], 'terms': query})
|
||||||
'terms': query})
|
|
||||||
params['url'] = search_url.format(query=query)
|
params['url'] = search_url.format(query=query)
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -50,7 +49,7 @@ def response(resp):
|
||||||
size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
|
size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
|
||||||
|
|
||||||
# processing the results, two rows at a time
|
# processing the results, two rows at a time
|
||||||
for i in xrange(0, len(rows), 2):
|
for i in range(0, len(rows), 2):
|
||||||
# parse the first row
|
# parse the first row
|
||||||
name_row = rows[i]
|
name_row = rows[i]
|
||||||
|
|
||||||
|
@ -79,14 +78,14 @@ def response(resp):
|
||||||
groups = size_re.match(item).groups()
|
groups = size_re.match(item).groups()
|
||||||
multiplier = get_filesize_mul(groups[1])
|
multiplier = get_filesize_mul(groups[1])
|
||||||
params['filesize'] = int(multiplier * float(groups[0]))
|
params['filesize'] = int(multiplier * float(groups[0]))
|
||||||
except Exception as e:
|
except:
|
||||||
pass
|
pass
|
||||||
elif item.startswith('Date:'):
|
elif item.startswith('Date:'):
|
||||||
try:
|
try:
|
||||||
# Date: 2016-02-21 21:44 UTC
|
# Date: 2016-02-21 21:44 UTC
|
||||||
date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC')
|
date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC')
|
||||||
params['publishedDate'] = date
|
params['publishedDate'] = date
|
||||||
except Exception as e:
|
except:
|
||||||
pass
|
pass
|
||||||
elif item.startswith('Comment:'):
|
elif item.startswith('Comment:'):
|
||||||
params['content'] = item
|
params['content'] = item
|
||||||
|
|
|
@ -12,11 +12,11 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from searx.engines.nyaa import int_or_zero, get_filesize_mul
|
from searx.engines.nyaa import int_or_zero, get_filesize_mul
|
||||||
|
from searx.engines.xpath import extract_text
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['files', 'videos', 'music']
|
categories = ['files', 'videos', 'music']
|
||||||
|
@ -70,7 +70,7 @@ def response(resp):
|
||||||
size_str = result.xpath('./dd/span[@class="s"]/text()')[0]
|
size_str = result.xpath('./dd/span[@class="s"]/text()')[0]
|
||||||
size, suffix = size_str.split()
|
size, suffix = size_str.split()
|
||||||
params['filesize'] = int(size) * get_filesize_mul(suffix)
|
params['filesize'] = int(size) * get_filesize_mul(suffix)
|
||||||
except Exception as e:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# does our link contain a valid SHA1 sum?
|
# does our link contain a valid SHA1 sum?
|
||||||
|
@ -84,7 +84,7 @@ def response(resp):
|
||||||
# Fri, 25 Mar 2016 16:29:01
|
# Fri, 25 Mar 2016 16:29:01
|
||||||
date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S')
|
date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S')
|
||||||
params['publishedDate'] = date
|
params['publishedDate'] = date
|
||||||
except Exception as e:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
results.append(params)
|
results.append(params)
|
||||||
|
|
|
@ -9,8 +9,12 @@
|
||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
|
from sys import version_info
|
||||||
from searx.utils import is_valid_lang
|
from searx.utils import is_valid_lang
|
||||||
|
|
||||||
|
if version_info[0] == 3:
|
||||||
|
unicode = str
|
||||||
|
|
||||||
categories = ['general']
|
categories = ['general']
|
||||||
url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
|
url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
|
||||||
web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
|
web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
|
||||||
|
|
|
@ -12,11 +12,10 @@
|
||||||
@todo publishedDate
|
@todo publishedDate
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urlparse import urljoin
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
from searx.url_utils import urlencode, urljoin
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['social media']
|
categories = ['social media']
|
||||||
|
|
|
@ -13,8 +13,8 @@
|
||||||
# @todo set content-parameter with correct data
|
# @todo set content-parameter with correct data
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from urllib import urlencode
|
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['videos']
|
categories = ['videos']
|
||||||
|
|
|
@ -14,12 +14,11 @@
|
||||||
from searx import logger
|
from searx import logger
|
||||||
from searx.poolrequests import get
|
from searx.poolrequests import get
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.utils import format_date_by_locale
|
|
||||||
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
|
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from lxml.html import fromstring
|
from lxml.html import fromstring
|
||||||
from urllib import urlencode
|
|
||||||
|
|
||||||
logger = logger.getChild('wikidata')
|
logger = logger.getChild('wikidata')
|
||||||
result_count = 1
|
result_count = 1
|
||||||
|
@ -62,14 +61,13 @@ def request(query, params):
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
params['url'] = url_search.format(
|
params['url'] = url_search.format(
|
||||||
query=urlencode({'label': query,
|
query=urlencode({'label': query, 'language': language}))
|
||||||
'language': language}))
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
html = fromstring(resp.content)
|
html = fromstring(resp.text)
|
||||||
wikidata_ids = html.xpath(wikidata_ids_xpath)
|
wikidata_ids = html.xpath(wikidata_ids_xpath)
|
||||||
|
|
||||||
language = resp.search_params['language'].split('-')[0]
|
language = resp.search_params['language'].split('-')[0]
|
||||||
|
@ -78,10 +76,9 @@ def response(resp):
|
||||||
|
|
||||||
# TODO: make requests asynchronous to avoid timeout when result_count > 1
|
# TODO: make requests asynchronous to avoid timeout when result_count > 1
|
||||||
for wikidata_id in wikidata_ids[:result_count]:
|
for wikidata_id in wikidata_ids[:result_count]:
|
||||||
url = url_detail.format(query=urlencode({'page': wikidata_id,
|
url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
|
||||||
'uselang': language}))
|
|
||||||
htmlresponse = get(url)
|
htmlresponse = get(url)
|
||||||
jsonresponse = loads(htmlresponse.content)
|
jsonresponse = loads(htmlresponse.text)
|
||||||
results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'])
|
results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'])
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -11,13 +11,12 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from urllib import urlencode, quote
|
|
||||||
from lxml.html import fromstring
|
from lxml.html import fromstring
|
||||||
|
from searx.url_utils import quote, urlencode
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
base_url = 'https://{language}.wikipedia.org/'
|
base_url = u'https://{language}.wikipedia.org/'
|
||||||
search_postfix = 'w/api.php?'\
|
search_url = base_url + u'w/api.php?'\
|
||||||
'action=query'\
|
'action=query'\
|
||||||
'&format=json'\
|
'&format=json'\
|
||||||
'&{query}'\
|
'&{query}'\
|
||||||
|
@ -37,16 +36,16 @@ def url_lang(lang):
|
||||||
else:
|
else:
|
||||||
language = lang
|
language = lang
|
||||||
|
|
||||||
return base_url.format(language=language)
|
return language
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
if query.islower():
|
if query.islower():
|
||||||
query += '|' + query.title()
|
query = u'{0}|{1}'.format(query.decode('utf-8'), query.decode('utf-8').title()).encode('utf-8')
|
||||||
|
|
||||||
params['url'] = url_lang(params['language']) \
|
params['url'] = search_url.format(query=urlencode({'titles': query}),
|
||||||
+ search_postfix.format(query=urlencode({'titles': query}))
|
language=url_lang(params['language']))
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -78,7 +77,7 @@ def extract_first_paragraph(content, title, image):
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
search_result = loads(resp.content)
|
search_result = loads(resp.text)
|
||||||
|
|
||||||
# wikipedia article's unique id
|
# wikipedia article's unique id
|
||||||
# first valid id is assumed to be the requested article
|
# first valid id is assumed to be the requested article
|
||||||
|
@ -99,11 +98,9 @@ def response(resp):
|
||||||
extract = page.get('extract')
|
extract = page.get('extract')
|
||||||
|
|
||||||
summary = extract_first_paragraph(extract, title, image)
|
summary = extract_first_paragraph(extract, title, image)
|
||||||
if not summary:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# link to wikipedia article
|
# link to wikipedia article
|
||||||
wikipedia_link = url_lang(resp.search_params['language']) \
|
wikipedia_link = base_url.format(language=url_lang(resp.search_params['language'])) \
|
||||||
+ 'wiki/' + quote(title.replace(' ', '_').encode('utf8'))
|
+ 'wiki/' + quote(title.replace(' ', '_').encode('utf8'))
|
||||||
|
|
||||||
results.append({'url': wikipedia_link, 'title': title})
|
results.append({'url': wikipedia_link, 'title': title})
|
||||||
|
|
|
@ -8,8 +8,8 @@
|
||||||
# @stable yes
|
# @stable yes
|
||||||
# @parse url, infobox
|
# @parse url, infobox
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
|
search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
|
||||||
|
@ -37,8 +37,7 @@ image_pods = {'VisualRepresentation',
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(query=urlencode({'input': query}),
|
params['url'] = search_url.format(query=urlencode({'input': query}), api_key=api_key)
|
||||||
api_key=api_key)
|
|
||||||
params['headers']['Referer'] = site_url.format(query=urlencode({'i': query}))
|
params['headers']['Referer'] = site_url.format(query=urlencode({'i': query}))
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
@ -56,7 +55,7 @@ def replace_pua_chars(text):
|
||||||
u'\uf74e': 'i', # imaginary number
|
u'\uf74e': 'i', # imaginary number
|
||||||
u'\uf7d9': '='} # equals sign
|
u'\uf7d9': '='} # equals sign
|
||||||
|
|
||||||
for k, v in pua_chars.iteritems():
|
for k, v in pua_chars.items():
|
||||||
text = text.replace(k, v)
|
text = text.replace(k, v)
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
@ -66,7 +65,7 @@ def replace_pua_chars(text):
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
search_results = etree.XML(resp.content)
|
search_results = etree.XML(resp.text)
|
||||||
|
|
||||||
# return empty array if there are no results
|
# return empty array if there are no results
|
||||||
if search_results.xpath(failure_xpath):
|
if search_results.xpath(failure_xpath):
|
||||||
|
@ -120,10 +119,10 @@ def response(resp):
|
||||||
# append infobox
|
# append infobox
|
||||||
results.append({'infobox': infobox_title,
|
results.append({'infobox': infobox_title,
|
||||||
'attributes': result_chunks,
|
'attributes': result_chunks,
|
||||||
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
|
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
|
||||||
|
|
||||||
# append link to site
|
# append link to site
|
||||||
results.append({'url': resp.request.headers['Referer'].decode('utf8'),
|
results.append({'url': resp.request.headers['Referer'],
|
||||||
'title': title,
|
'title': title,
|
||||||
'content': result_content})
|
'content': result_content})
|
||||||
|
|
||||||
|
|
|
@ -10,10 +10,9 @@
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from time import time
|
from time import time
|
||||||
from urllib import urlencode
|
|
||||||
from lxml.etree import XML
|
|
||||||
|
|
||||||
from searx.poolrequests import get as http_get
|
from searx.poolrequests import get as http_get
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
url = 'https://www.wolframalpha.com/'
|
url = 'https://www.wolframalpha.com/'
|
||||||
|
@ -62,7 +61,7 @@ obtain_token()
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
# obtain token if last update was more than an hour
|
# obtain token if last update was more than an hour
|
||||||
if time() - token['last_updated'] > 3600:
|
if time() - (token['last_updated'] or 0) > 3600:
|
||||||
obtain_token()
|
obtain_token()
|
||||||
params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
|
params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
|
||||||
params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
|
params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
|
||||||
|
@ -112,9 +111,9 @@ def response(resp):
|
||||||
|
|
||||||
results.append({'infobox': infobox_title,
|
results.append({'infobox': infobox_title,
|
||||||
'attributes': result_chunks,
|
'attributes': result_chunks,
|
||||||
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
|
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
|
||||||
|
|
||||||
results.append({'url': resp.request.headers['Referer'].decode('utf8'),
|
results.append({'url': resp.request.headers['Referer'],
|
||||||
'title': 'Wolfram|Alpha (' + infobox_title + ')',
|
'title': 'Wolfram|Alpha (' + infobox_title + ')',
|
||||||
'content': result_content})
|
'content': result_content})
|
||||||
|
|
||||||
|
|
|
@ -10,11 +10,9 @@
|
||||||
@parse url, title, thumbnail, img_src, content
|
@parse url, title, thumbnail, img_src, content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from urlparse import urljoin
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
import string
|
|
||||||
import re
|
import re
|
||||||
|
from searx.url_utils import urlencode, urljoin
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['images']
|
categories = ['images']
|
||||||
|
@ -55,7 +53,7 @@ def response(resp):
|
||||||
cur_element += result_part
|
cur_element += result_part
|
||||||
|
|
||||||
# fix xml-error
|
# fix xml-error
|
||||||
cur_element = string.replace(cur_element, '"></a>', '"/></a>')
|
cur_element = cur_element.replace('"></a>', '"/></a>')
|
||||||
|
|
||||||
dom = html.fromstring(cur_element)
|
dom = html.fromstring(cur_element)
|
||||||
link = dom.xpath('//a')[0]
|
link = dom.xpath('//a')[0]
|
||||||
|
|
|
@ -13,8 +13,7 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from urllib import urlencode
|
from searx.url_utils import urlencode, urljoin
|
||||||
from urlparse import urljoin
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['images']
|
categories = ['images']
|
||||||
|
|
|
@ -1,13 +1,13 @@
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from urllib import urlencode, unquote
|
|
||||||
from urlparse import urlparse, urljoin
|
|
||||||
from lxml.etree import _ElementStringResult, _ElementUnicodeResult
|
from lxml.etree import _ElementStringResult, _ElementUnicodeResult
|
||||||
from searx.utils import html_to_text
|
from searx.utils import html_to_text
|
||||||
|
from searx.url_utils import unquote, urlencode, urljoin, urlparse
|
||||||
|
|
||||||
search_url = None
|
search_url = None
|
||||||
url_xpath = None
|
url_xpath = None
|
||||||
content_xpath = None
|
content_xpath = None
|
||||||
title_xpath = None
|
title_xpath = None
|
||||||
|
paging = False
|
||||||
suggestion_xpath = ''
|
suggestion_xpath = ''
|
||||||
results_xpath = ''
|
results_xpath = ''
|
||||||
|
|
||||||
|
|
|
@ -13,8 +13,8 @@
|
||||||
# @todo parse video, audio and file results
|
# @todo parse video, audio and file results
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from urllib import urlencode
|
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
from searx.utils import html_to_text
|
from searx.utils import html_to_text
|
||||||
|
|
||||||
|
|
|
@ -11,10 +11,9 @@
|
||||||
@parse url, title, content, suggestion
|
@parse url, title, content, suggestion
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from urlparse import unquote
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text, extract_url
|
from searx.engines.xpath import extract_text, extract_url
|
||||||
|
from searx.url_utils import unquote, urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['general']
|
categories = ['general']
|
||||||
|
|
|
@ -9,13 +9,13 @@
|
||||||
# @stable no (HTML can change)
|
# @stable no (HTML can change)
|
||||||
# @parse url, title, content, publishedDate
|
# @parse url, title, content, publishedDate
|
||||||
|
|
||||||
from urllib import urlencode
|
import re
|
||||||
|
from datetime import datetime, timedelta
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text, extract_url
|
from searx.engines.xpath import extract_text, extract_url
|
||||||
from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
|
from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
|
||||||
from datetime import datetime, timedelta
|
|
||||||
import re
|
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['news']
|
categories = ['news']
|
||||||
|
|
|
@ -9,9 +9,9 @@
|
||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.search import logger
|
from searx import logger
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
logger = logger.getChild('yandex engine')
|
logger = logger.getChild('yandex engine')
|
||||||
|
|
||||||
|
|
|
@ -9,8 +9,8 @@
|
||||||
# @parse url, title, content, publishedDate, thumbnail, embedded
|
# @parse url, title, content, publishedDate, thumbnail, embedded
|
||||||
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from urllib import urlencode
|
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
|
from searx.url_utils import urlencode
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['videos', 'music']
|
categories = ['videos', 'music']
|
||||||
|
|
|
@ -8,10 +8,10 @@
|
||||||
# @stable no
|
# @stable no
|
||||||
# @parse url, title, content, publishedDate, thumbnail, embedded
|
# @parse url, title, content, publishedDate, thumbnail, embedded
|
||||||
|
|
||||||
from urllib import quote_plus
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from searx.utils import list_get
|
from searx.utils import list_get
|
||||||
|
from searx.url_utils import quote_plus
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['videos', 'music']
|
categories = ['videos', 'music']
|
||||||
|
|
|
@ -14,9 +14,12 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
||||||
|
|
||||||
(C) 2015 by Adam Tauber, <asciimoo@gmail.com>
|
(C) 2015 by Adam Tauber, <asciimoo@gmail.com>
|
||||||
'''
|
'''
|
||||||
from sys import exit
|
from sys import exit, version_info
|
||||||
from searx import logger
|
from searx import logger
|
||||||
|
|
||||||
|
if version_info[0] == 3:
|
||||||
|
unicode = str
|
||||||
|
|
||||||
logger = logger.getChild('plugins')
|
logger = logger.getChild('plugins')
|
||||||
|
|
||||||
from searx.plugins import (doai_rewrite,
|
from searx.plugins import (doai_rewrite,
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
import re
|
import re
|
||||||
from urlparse import urlparse, parse_qsl
|
from searx.url_utils import urlparse, parse_qsl
|
||||||
|
|
||||||
regex = re.compile(r'10\.\d{4,9}/[^\s]+')
|
regex = re.compile(r'10\.\d{4,9}/[^\s]+')
|
||||||
|
|
||||||
|
|
|
@ -16,14 +16,17 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from urlparse import urlparse
|
import sys
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from os import listdir, environ
|
from os import listdir, environ
|
||||||
from os.path import isfile, isdir, join
|
from os.path import isfile, isdir, join
|
||||||
from searx.plugins import logger
|
from searx.plugins import logger
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
from searx import searx_dir
|
from searx import searx_dir
|
||||||
|
from searx.url_utils import urlparse
|
||||||
|
|
||||||
|
if sys.version_info[0] == 3:
|
||||||
|
unicode = str
|
||||||
|
|
||||||
name = "HTTPS rewrite"
|
name = "HTTPS rewrite"
|
||||||
description = gettext('Rewrite HTTP links to HTTPS if possible')
|
description = gettext('Rewrite HTTP links to HTTPS if possible')
|
||||||
|
|
|
@ -22,7 +22,7 @@ default_on = True
|
||||||
|
|
||||||
|
|
||||||
# Self User Agent regex
|
# Self User Agent regex
|
||||||
p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
|
p = re.compile(b'.*user[ -]agent.*', re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
# attach callback to the post search hook
|
# attach callback to the post search hook
|
||||||
|
@ -31,7 +31,7 @@ p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
|
||||||
def post_search(request, search):
|
def post_search(request, search):
|
||||||
if search.search_query.pageno > 1:
|
if search.search_query.pageno > 1:
|
||||||
return True
|
return True
|
||||||
if search.search_query.query == 'ip':
|
if search.search_query.query == b'ip':
|
||||||
x_forwarded_for = request.headers.getlist("X-Forwarded-For")
|
x_forwarded_for = request.headers.getlist("X-Forwarded-For")
|
||||||
if x_forwarded_for:
|
if x_forwarded_for:
|
||||||
ip = x_forwarded_for[0]
|
ip = x_forwarded_for[0]
|
||||||
|
|
|
@ -17,7 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
||||||
|
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext
|
||||||
import re
|
import re
|
||||||
from urlparse import urlunparse
|
from searx.url_utils import urlunparse
|
||||||
|
|
||||||
regexes = {re.compile(r'utm_[^&]+&?'),
|
regexes = {re.compile(r'utm_[^&]+&?'),
|
||||||
re.compile(r'(wkey|wemail)[^&]+&?'),
|
re.compile(r'(wkey|wemail)[^&]+&?'),
|
||||||
|
|
|
@ -23,7 +23,7 @@ class Setting(object):
|
||||||
def __init__(self, default_value, **kwargs):
|
def __init__(self, default_value, **kwargs):
|
||||||
super(Setting, self).__init__()
|
super(Setting, self).__init__()
|
||||||
self.value = default_value
|
self.value = default_value
|
||||||
for key, value in kwargs.iteritems():
|
for key, value in kwargs.items():
|
||||||
setattr(self, key, value)
|
setattr(self, key, value)
|
||||||
|
|
||||||
self._post_init()
|
self._post_init()
|
||||||
|
@ -38,7 +38,7 @@ class Setting(object):
|
||||||
return self.value
|
return self.value
|
||||||
|
|
||||||
def save(self, name, resp):
|
def save(self, name, resp):
|
||||||
resp.set_cookie(name, bytes(self.value), max_age=COOKIE_MAX_AGE)
|
resp.set_cookie(name, self.value, max_age=COOKIE_MAX_AGE)
|
||||||
|
|
||||||
|
|
||||||
class StringSetting(Setting):
|
class StringSetting(Setting):
|
||||||
|
@ -133,7 +133,7 @@ class MapSetting(Setting):
|
||||||
|
|
||||||
def save(self, name, resp):
|
def save(self, name, resp):
|
||||||
if hasattr(self, 'key'):
|
if hasattr(self, 'key'):
|
||||||
resp.set_cookie(name, bytes(self.key), max_age=COOKIE_MAX_AGE)
|
resp.set_cookie(name, self.key, max_age=COOKIE_MAX_AGE)
|
||||||
|
|
||||||
|
|
||||||
class SwitchableSetting(Setting):
|
class SwitchableSetting(Setting):
|
||||||
|
@ -194,7 +194,7 @@ class EnginesSetting(SwitchableSetting):
|
||||||
def _post_init(self):
|
def _post_init(self):
|
||||||
super(EnginesSetting, self)._post_init()
|
super(EnginesSetting, self)._post_init()
|
||||||
transformed_choices = []
|
transformed_choices = []
|
||||||
for engine_name, engine in self.choices.iteritems():
|
for engine_name, engine in self.choices.items():
|
||||||
for category in engine.categories:
|
for category in engine.categories:
|
||||||
transformed_choice = dict()
|
transformed_choice = dict()
|
||||||
transformed_choice['default_on'] = not engine.disabled
|
transformed_choice['default_on'] = not engine.disabled
|
||||||
|
@ -241,9 +241,9 @@ class Preferences(object):
|
||||||
'language': SearchLanguageSetting(settings['search']['language'],
|
'language': SearchLanguageSetting(settings['search']['language'],
|
||||||
choices=LANGUAGE_CODES),
|
choices=LANGUAGE_CODES),
|
||||||
'locale': EnumStringSetting(settings['ui']['default_locale'],
|
'locale': EnumStringSetting(settings['ui']['default_locale'],
|
||||||
choices=settings['locales'].keys() + ['']),
|
choices=list(settings['locales'].keys()) + ['']),
|
||||||
'autocomplete': EnumStringSetting(settings['search']['autocomplete'],
|
'autocomplete': EnumStringSetting(settings['search']['autocomplete'],
|
||||||
choices=autocomplete.backends.keys() + ['']),
|
choices=list(autocomplete.backends.keys()) + ['']),
|
||||||
'image_proxy': MapSetting(settings['server']['image_proxy'],
|
'image_proxy': MapSetting(settings['server']['image_proxy'],
|
||||||
map={'': settings['server']['image_proxy'],
|
map={'': settings['server']['image_proxy'],
|
||||||
'0': False,
|
'0': False,
|
||||||
|
@ -260,7 +260,7 @@ class Preferences(object):
|
||||||
self.unknown_params = {}
|
self.unknown_params = {}
|
||||||
|
|
||||||
def parse_cookies(self, input_data):
|
def parse_cookies(self, input_data):
|
||||||
for user_setting_name, user_setting in input_data.iteritems():
|
for user_setting_name, user_setting in input_data.items():
|
||||||
if user_setting_name in self.key_value_settings:
|
if user_setting_name in self.key_value_settings:
|
||||||
self.key_value_settings[user_setting_name].parse(user_setting)
|
self.key_value_settings[user_setting_name].parse(user_setting)
|
||||||
elif user_setting_name == 'disabled_engines':
|
elif user_setting_name == 'disabled_engines':
|
||||||
|
@ -274,7 +274,7 @@ class Preferences(object):
|
||||||
disabled_engines = []
|
disabled_engines = []
|
||||||
enabled_categories = []
|
enabled_categories = []
|
||||||
disabled_plugins = []
|
disabled_plugins = []
|
||||||
for user_setting_name, user_setting in input_data.iteritems():
|
for user_setting_name, user_setting in input_data.items():
|
||||||
if user_setting_name in self.key_value_settings:
|
if user_setting_name in self.key_value_settings:
|
||||||
self.key_value_settings[user_setting_name].parse(user_setting)
|
self.key_value_settings[user_setting_name].parse(user_setting)
|
||||||
elif user_setting_name.startswith('engine_'):
|
elif user_setting_name.startswith('engine_'):
|
||||||
|
@ -295,7 +295,7 @@ class Preferences(object):
|
||||||
return self.key_value_settings[user_setting_name].get_value()
|
return self.key_value_settings[user_setting_name].get_value()
|
||||||
|
|
||||||
def save(self, resp):
|
def save(self, resp):
|
||||||
for user_setting_name, user_setting in self.key_value_settings.iteritems():
|
for user_setting_name, user_setting in self.key_value_settings.items():
|
||||||
user_setting.save(user_setting_name, resp)
|
user_setting.save(user_setting_name, resp)
|
||||||
self.engines.save(resp)
|
self.engines.save(resp)
|
||||||
self.plugins.save(resp)
|
self.plugins.save(resp)
|
||||||
|
|
|
@ -21,8 +21,12 @@ from searx.languages import language_codes
|
||||||
from searx.engines import (
|
from searx.engines import (
|
||||||
categories, engines, engine_shortcuts
|
categories, engines, engine_shortcuts
|
||||||
)
|
)
|
||||||
import string
|
|
||||||
import re
|
import re
|
||||||
|
import string
|
||||||
|
import sys
|
||||||
|
|
||||||
|
if sys.version_info[0] == 3:
|
||||||
|
unicode = str
|
||||||
|
|
||||||
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
|
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
|
||||||
|
|
||||||
|
@ -146,7 +150,7 @@ class SearchQuery(object):
|
||||||
"""container for all the search parameters (query, language, etc...)"""
|
"""container for all the search parameters (query, language, etc...)"""
|
||||||
|
|
||||||
def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range):
|
def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range):
|
||||||
self.query = query
|
self.query = query.encode('utf-8')
|
||||||
self.engines = engines
|
self.engines = engines
|
||||||
self.categories = categories
|
self.categories = categories
|
||||||
self.lang = lang
|
self.lang = lang
|
||||||
|
|
|
@ -1,9 +1,13 @@
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from threading import RLock
|
from threading import RLock
|
||||||
from urlparse import urlparse, unquote
|
|
||||||
from searx.engines import engines
|
from searx.engines import engines
|
||||||
|
from searx.url_utils import urlparse, unquote
|
||||||
|
|
||||||
|
if sys.version_info[0] == 3:
|
||||||
|
basestring = str
|
||||||
|
|
||||||
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
|
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
|
||||||
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
|
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
|
||||||
|
|
|
@ -16,8 +16,8 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import gc
|
import gc
|
||||||
|
import sys
|
||||||
import threading
|
import threading
|
||||||
from thread import start_new_thread
|
|
||||||
from time import time
|
from time import time
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
import requests.exceptions
|
import requests.exceptions
|
||||||
|
@ -33,6 +33,14 @@ from searx import logger
|
||||||
from searx.plugins import plugins
|
from searx.plugins import plugins
|
||||||
from searx.exceptions import SearxParameterException
|
from searx.exceptions import SearxParameterException
|
||||||
|
|
||||||
|
try:
|
||||||
|
from thread import start_new_thread
|
||||||
|
except:
|
||||||
|
from _thread import start_new_thread
|
||||||
|
|
||||||
|
if sys.version_info[0] == 3:
|
||||||
|
unicode = str
|
||||||
|
|
||||||
logger = logger.getChild('search')
|
logger = logger.getChild('search')
|
||||||
|
|
||||||
number_of_searches = 0
|
number_of_searches = 0
|
||||||
|
@ -387,7 +395,7 @@ class Search(object):
|
||||||
request_params['time_range'] = search_query.time_range
|
request_params['time_range'] = search_query.time_range
|
||||||
|
|
||||||
# append request to list
|
# append request to list
|
||||||
requests.append((selected_engine['name'], search_query.query.encode('utf-8'), request_params))
|
requests.append((selected_engine['name'], search_query.query, request_params))
|
||||||
|
|
||||||
# update timeout_limit
|
# update timeout_limit
|
||||||
timeout_limit = max(timeout_limit, engine.timeout)
|
timeout_limit = max(timeout_limit, engine.timeout)
|
||||||
|
|
|
@ -17,7 +17,7 @@ server:
|
||||||
|
|
||||||
ui:
|
ui:
|
||||||
themes_path : ""
|
themes_path : ""
|
||||||
default_theme : legacy
|
default_theme : oscar
|
||||||
default_locale : ""
|
default_locale : ""
|
||||||
|
|
||||||
outgoing:
|
outgoing:
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<div class="center">
|
<div class="center">
|
||||||
<h1>{{ _('Page not found') }}</h1>
|
<h1>{{ _('Page not found') }}</h1>
|
||||||
{% autoescape false %}
|
{% autoescape false %}
|
||||||
<p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
|
<p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
|
||||||
{% endautoescape %}
|
{% endautoescape %}
|
||||||
</div>
|
</div>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<div class="center">
|
<div class="center">
|
||||||
<h1>{{ _('Page not found') }}</h1>
|
<h1>{{ _('Page not found') }}</h1>
|
||||||
{% autoescape false %}
|
{% autoescape false %}
|
||||||
<p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
|
<p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
|
||||||
{% endautoescape %}
|
{% endautoescape %}
|
||||||
</div>
|
</div>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<div class="text-center">
|
<div class="text-center">
|
||||||
<h1>{{ _('Page not found') }}</h1>
|
<h1>{{ _('Page not found') }}</h1>
|
||||||
{% autoescape false %}
|
{% autoescape false %}
|
||||||
<p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
|
<p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
|
||||||
{% endautoescape %}
|
{% endautoescape %}
|
||||||
</div>
|
</div>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<div class="center">
|
<div class="center">
|
||||||
<h1>{{ _('Page not found') }}</h1>
|
<h1>{{ _('Page not found') }}</h1>
|
||||||
{% autoescape false %}
|
{% autoescape false %}
|
||||||
<p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
|
<p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
|
||||||
{% endautoescape %}
|
{% endautoescape %}
|
||||||
</div>
|
</div>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
|
@ -1,13 +1,16 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
"""Shared testing code."""
|
"""Shared testing code."""
|
||||||
|
|
||||||
from plone.testing import Layer
|
|
||||||
from unittest2 import TestCase
|
|
||||||
from os.path import dirname, join, abspath
|
|
||||||
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
|
||||||
|
from os.path import dirname, join, abspath
|
||||||
|
|
||||||
|
from splinter import Browser
|
||||||
|
from unittest2 import TestCase
|
||||||
|
|
||||||
|
|
||||||
class SearxTestLayer:
|
class SearxTestLayer:
|
||||||
|
@ -32,7 +35,7 @@ class SearxTestLayer:
|
||||||
testTearDown = classmethod(testTearDown)
|
testTearDown = classmethod(testTearDown)
|
||||||
|
|
||||||
|
|
||||||
class SearxRobotLayer(Layer):
|
class SearxRobotLayer():
|
||||||
"""Searx Robot Test Layer"""
|
"""Searx Robot Test Layer"""
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
@ -62,7 +65,12 @@ class SearxRobotLayer(Layer):
|
||||||
del os.environ['SEARX_SETTINGS_PATH']
|
del os.environ['SEARX_SETTINGS_PATH']
|
||||||
|
|
||||||
|
|
||||||
SEARXROBOTLAYER = SearxRobotLayer()
|
# SEARXROBOTLAYER = SearxRobotLayer()
|
||||||
|
def run_robot_tests(tests):
|
||||||
|
print('Running {0} tests'.format(len(tests)))
|
||||||
|
for test in tests:
|
||||||
|
with Browser() as browser:
|
||||||
|
test(browser)
|
||||||
|
|
||||||
|
|
||||||
class SearxTestCase(TestCase):
|
class SearxTestCase(TestCase):
|
||||||
|
@ -72,17 +80,19 @@ class SearxTestCase(TestCase):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
from tests.test_robot import test_suite
|
|
||||||
import sys
|
import sys
|
||||||
from zope.testrunner.runner import Runner
|
# test cases
|
||||||
|
from tests import robot
|
||||||
|
|
||||||
base_dir = abspath(join(dirname(__file__), '../tests'))
|
base_dir = abspath(join(dirname(__file__), '../tests'))
|
||||||
if sys.argv[1] == 'robot':
|
if sys.argv[1] == 'robot':
|
||||||
r = Runner(['--color',
|
test_layer = SearxRobotLayer()
|
||||||
'--auto-progress',
|
errors = False
|
||||||
'--stop-on-error',
|
try:
|
||||||
'--path',
|
test_layer.setUp()
|
||||||
base_dir],
|
run_robot_tests([getattr(robot, x) for x in dir(robot) if x.startswith('test_')])
|
||||||
found_suites=[test_suite()])
|
except Exception:
|
||||||
r.run()
|
errors = True
|
||||||
sys.exit(int(r.failed))
|
print('Error occured: {0}'.format(traceback.format_exc()))
|
||||||
|
test_layer.tearDown()
|
||||||
|
sys.exit(1 if errors else 0)
|
||||||
|
|
28
searx/url_utils.py
Normal file
28
searx/url_utils.py
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
from sys import version_info
|
||||||
|
|
||||||
|
if version_info[0] == 2:
|
||||||
|
from urllib import quote, quote_plus, unquote, urlencode
|
||||||
|
from urlparse import parse_qsl, urljoin, urlparse, urlunparse, ParseResult
|
||||||
|
else:
|
||||||
|
from urllib.parse import (
|
||||||
|
parse_qsl,
|
||||||
|
quote,
|
||||||
|
quote_plus,
|
||||||
|
unquote,
|
||||||
|
urlencode,
|
||||||
|
urljoin,
|
||||||
|
urlparse,
|
||||||
|
urlunparse,
|
||||||
|
ParseResult
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
__export__ = (parse_qsl,
|
||||||
|
quote,
|
||||||
|
quote_plus,
|
||||||
|
unquote,
|
||||||
|
urlencode,
|
||||||
|
urljoin,
|
||||||
|
urlparse,
|
||||||
|
urlunparse,
|
||||||
|
ParseResult)
|
|
@ -1,11 +1,9 @@
|
||||||
import cStringIO
|
|
||||||
import csv
|
import csv
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from babel.dates import format_date
|
from babel.dates import format_date
|
||||||
from codecs import getincrementalencoder
|
from codecs import getincrementalencoder
|
||||||
from HTMLParser import HTMLParser
|
|
||||||
from imp import load_source
|
from imp import load_source
|
||||||
from os.path import splitext, join
|
from os.path import splitext, join
|
||||||
from random import choice
|
from random import choice
|
||||||
|
@ -16,6 +14,19 @@ from searx.languages import language_codes
|
||||||
from searx import settings
|
from searx import settings
|
||||||
from searx import logger
|
from searx import logger
|
||||||
|
|
||||||
|
try:
|
||||||
|
from cStringIO import StringIO
|
||||||
|
except:
|
||||||
|
from io import StringIO
|
||||||
|
|
||||||
|
try:
|
||||||
|
from HTMLParser import HTMLParser
|
||||||
|
except:
|
||||||
|
from html.parser import HTMLParser
|
||||||
|
|
||||||
|
if sys.version_info[0] == 3:
|
||||||
|
unichr = chr
|
||||||
|
unicode = str
|
||||||
|
|
||||||
logger = logger.getChild('utils')
|
logger = logger.getChild('utils')
|
||||||
|
|
||||||
|
@ -140,7 +151,7 @@ class UnicodeWriter:
|
||||||
|
|
||||||
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
|
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
|
||||||
# Redirect output to a queue
|
# Redirect output to a queue
|
||||||
self.queue = cStringIO.StringIO()
|
self.queue = StringIO()
|
||||||
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
|
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
|
||||||
self.stream = f
|
self.stream = f
|
||||||
self.encoder = getincrementalencoder(encoding)()
|
self.encoder = getincrementalencoder(encoding)()
|
||||||
|
@ -152,14 +163,13 @@ class UnicodeWriter:
|
||||||
unicode_row.append(col.encode('utf-8').strip())
|
unicode_row.append(col.encode('utf-8').strip())
|
||||||
else:
|
else:
|
||||||
unicode_row.append(col)
|
unicode_row.append(col)
|
||||||
self.writer.writerow(unicode_row)
|
self.writer.writerow([x.decode('utf-8') if hasattr(x, 'decode') else x for x in unicode_row])
|
||||||
# Fetch UTF-8 output from the queue ...
|
# Fetch UTF-8 output from the queue ...
|
||||||
data = self.queue.getvalue()
|
data = self.queue.getvalue().strip('\x00')
|
||||||
data = data.decode("utf-8")
|
|
||||||
# ... and reencode it into the target encoding
|
# ... and reencode it into the target encoding
|
||||||
data = self.encoder.encode(data)
|
data = self.encoder.encode(data)
|
||||||
# write to the target stream
|
# write to the target stream
|
||||||
self.stream.write(data)
|
self.stream.write(data.decode('utf-8'))
|
||||||
# empty queue
|
# empty queue
|
||||||
self.queue.truncate(0)
|
self.queue.truncate(0)
|
||||||
|
|
||||||
|
@ -231,7 +241,7 @@ def dict_subset(d, properties):
|
||||||
|
|
||||||
def prettify_url(url, max_length=74):
|
def prettify_url(url, max_length=74):
|
||||||
if len(url) > max_length:
|
if len(url) > max_length:
|
||||||
chunk_len = max_length / 2 + 1
|
chunk_len = int(max_length / 2 + 1)
|
||||||
return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
|
return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
|
||||||
else:
|
else:
|
||||||
return url
|
return url
|
||||||
|
|
|
@ -22,11 +22,12 @@ if __name__ == '__main__':
|
||||||
from os.path import realpath, dirname
|
from os.path import realpath, dirname
|
||||||
path.append(realpath(dirname(realpath(__file__)) + '/../'))
|
path.append(realpath(dirname(realpath(__file__)) + '/../'))
|
||||||
|
|
||||||
import cStringIO
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import hmac
|
import hmac
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from searx import logger
|
from searx import logger
|
||||||
|
@ -42,8 +43,6 @@ except:
|
||||||
exit(1)
|
exit(1)
|
||||||
from cgi import escape
|
from cgi import escape
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from urllib import urlencode
|
|
||||||
from urlparse import urlparse, urljoin
|
|
||||||
from werkzeug.contrib.fixers import ProxyFix
|
from werkzeug.contrib.fixers import ProxyFix
|
||||||
from flask import (
|
from flask import (
|
||||||
Flask, request, render_template, url_for, Response, make_response,
|
Flask, request, render_template, url_for, Response, make_response,
|
||||||
|
@ -52,7 +51,7 @@ from flask import (
|
||||||
from flask_babel import Babel, gettext, format_date, format_decimal
|
from flask_babel import Babel, gettext, format_date, format_decimal
|
||||||
from flask.json import jsonify
|
from flask.json import jsonify
|
||||||
from searx import settings, searx_dir, searx_debug
|
from searx import settings, searx_dir, searx_debug
|
||||||
from searx.exceptions import SearxException, SearxParameterException
|
from searx.exceptions import SearxParameterException
|
||||||
from searx.engines import (
|
from searx.engines import (
|
||||||
categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
|
categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
|
||||||
)
|
)
|
||||||
|
@ -69,6 +68,7 @@ from searx.autocomplete import searx_bang, backends as autocomplete_backends
|
||||||
from searx.plugins import plugins
|
from searx.plugins import plugins
|
||||||
from searx.preferences import Preferences, ValidationException
|
from searx.preferences import Preferences, ValidationException
|
||||||
from searx.answerers import answerers
|
from searx.answerers import answerers
|
||||||
|
from searx.url_utils import urlencode, urlparse, urljoin
|
||||||
|
|
||||||
# check if the pyopenssl package is installed.
|
# check if the pyopenssl package is installed.
|
||||||
# It is needed for SSL connection without trouble, see #298
|
# It is needed for SSL connection without trouble, see #298
|
||||||
|
@ -78,6 +78,15 @@ except ImportError:
|
||||||
logger.critical("The pyopenssl package has to be installed.\n"
|
logger.critical("The pyopenssl package has to be installed.\n"
|
||||||
"Some HTTPS connections will fail")
|
"Some HTTPS connections will fail")
|
||||||
|
|
||||||
|
try:
|
||||||
|
from cStringIO import StringIO
|
||||||
|
except:
|
||||||
|
from io import StringIO
|
||||||
|
|
||||||
|
|
||||||
|
if sys.version_info[0] == 3:
|
||||||
|
unicode = str
|
||||||
|
|
||||||
# serve pages with HTTP/1.1
|
# serve pages with HTTP/1.1
|
||||||
from werkzeug.serving import WSGIRequestHandler
|
from werkzeug.serving import WSGIRequestHandler
|
||||||
WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
|
WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
|
||||||
|
@ -357,6 +366,8 @@ def render(template_name, override_theme=None, **kwargs):
|
||||||
|
|
||||||
kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
|
kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
|
||||||
|
|
||||||
|
kwargs['unicode'] = unicode
|
||||||
|
|
||||||
kwargs['scripts'] = set()
|
kwargs['scripts'] = set()
|
||||||
for plugin in request.user_plugins:
|
for plugin in request.user_plugins:
|
||||||
for script in plugin.js_dependencies:
|
for script in plugin.js_dependencies:
|
||||||
|
@ -375,7 +386,7 @@ def render(template_name, override_theme=None, **kwargs):
|
||||||
def pre_request():
|
def pre_request():
|
||||||
request.errors = []
|
request.errors = []
|
||||||
|
|
||||||
preferences = Preferences(themes, categories.keys(), engines, plugins)
|
preferences = Preferences(themes, list(categories.keys()), engines, plugins)
|
||||||
request.preferences = preferences
|
request.preferences = preferences
|
||||||
try:
|
try:
|
||||||
preferences.parse_cookies(request.cookies)
|
preferences.parse_cookies(request.cookies)
|
||||||
|
@ -479,10 +490,8 @@ def index():
|
||||||
for result in results:
|
for result in results:
|
||||||
if output_format == 'html':
|
if output_format == 'html':
|
||||||
if 'content' in result and result['content']:
|
if 'content' in result and result['content']:
|
||||||
result['content'] = highlight_content(escape(result['content'][:1024]),
|
result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
|
||||||
search_query.query.encode('utf-8'))
|
result['title'] = highlight_content(escape(result['title'] or u''), search_query.query)
|
||||||
result['title'] = highlight_content(escape(result['title'] or u''),
|
|
||||||
search_query.query.encode('utf-8'))
|
|
||||||
else:
|
else:
|
||||||
if result.get('content'):
|
if result.get('content'):
|
||||||
result['content'] = html_to_text(result['content']).strip()
|
result['content'] = html_to_text(result['content']).strip()
|
||||||
|
@ -510,7 +519,7 @@ def index():
|
||||||
result['publishedDate'] = format_date(result['publishedDate'])
|
result['publishedDate'] = format_date(result['publishedDate'])
|
||||||
|
|
||||||
if output_format == 'json':
|
if output_format == 'json':
|
||||||
return Response(json.dumps({'query': search_query.query,
|
return Response(json.dumps({'query': search_query.query.decode('utf-8'),
|
||||||
'number_of_results': number_of_results,
|
'number_of_results': number_of_results,
|
||||||
'results': results,
|
'results': results,
|
||||||
'answers': list(result_container.answers),
|
'answers': list(result_container.answers),
|
||||||
|
@ -519,7 +528,7 @@ def index():
|
||||||
'suggestions': list(result_container.suggestions)}),
|
'suggestions': list(result_container.suggestions)}),
|
||||||
mimetype='application/json')
|
mimetype='application/json')
|
||||||
elif output_format == 'csv':
|
elif output_format == 'csv':
|
||||||
csv = UnicodeWriter(cStringIO.StringIO())
|
csv = UnicodeWriter(StringIO())
|
||||||
keys = ('title', 'url', 'content', 'host', 'engine', 'score')
|
keys = ('title', 'url', 'content', 'host', 'engine', 'score')
|
||||||
csv.writerow(keys)
|
csv.writerow(keys)
|
||||||
for row in results:
|
for row in results:
|
||||||
|
@ -527,7 +536,7 @@ def index():
|
||||||
csv.writerow([row.get(key, '') for key in keys])
|
csv.writerow([row.get(key, '') for key in keys])
|
||||||
csv.stream.seek(0)
|
csv.stream.seek(0)
|
||||||
response = Response(csv.stream.read(), mimetype='application/csv')
|
response = Response(csv.stream.read(), mimetype='application/csv')
|
||||||
cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.encode('utf-8'))
|
cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query)
|
||||||
response.headers.add('Content-Disposition', cont_disp)
|
response.headers.add('Content-Disposition', cont_disp)
|
||||||
return response
|
return response
|
||||||
elif output_format == 'rss':
|
elif output_format == 'rss':
|
||||||
|
@ -578,7 +587,7 @@ def autocompleter():
|
||||||
disabled_engines = request.preferences.engines.get_disabled()
|
disabled_engines = request.preferences.engines.get_disabled()
|
||||||
|
|
||||||
# parse query
|
# parse query
|
||||||
raw_text_query = RawTextQuery(request.form.get('q', '').encode('utf-8'), disabled_engines)
|
raw_text_query = RawTextQuery(request.form.get('q', u'').encode('utf-8'), disabled_engines)
|
||||||
raw_text_query.parse_query()
|
raw_text_query.parse_query()
|
||||||
|
|
||||||
# check if search query is set
|
# check if search query is set
|
||||||
|
@ -820,6 +829,7 @@ def page_not_found(e):
|
||||||
|
|
||||||
|
|
||||||
def run():
|
def run():
|
||||||
|
logger.debug('starting webserver on %s:%s', settings['server']['port'], settings['server']['bind_address'])
|
||||||
app.run(
|
app.run(
|
||||||
debug=searx_debug,
|
debug=searx_debug,
|
||||||
use_debugger=searx_debug,
|
use_debugger=searx_debug,
|
||||||
|
|
|
@ -0,0 +1,75 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
|
url = "http://localhost:11111/"
|
||||||
|
|
||||||
|
|
||||||
|
def test_index(browser):
|
||||||
|
# Visit URL
|
||||||
|
browser.visit(url)
|
||||||
|
assert browser.is_text_present('about')
|
||||||
|
|
||||||
|
|
||||||
|
def test_404(browser):
|
||||||
|
# Visit URL
|
||||||
|
browser.visit(url + 'missing_link')
|
||||||
|
assert browser.is_text_present('Page not found')
|
||||||
|
|
||||||
|
|
||||||
|
def test_about(browser):
|
||||||
|
browser.visit(url)
|
||||||
|
browser.click_link_by_text('about')
|
||||||
|
assert browser.is_text_present('Why use searx?')
|
||||||
|
|
||||||
|
|
||||||
|
def test_preferences(browser):
|
||||||
|
browser.visit(url)
|
||||||
|
browser.click_link_by_text('preferences')
|
||||||
|
assert browser.is_text_present('Preferences')
|
||||||
|
assert browser.is_text_present('Cookies')
|
||||||
|
|
||||||
|
assert browser.is_element_present_by_xpath('//label[@for="checkbox_dummy"]')
|
||||||
|
|
||||||
|
|
||||||
|
def test_preferences_engine_select(browser):
|
||||||
|
browser.visit(url)
|
||||||
|
browser.click_link_by_text('preferences')
|
||||||
|
|
||||||
|
assert browser.is_element_present_by_xpath('//a[@href="#tab_engine"]')
|
||||||
|
browser.find_by_xpath('//a[@href="#tab_engine"]').first.click()
|
||||||
|
|
||||||
|
assert not browser.find_by_xpath('//input[@id="engine_general_dummy__general"]').first.checked
|
||||||
|
browser.find_by_xpath('//label[@for="engine_general_dummy__general"]').first.check()
|
||||||
|
browser.find_by_xpath('//input[@value="save"]').first.click()
|
||||||
|
|
||||||
|
# waiting for the redirect - without this the test is flaky..
|
||||||
|
sleep(1)
|
||||||
|
|
||||||
|
browser.visit(url)
|
||||||
|
browser.click_link_by_text('preferences')
|
||||||
|
browser.find_by_xpath('//a[@href="#tab_engine"]').first.click()
|
||||||
|
|
||||||
|
assert browser.find_by_xpath('//input[@id="engine_general_dummy__general"]').first.checked
|
||||||
|
|
||||||
|
|
||||||
|
def test_preferences_locale(browser):
|
||||||
|
browser.visit(url)
|
||||||
|
browser.click_link_by_text('preferences')
|
||||||
|
|
||||||
|
browser.select('locale', 'hu')
|
||||||
|
browser.find_by_xpath('//input[@value="save"]').first.click()
|
||||||
|
|
||||||
|
# waiting for the redirect - without this the test is flaky..
|
||||||
|
sleep(1)
|
||||||
|
|
||||||
|
browser.visit(url)
|
||||||
|
browser.click_link_by_text('beállítások')
|
||||||
|
browser.is_text_present('Beállítások')
|
||||||
|
|
||||||
|
|
||||||
|
def test_search(browser):
|
||||||
|
browser.visit(url)
|
||||||
|
browser.fill('q', 'test search query')
|
||||||
|
browser.find_by_xpath('//button[@type="submit"]').first.click()
|
||||||
|
assert browser.is_text_present('didn\'t find any results')
|
|
@ -1,153 +0,0 @@
|
||||||
*** Settings ***
|
|
||||||
Library Selenium2Library timeout=10 implicit_wait=0.5
|
|
||||||
Test Setup Open Browser http://localhost:11111/
|
|
||||||
Test Teardown Close All Browsers
|
|
||||||
|
|
||||||
|
|
||||||
*** Keywords ***
|
|
||||||
Submit Preferences
|
|
||||||
Set Selenium Speed 2 seconds
|
|
||||||
Submit Form id=search_form
|
|
||||||
Location Should Be http://localhost:11111/
|
|
||||||
Set Selenium Speed 0 seconds
|
|
||||||
|
|
||||||
|
|
||||||
*** Test Cases ***
|
|
||||||
Front page
|
|
||||||
Page Should Contain about
|
|
||||||
Page Should Contain preferences
|
|
||||||
|
|
||||||
404 page
|
|
||||||
Go To http://localhost:11111/no-such-page
|
|
||||||
Page Should Contain Page not found
|
|
||||||
Page Should Contain Go to search page
|
|
||||||
|
|
||||||
About page
|
|
||||||
Click Element link=about
|
|
||||||
Page Should Contain Why use searx?
|
|
||||||
Page Should Contain Element link=search engines
|
|
||||||
|
|
||||||
Preferences page
|
|
||||||
Click Element link=preferences
|
|
||||||
Page Should Contain Preferences
|
|
||||||
Page Should Contain Default categories
|
|
||||||
Page Should Contain Currently used search engines
|
|
||||||
Page Should Contain dummy dummy
|
|
||||||
Page Should Contain general dummy
|
|
||||||
|
|
||||||
Switch category
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
Page Should Contain Checkbox category_general
|
|
||||||
Page Should Contain Checkbox category_dummy
|
|
||||||
Click Element xpath=//*[.="general"]
|
|
||||||
Click Element xpath=//*[.="dummy"]
|
|
||||||
Submit Preferences
|
|
||||||
Checkbox Should Not Be Selected category_general
|
|
||||||
Checkbox Should Be Selected category_dummy
|
|
||||||
|
|
||||||
Change language
|
|
||||||
Page Should Contain about
|
|
||||||
Page Should Contain preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
Select From List locale hu
|
|
||||||
Submit Preferences
|
|
||||||
Page Should Contain rólunk
|
|
||||||
Page Should Contain beállítások
|
|
||||||
|
|
||||||
Change method
|
|
||||||
Page Should Contain about
|
|
||||||
Page Should Contain preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
Select From List method GET
|
|
||||||
Submit Preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
List Selection Should Be method GET
|
|
||||||
Select From List method POST
|
|
||||||
Submit Preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
List Selection Should Be method POST
|
|
||||||
|
|
||||||
Change theme
|
|
||||||
Page Should Contain about
|
|
||||||
Page Should Contain preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
List Selection Should Be theme legacy
|
|
||||||
Select From List theme oscar
|
|
||||||
Submit Preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
List Selection Should Be theme oscar
|
|
||||||
|
|
||||||
Change safesearch
|
|
||||||
Page Should Contain about
|
|
||||||
Page Should Contain preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
List Selection Should Be safesearch None
|
|
||||||
Select From List safesearch Strict
|
|
||||||
Submit Preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
List Selection Should Be safesearch Strict
|
|
||||||
|
|
||||||
Change image proxy
|
|
||||||
Page Should Contain about
|
|
||||||
Page Should Contain preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
List Selection Should Be image_proxy Disabled
|
|
||||||
Select From List image_proxy Enabled
|
|
||||||
Submit Preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
List Selection Should Be image_proxy Enabled
|
|
||||||
|
|
||||||
Change search language
|
|
||||||
Page Should Contain about
|
|
||||||
Page Should Contain preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
List Selection Should Be language Default language
|
|
||||||
Select From List language Türkçe - tr-TR
|
|
||||||
Submit Preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
List Selection Should Be language Türkçe - tr-TR
|
|
||||||
|
|
||||||
Change autocomplete
|
|
||||||
Page Should Contain about
|
|
||||||
Page Should Contain preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
List Selection Should Be autocomplete -
|
|
||||||
Select From List autocomplete google
|
|
||||||
Submit Preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
List Selection Should Be autocomplete google
|
|
||||||
|
|
||||||
Change allowed/disabled engines
|
|
||||||
Page Should Contain about
|
|
||||||
Page Should Contain preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
Page Should Contain Engine name
|
|
||||||
Element Should Contain xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy'] Block
|
|
||||||
Element Should Contain xpath=//label[@class="deny"][@for='engine_general_general_dummy'] Block
|
|
||||||
Click Element xpath=//label[@class="deny"][@for='engine_general_general_dummy']
|
|
||||||
Submit Preferences
|
|
||||||
Page Should Contain about
|
|
||||||
Page Should Contain preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
Page Should Contain Engine name
|
|
||||||
Element Should Contain xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy'] Block
|
|
||||||
Element Should Contain xpath=//label[@class="deny"][@for='engine_general_general_dummy'] \
|
|
||||||
|
|
||||||
Block a plugin
|
|
||||||
Page Should Contain about
|
|
||||||
Page Should Contain preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
List Selection Should Be theme legacy
|
|
||||||
Select From List theme oscar
|
|
||||||
Submit Preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
List Selection Should Be theme oscar
|
|
||||||
Page Should Contain Plugins
|
|
||||||
Click Link Plugins
|
|
||||||
Checkbox Should Not Be Selected id=plugin_HTTPS_rewrite
|
|
||||||
Click Element xpath=//label[@for='plugin_HTTPS_rewrite']
|
|
||||||
Submit Preferences
|
|
||||||
Go To http://localhost:11111/preferences
|
|
||||||
Page Should Contain Plugins
|
|
||||||
Click Link Plugins
|
|
||||||
Checkbox Should Be Selected id=plugin_HTTPS_rewrite
|
|
|
@ -25,7 +25,7 @@ class TestArchLinuxEngine(SearxTestCase):
|
||||||
self.assertTrue(query in params['url'])
|
self.assertTrue(query in params['url'])
|
||||||
self.assertTrue('wiki.archlinux.org' in params['url'])
|
self.assertTrue('wiki.archlinux.org' in params['url'])
|
||||||
|
|
||||||
for lang, domain in domains.iteritems():
|
for lang, domain in domains.items():
|
||||||
dic['language'] = lang
|
dic['language'] = lang
|
||||||
params = archlinux.request(query, dic)
|
params = archlinux.request(query, dic)
|
||||||
self.assertTrue(domain in params['url'])
|
self.assertTrue(domain in params['url'])
|
||||||
|
@ -102,5 +102,5 @@ class TestArchLinuxEngine(SearxTestCase):
|
||||||
for exp in expected:
|
for exp in expected:
|
||||||
res = results[i]
|
res = results[i]
|
||||||
i += 1
|
i += 1
|
||||||
for key, value in exp.iteritems():
|
for key, value in exp.items():
|
||||||
self.assertEqual(res[key], value)
|
self.assertEqual(res[key], value)
|
||||||
|
|
|
@ -7,18 +7,18 @@ from searx.testing import SearxTestCase
|
||||||
class TestBingEngine(SearxTestCase):
|
class TestBingEngine(SearxTestCase):
|
||||||
|
|
||||||
def test_request(self):
|
def test_request(self):
|
||||||
query = 'test_query'
|
query = u'test_query'
|
||||||
dicto = defaultdict(dict)
|
dicto = defaultdict(dict)
|
||||||
dicto['pageno'] = 0
|
dicto['pageno'] = 0
|
||||||
dicto['language'] = 'fr_FR'
|
dicto['language'] = 'fr_FR'
|
||||||
params = bing.request(query, dicto)
|
params = bing.request(query.encode('utf-8'), dicto)
|
||||||
self.assertTrue('url' in params)
|
self.assertTrue('url' in params)
|
||||||
self.assertTrue(query in params['url'])
|
self.assertTrue(query in params['url'])
|
||||||
self.assertTrue('language%3AFR' in params['url'])
|
self.assertTrue('language%3AFR' in params['url'])
|
||||||
self.assertTrue('bing.com' in params['url'])
|
self.assertTrue('bing.com' in params['url'])
|
||||||
|
|
||||||
dicto['language'] = 'all'
|
dicto['language'] = 'all'
|
||||||
params = bing.request(query, dicto)
|
params = bing.request(query.encode('utf-8'), dicto)
|
||||||
self.assertTrue('language' in params['url'])
|
self.assertTrue('language' in params['url'])
|
||||||
|
|
||||||
def test_response(self):
|
def test_response(self):
|
||||||
|
|
|
@ -36,10 +36,10 @@ class TestBingNewsEngine(SearxTestCase):
|
||||||
self.assertRaises(AttributeError, bing_news.response, '')
|
self.assertRaises(AttributeError, bing_news.response, '')
|
||||||
self.assertRaises(AttributeError, bing_news.response, '[]')
|
self.assertRaises(AttributeError, bing_news.response, '[]')
|
||||||
|
|
||||||
response = mock.Mock(content='<html></html>')
|
response = mock.Mock(text='<html></html>')
|
||||||
self.assertEqual(bing_news.response(response), [])
|
self.assertEqual(bing_news.response(response), [])
|
||||||
|
|
||||||
response = mock.Mock(content='<html></html>')
|
response = mock.Mock(text='<html></html>')
|
||||||
self.assertEqual(bing_news.response(response), [])
|
self.assertEqual(bing_news.response(response), [])
|
||||||
|
|
||||||
html = """<?xml version="1.0" encoding="utf-8" ?>
|
html = """<?xml version="1.0" encoding="utf-8" ?>
|
||||||
|
@ -74,7 +74,7 @@ class TestBingNewsEngine(SearxTestCase):
|
||||||
</item>
|
</item>
|
||||||
</channel>
|
</channel>
|
||||||
</rss>""" # noqa
|
</rss>""" # noqa
|
||||||
response = mock.Mock(content=html)
|
response = mock.Mock(text=html.encode('utf-8'))
|
||||||
results = bing_news.response(response)
|
results = bing_news.response(response)
|
||||||
self.assertEqual(type(results), list)
|
self.assertEqual(type(results), list)
|
||||||
self.assertEqual(len(results), 2)
|
self.assertEqual(len(results), 2)
|
||||||
|
@ -113,7 +113,7 @@ class TestBingNewsEngine(SearxTestCase):
|
||||||
</item>
|
</item>
|
||||||
</channel>
|
</channel>
|
||||||
</rss>""" # noqa
|
</rss>""" # noqa
|
||||||
response = mock.Mock(content=html)
|
response = mock.Mock(text=html.encode('utf-8'))
|
||||||
results = bing_news.response(response)
|
results = bing_news.response(response)
|
||||||
self.assertEqual(type(results), list)
|
self.assertEqual(type(results), list)
|
||||||
self.assertEqual(len(results), 1)
|
self.assertEqual(len(results), 1)
|
||||||
|
@ -136,11 +136,11 @@ class TestBingNewsEngine(SearxTestCase):
|
||||||
</channel>
|
</channel>
|
||||||
</rss>""" # noqa
|
</rss>""" # noqa
|
||||||
|
|
||||||
response = mock.Mock(content=html)
|
response = mock.Mock(text=html.encode('utf-8'))
|
||||||
results = bing_news.response(response)
|
results = bing_news.response(response)
|
||||||
self.assertEqual(type(results), list)
|
self.assertEqual(type(results), list)
|
||||||
self.assertEqual(len(results), 0)
|
self.assertEqual(len(results), 0)
|
||||||
|
|
||||||
html = """<?xml version="1.0" encoding="utf-8" ?>gabarge"""
|
html = """<?xml version="1.0" encoding="utf-8" ?>gabarge"""
|
||||||
response = mock.Mock(content=html)
|
response = mock.Mock(text=html.encode('utf-8'))
|
||||||
self.assertRaises(lxml.etree.XMLSyntaxError, bing_news.response, response)
|
self.assertRaises(lxml.etree.XMLSyntaxError, bing_news.response, response)
|
||||||
|
|
|
@ -22,10 +22,10 @@ class TestBtdiggEngine(SearxTestCase):
|
||||||
self.assertRaises(AttributeError, btdigg.response, '')
|
self.assertRaises(AttributeError, btdigg.response, '')
|
||||||
self.assertRaises(AttributeError, btdigg.response, '[]')
|
self.assertRaises(AttributeError, btdigg.response, '[]')
|
||||||
|
|
||||||
response = mock.Mock(content='<html></html>')
|
response = mock.Mock(text='<html></html>')
|
||||||
self.assertEqual(btdigg.response(response), [])
|
self.assertEqual(btdigg.response(response), [])
|
||||||
|
|
||||||
html = """
|
html = u"""
|
||||||
<div id="search_res">
|
<div id="search_res">
|
||||||
<table>
|
<table>
|
||||||
<tr>
|
<tr>
|
||||||
|
@ -82,7 +82,7 @@ class TestBtdiggEngine(SearxTestCase):
|
||||||
</table>
|
</table>
|
||||||
</div>
|
</div>
|
||||||
"""
|
"""
|
||||||
response = mock.Mock(content=html)
|
response = mock.Mock(text=html.encode('utf-8'))
|
||||||
results = btdigg.response(response)
|
results = btdigg.response(response)
|
||||||
self.assertEqual(type(results), list)
|
self.assertEqual(type(results), list)
|
||||||
self.assertEqual(len(results), 1)
|
self.assertEqual(len(results), 1)
|
||||||
|
@ -101,12 +101,12 @@ class TestBtdiggEngine(SearxTestCase):
|
||||||
</table>
|
</table>
|
||||||
</div>
|
</div>
|
||||||
"""
|
"""
|
||||||
response = mock.Mock(content=html)
|
response = mock.Mock(text=html.encode('utf-8'))
|
||||||
results = btdigg.response(response)
|
results = btdigg.response(response)
|
||||||
self.assertEqual(type(results), list)
|
self.assertEqual(type(results), list)
|
||||||
self.assertEqual(len(results), 0)
|
self.assertEqual(len(results), 0)
|
||||||
|
|
||||||
html = """
|
html = u"""
|
||||||
<div id="search_res">
|
<div id="search_res">
|
||||||
<table>
|
<table>
|
||||||
<tr>
|
<tr>
|
||||||
|
@ -367,7 +367,7 @@ class TestBtdiggEngine(SearxTestCase):
|
||||||
</table>
|
</table>
|
||||||
</div>
|
</div>
|
||||||
"""
|
"""
|
||||||
response = mock.Mock(content=html)
|
response = mock.Mock(text=html.encode('utf-8'))
|
||||||
results = btdigg.response(response)
|
results = btdigg.response(response)
|
||||||
self.assertEqual(type(results), list)
|
self.assertEqual(type(results), list)
|
||||||
self.assertEqual(len(results), 5)
|
self.assertEqual(len(results), 5)
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue