This commit is contained in:
pw3t 2014-01-23 22:11:36 +01:00
commit 132681b3aa
52 changed files with 889 additions and 412 deletions

20
.gitignore vendored
View file

@ -1,20 +1,24 @@
env
engines.cfg
.installed.cfg
.coverage
covearge/
.installed.cfg
engines.cfg
env
robot_log.html
robot_output.xml
robot_report.html
setup.cfg
*.pyc
*/*.pyc
bin/
build/
covearge/
develop-eggs/
dist/
eggs/
include/
lib/
build/
develop-eggs/
eggs/
local/
searx.egg-info/
parts/
searx.egg-info/
var/

View file

@ -21,11 +21,7 @@ $(python):
tests: .installed.cfg
@bin/test
enginescfg:
@test -f ./engines.cfg || echo "Copying engines.cfg ..."
@cp --no-clobber engines.cfg_sample engines.cfg
robot: .installed.cfg enginescfg
robot: .installed.cfg
@bin/robot
flake8: .installed.cfg
@ -37,18 +33,21 @@ coverage: .installed.cfg
@bin/coverage report --show-missing
@bin/coverage html --directory ./coverage
production: bin/buildout production.cfg setup.py enginescfg
production: bin/buildout production.cfg setup.py
bin/buildout -c production.cfg $(options)
@echo "* Please modify `readlink --canonicalize-missing ./searx/settings.py`"
@echo "* Hint 1: on production, disable debug mode and change secret_key"
@echo "* Hint 2: searx will be executed at server startup by crontab"
@echo "* Hint 3: to run immediatley, execute 'bin/supervisord'"
minimal: bin/buildout minimal.cfg setup.py enginescfg
minimal: bin/buildout minimal.cfg setup.py
bin/buildout -c minimal.cfg $(options)
locales:
@pybabel compile -d searx/translations
clean:
@rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs \
searx.egg-info lib include .coverage coverage
.PHONY: all tests enginescfg robot flake8 coverage production minimal clean
.PHONY: all tests robot flake8 coverage production minimal locales clean

122
README.md
View file

@ -1,122 +0,0 @@
searx
=====
A privacy-respecting, hackable [metasearch engine](https://en.wikipedia.org/wiki/Metasearch_engine).
List of [running instances](https://github.com/asciimoo/searx/wiki/Searx-instances).
[![Flattr searx](http://api.flattr.com/button/flattr-badge-large.png)](https://flattr.com/submit/auto?user_id=asciimoo&url=https://github.com/asciimoo/searx&title=searx&language=&tags=github&category=software)
### Features
* Tracking free
* Modular (see [examples](https://github.com/asciimoo/searx/blob/master/examples))
* Parallel queries
* Supports multiple output formats
* json `curl https://searx.0x2a.tk/?format=json&q=[query]`
* csv `curl https://searx.0x2a.tk/?format=csv&q=[query]`
* opensearch/rss `curl https://searx.0x2a.tk/?format=rss&q=[query]`
* Opensearch support (you can set as default search engine)
* Configurable search engines/categories
### Installation
* clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
* install dependencies: `pip install -r requirements.txt`
* edit your [settings.yml](https://github.com/asciimoo/searx/blob/master/settings.yml) (set your `secret_key`!)
* run `python searx/webapp.py` to start the application
For all the details, follow this [step by step installation](https://github.com/asciimoo/searx/wiki/Installation)
### Alternative (Recommended) Installation
* clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
* build in current folder: `make minimal`
* run `bin/searx-run` to start the application
### Development
Just run `make`. Versions of dependencies are pinned down inside `versions.cfg` to produce most stable build. Also remember, NO make command should be run as root, not even `make production`
### Deployment
* clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
* build in current folder: `make production`
* run `bin/supervisord` to start the application
### Upgrading
* inside previously cloned searx directory run: `git stash` to temporarily save any changes you have made
* pull source: `git pull origin master`
* re-build in current folder: `make production`
* run `bin/supervisorctl stop searx` to stop searx, if it does not, then run `fuser -k 8888/tcp`
* run `bin/supervisorctl reload` to re-read supervisor config and start searx
### Command make
##### `make`
Builds development environment with testing support.
##### `make tests`
Runs tests. You can write tests [here](https://github.com/asciimoo/searx/tree/master/searx/tests) and remember 'untested code is broken code'.
##### `make robot`
Runs robot (Selenium) tests, you must have `firefox` installed because this functional tests actually run the browser and perform operations on it. Also searx is executed with [settings_robot](https://github.com/asciimoo/searx/blob/master/searx/settings_robot.py).
##### `make flake8`
'pep8 is a tool to check your Python code against some of the style conventions in [PEP 8](http://www.python.org/dev/peps/pep-0008/).'
##### `make coverage`
Checks coverage of tests, after running this, execute this: `firefox ./coverage/index.html`
##### `make production`
Used to make co-called production environment - without tests (you should ran tests before deploying searx on the server). This installs supervisord, so if searx crashes, it will try to pick itself up again. And crontab entry is added to start supervisord at server boot.
##### `make minimal`
Minimal build - without test frameworks, the quickest build option.
##### `make clean`
Deletes several folders and files (see `Makefile` for more), so that next time you run any other `make` command it will rebuild everithing.
### TODO
* Moar engines
* Better ui
* Language support
* Documentation
* Pagination
* Fix `flake8` errors, `make flake8` will be merged into `make tests` when it does not fail anymore
* Tests
* When we have more tests, we can integrate Travis-CI
### Bugs
Bugs or suggestions? Visit the [issue tracker](https://github.com/asciimoo/searx/issues).
### [License](https://github.com/asciimoo/searx/blob/master/LICENSE)
### More about searx
* [ohloh](https://www.ohloh.net/p/searx/)
* [twitter](https://twitter.com/Searx_engine)
* IRC: #searx @ freenode

159
README.rst Normal file
View file

@ -0,0 +1,159 @@
searx
=====
A privacy-respecting, hackable `metasearch
engine <https://en.wikipedia.org/wiki/Metasearch_engine>`__.
List of `running
instances <https://github.com/asciimoo/searx/wiki/Searx-instances>`__.
|Flattr searx|
Features
~~~~~~~~
- Tracking free
- Modular (see
`examples <https://github.com/asciimoo/searx/blob/master/examples>`__)
- Parallel queries
- Supports multiple output formats
- json ``curl https://searx.0x2a.tk/?format=json&q=[query]``
- csv ``curl https://searx.0x2a.tk/?format=csv&q=[query]``
- opensearch/rss ``curl https://searx.0x2a.tk/?format=rss&q=[query]``
- Opensearch support (you can set as default search engine)
- Configurable search engines/categories
Installation
~~~~~~~~~~~~
- clone source:
``git clone git@github.com:asciimoo/searx.git && cd searx``
- install dependencies: ``pip install -r requirements.txt``
- edit your
`settings.yml <https://github.com/asciimoo/searx/blob/master/settings.yml>`__
(set your ``secret_key``!)
- run ``python searx/webapp.py`` to start the application
For all the details, follow this `step by step
installation <https://github.com/asciimoo/searx/wiki/Installation>`__
Alternative (Recommended) Installation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- clone source:
``git clone git@github.com:asciimoo/searx.git && cd searx``
- build in current folder: ``make minimal``
- run ``bin/searx-run`` to start the application
Development
~~~~~~~~~~~
Just run ``make``. Versions of dependencies are pinned down inside
``versions.cfg`` to produce most stable build. Also remember, NO make
command should be run as root, not even ``make production``
Deployment
~~~~~~~~~~
- clone source:
``git clone git@github.com:asciimoo/searx.git && cd searx``
- build in current folder: ``make production``
- run ``bin/supervisord`` to start the application
Upgrading
~~~~~~~~~
- inside previously cloned searx directory run: ``git stash`` to
temporarily save any changes you have made
- pull source: ``git pull origin master``
- re-build in current folder: ``make production``
- run ``bin/supervisorctl stop searx`` to stop searx, if it does not,
then run ``fuser -k 8888/tcp``
- run ``bin/supervisorctl reload`` to re-read supervisor config and
start searx
Command make
~~~~~~~~~~~~
``make``
''''''''
Builds development environment with testing support.
``make tests``
''''''''''''''
Runs tests. You can write tests
`here <https://github.com/asciimoo/searx/tree/master/searx/tests>`__ and
remember 'untested code is broken code'.
``make robot``
''''''''''''''
Runs robot (Selenium) tests, you must have ``firefox`` installed because
this functional tests actually run the browser and perform operations on
it. Also searx is executed with
`settings\_robot <https://github.com/asciimoo/searx/blob/master/searx/settings_robot.py>`__.
``make flake8``
'''''''''''''''
'pep8 is a tool to check your Python code against some of the style
conventions in `PEP 8 <http://www.python.org/dev/peps/pep-0008/>`__.'
``make coverage``
'''''''''''''''''
Checks coverage of tests, after running this, execute this:
``firefox ./coverage/index.html``
``make production``
'''''''''''''''''''
Used to make co-called production environment - without tests (you
should ran tests before deploying searx on the server). This installs
supervisord, so if searx crashes, it will try to pick itself up again.
And crontab entry is added to start supervisord at server boot.
``make minimal``
''''''''''''''''
Minimal build - without test frameworks, the quickest build option.
``make clean``
''''''''''''''
Deletes several folders and files (see ``Makefile`` for more), so that
next time you run any other ``make`` command it will rebuild everithing.
TODO
~~~~
- Moar engines
- Better ui
- Language support
- Documentation
- Pagination
- Fix ``flake8`` errors, ``make flake8`` will be merged into
``make tests`` when it does not fail anymore
- Tests
- When we have more tests, we can integrate Travis-CI
Bugs
~~~~
Bugs or suggestions? Visit the `issue
tracker <https://github.com/asciimoo/searx/issues>`__.
`License <https://github.com/asciimoo/searx/blob/master/LICENSE>`__
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
More about searx
~~~~~~~~~~~~~~~~
- `ohloh <https://www.ohloh.net/p/searx/>`__
- `twitter <https://twitter.com/Searx_engine>`__
- IRC: #searx @ freenode
.. |Flattr searx| image:: http://api.flattr.com/button/flattr-badge-large.png
:target: https://flattr.com/submit/auto?user_id=asciimoo&url=https://github.com/asciimoo/searx&title=searx&language=&tags=github&category=software

3
babel.cfg Normal file
View file

@ -0,0 +1,3 @@
[python: **.py]
[jinja2: **/templates/**.html]
extensions=jinja2.ext.autoescape,jinja2.ext.with_

View file

@ -16,8 +16,6 @@ recipe = zc.recipe.egg:script
eggs = ${buildout:eggs}
interpreter = py
dependent-scripts = true
entry-points =
searx-run=searx.webapp:run
[robot]

View file

@ -13,5 +13,3 @@ parts +=
recipe = zc.recipe.egg:script
eggs = ${buildout:eggs}
interpreter = py
entry-points =
searx-run=searx.webapp:run

View file

@ -15,8 +15,6 @@ parts +=
recipe = zc.recipe.egg:script
eggs = ${buildout:eggs}
interpreter = py
entry-points =
searx-run=searx.webapp:run
[supervisor]

View file

@ -1,4 +1,5 @@
flask
flask-babel
grequests
lxml
pyyaml

View file

@ -1,5 +1,5 @@
from os import environ
from os.path import realpath, dirname, join
from os.path import realpath, dirname, join, abspath
try:
from yaml import load
except:
@ -7,8 +7,7 @@ except:
stderr.write('[E] install pyyaml\n')
exit(2)
searx_dir = realpath(dirname(realpath(__file__))+'/../')
searx_dir = abspath(dirname(__file__))
engine_dir = dirname(realpath(__file__))
if 'SEARX_SETTINGS_PATH' in environ:
@ -19,4 +18,3 @@ else:
with open(settings_path) as settings_yaml:
settings = load(settings_yaml)

View file

@ -26,6 +26,7 @@ from searx import settings
from searx.utils import gen_useragent
import sys
from datetime import datetime
from flask.ext.babel import gettext
engine_dir = dirname(realpath(__file__))
@ -35,6 +36,7 @@ engines = {}
categories = {'general': []}
def load_module(filename):
modname = splitext(filename)[0]
if modname in sys.modules:
@ -58,38 +60,50 @@ for engine_data in settings['engines']:
if engine_data['categories'] == 'none':
engine.categories = []
else:
engine.categories = map(str.strip, engine_data['categories'].split(','))
engine.categories = map(
str.strip, engine_data['categories'].split(','))
continue
setattr(engine, param_name, engine_data[param_name])
for engine_attr in dir(engine):
if engine_attr.startswith('_'):
continue
if getattr(engine, engine_attr) == None:
print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr)
if getattr(engine, engine_attr) is None:
print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr) # noqa
sys.exit(1)
engines[engine.name] = engine
engine.stats = {'result_count': 0, 'search_count': 0, 'page_load_time': 0, 'score_count': 0, 'errors': 0}
engine.stats = {
'result_count': 0,
'search_count': 0,
'page_load_time': 0,
'score_count': 0,
'errors': 0
}
if hasattr(engine, 'categories'):
for category_name in engine.categories:
categories.setdefault(category_name, []).append(engine)
else:
categories['general'].append(engine)
def default_request_params():
return {'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}}
return {
'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}}
def make_callback(engine_name, results, suggestions, callback, params):
# creating a callback wrapper for the search engine results
def process_callback(response, **kwargs):
cb_res = []
response.search_params = params
engines[engine_name].stats['page_load_time'] += (datetime.now() - params['started']).total_seconds()
engines[engine_name].stats['page_load_time'] += \
(datetime.now() - params['started']).total_seconds()
try:
search_results = callback(response)
except Exception, e:
engines[engine_name].stats['errors'] += 1
results[engine_name] = cb_res
print '[E] Error with engine "{0}":\n\t{1}'.format(engine_name, str(e))
print '[E] Error with engine "{0}":\n\t{1}'.format(
engine_name, str(e))
return
for result in search_results:
result['engine'] = engine_name
@ -101,8 +115,10 @@ def make_callback(engine_name, results, suggestions, callback, params):
results[engine_name] = cb_res
return process_callback
def score_results(results):
flat_res = filter(None, chain.from_iterable(izip_longest(*results.values())))
flat_res = filter(
None, chain.from_iterable(izip_longest(*results.values())))
flat_len = len(flat_res)
engines_len = len(results)
results = []
@ -116,8 +132,8 @@ def score_results(results):
score = int((flat_len - i) / engines_len) * weight + 1
duplicated = False
for new_res in results:
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path
p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa
p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa
if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\
p1 == p2 and\
res['parsed_url'].query == new_res['parsed_url'].query and\
@ -125,7 +141,7 @@ def score_results(results):
duplicated = new_res
break
if duplicated:
if len(res.get('content', '')) > len(duplicated.get('content', '')):
if len(res.get('content', '')) > len(duplicated.get('content', '')): # noqa
duplicated['content'] = res['content']
duplicated['score'] += score
duplicated['engines'].append(res['engine'])
@ -139,6 +155,7 @@ def score_results(results):
results.append(res)
return sorted(results, key=itemgetter('score'), reverse=True)
def search(query, request, selected_engines):
global engines, categories, number_of_searches
requests = []
@ -160,12 +177,19 @@ def search(query, request, selected_engines):
request_params['started'] = datetime.now()
request_params = engine.request(query, request_params)
callback = make_callback(selected_engine['name'], results, suggestions, engine.response, request_params)
callback = make_callback(
selected_engine['name'],
results,
suggestions,
engine.response,
request_params
)
request_args = dict(headers = request_params['headers']
,hooks = dict(response=callback)
,cookies = request_params['cookies']
,timeout = settings['server']['request_timeout']
request_args = dict(
headers=request_params['headers'],
hooks=dict(response=callback),
cookies=request_params['cookies'],
timeout=settings['server']['request_timeout']
)
if request_params['method'] == 'GET':
@ -192,6 +216,7 @@ def search(query, request, selected_engines):
return results, suggestions
def get_engines_stats():
# TODO refactor
pageloads = []
@ -200,14 +225,15 @@ def get_engines_stats():
errors = []
scores_per_result = []
max_pageload = max_results = max_score = max_errors = max_score_per_result = 0
max_pageload = max_results = max_score = max_errors = max_score_per_result = 0 # noqa
for engine in engines.values():
if engine.stats['search_count'] == 0:
continue
results_num = engine.stats['result_count']/float(engine.stats['search_count'])
load_times = engine.stats['page_load_time']/float(engine.stats['search_count'])
results_num = \
engine.stats['result_count'] / float(engine.stats['search_count'])
load_times = engine.stats['page_load_time'] / float(engine.stats['search_count']) # noqa
if results_num:
score = engine.stats['score_count'] / float(engine.stats['search_count'])
score = engine.stats['score_count'] / float(engine.stats['search_count']) # noqa
score_per_result = score / results_num
else:
score = score_per_result = 0.0
@ -220,7 +246,10 @@ def get_engines_stats():
results.append({'avg': results_num, 'name': engine.name})
scores.append({'avg': score, 'name': engine.name})
errors.append({'avg': engine.stats['errors'], 'name': engine.name})
scores_per_result.append({'avg': score_per_result, 'name': engine.name})
scores_per_result.append({
'avg': score_per_result,
'name': engine.name
})
for engine in pageloads:
engine['percentage'] = int(engine['avg'] / max_pageload * 100)
@ -240,10 +269,25 @@ def get_engines_stats():
else:
engine['percentage'] = 0
return [('Page loads (sec)', sorted(pageloads, key=itemgetter('avg')))
,('Number of results', sorted(results, key=itemgetter('avg'), reverse=True))
,('Scores', sorted(scores, key=itemgetter('avg'), reverse=True))
,('Scores per result', sorted(scores_per_result, key=itemgetter('avg'), reverse=True))
,('Errors', sorted(errors, key=itemgetter('avg'), reverse=True))
return [
(
gettext('Page loads (sec)'),
sorted(pageloads, key=itemgetter('avg'))
),
(
gettext('Number of results'),
sorted(results, key=itemgetter('avg'), reverse=True)
),
(
gettext('Scores'),
sorted(scores, key=itemgetter('avg'), reverse=True)
),
(
gettext('Scores per result'),
sorted(scores_per_result, key=itemgetter('avg'), reverse=True)
),
(
gettext('Errors'),
sorted(errors, key=itemgetter('avg'), reverse=True)
),
]

View file

@ -8,7 +8,8 @@ locale = 'en-US' # see http://msdn.microsoft.com/en-us/library/dd251064.aspx
def request(query, params):
search_path = search_string.format(query=urlencode({'q': query, 'setmkt': locale}))
search_path = search_string.format(
query=urlencode({'q': query, 'setmkt': locale}))
#if params['category'] == 'images':
# params['url'] = base_url + 'images/' + search_path
params['url'] = base_url + search_path

View file

@ -5,7 +5,8 @@ categories = []
url = 'http://finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
weight = 100
parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I)
parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I) # noqa
def request(query, params):
m = parser_re.match(query)
@ -38,19 +39,23 @@ def response(resp):
except:
return results
title = '{0} {1} in {2} is {3}'.format(resp.search_params['ammount']
,resp.search_params['from']
,resp.search_params['to']
,resp.search_params['ammount']*conversion_rate
title = '{0} {1} in {2} is {3}'.format(
resp.search_params['ammount'],
resp.search_params['from'],
resp.search_params['to'],
resp.search_params['ammount'] * conversion_rate
)
content = '1 {0} is {1} {2}'.format(resp.search_params['from'], conversion_rate, resp.search_params['to'])
content = '1 {0} is {1} {2}'.format(resp.search_params['from'],
conversion_rate,
resp.search_params['to'])
now_date = datetime.now().strftime('%Y%m%d')
url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html'
url = url.format(now_date
,resp.search_params['ammount']
,resp.search_params['from'].lower()
,resp.search_params['to'].lower()
url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html' # noqa
url = url.format(
now_date,
resp.search_params['ammount'],
resp.search_params['from'].lower(),
resp.search_params['to'].lower()
)
results.append({'title': title, 'content': content, 'url': url})

View file

@ -1,17 +1,21 @@
from urllib import urlencode
from lxml import html
from json import loads
from cgi import escape
categories = ['videos']
locale = 'en_US'
# see http://www.dailymotion.com/doc/api/obj-video.html
search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}'
search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}' # noqa
# TODO use video result template
content_tpl = '<a href="{0}" title="{0}" ><img src="{1}" /></a><br />'
def request(query, params):
global search_url
params['url'] = search_url.format(query=urlencode({'search': query, 'localization': locale }))
params['url'] = search_url.format(
query=urlencode({'search': query, 'localization': locale}))
return params
@ -24,7 +28,7 @@ def response(resp):
title = res['title']
url = res['url']
if res['thumbnail_360_url']:
content = '<a href="{0}" title="{0}" ><img src="{1}" /></a><br />'.format(url, res['thumbnail_360_url'])
content = content_tpl.format(url, res['thumbnail_360_url'])
else:
content = ''
if res['description']:
@ -33,6 +37,7 @@ def response(resp):
results.append({'url': url, 'title': title, 'content': content})
return results
def text_content_from_html(html_string):
desc_html = html.fragment_fromstring(html_string, create_parent=True)
return desc_html.text_content()

View file

@ -7,6 +7,7 @@ categories = ['images']
base_url = 'https://www.deviantart.com/'
search_url = base_url+'search?'
def request(query, params):
global search_url
params['url'] = search_url + urlencode({'q': query})
@ -22,8 +23,11 @@ def response(resp):
for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'):
link = result.xpath('.//a[contains(@class, "thumb")]')[0]
url = urljoin(base_url, link.attrib.get('href'))
title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]')
title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]') # noqa
title = ''.join(title_links[0].xpath('.//text()'))
img_src = link.xpath('.//img')[0].attrib['src']
results.append({'url': url, 'title': title, 'img_src': img_src, 'template': 'images.html'})
results.append({'url': url,
'title': title,
'img_src': img_src,
'template': 'images.html'})
return results

View file

@ -6,8 +6,11 @@ url = 'https://duckduckgo.com/'
search_url = url + 'd.js?{query}&p=1&s=0'
locale = 'us-en'
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query, 'l': locale}))
q = urlencode({'q': query,
'l': locale})
params['url'] = search_url.format(query=q)
return params
@ -17,8 +20,7 @@ def response(resp):
for r in search_res:
if not r.get('t'):
continue
results.append({'title': r['t']
,'content': html_to_text(r['a'])
,'url': r['u']
})
results.append({'title': r['t'],
'content': html_to_text(r['a']),
'url': r['u']})
return results

View file

@ -3,6 +3,7 @@ from urllib import urlencode
url = 'http://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1'
def request(query, params):
params['url'] = url.format(query=urlencode({'q': query}))
return params
@ -13,11 +14,10 @@ def response(resp):
results = []
if 'Definition' in search_res:
if search_res.get('AbstractURL'):
res = {'title' : search_res.get('Heading', '')
,'content' : search_res.get('Definition', '')
,'url' : search_res.get('AbstractURL', '')
,'class' : 'definition_result'
}
res = {'title': search_res.get('Heading', ''),
'content': search_res.get('Definition', ''),
'url': search_res.get('AbstractURL', ''),
'class': 'definition_result'}
results.append(res)
return results

View file

@ -2,7 +2,8 @@ from urllib import urlencode
from HTMLParser import HTMLParser
url = 'http://www.filecrop.com/'
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1'
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1' # noqa
class FilecropResultParser(HTMLParser):
def __init__(self):
@ -18,22 +19,28 @@ class FilecropResultParser(HTMLParser):
def handle_starttag(self, tag, attrs):
if tag == 'tr':
if ('bgcolor', '#edeff5') in attrs or ('bgcolor', '#ffffff') in attrs:
if ('bgcolor', '#edeff5') in attrs or\
('bgcolor', '#ffffff') in attrs:
self.__start_processing = True
if not self.__start_processing:
return
if tag == 'label':
self.result['title'] = [attr[1] for attr in attrs if attr[0] == 'title'][0]
elif tag == 'a' and ('rel', 'nofollow') in attrs and ('class', 'sourcelink') in attrs:
self.result['title'] = [attr[1] for attr in attrs
if attr[0] == 'title'][0]
elif tag == 'a' and ('rel', 'nofollow') in attrs\
and ('class', 'sourcelink') in attrs:
if 'content' in self.result:
self.result['content'] += [attr[1] for attr in attrs if attr[0] == 'title'][0]
self.result['content'] += [attr[1] for attr in attrs
if attr[0] == 'title'][0]
else:
self.result['content'] = [attr[1] for attr in attrs if attr[0] == 'title'][0]
self.result['content'] = [attr[1] for attr in attrs
if attr[0] == 'title'][0]
self.result['content'] += ' '
elif tag == 'a':
self.result['url'] = url + [attr[1] for attr in attrs if attr[0] == 'href'][0]
self.result['url'] = url + [attr[1] for attr in attrs
if attr[0] == 'href'][0]
def handle_endtag(self, tag):
if self.__start_processing is False:
@ -60,10 +67,12 @@ class FilecropResultParser(HTMLParser):
self.data_counter += 1
def request(query, params):
params['url'] = search_url.format(query=urlencode({'w': query}))
return params
def response(resp):
parser = FilecropResultParser()
parser.feed(resp.text)

View file

@ -8,21 +8,27 @@ categories = ['images']
url = 'https://secure.flickr.com/'
search_url = url+'search/?{query}'
results_xpath = '//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]' # noqa
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}))
return params
def response(resp):
global base_url
results = []
dom = html.fromstring(resp.text)
for result in dom.xpath('//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]'):
for result in dom.xpath(results_xpath):
href = urljoin(url, result.attrib.get('href'))
img = result.xpath('.//img')[0]
title = img.attrib.get('alt', '')
img_src = img.attrib.get('data-defer-src')
if not img_src:
continue
results.append({'url': href, 'title': title, 'img_src': img_src, 'template': 'images.html'})
results.append({'url': href,
'title': title,
'img_src': img_src,
'template': 'images.html'})
return results

View file

@ -4,12 +4,15 @@ from cgi import escape
categories = ['it']
search_url = 'https://api.github.com/search/repositories?sort=stars&order=desc&{query}'
search_url = 'https://api.github.com/search/repositories?sort=stars&order=desc&{query}' # noqa
accept_header = 'application/vnd.github.preview.text-match+json'
def request(query, params):
global search_url
params['url'] = search_url.format(query=urlencode({'q': query}))
params['headers']['Accept'] = 'application/vnd.github.preview.text-match+json'
params['headers']['Accept'] = accept_header
return params

View file

@ -6,12 +6,14 @@ from json import loads
categories = ['images']
url = 'https://ajax.googleapis.com/'
search_url = url + 'ajax/services/search/images?v=1.0&start=0&rsz=large&safe=off&filter=off&{query}'
search_url = url + 'ajax/services/search/images?v=1.0&start=0&rsz=large&safe=off&filter=off&{query}' # noqa
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}))
return params
def response(resp):
results = []
search_res = loads(resp.text)
@ -24,5 +26,9 @@ def response(resp):
title = result['title']
if not result['url']:
continue
results.append({'url': href, 'title': title, 'content': '', 'img_src': result['url'], 'template': 'images.html'})
results.append({'url': href,
'title': title,
'content': '',
'img_src': result['url'],
'template': 'images.html'})
return results

View file

@ -8,6 +8,7 @@ content_query = None
title_query = None
#suggestion_xpath = ''
def iterate(iterable):
if type(iterable) == dict:
it = iterable.iteritems()
@ -17,11 +18,15 @@ def iterate(iterable):
for index, value in it:
yield str(index), value
def is_iterable(obj):
if type(obj) == str: return False
if type(obj) == unicode: return False
if type(obj) == str:
return False
if type(obj) == unicode:
return False
return isinstance(obj, Iterable)
def parse(query):
q = []
for part in query.split('/'):
@ -31,6 +36,7 @@ def parse(query):
q.append(part)
return q
def do_query(data, q):
ret = []
if not len(q):
@ -54,11 +60,13 @@ def do_query(data, q):
ret.extend(do_query(value, q))
return ret
def query(data, query_string):
q = parse(query_string)
return do_query(data, q)
def request(query, params):
query = urlencode({'q': query})[2:]
params['url'] = search_url.format(query=query)

View file

@ -3,10 +3,12 @@ from urllib import urlencode, quote
url = 'https://en.wikipedia.org/'
search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json' # noqa
number_of_results = 10
def request(query, params):
search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json'
params['url'] = search_url.format(query=urlencode({'srsearch': query}))
return params
@ -14,7 +16,5 @@ def request(query, params):
def response(resp):
search_results = loads(resp.text)
res = search_results.get('query', {}).get('search', [])
return [{'url': url + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')),
return [{'url': url + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')), # noqa
'title': result['title']} for result in res[:int(number_of_results)]]

View file

@ -7,13 +7,18 @@ categories = ['videos', 'music']
url = 'https://thepiratebay.se/'
search_url = url + 'search/{search_term}/0/99/{search_type}'
search_types = {'videos': '200'
,'music' : '100'
,'files' : '0'
}
search_types = {'videos': '200',
'music': '100',
'files': '0'}
magnet_xpath = './/a[@title="Download this torrent using magnet"]'
content_xpath = './/font[@class="detDesc"]//text()'
def request(query, params):
params['url'] = search_url.format(search_term=quote(query), search_type=search_types.get(params['category']))
search_type = search_types.get(params['category'])
params['url'] = search_url.format(search_term=quote(query),
search_type=search_type)
return params
@ -27,10 +32,14 @@ def response(resp):
link = result.xpath('.//div[@class="detName"]//a')[0]
href = urljoin(url, link.attrib.get('href'))
title = ' '.join(link.xpath('.//text()'))
content = escape(' '.join(result.xpath('.//font[@class="detDesc"]//text()')))
content = escape(' '.join(result.xpath(content_xpath)))
seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
magnetlink = result.xpath('.//a[@title="Download this torrent using magnet"]')[0]
results.append({'url': href, 'title': title, 'content': content,
'seed': seed, 'leech': leech, 'magnetlink': magnetlink.attrib['href'],
magnetlink = result.xpath(magnet_xpath)[0]
results.append({'url': href,
'title': title,
'content': content,
'seed': seed,
'leech': leech,
'magnetlink': magnetlink.attrib['href'],
'template': 'torrent.html'})
return results

View file

@ -5,7 +5,8 @@ categories = ['music']
guest_client_id = 'b45b1aa10f1ac2941910a7f0d10f8e28'
url = 'https://api.soundcloud.com/'
search_url = url + 'search?{query}&facet=model&limit=20&offset=0&linked_partitioning=1&client_id='+guest_client_id
search_url = url + 'search?{query}&facet=model&limit=20&offset=0&linked_partitioning=1&client_id='+guest_client_id # noqa
def request(query, params):
global search_url
@ -21,5 +22,7 @@ def response(resp):
if result['kind'] in ('track', 'playlist'):
title = result['title']
content = result['description']
results.append({'url': result['permalink_url'], 'title': title, 'content': content})
results.append({'url': result['permalink_url'],
'title': title,
'content': content})
return results

View file

@ -7,6 +7,8 @@ categories = ['it']
url = 'http://stackoverflow.com/'
search_url = url+'search?'
result_xpath = './/div[@class="excerpt"]//text()'
def request(query, params):
params['url'] = search_url + urlencode({'q': query})
@ -20,6 +22,6 @@ def response(resp):
link = result.xpath('.//div[@class="result-link"]//a')[0]
href = urljoin(url, link.attrib.get('href'))
title = escape(' '.join(link.xpath('.//text()')))
content = escape(' '.join(result.xpath('.//div[@class="excerpt"]//text()')))
content = escape(' '.join(result.xpath(result_xpath)))
results.append({'url': href, 'title': title, 'content': content})
return results

View file

@ -1,11 +1,10 @@
from urllib import urlencode
from lxml import html
from urlparse import urlparse
from cgi import escape
base_url = 'https://startpage.com/'
search_url = base_url+'do/search'
def request(query, params):
global search_url
query = urlencode({'q': query})[2:]
@ -24,7 +23,6 @@ def response(resp):
for result in dom.xpath('//div[@id="results"]/div[@class="result"]'):
link = result.xpath('.//h3/a')[0]
url = link.attrib.get('href')
parsed_url = urlparse(url)
title = link.text_content()
content = result.xpath('./p[@class="desc"]')[0].text_content()
results.append({'url': url, 'title': title, 'content': content})

View file

@ -7,6 +7,9 @@ categories = ['social media']
base_url = 'https://twitter.com/'
search_url = base_url+'search?'
title_xpath = './/span[@class="username js-action-profile-name"]//text()'
content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
def request(query, params):
global search_url
@ -21,7 +24,9 @@ def response(resp):
for tweet in dom.xpath('//li[@data-item-type="tweet"]'):
link = tweet.xpath('.//small[@class="time"]//a')[0]
url = urljoin(base_url, link.attrib.get('href'))
title = ''.join(tweet.xpath('.//span[@class="username js-action-profile-name"]//text()'))
content = escape(''.join(tweet.xpath('.//p[@class="js-tweet-text tweet-text"]//text()')))
results.append({'url': url, 'title': title, 'content': content})
title = ''.join(tweet.xpath(title_xpath))
content = escape(''.join(tweet.xpath(content_xpath)))
results.append({'url': url,
'title': title,
'content': content})
return results

View file

@ -9,23 +9,27 @@ url_xpath = None
content_xpath = None
title_xpath = None
results_xpath = ''
content_tpl = '<a href="{0}"> <img src="{2}"/> </a>'
# the cookie set by vimeo contains all the following values, but only __utma seems to be requiered
# the cookie set by vimeo contains all the following values,
# but only __utma seems to be requiered
cookie = {
#'vuid':'918282893.1027205400'
# 'ab_bs':'%7B%223%22%3A279%7D'
'__utma': '00000000.000#0000000.0000000000.0000000000.0000000000.0'
# '__utmb':'18302654.1.10.1388942090'
#, '__utmc':'18302654'
#, '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'
#, '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)' # noqa
#, '__utml':'search'
}
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}))
params['cookies'] = cookie
return params
def response(resp):
results = []
dom = html.fromstring(resp.text)
@ -36,10 +40,9 @@ def response(resp):
url = base_url + result.xpath(url_xpath)[0]
title = p.unescape(extract_text(result.xpath(title_xpath)))
thumbnail = extract_text(result.xpath(content_xpath)[0])
content = '<a href="{0}"> <img src="{2}"/> </a>'.format(url, title, thumbnail)
results.append({'url': url
, 'title': title
, 'content': content
, 'template':'videos.html'
, 'thumbnail': thumbnail})
results.append({'url': url,
'title': title,
'content': content_tpl.format(url, title, thumbnail),
'template': 'videos.html',
'thumbnail': thumbnail})
return results

View file

@ -1,8 +1,8 @@
from lxml import html
from urllib import urlencode, unquote
from urlparse import urlparse, urljoin
from cgi import escape
from lxml.etree import _ElementStringResult
from searx.utils import html_to_text
search_url = None
url_xpath = None
@ -11,11 +11,15 @@ title_xpath = None
suggestion_xpath = ''
results_xpath = ''
'''
if xpath_results is list, extract the text from each result and concat the list
if xpath_results is a xml element, extract all the text node from it ( text_content() method from lxml )
if xpath_results is a xml element, extract all the text node from it
( text_content() method from lxml )
if xpath_results is a string element, then it's already done
'''
def extract_text(xpath_results):
if type(xpath_results) == list:
# it's list of result : concat everything using recursive call
@ -30,7 +34,7 @@ def extract_text(xpath_results):
return ''.join(xpath_results)
else:
# it's a element
return xpath_results.text_content()
return html_to_text(xpath_results.text_content())
def extract_url(xpath_results):
@ -60,7 +64,8 @@ def normalize_url(url):
url += '/'
# FIXME : hack for yahoo
if parsed_url.hostname == 'search.yahoo.com' and parsed_url.path.startswith('/r'):
if parsed_url.hostname == 'search.yahoo.com'\
and parsed_url.path.startswith('/r'):
p = parsed_url.path
mark = p.find('/**')
if mark != -1:
@ -87,9 +92,9 @@ def response(resp):
results.append({'url': url, 'title': title, 'content': content})
else:
for url, title, content in zip(
map(extract_url, dom.xpath(url_xpath)), \
map(extract_text, dom.xpath(title_xpath)), \
map(extract_text, dom.xpath(content_xpath)), \
map(extract_url, dom.xpath(url_xpath)),
map(extract_text, dom.xpath(title_xpath)),
map(extract_text, dom.xpath(content_xpath))
):
results.append({'url': url, 'title': title, 'content': content})

View file

@ -4,10 +4,12 @@ from urllib import urlencode
url = 'http://localhost:8090'
search_url = '/yacysearch.json?{query}&maximumRecords=10'
def request(query, params):
params['url'] = url + search_url.format(query=urlencode({'query': query}))
return params
def response(resp):
raw_search_results = loads(resp.text)

View file

@ -5,6 +5,7 @@ categories = ['videos']
search_url = 'https://gdata.youtube.com/feeds/api/videos?alt=json&{query}'
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}))
return params
@ -30,17 +31,16 @@ def response(resp):
thumbnail = ''
if len(result['media$group']['media$thumbnail']):
thumbnail = result['media$group']['media$thumbnail'][0]['url']
content += '<a href="{0}" title="{0}" ><img src="{1}" /></a>'.format(url, thumbnail)
content += '<a href="{0}" title="{0}" ><img src="{1}" /></a>'.format(url, thumbnail) # noqa
if len(content):
content += '<br />' + result['content']['$t']
else:
content = result['content']['$t']
results.append({'url': url
, 'title': title
, 'content': content
, 'template':'videos.html'
, 'thumbnail':thumbnail})
results.append({'url': url,
'title': title,
'content': content,
'template': 'videos.html',
'thumbnail': thumbnail})
return results

View file

@ -105,3 +105,7 @@ engines:
url_xpath : ./a/@href
title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
content_xpath : ./a/img/@src
locales:
en : English
hu : Magyar

View file

@ -1,16 +0,0 @@
port = 11111
secret_key = "ultrasecretkey" # change this!
debug = False
request_timeout = 5.0 # seconds
weights = {} # 'search_engine_name': float(weight) | default is 1.0
blacklist = [] # search engine blacklist
categories = {} # custom search engine categories
base_url = None # "https://your.domain.tld/" or None (to use request parameters)

107
searx/settings_robot.yml Normal file
View file

@ -0,0 +1,107 @@
server:
port : 11111
secret_key : "ultrasecretkey" # change this!
debug : False
request_timeout : 3.0 # seconds
base_url: False
engines:
- name : wikipedia
engine : mediawiki
url : https://en.wikipedia.org/
number_of_results : 1
- name : bing
engine : bing
locale : en-US
- name : currency
engine : currency_convert
categories : general
- name : deviantart
engine : deviantart
categories : images
- name : ddg definitions
engine : duckduckgo_definitions
- name : duckduckgo
engine : duckduckgo
locale : en-us
- name : filecrop
engine : filecrop
categories : files
- name : flickr
engine : flickr
categories : images
- name : github
engine : github
categories : it
- name : google
engine : json_engine
search_url : https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query}
categories : general
url_query : /responseData/results/unescapedUrl
content_query : /responseData/results/content
title_query : /responseData/results/titleNoFormatting
- name : google images
engine : google_images
categories : images
- name : piratebay
engine : piratebay
categories : videos, music, files
- name : soundcloud
engine : soundcloud
categories : music
- name : stackoverflow
engine : stackoverflow
categories : it
- name : startpage
engine : startpage
- name : twitter
engine : twitter
categories : social media
- name : urbandictionary
engine : xpath
search_url : http://www.urbandictionary.com/define.php?term={query}
url_xpath : //div[@class="word"]//a/@href
title_xpath : //div[@class="word"]//a
content_xpath : //div[@class="definition"]
- name : yahoo
engine : xpath
search_url : http://search.yahoo.com/search?p={query}
results_xpath : //div[@class="res"]
url_xpath : .//h3/a/@href
title_xpath : .//h3/a
content_xpath : .//div[@class="abstr"]
suggestion_xpath : //div[@id="satat"]//a
- name : youtube
engine : youtube
categories : videos
- name : dailymotion
engine : dailymotion
locale : en_US
categories : videos
- name : vimeo
engine : vimeo
categories : videos
results_xpath : //div[@id="browse_content"]/ol/li
url_xpath : ./a/@href
title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
content_xpath : ./a/img/@src

View file

@ -49,6 +49,8 @@ input[type="submit"] { border: 1px solid #666666; color: #444444; padding: 4px;
input[type="checkbox"] { visibility: hidden; }
fieldset { margin: 8px; }
#categories { margin: 0 10px; }
.checkbox_container { display: inline-block; position: relative; margin: 0 3px; padding: 0px; }
@ -79,7 +81,6 @@ a { text-decoration: none; color: #1a11be; }
a:visited { color: #7b11be; }
.result { margin: 19px 0 18px 0; padding: 0; max-width: 55em; clear: both; }
.result:hover { background: #e8e7e6; }
.result_title { margin-bottom: 0; }
.result h3 { font-size: 1em; word-wrap:break-word; margin: 5px 0 1px 0; padding: 0 }
.result .content { font-size: 0.8em; margin: 0; padding: 0; max-width: 54em; word-wrap:break-word; line-height: 1.24; }
@ -201,3 +202,5 @@ tr:hover td { background: #DDDDDD; }
.result img { max-width: 90%; width: auto; height: auto }
}
.favicon { float: left; margin-right: 4px; }

View file

@ -8,25 +8,25 @@
</p>
<h2>Why use Searx?</h2>
<ul>
<li>Maybe Searx wont offer you as personalised results as Google, but it doesn't make a profile about you</li>
<li>Searx doesn't care about what you search, never shares anything with a third party, and it can't be used to compromise you</li>
<li>Searx is a free software, the code is 100% open and you can help to make it better. See more on <a href="https://gmail.com/asciimoo/searx">github</a></li>
<li>Searx may not offer you as personalised results as Google, but it doesn't generate a profile about you</li>
<li>Searx doesn't care about what you search for, never shares anything with a third party, and it can't be used to compromise you</li>
<li>Searx is free software, the code is 100% open and you can help to make it better. See more on <a href="https://github.com/asciimoo/searx">github</a></li>
</ul>
<p>If you do care about privacy, want to be a conscious user, moreover believe
<p>If you do care about privacy, want to be a conscious user, or otherwise believe
in digital freedom, make Searx your default search engine or run it on your own server</p>
<h2>Technical details - How does it work?</h2>
<p>Searx is a <a href="https://en.wikipedia.org/wiki/Metasearch_engine">metasearch engine</a>,
inspired by the <a href="http://seeks-project.info/">seeks project</a>.<br />
It provides basic privacy by mixing your queries with searches on other platforms without storing search data. Queries are made using a POST request on every browser (except chrome*). Therefore they don't show up in our logs, neither in your url history. In case of Chrome* users there is an exception, Searx uses the search bar to perform GET requests.<br />
Searx can be added to your browser's search bar, moreover it can be set as the default search engine.
It provides basic privacy by mixing your queries with searches on other platforms without storing search data. Queries are made using a POST request on every browser (except chrome*). Therefore they show up in neither our logs, nor your url history. In case of Chrome* users there is an exception, Searx uses the search bar to perform GET requests.<br />
Searx can be added to your browser's search bar; moreover, it can be set as the default search engine.
</p>
<h2>How can I have my own?</h2>
<h2>How can I make it my own?</h2>
<p>Searx appreciates your suspicion regarding logs, so take the <a href="https://github.com/asciimoo/searx">code</a> and run it yourself! <br />Add your Searx to this <a href="https://github.com/asciimoo/searx/wiki/Searx-instances">list</a> to help other people to have privacy and make the Internet freer!
<br />The more decentralized the Internet is the more freedom we have!</p>
<p>Searx appreciates your concern regarding logs, so take the <a href="https://github.com/asciimoo/searx">code</a> and run it yourself! <br />Add your Searx to this <a href="https://github.com/asciimoo/searx/wiki/Searx-instances">list</a> to help other people reclaim their privacy and make the Internet freer!
<br />The more decentralized the Internet, is the more freedom we have!</p>
<hr />
@ -39,7 +39,7 @@ Searx can be added to your browser's search bar, moreover it can be set as the d
<h3>New engines?</h3>
<ul>
<li>Edit your engines.cfg, see <a href="https://raw.github.com/asciimoo/searx/master/engines.cfg_sample">sample config</a></li>
<li>Edit your <a href="https://raw.github.com/asciimoo/searx/master/searx/settings.yml">settings.yml</a></li>
<li>Create your custom engine module, check the <a href="https://github.com/asciimoo/searx/blob/master/examples/basic_engine.py">example engine</a></li>
</ul>
<p>Don't forget to restart searx after config edit!</p>
@ -48,7 +48,7 @@ Searx can be added to your browser's search bar, moreover it can be set as the d
<p>See the <a href="https://github.com/asciimoo/searx/wiki/Installation">installation and setup</a> wiki page</p>
<h3>How to debug engines?</h3>
<p><a href="/stats">Stats page</a> contains some useful data about the used engines.</p>
<p><a href="/stats">Stats page</a> contains some useful data about the engines used.</p>
</div>
{% endblock %}

View file

@ -1,7 +1,7 @@
<div id="categories">
{% for category in categories %}
<div class="checkbox_container">
<input type="checkbox" id="checkbox_{{ category|replace(' ', '_') }}" name="category_{{ category }}" {% if category in selected_categories %}checked="checked"{% endif %} /><label for="checkbox_{{ category|replace(' ', '_') }}">{{ category }}</label>
<input type="checkbox" id="checkbox_{{ category|replace(' ', '_') }}" name="category_{{ category }}" {% if category in selected_categories %}checked="checked"{% endif %} /><label for="checkbox_{{ category|replace(' ', '_') }}">{{ _(category) }}</label>
</div>
{% endfor %}
</div>

View file

@ -1,12 +1,12 @@
{% extends 'base.html' %}
{% block content %}
<div class="row">
<h2>Currently used search engines</h2>
<h2>{{ _('Currently used search engines') }}</h2>
<table style="width: 80%;">
<tr>
<th>Engine name</th>
<th>Category</th>
<th>{{ _('Engine name') }}</th>
<th>{{ _('Category') }}</th>
</tr>
{% for (categ,search_engines) in categs %}
{% for search_engine in search_engines %}
@ -20,7 +20,6 @@
{% endfor %}
{% endfor %}
</table>
<p>Please add more engines to this list, pull requests are welcome!</p>
<p class="right"><a href="/">back</a></p>
<p class="right"><a href="/">{{ _('back') }}</a></p>
</div>
{% endblock %}

View file

@ -4,8 +4,8 @@
<div class="title"><h1>searx</h1></div>
{% include 'search.html' %}
<p class="top_margin">
<a href="/about" class="hmarg">about</a>
<a href="/preferences" class="hmarg">preferences</a>
<a href="/about" class="hmarg">{{ _('about') }}</a>
<a href="/preferences" class="hmarg">{{ _('preferences') }}</a>
</p>
</div>
{% endblock %}

View file

@ -2,18 +2,28 @@
{% block head %} {% endblock %}
{% block content %}
<div class="row">
<h2>Preferences</h2>
<h2>{{ _('Preferences') }}</h2>
<fieldset>
<legend>Default categories</legend>
<form method="post" action="/preferences" id="search_form">
<fieldset>
<legend>{{ _('Default categories') }}</legend>
<p>
{% include 'categories.html' %}
</p>
<input type="submit" value="save" />
</form>
</fieldset>
<div class="right"><a href="/">back</a></div>
<fieldset>
<legend>{{ _('Interface language') }}</legend>
<p>
<select name='locale'>
{% for locale_id,locale_name in locales.items() %}
<option value={{ locale_id }} {% if locale_id == current_locale %}selected="selected"{% endif %}>{{ locale_name}}</option>
{% endfor %}
</select>
</p>
</fieldset>
<input type="submit" value="{{ _('save') }}" />
</form>
<div class="right"><a href="/">{{ _('back') }}</a></div>
</div>
{% endblock %}

View file

@ -1,13 +1,11 @@
<div class="result {{ result.class }}">
{% if result['favicon'] %}
<div style="float:left; margin:2px;">
<img width="18" height="18" src="static/img/icon_{{result['favicon']}}.ico" alt="{{result['favicon']}}.ico" title="{{result['favicon']}}.ico" />
</div>
<img width="14" height="14" class="favicon" src="static/img/icon_{{result['favicon']}}.ico" />
{% endif %}
<div>
<h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3></br>
<h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3>
<p class="content">{% if result.content %}{{ result.content|safe }}<br />{% endif %}</p>
<p class="url">{{ result.pretty_url }}</p>
</div>

View file

@ -1,13 +1,11 @@
<div class="result">
{% if result['favicon'] %}
<div style="float:left; margin:2px;">
<img width="18" height="18" src="static/img/icon_{{result['favicon']}}.ico" alt="{{result['favicon']}}.ico" title="{{result['favicon']}}.ico" />
</div>
<img width="14" height="14" class="favicon" src="static/img/icon_{{result['favicon']}}.ico" />
{% endif %}
<p>
<h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3>
<a href="{{ result.url }}"><img width="300" height="170" src="{{ result.thumbnail }}" title={{ result.title }} alt=" {{ result.title }}"/></a>
<a href="{{ result.url }}"><img width="400px" src="{{ result.thumbnail }}" title={{ result.title }} alt=" {{ result.title }}"/></a>
<p class="url">{{ result.url }}</p>
</p>
</div>

View file

@ -7,12 +7,12 @@
</div>
<div id="results">
{% if suggestions %}
<div id="suggestions"><span>Suggestions: </span>{% for suggestion in suggestions %}<form method="post" action="/"><input type="hidden" name="q" value="{{suggestion}}"><input type="submit" value="{{ suggestion }}" /></form>{% endfor %}</div>
<div id="suggestions"><span>{{ _('Suggestions') }}:</span>{% for suggestion in suggestions %}<form method="post" action="/"><input type="hidden" name="q" value="{{suggestion}}"><input type="submit" value="{{ suggestion }}" /></form>{% endfor %}</div>
{% endif %}
<div id ="result_count">
Number of results: {{ number_of_results }}
{{ _('Number of results') }}: {{ number_of_results }}
</div>
{% for result in results %}
@ -23,7 +23,7 @@
{% endif %}
{% endfor %}
<div id="apis">
Download results
{{ _('Download results') }}
<form method="post" action="/">
<div class="left">
<input type="hidden" name="q" value="{{ q }}" />

View file

@ -1,7 +1,7 @@
{% extends "base.html" %}
{% block head %} {% endblock %}
{% block content %}
<h2>Engine stats</h2>
<h2>{{ _('Engine stats') }}</h2>
{% for stat_name,stat_category in stats %}
<div class="left">

View file

@ -7,10 +7,10 @@ from unittest2 import TestCase
import os
import subprocess
import sys
class SearxTestLayer:
"""Base layer for non-robot tests."""
__name__ = u'SearxTestLayer'
@ -36,24 +36,37 @@ class SearxRobotLayer(Layer):
def setUp(self):
os.setpgrp() # create new process group, become its leader
# get program paths
webapp = os.path.join(
os.path.abspath(os.path.dirname(os.path.realpath(__file__))),
'webapp.py'
)
exe = os.path.abspath(os.path.dirname(__file__) + '/../bin/py')
# set robot settings path
os.environ['SEARX_SETTINGS_PATH'] = os.path.abspath(
os.path.dirname(__file__) + '/settings_robot.yml')
# run the server
self.server = subprocess.Popen(
[exe, webapp, 'settings_robot'],
[exe, webapp],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
)
def tearDown(self):
# TERM all processes in my group
# send TERM signal to all processes in my group, to stop subprocesses
os.killpg(os.getpgid(self.server.pid), 15)
# remove previously set environment variable
del os.environ['SEARX_SETTINGS_PATH']
SEARXROBOTLAYER = SearxRobotLayer()
class SearxTestCase(TestCase):
"""Base test case for non-robot tests."""
layer = SearxTestLayer

Binary file not shown.

View file

@ -0,0 +1,115 @@
# Hungarian translations for PROJECT.
# Copyright (C) 2014 ORGANIZATION
# This file is distributed under the same license as the PROJECT project.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2014.
#
msgid ""
msgstr ""
"Project-Id-Version: PROJECT VERSION\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2014-01-22 00:55+0100\n"
"PO-Revision-Date: 2014-01-21 23:33+0100\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: hu <LL@li.org>\n"
"Plural-Forms: nplurals=1; plural=0\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 1.3\n"
#: searx/engines/__init__.py:274
msgid "Page loads (sec)"
msgstr "Válaszidők (sec)"
#: searx/engines/__init__.py:278 searx/templates/results.html:15
msgid "Number of results"
msgstr "Találatok száma"
#: searx/engines/__init__.py:282
msgid "Scores"
msgstr "Pontszámok"
#: searx/engines/__init__.py:286
msgid "Scores per result"
msgstr "Pontszámok találatonként"
#: searx/engines/__init__.py:290
msgid "Errors"
msgstr "Hibák"
#: searx/templates/engines.html:4
msgid "Currently used search engines"
msgstr "Jelenleg használt keresők"
#: searx/templates/engines.html:8
msgid "Engine name"
msgstr "Kereső neve"
#: searx/templates/engines.html:9
msgid "Category"
msgstr "Kategória"
#: searx/templates/engines.html:23 searx/templates/preferences.html:27
msgid "back"
msgstr "vissza"
#: searx/templates/index.html:7
msgid "about"
msgstr "rólunk"
#: searx/templates/index.html:8
msgid "preferences"
msgstr "beállítások"
#: searx/templates/preferences.html:5
msgid "Preferences"
msgstr "Beállítások"
#: searx/templates/preferences.html:10
msgid "Default categories"
msgstr "Alapértelmezett kategóriák"
#: searx/templates/preferences.html:16
msgid "Interface language"
msgstr "Nyelv"
#: searx/templates/preferences.html:25
msgid "save"
msgstr "mentés"
#: searx/templates/results.html:10
msgid "Suggestions"
msgstr "Javaslatok"
#: searx/templates/results.html:26
msgid "Download results"
msgstr "Találatok letöltése"
#: searx/templates/stats.html:4
msgid "Engine stats"
msgstr "Kereső statisztikák"
# categories - manually added
# TODO - automatically add
msgid "files"
msgstr "fájlok"
msgid "general"
msgstr "általános"
msgid "music"
msgstr "zene"
msgid "social media"
msgstr "közösségi média"
msgid "images"
msgstr "képek"
msgid "videos"
msgstr "videók"
msgid "it"
msgstr "it"

View file

@ -1,13 +1,16 @@
from HTMLParser import HTMLParser
#import htmlentitydefs
import csv
import codecs
from codecs import getincrementalencoder
import cStringIO
import re
def gen_useragent():
# TODO
return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
ua = "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
return ua
def highlight_content(content, query):
@ -34,6 +37,7 @@ def highlight_content(content, query):
return content
class HTMLTextExtractor(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
@ -43,7 +47,10 @@ class HTMLTextExtractor(HTMLParser):
self.result.append(d)
def handle_charref(self, number):
codepoint = int(number[1:], 16) if number[0] in (u'x', u'X') else int(number)
if number[0] in (u'x', u'X'):
codepoint = int(number[1:], 16)
else:
codepoint = int(number)
self.result.append(unichr(codepoint))
def handle_entityref(self, name):
@ -54,6 +61,7 @@ class HTMLTextExtractor(HTMLParser):
def get_text(self):
return u''.join(self.result)
def html_to_text(html):
s = HTMLTextExtractor()
s.feed(html)
@ -71,10 +79,16 @@ class UnicodeWriter:
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
self.encoder = getincrementalencoder(encoding)()
def writerow(self, row):
self.writer.writerow([(s.encode("utf-8").strip() if type(s) == str or type(s) == unicode else str(s)) for s in row])
unicode_row = []
for col in row:
if type(col) == str or type(col) == unicode:
unicode_row.append(col.encode('utf-8').strip())
else:
unicode_row.append(col)
self.writer.writerow(unicode_row)
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")

View file

@ -17,26 +17,36 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
'''
import os
import sys
if __name__ == "__main__":
sys.path.append(os.path.realpath(os.path.dirname(os.path.realpath(__file__))+'/../'))
from searx import settings
from flask import Flask, request, render_template, url_for, Response, make_response, redirect
from searx.engines import search, categories, engines, get_engines_stats
import json
import cStringIO
from searx.utils import UnicodeWriter
import os
from flask import Flask, request, render_template
from flask import url_for, Response, make_response, redirect
from flask import send_from_directory
from searx import settings
from searx.engines import search, categories, engines, get_engines_stats
from searx.utils import UnicodeWriter
from searx.utils import highlight_content, html_to_text
from flask.ext.babel import Babel
app = Flask(__name__)
app = Flask(
__name__,
static_folder=os.path.join(os.path.dirname(__file__), 'static'),
template_folder=os.path.join(os.path.dirname(__file__), 'templates')
)
app.secret_key = settings['server']['secret_key']
babel = Babel(app)
#TODO configurable via settings.yml
favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud',
'twitter', 'stackoverflow', 'github']
opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?>
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
@ -51,6 +61,24 @@ opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?>
'''
@babel.localeselector
def get_locale():
locale = request.accept_languages.best_match(settings['locales'].keys())
if request.cookies.get('locale', '') in settings['locales']:
locale = request.cookies.get('locale', '')
if 'locale' in request.args\
and request.args['locale'] in settings['locales']:
locale = request.args['locale']
if 'locale' in request.form\
and request.form['locale'] in settings['locales']:
locale = request.form['locale']
return locale
def get_base_url():
if settings['server']['base_url']:
hostname = settings['server']['base_url']
@ -65,7 +93,8 @@ def get_base_url():
def render(template_name, **kwargs):
global categories
kwargs['categories'] = ['general']
kwargs['categories'].extend(x for x in sorted(categories.keys()) if x != 'general')
kwargs['categories'].extend(x for x in
sorted(categories.keys()) if x != 'general')
if not 'selected_categories' in kwargs:
kwargs['selected_categories'] = []
cookie_categories = request.cookies.get('categories', '').split(',')
@ -76,6 +105,7 @@ def render(template_name, **kwargs):
kwargs['selected_categories'] = ['general']
return render_template(template_name, **kwargs)
def parse_query(query):
query_engines = []
query_parts = query.split()
@ -108,7 +138,8 @@ def index():
continue
selected_categories.append(category)
if not len(selected_categories):
cookie_categories = request.cookies.get('categories', '').split(',')
cookie_categories = request.cookies.get('categories', '')
cookie_categories = cookie_categories.split(',')
for ccateg in cookie_categories:
if ccateg in categories:
selected_categories.append(ccateg)
@ -116,7 +147,9 @@ def index():
selected_categories = ['general']
for categ in selected_categories:
selected_engines.extend({'category': categ, 'name': x.name} for x in categories[categ])
selected_engines.extend({'category': categ,
'name': x.name}
for x in categories[categ])
results, suggestions = search(query, request, selected_engines)
@ -131,16 +164,18 @@ def index():
result['content'] = html_to_text(result['content']).strip()
result['title'] = html_to_text(result['title']).strip()
if len(result['url']) > 74:
result['pretty_url'] = result['url'][:35] + '[..]' + result['url'][-35:]
url_parts = result['url'][:35], result['url'][-35:]
result['pretty_url'] = '{0}[...]{1}'.format(*url_parts)
else:
result['pretty_url'] = result['url']
for engine in result['engines']:
if engine in ['wikipedia', 'youtube', 'vimeo', 'soundcloud', 'twitter', 'stackoverflow', 'github']:
if engine in favicons:
result['favicon'] = engine
if request_data.get('format') == 'json':
return Response(json.dumps({'query': query, 'results': results}), mimetype='application/json')
return Response(json.dumps({'query': query, 'results': results}),
mimetype='application/json')
elif request_data.get('format') == 'csv':
csv = UnicodeWriter(cStringIO.StringIO())
keys = ('title', 'url', 'content', 'host', 'engine', 'score')
@ -151,25 +186,27 @@ def index():
csv.writerow([row.get(key, '') for key in keys])
csv.stream.seek(0)
response = Response(csv.stream.read(), mimetype='application/csv')
response.headers.add('Content-Disposition', 'attachment;Filename=searx_-_{0}.csv'.format('_'.join(query.split())))
content_disp = 'attachment;Filename=searx_-_{0}.csv'.format(query)
response.headers.add('Content-Disposition', content_disp)
return response
elif request_data.get('format') == 'rss':
response_rss = render('opensearch_response_rss.xml'
,results=results
,q=request_data['q']
,number_of_results=len(results)
,base_url=get_base_url()
response_rss = render(
'opensearch_response_rss.xml',
results=results,
q=request_data['q'],
number_of_results=len(results),
base_url=get_base_url()
)
return Response(response_rss, mimetype='text/xml')
return render('results.html'
,results=results
,q=request_data['q']
,selected_categories=selected_categories
,number_of_results=len(results)+len(featured_results)
,featured_results=featured_results
,suggestions=suggestions
return render(
'results.html',
results=results,
q=request_data['q'],
selected_categories=selected_categories,
number_of_results=len(results) + len(featured_results),
featured_results=featured_results,
suggestions=suggestions
)
@ -189,18 +226,35 @@ def preferences():
if request.method == 'POST':
selected_categories = []
locale = None
for pd_name, pd in request.form.items():
if pd_name.startswith('category_'):
category = pd_name[9:]
if not category in categories:
continue
selected_categories.append(category)
if selected_categories:
elif pd_name == 'locale' and pd in settings['locales']:
locale = pd
resp = make_response(redirect('/'))
if locale:
# cookie max age: 4 weeks
resp.set_cookie('categories', ','.join(selected_categories), max_age=60*60*24*7*4)
resp.set_cookie(
'locale', locale,
max_age=60 * 60 * 24 * 7 * 4
)
if selected_categories:
# cookie max age: 4 weeks
resp.set_cookie(
'categories', ','.join(selected_categories),
max_age=60 * 60 * 24 * 7 * 4
)
return resp
return render('preferences.html')
return render('preferences.html',
locales=settings['locales'],
current_locale=get_locale())
@app.route('/stats', methods=['GET'])
@ -216,6 +270,7 @@ def robots():
Allow: /
Allow: /about
Disallow: /stats
Disallow: /engines
""", mimetype='text/plain')
@ -233,19 +288,22 @@ def opensearch():
mimetype="application/xml")
return resp
@app.route('/favicon.ico')
def favicon():
return send_from_directory(os.path.join(app.root_path, 'static/img'),
'favicon.png', mimetype='image/vnd.microsoft.icon')
'favicon.png',
mimetype='image/vnd.microsoft.icon')
def run():
from gevent import monkey
monkey.patch_all()
app.run(debug = settings['server']['debug']
,use_debugger = settings['server']['debug']
,port = settings['server']['port']
app.run(
debug=settings['server']['debug'],
use_debugger=settings['server']['debug'],
port=settings['server']['port']
)

View file

@ -11,12 +11,12 @@ def read(*rnames):
return open(os.path.join(os.path.dirname(__file__), *rnames)).read()
long_description = read('README.md')
long_description = read('README.rst')
setup(
name='searx',
version="0.1",
description="",
version="0.1.2",
description="A privacy-respecting, hackable metasearch engine",
long_description=long_description,
classifiers=[
"Programming Language :: Python",
@ -30,6 +30,7 @@ setup(
zip_safe=False,
install_requires=[
'flask',
'flask-babel',
'grequests',
'lxml',
'pyyaml',
@ -49,4 +50,20 @@ setup(
'zope.testrunner',
]
},
entry_points={
'console_scripts': [
'searx-run = searx.webapp:run'
]
},
package_data={
'searx': [
'settings.yml',
'../README.rst',
'static/*/*',
'translations/*/*',
'templates/*.html',
'templates/result_templates/*.html',
],
},
)

View file

@ -1,5 +1,6 @@
[versions]
Flask = 0.10.1
Flask-Babel = 0.9
Jinja2 = 2.7.2
MarkupSafe = 0.18
WebOb = 1.3.1