mirror of
https://github.com/searxng/searxng.git
synced 2024-11-29 14:11:02 +00:00
add Ahmia filter plugin for onion results
This commit is contained in:
parent
c3daa08537
commit
32957cdf49
5 changed files with 16253 additions and 0 deletions
16177
searx/data/ahmia_blacklist.txt
Normal file
16177
searx/data/ahmia_blacklist.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -28,6 +28,7 @@ from searx import logger, settings, static_path
|
||||||
logger = logger.getChild('plugins')
|
logger = logger.getChild('plugins')
|
||||||
|
|
||||||
from searx.plugins import (oa_doi_rewrite,
|
from searx.plugins import (oa_doi_rewrite,
|
||||||
|
ahmia_filter,
|
||||||
hash_plugin,
|
hash_plugin,
|
||||||
https_rewrite,
|
https_rewrite,
|
||||||
infinite_scroll,
|
infinite_scroll,
|
||||||
|
@ -181,3 +182,7 @@ if 'enabled_plugins' in settings:
|
||||||
plugin.default_on = True
|
plugin.default_on = True
|
||||||
else:
|
else:
|
||||||
plugin.default_on = False
|
plugin.default_on = False
|
||||||
|
|
||||||
|
# load tor specific plugins
|
||||||
|
if settings['outgoing'].get('using_tor_proxy'):
|
||||||
|
plugins.register(ahmia_filter)
|
||||||
|
|
36
searx/plugins/ahmia_filter.py
Normal file
36
searx/plugins/ahmia_filter.py
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
'''
|
||||||
|
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
'''
|
||||||
|
|
||||||
|
from hashlib import md5
|
||||||
|
from os.path import join
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
from searx import searx_dir
|
||||||
|
|
||||||
|
name = "Ahmia blacklist"
|
||||||
|
description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)"
|
||||||
|
default_on = True
|
||||||
|
preference_section = 'onions'
|
||||||
|
|
||||||
|
ahmia_blacklist = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_ahmia_blacklist():
|
||||||
|
global ahmia_blacklist
|
||||||
|
if not ahmia_blacklist:
|
||||||
|
with open(join(join(searx_dir, "data"), "ahmia_blacklist.txt"), 'r') as f:
|
||||||
|
ahmia_blacklist = f.read().split()
|
||||||
|
return ahmia_blacklist
|
||||||
|
|
||||||
|
|
||||||
|
def not_blacklisted(result):
|
||||||
|
if not result.get('is_onion'):
|
||||||
|
return True
|
||||||
|
result_hash = md5(urlparse(result.get('url')).hostname.encode()).hexdigest()
|
||||||
|
return result_hash not in get_ahmia_blacklist()
|
||||||
|
|
||||||
|
|
||||||
|
def post_search(request, search):
|
||||||
|
filtered_results = list(filter(not_blacklisted, search.result_container._merged_results))
|
||||||
|
search.result_container._merged_results = filtered_results
|
||||||
|
return True
|
|
@ -258,6 +258,7 @@
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<div class="container-fluid">
|
<div class="container-fluid">
|
||||||
{% for plugin in plugins %}
|
{% for plugin in plugins %}
|
||||||
|
{% if plugin.preference_section != 'onions' %}
|
||||||
<div class="panel panel-default">
|
<div class="panel panel-default">
|
||||||
<div class="panel-heading">
|
<div class="panel-heading">
|
||||||
<h3 class="panel-title">{{ _(plugin.name) }}</h3>
|
<h3 class="panel-title">{{ _(plugin.name) }}</h3>
|
||||||
|
@ -271,6 +272,7 @@
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
{% endif %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
</fieldset>
|
</fieldset>
|
||||||
|
|
33
utils/fetch_ahmia_blacklist.py
Executable file
33
utils/fetch_ahmia_blacklist.py
Executable file
|
@ -0,0 +1,33 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# This script saves Ahmia's blacklist for onion sites.
|
||||||
|
# More info in https://ahmia.fi/blacklist/
|
||||||
|
|
||||||
|
# set path
|
||||||
|
from sys import path
|
||||||
|
from os.path import realpath, dirname, join
|
||||||
|
path.append(realpath(dirname(realpath(__file__)) + '/../'))
|
||||||
|
|
||||||
|
#
|
||||||
|
import requests
|
||||||
|
from searx import searx_dir
|
||||||
|
|
||||||
|
URL = 'https://ahmia.fi/blacklist/banned/'
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_ahmia_blacklist():
|
||||||
|
resp = requests.get(URL, timeout=3.0)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code)
|
||||||
|
else:
|
||||||
|
blacklist = resp.text.split()
|
||||||
|
return blacklist
|
||||||
|
|
||||||
|
|
||||||
|
def get_ahmia_blacklist_filename():
|
||||||
|
return join(join(searx_dir, "data"), "ahmia_blacklist.txt")
|
||||||
|
|
||||||
|
|
||||||
|
blacklist = fetch_ahmia_blacklist()
|
||||||
|
with open(get_ahmia_blacklist_filename(), "w") as f:
|
||||||
|
f.write('\n'.join(blacklist))
|
Loading…
Reference in a new issue