mirror of
https://github.com/searxng/searxng.git
synced 2024-12-01 23:21:09 +00:00
[mod] ahmia_filter.py: minor changes
- use result['parsed_url'] - load ahmia_blacklist.txt in searx.datae
This commit is contained in:
parent
db703a0283
commit
5e7060053c
2 changed files with 10 additions and 8 deletions
|
@ -2,7 +2,7 @@ import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader']
|
__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader', 'ahmia_blacklist_loader']
|
||||||
data_dir = Path(__file__).parent
|
data_dir = Path(__file__).parent
|
||||||
|
|
||||||
|
|
||||||
|
@ -16,6 +16,11 @@ def bangs_loader():
|
||||||
return load('bangs.json')
|
return load('bangs.json')
|
||||||
|
|
||||||
|
|
||||||
|
def ahmia_blacklist_loader():
|
||||||
|
with open(str(data_dir / 'ahmia_blacklist.txt'), encoding='utf-8') as fd:
|
||||||
|
return fd.read().split()
|
||||||
|
|
||||||
|
|
||||||
ENGINES_LANGUAGES = load('engines_languages.json')
|
ENGINES_LANGUAGES = load('engines_languages.json')
|
||||||
CURRENCIES = load('currencies.json')
|
CURRENCIES = load('currencies.json')
|
||||||
USER_AGENTS = load('useragents.json')
|
USER_AGENTS = load('useragents.json')
|
||||||
|
|
|
@ -3,9 +3,7 @@
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
from os.path import join
|
from searx.data import ahmia_blacklist_loader
|
||||||
from urllib.parse import urlparse
|
|
||||||
from searx import searx_dir
|
|
||||||
|
|
||||||
name = "Ahmia blacklist"
|
name = "Ahmia blacklist"
|
||||||
description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)"
|
description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)"
|
||||||
|
@ -18,15 +16,14 @@ ahmia_blacklist = None
|
||||||
def get_ahmia_blacklist():
|
def get_ahmia_blacklist():
|
||||||
global ahmia_blacklist
|
global ahmia_blacklist
|
||||||
if not ahmia_blacklist:
|
if not ahmia_blacklist:
|
||||||
with open(join(join(searx_dir, "data"), "ahmia_blacklist.txt"), 'r') as f:
|
ahmia_blacklist = ahmia_blacklist_loader()
|
||||||
ahmia_blacklist = f.read().split()
|
|
||||||
return ahmia_blacklist
|
return ahmia_blacklist
|
||||||
|
|
||||||
|
|
||||||
def not_blacklisted(result):
|
def not_blacklisted(result):
|
||||||
if not result.get('is_onion'):
|
if not result.get('is_onion') or not result.get('parsed_url'):
|
||||||
return True
|
return True
|
||||||
result_hash = md5(urlparse(result.get('url')).hostname.encode()).hexdigest()
|
result_hash = md5(result['parsed_url'].hostname.encode()).hexdigest()
|
||||||
return result_hash not in get_ahmia_blacklist()
|
return result_hash not in get_ahmia_blacklist()
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue