add digbt engine

Unfortunately, it is quite slow so it is disabled.
Furthermore, the display of number of files is wrong
on digbt.org, so it is not displayed on searx.
This commit is contained in:
Noemi Vanyi 2016-08-13 14:55:47 +02:00
parent 104cdb7d03
commit 3a1c5876b1
4 changed files with 84 additions and 15 deletions

View file

@ -16,6 +16,7 @@ from urllib import quote
from lxml import html
from operator import itemgetter
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size
# engine dependent config
categories = ['videos', 'music', 'files']
@ -68,20 +69,7 @@ def response(resp):
leech = 0
# convert filesize to byte if possible
try:
filesize = float(filesize)
# convert filesize to byte
if filesize_multiplier == 'TB':
filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
elif filesize_multiplier == 'GB':
filesize = int(filesize * 1024 * 1024 * 1024)
elif filesize_multiplier == 'MB':
filesize = int(filesize * 1024 * 1024)
elif filesize_multiplier == 'KB':
filesize = int(filesize * 1024)
except:
filesize = None
filesize = get_torrent_size(filesize, filesize_multiplier)
# convert files to int if possible
if files.isdigit():

58
searx/engines/digbt.py Normal file
View file

@ -0,0 +1,58 @@
"""
DigBT (Videos, Music, Files)
@website https://digbt.org
@provide-api no
@using-api no
@results HTML (using search portal)
@stable no (HTML can change)
@parse url, title, content, magnetlink
"""
from urlparse import urljoin
from lxml import html
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size
categories = ['videos', 'music', 'files']
paging = True
URL = 'https://digbt.org'
SEARCH_URL = URL + '/search/{query}-time-{pageno}'
FILESIZE = 3
FILESIZE_MULTIPLIER = 4
def request(query, params):
params['url'] = SEARCH_URL.format(query=query, pageno=params['pageno'])
return params
def response(resp):
dom = html.fromstring(resp.content)
search_res = dom.xpath('.//td[@class="x-item"]')
if not search_res:
return list()
results = list()
for result in search_res:
url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
title = result.xpath('.//a[@title]/text()')[0]
content = extract_text(result.xpath('.//div[@class="files"]'))
files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]
results.append({'url': url,
'title': title,
'content': content,
'filesize': filesize,
'magnetlink': magnetlink,
'seed': 'N/A',
'leech': 'N/A',
'template': 'torrent.html'})
return results

View file

@ -87,7 +87,7 @@ engines:
- name : btdigg
engine : btdigg
shortcut : bt
- name : crossref
engine : json_engine
paging : True
@ -118,6 +118,11 @@ engines:
weight : 2
disabled : True
- name : digbt
engine : digbt
shortcut : dbt
timeout : 6.0
- name : digg
engine : digg
shortcut : dg

View file

@ -237,3 +237,21 @@ def list_get(a_list, index, default=None):
return a_list[index]
else:
return default
def get_torrent_size(filesize, filesize_multiplier):
try:
filesize = float(filesize)
if filesize_multiplier == 'TB':
filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
elif filesize_multiplier == 'GB':
filesize = int(filesize * 1024 * 1024 * 1024)
elif filesize_multiplier == 'MB':
filesize = int(filesize * 1024 * 1024)
elif filesize_multiplier == 'KB':
filesize = int(filesize * 1024)
except:
filesize = None
return filesize