Added seepeer to config and added seepeer search parser

This commit is contained in:
Pydo 2016-09-05 14:51:02 -04:00
parent aa85045a7d
commit 2c2123b2e8
2 changed files with 82 additions and 0 deletions

78
searx/engines/seedpeer.py Normal file
View file

@ -0,0 +1,78 @@
# Seedpeer (Videos, Music, Files)
#
# @website http://seedpeer.eu
# @provide-api no (nothing found)
#
# @using-api no
# @results HTML (using search portal)
# @stable yes (HTML can change)
# @parse url, title, content, seed, leech, magnetlink
from urlparse import urljoin
from cgi import escape
from urllib import quote
from lxml import html
from operator import itemgetter
from searx.engines.xpath import extract_text
url = 'http://www.seedpeer.eu/'
search_url = url + 'search/{search_term}/7/{page_no}.html'
# specific xpath variables
torrent_xpath = '//*[@id="body"]/center/center/table[2]/tr/td/a'
alternative_torrent_xpath = '//*[@id="body"]/center/center/table[1]/tr/td/a'
title_xpath = '//*[@id="body"]/center/center/table[2]/tr/td/a/text()'
alternative_title_xpath = '//*[@id="body"]/center/center/table/tr/td/a'
seeds_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[4]/font/text()'
alternative_seeds_xpath = '//*[@id="body"]/center/center/table/tr/td[4]/font/text()'
peers_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[5]/font/text()'
alternative_peers_xpath = '//*[@id="body"]/center/center/table/tr/td[5]/font/text()'
age_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[2]/text()'
alternative_age_xpath = '//*[@id="body"]/center/center/table/tr/td[2]/text()'
size_xpath = '//*[@id="body"]/center/center/table[2]/tr/td[3]/text()'
alternative_size_xpath = '//*[@id="body"]/center/center/table/tr/td[3]/text()'
# do search-request
def request(query, params):
params['url'] = search_url.format(search_term=quote(query),
page_no=params['pageno'] - 1)
return params
# get response from search-request
def response(resp):
results = []
dom = html.fromstring(resp.text)
torrent_links = dom.xpath(torrent_xpath)
if len(torrent_links) > 0:
seeds = dom.xpath(seeds_xpath)
peers = dom.xpath(peers_xpath)
titles = dom.xpath(title_xpath)
sizes = dom.xpath(size_xpath)
ages = dom.xpath(age_xpath)
else: # under ~5 results uses a different xpath
torrent_links = dom.xpath(alternative_torrent_xpath)
seeds = dom.xpath(alternative_seeds_xpath)
peers = dom.xpath(alternative_peers_xpath)
titles = dom.xpath(alternative_title_xpath)
sizes = dom.xpath(alternative_size_xpath)
ages = dom.xpath(alternative_age_xpath)
# return empty array if nothing is found
if not torrent_links:
return []
# parse results
for index, result in enumerate(torrent_links):
link = result.attrib.get('href')
href = urljoin(url, link)
results.append({'url': href,
'title': titles[index].text_content(),
'content': '{}, {}'.format(sizes[index], ages[index]),
'seed': seeds[index],
'leech': peers[index],
'template': 'torrent.html'})
# return results sorted by seeder
return sorted(results, key=itemgetter('seed'), reverse=True)

View file

@ -495,6 +495,10 @@ engines:
timeout: 6.0
categories : science
- name : seedpeer
engine : seedpeer
shortcut: speu
#The blekko technology and team have joined IBM Watson! -> https://blekko.com/
# - name : blekko images
# engine : blekko_images