Digg + Twitter corrections

Digg engines, with thumbnails
Add pubdate for twitter
This commit is contained in:
Cqoicebordel 2014-12-28 22:57:59 +01:00
parent 011c43b485
commit e7e2981536
3 changed files with 86 additions and 6 deletions

66
searx/engines/digg.py Normal file
View file

@ -0,0 +1,66 @@
## Digg (News, Social media)
#
# @website https://digg.com/
# @provide-api no
#
# @using-api no
# @results HTML (using search portal)
# @stable no (HTML can change)
# @parse url, title, content, publishedDate, thumbnail
from urllib import quote_plus
from json import loads
from lxml import html
from cgi import escape
from dateutil import parser
# engine dependent config
categories = ['news', 'social media']
paging = True
# search-url
base_url = 'https://digg.com/'
search_url = base_url+'api/search/{query}.json?position={position}&format=html'
# specific xpath variables
results_xpath = '//article'
link_xpath = './/small[@class="time"]//a'
title_xpath = './/h2//a//text()'
content_xpath = './/p//text()'
pubdate_xpath = './/time'
# do search-request
def request(query, params):
offset = (params['pageno'] - 1) * 10
params['url'] = search_url.format(position=offset,
query=quote_plus(query))
return params
# get response from search-request
def response(resp):
results = []
search_result = loads(resp.text)
dom = html.fromstring(search_result['html'])
# parse results
for result in dom.xpath(results_xpath):
url = result.attrib.get('data-contenturl')
thumbnail = result.xpath('.//img')[0].attrib.get('src')
title = ''.join(result.xpath(title_xpath))
content = escape(''.join(result.xpath(content_xpath)))
publishedDate = parser.parse(result.xpath(pubdate_xpath)[0].attrib.get('datetime'))
# append result
results.append({'url': url,
'title': title,
'content': content,
'template': 'videos.html',
'publishedDate': publishedDate,
'thumbnail': thumbnail})
# return results
return results

View file

@ -1,6 +1,6 @@
## Twitter (Social media)
#
# @website https://www.bing.com/news
# @website https://twitter.com/
# @provide-api yes (https://dev.twitter.com/docs/using-search)
#
# @using-api no
@ -14,6 +14,7 @@ from urlparse import urljoin
from urllib import urlencode
from lxml import html
from cgi import escape
from datetime import datetime
# engine dependent config
categories = ['social media']
@ -28,6 +29,7 @@ results_xpath = '//li[@data-item-type="tweet"]'
link_xpath = './/small[@class="time"]//a'
title_xpath = './/span[@class="username js-action-profile-name"]//text()'
content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
timestamp_xpath = './/span[contains(@class,"_timestamp")]'
# do search-request
@ -53,7 +55,15 @@ def response(resp):
url = urljoin(base_url, link.attrib.get('href'))
title = ''.join(tweet.xpath(title_xpath))
content = escape(''.join(tweet.xpath(content_xpath)))
pubdate = tweet.xpath(timestamp_xpath)
if len(pubdate) > 0:
publishedDate = datetime.fromtimestamp(float(pubdate[0].attrib.get('data-time')), None)
# append result
results.append({'url': url,
'title': title,
'content': content,
'publishedDate': publishedDate})
else:
# append result
results.append({'url': url,
'title': title,

View file

@ -45,6 +45,10 @@ engines:
engine : duckduckgo_definitions
shortcut : ddd
- name : digg
engine : digg
shortcut : dg
- name : wikidata
engine : wikidata
shortcut : wd