searxng/searx/engines/wolframalpha_api.py

78 lines
2.2 KiB
Python
Raw Normal View History

2015-12-23 06:01:00 +00:00
# Wolfram Alpha (Maths)
#
# @website http://www.wolframalpha.com
# @provide-api yes (http://api.wolframalpha.com/v2/)
#
# @using-api yes
# @results XML
# @stable yes
# @parse result
from urllib import urlencode
from lxml import etree
from re import search
2015-12-23 06:01:00 +00:00
# search-url
base_url = 'http://api.wolframalpha.com/v2/query'
search_url = base_url + '?appid={api_key}&{query}&format=plaintext'
site_url = 'http://www.wolframalpha.com/input/?{query}'
2016-01-02 06:41:14 +00:00
api_key = '' # defined in settings.yml
2015-12-28 07:17:42 +00:00
2016-01-02 04:02:10 +00:00
# xpath variables
failure_xpath = '/queryresult[attribute::success="false"]'
answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext'
input_xpath = '//pod[starts-with(attribute::title, "Input")]/subpod/plaintext'
2016-01-02 04:02:10 +00:00
2015-12-23 06:01:00 +00:00
# do search-request
def request(query, params):
params['url'] = search_url.format(query=urlencode({'input': query}),
api_key=api_key)
return params
2015-12-28 07:17:42 +00:00
2015-12-23 06:01:00 +00:00
# replace private user area characters to make text legible
def replace_pua_chars(text):
2015-12-28 07:17:42 +00:00
pua_chars = {u'\uf74c': 'd',
u'\uf74d': u'\u212f',
u'\uf74e': 'i',
u'\uf7d9': '='}
2015-12-23 06:01:00 +00:00
for k, v in pua_chars.iteritems():
text = text.replace(k, v)
return text
2015-12-28 07:17:42 +00:00
2015-12-23 06:01:00 +00:00
# get response from search-request
def response(resp):
results = []
search_results = etree.XML(resp.content)
# return empty array if there are no results
2016-01-02 04:02:10 +00:00
if search_results.xpath(failure_xpath):
2015-12-23 06:01:00 +00:00
return []
# parse answers
answers = search_results.xpath(answer_xpath)
if answers:
for answer in answers:
answer = replace_pua_chars(answer.text)
2016-01-02 04:02:10 +00:00
results.append({'answer': answer})
# if there's no input section in search_results, check if answer has the input embedded (before their "=" sign)
try:
query_input = search_results.xpath(input_xpath)[0].text
except IndexError:
query_input = search(u'([^\uf7d9]+)', answers[0].text).group(1)
# append link to site
result_url = site_url.format(query=urlencode({'i': query_input.encode('utf-8')}))
results.append({'url': result_url,
'title': query_input + " - Wolfram|Alpha"})
2015-12-23 06:01:00 +00:00
return results