Merge pull request #486 from a01200356/master

[enh] WolframAlpha no API engine (and tests for both)
This commit is contained in:
Adam Tauber 2016-01-19 17:02:14 +01:00
commit b5a3dfca60
5 changed files with 621 additions and 16 deletions

View file

@ -10,11 +10,18 @@
from urllib import urlencode
from lxml import etree
from re import search
# search-url
base_url = 'http://api.wolframalpha.com/v2/query'
search_url = base_url + '?appid={api_key}&{query}&format=plaintext'
api_key = ''
site_url = 'http://www.wolframalpha.com/input/?{query}'
api_key = '' # defined in settings.yml
# xpath variables
failure_xpath = '/queryresult[attribute::success="false"]'
answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext'
input_xpath = '//pod[starts-with(attribute::title, "Input")]/subpod/plaintext'
# do search-request
@ -45,16 +52,26 @@ def response(resp):
search_results = etree.XML(resp.content)
# return empty array if there are no results
if search_results.xpath('/queryresult[attribute::success="false"]'):
if search_results.xpath(failure_xpath):
return []
# parse result
result = search_results.xpath('//pod[attribute::primary="true"]/subpod/plaintext')[0].text
result = replace_pua_chars(result)
# parse answers
answers = search_results.xpath(answer_xpath)
if answers:
for answer in answers:
answer = replace_pua_chars(answer.text)
# append result
# TODO: shouldn't it bind the source too?
results.append({'answer': result})
results.append({'answer': answer})
# if there's no input section in search_results, check if answer has the input embedded (before their "=" sign)
try:
query_input = search_results.xpath(input_xpath)[0].text
except IndexError:
query_input = search(u'([^\uf7d9]+)', answers[0].text).group(1)
# append link to site
result_url = site_url.format(query=urlencode({'i': query_input.encode('utf-8')}))
results.append({'url': result_url,
'title': query_input + " - Wolfram|Alpha"})
# return results
return results

View file

@ -0,0 +1,86 @@
# WolframAlpha (Maths)
#
# @website http://www.wolframalpha.com/
# @provide-api yes (http://api.wolframalpha.com/v2/)
#
# @using-api no
# @results HTML
# @stable no
# @parse answer
from re import search, sub
from json import loads
from urllib import urlencode
from lxml import html
import HTMLParser
# search-url
url = 'http://www.wolframalpha.com/'
search_url = url + 'input/?{query}'
# xpath variables
scripts_xpath = '//script'
title_xpath = '//title'
failure_xpath = '//p[attribute::class="pfail"]'
# do search-request
def request(query, params):
params['url'] = search_url.format(query=urlencode({'i': query}))
return params
# get response from search-request
def response(resp):
results = []
line = None
dom = html.fromstring(resp.text)
scripts = dom.xpath(scripts_xpath)
# the answer is inside a js function
# answer can be located in different 'pods', although by default it should be in pod_0200
possible_locations = ['pod_0200\.push\((.*)',
'pod_0100\.push\((.*)']
# failed result
if dom.xpath(failure_xpath):
return results
# get line that matches the pattern
for pattern in possible_locations:
for script in scripts:
try:
line = search(pattern, script.text_content()).group(1)
break
except AttributeError:
continue
if line:
break
if line:
# extract answer from json
answer = line[line.find('{'):line.rfind('}') + 1]
try:
answer = loads(answer)
except Exception:
answer = loads(answer.encode('unicode-escape'))
answer = answer['stringified']
# clean plaintext answer
h = HTMLParser.HTMLParser()
answer = h.unescape(answer.decode('unicode-escape'))
answer = sub(r'\\', '', answer)
results.append({'answer': answer})
# user input is in first part of title
title = dom.xpath(title_xpath)[0].text.encode('utf-8')
result_url = request(title[:-16], {})['url']
# append result
results.append({'url': result_url,
'title': title.decode('utf-8')})
return results

View file

@ -300,13 +300,15 @@ engines:
engine : vimeo
shortcut : vm
# You can use the engine using the official stable API, but you need an API key
# See : http://products.wolframalpha.com/api/
# - name : wolframalpha
# shortcut : wa
# engine : wolframalpha_api
# api_key: 'apikey' # required!
# timeout: 6.0
- name : wolframalpha
shortcut : wa
# You can use the engine using the official stable API, but you need an API key
# See : http://products.wolframalpha.com/api/
# engine : wolframalpha_api
# api_key: 'apikey' # required!
engine : wolframalpha_noapi
timeout: 6.0
disabled : True
#The blekko technology and team have joined IBM Watson! -> https://blekko.com/
# - name : blekko images

View file

@ -0,0 +1,307 @@
# -*- coding: utf-8 -*-
from collections import defaultdict
import mock
from searx.engines import wolframalpha_api
from searx.testing import SearxTestCase
class TestWolframAlphaAPIEngine(SearxTestCase):
def test_request(self):
query = 'test_query'
api_key = 'XXXXXX-XXXXXXXXXX'
dicto = defaultdict(dict)
dicto['api_key'] = api_key
params = wolframalpha_api.request(query, dicto)
self.assertIn('url', params)
self.assertIn(query, params['url'])
self.assertIn('wolframalpha.com', params['url'])
self.assertIn('api_key', params)
self.assertIn(api_key, params['api_key'])
def test_response(self):
self.assertRaises(AttributeError, wolframalpha_api.response, None)
self.assertRaises(AttributeError, wolframalpha_api.response, [])
self.assertRaises(AttributeError, wolframalpha_api.response, '')
self.assertRaises(AttributeError, wolframalpha_api.response, '[]')
xml = '''<?xml version='1.0' encoding='UTF-8'?>
<queryresult success='false' error='false' />
'''
# test failure
response = mock.Mock(content=xml)
self.assertEqual(wolframalpha_api.response(response), [])
xml = """<?xml version='1.0' encoding='UTF-8'?>
<queryresult success='true'
error='false'
numpods='6'
datatypes=''
timedout=''
timedoutpods=''
timing='0.684'
parsetiming='0.138'
parsetimedout='false'
recalculate=''
id='MSPa416020a7966dachc463600000f9c66cc21444cfg'
host='http://www3.wolframalpha.com'
server='6'
related='http://www3.wolframalpha.com/api/v2/relatedQueries.jsp?...'
version='2.6'>
<pod title='Input'
scanner='Identity'
id='Input'
position='100'
error='false'
numsubpods='1'>
<subpod title=''>
<plaintext>sqrt(-1)</plaintext>
</subpod>
</pod>
<pod title='Result'
scanner='Simplification'
id='Result'
position='200'
error='false'
numsubpods='1'
primary='true'>
<subpod title=''>
<plaintext></plaintext>
</subpod>
<states count='1'>
<state name='Step-by-step solution'
input='Result__Step-by-step solution' />
</states>
</pod>
<pod title='Polar coordinates'
scanner='Numeric'
id='PolarCoordinates'
position='300'
error='false'
numsubpods='1'>
<subpod title=''>
<plaintext>r1 (radius), θ90° (angle)</plaintext>
</subpod>
</pod>
<pod title='Position in the complex plane'
scanner='Numeric'
id='PositionInTheComplexPlane'
position='400'
error='false'
numsubpods='1'>
<subpod title=''>
<plaintext></plaintext>
</subpod>
</pod>
<pod title='All 2nd roots of -1'
scanner='RootsOfUnity'
id=''
position='500'
error='false'
numsubpods='2'>
<subpod title=''>
<plaintext> (principal root)</plaintext>
</subpod>
<subpod title=''>
<plaintext>-</plaintext>
</subpod>
</pod>
<pod title='Plot of all roots in the complex plane'
scanner='RootsOfUnity'
id='PlotOfAllRootsInTheComplexPlane'
position='600'
error='false'
numsubpods='1'>
<subpod title=''>
<plaintext></plaintext>
</subpod>
</pod>
</queryresult>
"""
# test private user area char in response
response = mock.Mock(content=xml)
results = wolframalpha_api.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)
self.assertIn('i', results[0]['answer'])
self.assertIn('sqrt(-1) - Wolfram|Alpha', results[1]['title'])
self.assertEquals('http://www.wolframalpha.com/input/?i=sqrt%28-1%29', results[1]['url'])
xml = """<?xml version='1.0' encoding='UTF-8'?>
<queryresult success='true'
error='false'
numpods='2'
datatypes=''
timedout=''
timedoutpods=''
timing='1.286'
parsetiming='0.255'
parsetimedout='false'
recalculate=''
id='MSPa195222ad740ede5214h30000480ca61h003d3gd6'
host='http://www3.wolframalpha.com'
server='20'
related='http://www3.wolframalpha.com/api/v2/relatedQueries.jsp?id=...'
version='2.6'>
<pod title='Indefinite integral'
scanner='Integral'
id='IndefiniteIntegral'
position='100'
error='false'
numsubpods='1'
primary='true'>
<subpod title=''>
<plaintext>1/xxlog(x)+constant</plaintext>
</subpod>
<states count='1'>
<state name='Step-by-step solution'
input='IndefiniteIntegral__Step-by-step solution' />
</states>
<infos count='1'>
<info text='log(x) is the natural logarithm'>
<link url='http://reference.wolfram.com/mathematica/ref/Log.html'
text='Documentation'
title='Mathematica' />
<link url='http://functions.wolfram.com/ElementaryFunctions/Log'
text='Properties'
title='Wolfram Functions Site' />
<link url='http://mathworld.wolfram.com/NaturalLogarithm.html'
text='Definition'
title='MathWorld' />
</info>
</infos>
</pod>
<pod title='Plots of the integral'
scanner='Integral'
id='Plot'
position='200'
error='false'
numsubpods='2'>
<subpod title=''>
<plaintext></plaintext>
<states count='1'>
<statelist count='2'
value='Complex-valued plot'
delimiters=''>
<state name='Complex-valued plot'
input='Plot__1_Complex-valued plot' />
<state name='Real-valued plot'
input='Plot__1_Real-valued plot' />
</statelist>
</states>
</subpod>
<subpod title=''>
<plaintext></plaintext>
<states count='1'>
<statelist count='2'
value='Complex-valued plot'
delimiters=''>
<state name='Complex-valued plot'
input='Plot__2_Complex-valued plot' />
<state name='Real-valued plot'
input='Plot__2_Real-valued plot' />
</statelist>
</states>
</subpod>
</pod>
<assumptions count='1'>
<assumption type='Clash'
word='integral'
template='Assuming &quot;${word}&quot; is ${desc1}. Use as ${desc2} instead'
count='2'>
<value name='IntegralsWord'
desc='an integral'
input='*C.integral-_*IntegralsWord-' />
<value name='MathematicalFunctionIdentityPropertyClass'
desc='a function property'
input='*C.integral-_*MathematicalFunctionIdentityPropertyClass-' />
</assumption>
</assumptions>
</queryresult>
"""
# test integral
response = mock.Mock(content=xml)
results = wolframalpha_api.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)
self.assertIn('log(x)+c', results[0]['answer'])
self.assertIn('∫1/xx - Wolfram|Alpha'.decode('utf-8'), results[1]['title'])
self.assertEquals('http://www.wolframalpha.com/input/?i=%E2%88%AB1%2Fx%EF%9D%8Cx', results[1]['url'])
xml = """<?xml version='1.0' encoding='UTF-8'?>
<queryresult success='true'
error='false'
numpods='4'
datatypes='Solve'
timedout=''
timedoutpods=''
timing='0.79'
parsetiming='0.338'
parsetimedout='false'
recalculate=''
id='MSPa7481f7i06d25h3deh2900004810i3a78d9b4fdc'
host='http://www5b.wolframalpha.com'
server='23'
related='http://www5b.wolframalpha.com/api/v2/relatedQueries.jsp?id=...'
version='2.6'>
<pod title='Input interpretation'
scanner='Identity'
id='Input'
position='100'
error='false'
numsubpods='1'>
<subpod title=''>
<plaintext>solve x^2+x0</plaintext>
</subpod>
</pod>
<pod title='Results'
scanner='Solve'
id='Result'
position='200'
error='false'
numsubpods='2'
primary='true'>
<subpod title=''>
<plaintext>x-1</plaintext>
</subpod>
<subpod title=''>
<plaintext>x0</plaintext>
</subpod>
<states count='1'>
<state name='Step-by-step solution'
input='Result__Step-by-step solution' />
</states>
</pod>
<pod title='Root plot'
scanner='Solve'
id='RootPlot'
position='300'
error='false'
numsubpods='1'>
<subpod title=''>
<plaintext></plaintext>
</subpod>
</pod>
<pod title='Number line'
scanner='Solve'
id='NumberLine'
position='400'
error='false'
numsubpods='1'>
<subpod title=''>
<plaintext></plaintext>
</subpod>
</pod>
</queryresult>
"""
# test ecuation with multiple answers
response = mock.Mock(content=xml)
results = wolframalpha_api.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 3)
self.assertIn('x=-1', results[0]['answer'])
self.assertIn('x=0', results[1]['answer'])
self.assertIn('solve x^2+x0 - Wolfram|Alpha'.decode('utf-8'), results[2]['title'])
self.assertEquals('http://www.wolframalpha.com/input/?i=solve+x%5E2%2Bx%EF%9F%990', results[2]['url'])

View file

@ -0,0 +1,193 @@
# -*- coding: utf-8 -*-
from collections import defaultdict
import mock
from searx.engines import wolframalpha_noapi
from searx.testing import SearxTestCase
class TestWolframAlphaNoAPIEngine(SearxTestCase):
def test_request(self):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
params = wolframalpha_noapi.request(query, dicto)
self.assertIn('url', params)
self.assertIn(query, params['url'])
self.assertIn('wolframalpha.com', params['url'])
def test_response(self):
self.assertRaises(AttributeError, wolframalpha_noapi.response, None)
self.assertRaises(AttributeError, wolframalpha_noapi.response, [])
self.assertRaises(AttributeError, wolframalpha_noapi.response, '')
self.assertRaises(AttributeError, wolframalpha_noapi.response, '[]')
html = """
<!DOCTYPE html>
<title> Parangaricutirimícuaro - Wolfram|Alpha</title>
<meta charset="utf-8" />
<body>
<div id="closest">
<p class="pfail">Wolfram|Alpha doesn't know how to interpret your input.</p>
<div id="dtips">
<div class="tip">
<span class="tip-title">Tip:&nbsp;</span>
Check your spelling, and use English
<span class="tip-extra"></span>
</div>
</div>
</div>
</body>
</html>
"""
# test failed query
response = mock.Mock(text=html)
self.assertEqual(wolframalpha_noapi.response(response), [])
html = """
<!DOCTYPE html>
<title> sqrt(-1) - Wolfram|Alpha</title>
<meta charset="utf-8" />
<body>
<script type="text/javascript">
try {
if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
context.jsonArray.popups.pod_0100 = [];
}
context.jsonArray.popups.pod_0100.push( {"stringified": "sqrt(-1)","mInput": "","mOutput": ""});
} catch(e) { }
try {
if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) {
context.jsonArray.popups.pod_0200 = [];
}
context.jsonArray.popups.pod_0200.push( {"stringified": "i","mInput": "","mOutput": ""});
} catch(e) { }
</script>
</body>
</html>
"""
# test plaintext
response = mock.Mock(text=html)
results = wolframalpha_noapi.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)
self.assertEquals('i', results[0]['answer'])
self.assertIn('sqrt(-1) - Wolfram|Alpha', results[1]['title'])
self.assertEquals('http://www.wolframalpha.com/input/?i=+sqrt%28-1%29', results[1]['url'])
html = """
<!DOCTYPE html>
<title> integral 1/x - Wolfram|Alpha</title>
<meta charset="utf-8" />
<body>
<script type="text/javascript">
try {
if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
context.jsonArray.popups.pod_0100 = [];
}
context.jsonArray.popups.pod_0100.push( {"stringified": "integral 1\/x dx = log(x)+constant"});
} catch(e) { }
</script>
</body>
</html>
"""
# test integral
response = mock.Mock(text=html)
results = wolframalpha_noapi.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)
self.assertIn('log(x)+c', results[0]['answer'])
self.assertIn('integral 1/x - Wolfram|Alpha', results[1]['title'])
self.assertEquals('http://www.wolframalpha.com/input/?i=+integral+1%2Fx', results[1]['url'])
html = """
<!DOCTYPE html>
<title> &int;1&#x2f;x &#xf74c;x - Wolfram|Alpha</title>
<meta charset="utf-8" />
<body>
<script type="text/javascript">
try {
if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
context.jsonArray.popups.pod_0100 = [];
}
context.jsonArray.popups.pod_0100.push( {"stringified": "integral 1\/x dx = log(x)+constant"});
} catch(e) { }
</script>
</body>
</html>
"""
# test input in mathematical notation
response = mock.Mock(text=html)
results = wolframalpha_noapi.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)
self.assertIn('log(x)+c', results[0]['answer'])
self.assertIn('∫1/x x - Wolfram|Alpha'.decode('utf-8'), results[1]['title'])
self.assertEquals('http://www.wolframalpha.com/input/?i=+%E2%88%AB1%2Fx+%EF%9D%8Cx', results[1]['url'])
html = """
<!DOCTYPE html>
<title> 1 euro to yen - Wolfram|Alpha</title>
<meta charset="utf-8" />
<body>
<script type="text/javascript">
try {
if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
context.jsonArray.popups.pod_0100 = [];
}
context.jsonArray.popups.pod_0100.push( {"stringified": "convert euro1 (euro) to Japanese yen"});
} catch(e) { }
try {
if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) {
context.jsonArray.popups.pod_0200 = [];
}
context.jsonArray.popups.pod_0200.push( {"stringified": "&yen;130.5 (Japanese yen)"});
} catch(e) { }
</script>
</body>
</html>
"""
# test output with htmlentity
response = mock.Mock(text=html)
results = wolframalpha_noapi.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)
self.assertIn('¥'.decode('utf-8'), results[0]['answer'])
self.assertIn('1 euro to yen - Wolfram|Alpha', results[1]['title'])
self.assertEquals('http://www.wolframalpha.com/input/?i=+1+euro+to+yen', results[1]['url'])
html = """
<!DOCTYPE html>
<title> distance from nairobi to kyoto in inches - Wolfram|Alpha</title>
<meta charset="utf-8" />
<body>
<script type="text/javascript">
try {
if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) {
context.jsonArray.popups.pod_0100 = [];
}
[...].pod_0100.push( {"stringified": "convert distance | from | Nairobi, Kenya\nto | Kyoto, Japan to inches"});
} catch(e) { }
try {
if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) {
context.jsonArray.popups.pod_0200 = [];
}
pod_0200.push({"stringified": "4.295&times;10^8 inches","mOutput": "Quantity[4.295×10^8,&amp;quot;Inches&amp;quot;]"});
} catch(e) { }
</script>
</body>
</html>
"""
# test output with utf-8 character
response = mock.Mock(text=html)
results = wolframalpha_noapi.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)
self.assertIn('4.295×10^8 inches'.decode('utf-8'), results[0]['answer'])
self.assertIn('distance from nairobi to kyoto in inches - Wolfram|Alpha', results[1]['title'])
self.assertEquals('http://www.wolframalpha.com/input/?i=+distance+from+nairobi+to+kyoto+in+inches',
results[1]['url'])