mirror of
https://github.com/searxng/searxng.git
synced 2024-12-22 01:06:29 +00:00
Merge pull request #609 from LuccoJ/betterwolfram
Improving Wolfram Alpha search hit content
This commit is contained in:
commit
8f48c518aa
4 changed files with 32 additions and 12 deletions
|
@ -18,10 +18,10 @@ api_key = '' # defined in settings.yml
|
||||||
|
|
||||||
# xpath variables
|
# xpath variables
|
||||||
failure_xpath = '/queryresult[attribute::success="false"]'
|
failure_xpath = '/queryresult[attribute::success="false"]'
|
||||||
answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext'
|
|
||||||
input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext'
|
input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext'
|
||||||
pods_xpath = '//pod'
|
pods_xpath = '//pod'
|
||||||
subpods_xpath = './subpod'
|
subpods_xpath = './subpod'
|
||||||
|
pod_primary_xpath = './@primary'
|
||||||
pod_id_xpath = './@id'
|
pod_id_xpath = './@id'
|
||||||
pod_title_xpath = './@title'
|
pod_title_xpath = './@title'
|
||||||
plaintext_xpath = './plaintext'
|
plaintext_xpath = './plaintext'
|
||||||
|
@ -75,13 +75,15 @@ def response(resp):
|
||||||
try:
|
try:
|
||||||
infobox_title = search_results.xpath(input_xpath)[0].text
|
infobox_title = search_results.xpath(input_xpath)[0].text
|
||||||
except:
|
except:
|
||||||
infobox_title = None
|
infobox_title = ""
|
||||||
|
|
||||||
pods = search_results.xpath(pods_xpath)
|
pods = search_results.xpath(pods_xpath)
|
||||||
result_chunks = []
|
result_chunks = []
|
||||||
|
result_content = ""
|
||||||
for pod in pods:
|
for pod in pods:
|
||||||
pod_id = pod.xpath(pod_id_xpath)[0]
|
pod_id = pod.xpath(pod_id_xpath)[0]
|
||||||
pod_title = pod.xpath(pod_title_xpath)[0]
|
pod_title = pod.xpath(pod_title_xpath)[0]
|
||||||
|
pod_is_result = pod.xpath(pod_primary_xpath)
|
||||||
|
|
||||||
subpods = pod.xpath(subpods_xpath)
|
subpods = pod.xpath(subpods_xpath)
|
||||||
if not subpods:
|
if not subpods:
|
||||||
|
@ -94,6 +96,10 @@ def response(resp):
|
||||||
|
|
||||||
if content and pod_id not in image_pods:
|
if content and pod_id not in image_pods:
|
||||||
|
|
||||||
|
if pod_is_result or not result_content:
|
||||||
|
if pod_id != "Input":
|
||||||
|
result_content = "%s: %s" % (pod_title, content)
|
||||||
|
|
||||||
# if no input pod was found, title is first plaintext pod
|
# if no input pod was found, title is first plaintext pod
|
||||||
if not infobox_title:
|
if not infobox_title:
|
||||||
infobox_title = content
|
infobox_title = content
|
||||||
|
@ -109,6 +115,8 @@ def response(resp):
|
||||||
if not result_chunks:
|
if not result_chunks:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
title = "Wolfram|Alpha (%s)" % infobox_title
|
||||||
|
|
||||||
# append infobox
|
# append infobox
|
||||||
results.append({'infobox': infobox_title,
|
results.append({'infobox': infobox_title,
|
||||||
'attributes': result_chunks,
|
'attributes': result_chunks,
|
||||||
|
@ -116,7 +124,7 @@ def response(resp):
|
||||||
|
|
||||||
# append link to site
|
# append link to site
|
||||||
results.append({'url': resp.request.headers['Referer'].decode('utf8'),
|
results.append({'url': resp.request.headers['Referer'].decode('utf8'),
|
||||||
'title': 'Wolfram|Alpha',
|
'title': title,
|
||||||
'content': infobox_title})
|
'content': result_content})
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -8,9 +8,11 @@
|
||||||
# @stable no
|
# @stable no
|
||||||
# @parse url, infobox
|
# @parse url, infobox
|
||||||
|
|
||||||
|
from cgi import escape
|
||||||
from json import loads
|
from json import loads
|
||||||
from time import time
|
from time import time
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
|
from lxml.etree import XML
|
||||||
|
|
||||||
from searx.poolrequests import get as http_get
|
from searx.poolrequests import get as http_get
|
||||||
|
|
||||||
|
@ -34,7 +36,7 @@ search_url = url + 'input/json.jsp'\
|
||||||
referer_url = url + 'input/?{query}'
|
referer_url = url + 'input/?{query}'
|
||||||
|
|
||||||
token = {'value': '',
|
token = {'value': '',
|
||||||
'last_updated': 0}
|
'last_updated': None}
|
||||||
|
|
||||||
# pods to display as image in infobox
|
# pods to display as image in infobox
|
||||||
# this pods do return a plaintext, but they look better and are more useful as images
|
# this pods do return a plaintext, but they look better and are more useful as images
|
||||||
|
@ -80,10 +82,12 @@ def response(resp):
|
||||||
|
|
||||||
# TODO handle resp_json['queryresult']['assumptions']
|
# TODO handle resp_json['queryresult']['assumptions']
|
||||||
result_chunks = []
|
result_chunks = []
|
||||||
infobox_title = None
|
infobox_title = ""
|
||||||
|
result_content = ""
|
||||||
for pod in resp_json['queryresult']['pods']:
|
for pod in resp_json['queryresult']['pods']:
|
||||||
pod_id = pod.get('id', '')
|
pod_id = pod.get('id', '')
|
||||||
pod_title = pod.get('title', '')
|
pod_title = pod.get('title', '')
|
||||||
|
pod_is_result = pod.get('primary', None)
|
||||||
|
|
||||||
if 'subpods' not in pod:
|
if 'subpods' not in pod:
|
||||||
continue
|
continue
|
||||||
|
@ -97,6 +101,10 @@ def response(resp):
|
||||||
if subpod['plaintext'] != '(requires interactivity)':
|
if subpod['plaintext'] != '(requires interactivity)':
|
||||||
result_chunks.append({'label': pod_title, 'value': subpod['plaintext']})
|
result_chunks.append({'label': pod_title, 'value': subpod['plaintext']})
|
||||||
|
|
||||||
|
if pod_is_result or not result_content:
|
||||||
|
if pod_id != "Input":
|
||||||
|
result_content = pod_title + ': ' + subpod['plaintext']
|
||||||
|
|
||||||
elif 'img' in subpod:
|
elif 'img' in subpod:
|
||||||
result_chunks.append({'label': pod_title, 'image': subpod['img']})
|
result_chunks.append({'label': pod_title, 'image': subpod['img']})
|
||||||
|
|
||||||
|
@ -108,7 +116,7 @@ def response(resp):
|
||||||
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
|
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
|
||||||
|
|
||||||
results.append({'url': resp.request.headers['Referer'].decode('utf8'),
|
results.append({'url': resp.request.headers['Referer'].decode('utf8'),
|
||||||
'title': 'Wolfram|Alpha',
|
'title': 'Wolfram|Alpha (' + infobox_title + ')',
|
||||||
'content': infobox_title})
|
'content': result_content})
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -103,7 +103,8 @@ class TestWolframAlphaAPIEngine(SearxTestCase):
|
||||||
self.assertEqual(referer_url, results[0]['urls'][0]['url'])
|
self.assertEqual(referer_url, results[0]['urls'][0]['url'])
|
||||||
self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
|
self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
|
||||||
self.assertEqual(referer_url, results[1]['url'])
|
self.assertEqual(referer_url, results[1]['url'])
|
||||||
self.assertEqual('Wolfram|Alpha', results[1]['title'])
|
self.assertEqual('Wolfram|Alpha (input_plaintext)', results[1]['title'])
|
||||||
|
self.assertIn('result_plaintext', results[1]['content'])
|
||||||
|
|
||||||
# test calc
|
# test calc
|
||||||
xml = """<?xml version='1.0' encoding='UTF-8'?>
|
xml = """<?xml version='1.0' encoding='UTF-8'?>
|
||||||
|
@ -161,4 +162,5 @@ class TestWolframAlphaAPIEngine(SearxTestCase):
|
||||||
self.assertEqual(referer_url, results[0]['urls'][0]['url'])
|
self.assertEqual(referer_url, results[0]['urls'][0]['url'])
|
||||||
self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
|
self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
|
||||||
self.assertEqual(referer_url, results[1]['url'])
|
self.assertEqual(referer_url, results[1]['url'])
|
||||||
self.assertEqual('Wolfram|Alpha', results[1]['title'])
|
self.assertEqual('Wolfram|Alpha (integral_plaintext)', results[1]['title'])
|
||||||
|
self.assertIn('integral_plaintext', results[1]['content'])
|
||||||
|
|
|
@ -140,7 +140,8 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase):
|
||||||
self.assertEqual(referer_url, results[0]['urls'][0]['url'])
|
self.assertEqual(referer_url, results[0]['urls'][0]['url'])
|
||||||
self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
|
self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
|
||||||
self.assertEqual(referer_url, results[1]['url'])
|
self.assertEqual(referer_url, results[1]['url'])
|
||||||
self.assertEqual('Wolfram|Alpha', results[1]['title'])
|
self.assertEqual('Wolfram|Alpha (input_plaintext)', results[1]['title'])
|
||||||
|
self.assertIn('result_plaintext', results[1]['content'])
|
||||||
|
|
||||||
# test calc
|
# test calc
|
||||||
json = r"""
|
json = r"""
|
||||||
|
@ -219,4 +220,5 @@ class TestWolframAlphaNoAPIEngine(SearxTestCase):
|
||||||
self.assertEqual(referer_url, results[0]['urls'][0]['url'])
|
self.assertEqual(referer_url, results[0]['urls'][0]['url'])
|
||||||
self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
|
self.assertEqual('Wolfram|Alpha', results[0]['urls'][0]['title'])
|
||||||
self.assertEqual(referer_url, results[1]['url'])
|
self.assertEqual(referer_url, results[1]['url'])
|
||||||
self.assertEqual('Wolfram|Alpha', results[1]['title'])
|
self.assertEqual('Wolfram|Alpha (integral_plaintext)', results[1]['title'])
|
||||||
|
self.assertIn('integral_plaintext', results[1]['content'])
|
||||||
|
|
Loading…
Reference in a new issue