mirror of
https://github.com/searxng/searxng.git
synced 2024-12-03 16:06:28 +00:00
Merge pull request #1260 from MarcAbonce/engine-fixes
[fix] Engine fixes
This commit is contained in:
commit
e5def5b019
6 changed files with 13 additions and 17 deletions
|
@ -68,8 +68,8 @@ def response(resp):
|
||||||
for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'):
|
for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'):
|
||||||
try:
|
try:
|
||||||
r = {
|
r = {
|
||||||
'url': result.xpath('.//a[@class="l _PMs"]')[0].attrib.get("href"),
|
'url': result.xpath('.//a[@class="l lLrAF"]')[0].attrib.get("href"),
|
||||||
'title': ''.join(result.xpath('.//a[@class="l _PMs"]//text()')),
|
'title': ''.join(result.xpath('.//a[@class="l lLrAF"]//text()')),
|
||||||
'content': ''.join(result.xpath('.//div[@class="st"]//text()')),
|
'content': ''.join(result.xpath('.//div[@class="st"]//text()')),
|
||||||
}
|
}
|
||||||
except:
|
except:
|
||||||
|
|
|
@ -27,7 +27,7 @@ result_count = 1
|
||||||
# urls
|
# urls
|
||||||
wikidata_host = 'https://www.wikidata.org'
|
wikidata_host = 'https://www.wikidata.org'
|
||||||
url_search = wikidata_host \
|
url_search = wikidata_host \
|
||||||
+ '/wiki/Special:ItemDisambiguation?{query}'
|
+ '/w/index.php?{query}'
|
||||||
|
|
||||||
wikidata_api = wikidata_host + '/w/api.php'
|
wikidata_api = wikidata_host + '/w/api.php'
|
||||||
url_detail = wikidata_api\
|
url_detail = wikidata_api\
|
||||||
|
@ -40,7 +40,7 @@ url_map = 'https://www.openstreetmap.org/'\
|
||||||
url_image = 'https://commons.wikimedia.org/wiki/Special:FilePath/{filename}?width=500&height=400'
|
url_image = 'https://commons.wikimedia.org/wiki/Special:FilePath/{filename}?width=500&height=400'
|
||||||
|
|
||||||
# xpaths
|
# xpaths
|
||||||
wikidata_ids_xpath = '//div/ul[@class="wikibase-disambiguation"]/li/a/@title'
|
wikidata_ids_xpath = '//ul[@class="mw-search-results"]/li//a/@href'
|
||||||
title_xpath = '//*[contains(@class,"wikibase-title-label")]'
|
title_xpath = '//*[contains(@class,"wikibase-title-label")]'
|
||||||
description_xpath = '//div[contains(@class,"wikibase-entitytermsview-heading-description")]'
|
description_xpath = '//div[contains(@class,"wikibase-entitytermsview-heading-description")]'
|
||||||
property_xpath = '//div[@id="{propertyid}"]'
|
property_xpath = '//div[@id="{propertyid}"]'
|
||||||
|
@ -57,22 +57,21 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
language = match_language(params['language'], supported_languages).split('-')[0]
|
|
||||||
|
|
||||||
params['url'] = url_search.format(
|
params['url'] = url_search.format(
|
||||||
query=urlencode({'label': query, 'language': language}))
|
query=urlencode({'search': query}))
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
html = fromstring(resp.text)
|
html = fromstring(resp.text)
|
||||||
wikidata_ids = html.xpath(wikidata_ids_xpath)
|
search_results = html.xpath(wikidata_ids_xpath)
|
||||||
|
|
||||||
language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
|
language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
|
||||||
|
|
||||||
# TODO: make requests asynchronous to avoid timeout when result_count > 1
|
# TODO: make requests asynchronous to avoid timeout when result_count > 1
|
||||||
for wikidata_id in wikidata_ids[:result_count]:
|
for search_result in search_results[:result_count]:
|
||||||
|
wikidata_id = search_result.split('/')[-1]
|
||||||
url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
|
url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
|
||||||
htmlresponse = get(url)
|
htmlresponse = get(url)
|
||||||
jsonresponse = loads(htmlresponse.text)
|
jsonresponse = loads(htmlresponse.text)
|
||||||
|
|
|
@ -53,7 +53,7 @@ def extract_url(xpath_results, search_url):
|
||||||
if url.startswith('//'):
|
if url.startswith('//'):
|
||||||
# add http or https to this kind of url //example.com/
|
# add http or https to this kind of url //example.com/
|
||||||
parsed_search_url = urlparse(search_url)
|
parsed_search_url = urlparse(search_url)
|
||||||
url = u'{0}:{1}'.format(parsed_search_url.scheme, url)
|
url = u'{0}:{1}'.format(parsed_search_url.scheme or 'http', url)
|
||||||
elif url.startswith('/'):
|
elif url.startswith('/'):
|
||||||
# fix relative url to the search engine
|
# fix relative url to the search engine
|
||||||
url = urljoin(search_url, url)
|
url = urljoin(search_url, url)
|
||||||
|
|
|
@ -174,6 +174,7 @@ engines:
|
||||||
- name : wikidata
|
- name : wikidata
|
||||||
engine : wikidata
|
engine : wikidata
|
||||||
shortcut : wd
|
shortcut : wd
|
||||||
|
timeout : 3.0
|
||||||
weight : 2
|
weight : 2
|
||||||
|
|
||||||
- name : duckduckgo
|
- name : duckduckgo
|
||||||
|
|
|
@ -42,7 +42,7 @@ class TestGoogleNewsEngine(SearxTestCase):
|
||||||
<div class="ts _JGs _JHs _tJs _KGs _jHs">
|
<div class="ts _JGs _JHs _tJs _KGs _jHs">
|
||||||
<div class="_hJs">
|
<div class="_hJs">
|
||||||
<h3 class="r _gJs">
|
<h3 class="r _gJs">
|
||||||
<a class="l _PMs" href="https://example.com/" onmousedown="return rwt(this,'','','','11','AFQjCNEyehpzD5cJK1KUfXBx9RmsbqqG9g','','0ahUKEwjB58OR54HWAhWnKJoKHSQhAMY4ChCpAggiKAAwAA','','',event)">Example title</a>
|
<a class="l lLrAF" href="https://example.com/" onmousedown="return rwt(this,'','','','11','AFQjCNEyehpzD5cJK1KUfXBx9RmsbqqG9g','','0ahUKEwjB58OR54HWAhWnKJoKHSQhAMY4ChCpAggiKAAwAA','','',event)">Example title</a>
|
||||||
</h3>
|
</h3>
|
||||||
<div class="slp">
|
<div class="slp">
|
||||||
<span class="_OHs _PHs">
|
<span class="_OHs _PHs">
|
||||||
|
@ -63,7 +63,7 @@ class TestGoogleNewsEngine(SearxTestCase):
|
||||||
</a>
|
</a>
|
||||||
<div class="_hJs">
|
<div class="_hJs">
|
||||||
<h3 class="r _gJs">
|
<h3 class="r _gJs">
|
||||||
<a class="l _PMs" href="https://example2.com/" onmousedown="return rwt(this,'','','','12','AFQjCNHObfH7sYmLWI1SC-YhWXKZFRzRjw','','0ahUKEwjB58OR54HWAhWnKJoKHSQhAMY4ChCpAgglKAAwAQ','','',event)">Example title 2</a>
|
<a class="l lLrAF" href="https://example2.com/" onmousedown="return rwt(this,'','','','12','AFQjCNHObfH7sYmLWI1SC-YhWXKZFRzRjw','','0ahUKEwjB58OR54HWAhWnKJoKHSQhAMY4ChCpAgglKAAwAQ','','',event)">Example title 2</a>
|
||||||
</h3>
|
</h3>
|
||||||
<div class="slp">
|
<div class="slp">
|
||||||
<span class="_OHs _PHs">
|
<span class="_OHs _PHs">
|
||||||
|
|
|
@ -9,20 +9,15 @@ from searx.testing import SearxTestCase
|
||||||
class TestWikidataEngine(SearxTestCase):
|
class TestWikidataEngine(SearxTestCase):
|
||||||
|
|
||||||
def test_request(self):
|
def test_request(self):
|
||||||
wikidata.supported_languages = ['en', 'es']
|
|
||||||
query = 'test_query'
|
query = 'test_query'
|
||||||
dicto = defaultdict(dict)
|
dicto = defaultdict(dict)
|
||||||
dicto['language'] = 'en-US'
|
|
||||||
params = wikidata.request(query, dicto)
|
params = wikidata.request(query, dicto)
|
||||||
self.assertIn('url', params)
|
self.assertIn('url', params)
|
||||||
self.assertIn(query, params['url'])
|
self.assertIn(query, params['url'])
|
||||||
self.assertIn('wikidata.org', params['url'])
|
self.assertIn('wikidata.org', params['url'])
|
||||||
self.assertIn('en', params['url'])
|
|
||||||
|
|
||||||
dicto['language'] = 'es-ES'
|
|
||||||
params = wikidata.request(query, dicto)
|
params = wikidata.request(query, dicto)
|
||||||
self.assertIn(query, params['url'])
|
self.assertIn(query, params['url'])
|
||||||
self.assertIn('es', params['url'])
|
|
||||||
|
|
||||||
# successful cases are not tested here to avoid sending additional requests
|
# successful cases are not tested here to avoid sending additional requests
|
||||||
def test_response(self):
|
def test_response(self):
|
||||||
|
@ -31,6 +26,7 @@ class TestWikidataEngine(SearxTestCase):
|
||||||
self.assertRaises(AttributeError, wikidata.response, '')
|
self.assertRaises(AttributeError, wikidata.response, '')
|
||||||
self.assertRaises(AttributeError, wikidata.response, '[]')
|
self.assertRaises(AttributeError, wikidata.response, '[]')
|
||||||
|
|
||||||
|
wikidata.supported_languages = ['en', 'es']
|
||||||
response = mock.Mock(text='<html></html>', search_params={"language": "en"})
|
response = mock.Mock(text='<html></html>', search_params={"language": "en"})
|
||||||
self.assertEqual(wikidata.response(response), [])
|
self.assertEqual(wikidata.response(response), [])
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue