Use only one engine for the four search from Qwant
This commit is contained in:
Cqoicebordel 2015-06-02 20:36:58 +02:00
parent 884eeb8541
commit f05087b93a
10 changed files with 217 additions and 644 deletions

View file

@ -1,5 +1,5 @@
"""
Qwant (Web)
Qwant (Web, Images, News, Social)
@website https://qwant.com/
@provide-api not officially (https://api.qwant.com/api/search/)
@ -12,21 +12,25 @@
from urllib import urlencode
from json import loads
from datetime import datetime
# engine dependent config
categories = ['general']
categories = None
paging = True
language_support = True
search_url_keyword = None
# search-url
url = 'https://api.qwant.com/api/search/web?count=10&offset={offset}&f=&{query}'
url = 'https://api.qwant.com/api/search/{keyword}?count=10&offset={offset}&f=&{query}'
# do search-request
def request(query, params):
offset = (params['pageno'] - 1) * 10
params['url'] = url.format(query=urlencode({'q': query}),
params['url'] = url.format(keyword=search_url_keyword,
query=urlencode({'q': query}),
offset=offset)
# add language tag if specified
@ -57,10 +61,28 @@ def response(resp):
res_url = result['url']
content = result['desc']
# append result
results.append({'title': title,
'content': content,
'url': res_url})
if search_url_keyword == 'web':
results.append({'title': title,
'content': content,
'url': res_url})
elif search_url_keyword == 'images':
thumbnail_src = result['thumbnail']
img_src = result['media']
results.append({'template': 'images.html',
'url': res_url,
'title': title,
'content': '',
'thumbnail_src': thumbnail_src,
'img_src': img_src})
elif search_url_keyword == 'news' or search_url_keyword == 'social':
published_date = datetime.fromtimestamp(result['date'], None)
results.append({'url': res_url,
'title': title,
'publishedDate': published_date,
'content': content})
# return results
return results

View file

@ -1,70 +0,0 @@
"""
Qwant (Images)
@website https://qwant.com/
@provide-api not officially (https://api.qwant.com/api/search/)
@using-api yes
@results JSON
@stable yes
@parse url, title, content
"""
from urllib import urlencode
from json import loads
# engine dependent config
categories = ['images']
paging = True
language_support = True
# search-url
url = 'https://api.qwant.com/api/search/images?count=10&offset={offset}&f=&{query}'
# do search-request
def request(query, params):
offset = (params['pageno'] - 1) * 10
params['url'] = url.format(query=urlencode({'q': query}),
offset=offset)
# add language tag if specified
if params['language'] != 'all':
params['url'] += '&locale=' + params['language'].lower()
return params
# get response from search-request
def response(resp):
results = []
search_results = loads(resp.text)
# return empty array if there are no results
if 'data' not in search_results:
return []
data = search_results.get('data', {})
res = data.get('result', {})
# parse results
for result in res.get('items', {}):
title = result['title']
res_url = result['url']
thumbnail_src = result['thumbnail']
img_src = result['media']
# append result
results.append({'template': 'images.html',
'url': res_url,
'title': title,
'content': '',
'thumbnail_src': thumbnail_src,
'img_src': img_src})
# return results
return results

View file

@ -1,69 +0,0 @@
"""
Qwant (News)
@website https://qwant.com/
@provide-api not officially (https://api.qwant.com/api/search/)
@using-api yes
@results JSON
@stable yes
@parse url, title, content
"""
from urllib import urlencode
from json import loads
from datetime import datetime
# engine dependent config
categories = ['news']
paging = True
language_support = True
# search-url
url = 'https://api.qwant.com/api/search/news?count=10&offset={offset}&f=&{query}'
# do search-request
def request(query, params):
offset = (params['pageno'] - 1) * 10
params['url'] = url.format(query=urlencode({'q': query}),
offset=offset)
# add language tag if specified
if params['language'] != 'all':
params['url'] += '&locale=' + params['language'].lower()
return params
# get response from search-request
def response(resp):
results = []
search_results = loads(resp.text)
# return empty array if there are no results
if 'data' not in search_results:
return []
data = search_results.get('data', {})
res = data.get('result', {})
# parse results
for result in res.get('items', {}):
title = result['title']
res_url = result['url']
content = result['desc']
published_date = datetime.fromtimestamp(result['date'], None)
# append result
results.append({'url': res_url,
'title': title,
'publishedDate': published_date,
'content': content})
# return results
return results

View file

@ -1,69 +0,0 @@
"""
Qwant (social media)
@website https://qwant.com/
@provide-api not officially (https://api.qwant.com/api/search/)
@using-api yes
@results JSON
@stable yes
@parse url, title, content
"""
from urllib import urlencode
from json import loads
from datetime import datetime
# engine dependent config
categories = ['social media']
paging = True
language_support = True
# search-url
url = 'https://api.qwant.com/api/search/social?count=10&offset={offset}&f=&{query}'
# do search-request
def request(query, params):
offset = (params['pageno'] - 1) * 10
params['url'] = url.format(query=urlencode({'q': query}),
offset=offset)
# add language tag if specified
if params['language'] != 'all':
params['url'] += '&locale=' + params['language'].lower()
return params
# get response from search-request
def response(resp):
results = []
search_results = loads(resp.text)
# return empty array if there are no results
if 'data' not in search_results:
return []
data = search_results.get('data', {})
res = data.get('result', {})
# parse results
for result in res.get('items', {}):
title = result['title']
res_url = result['url']
content = result['desc']
published_date = datetime.fromtimestamp(result['date'], None)
# append result
results.append({'url': res_url,
'title': title,
'content': content,
'publishedDate': published_date})
# return results
return results

View file

@ -171,18 +171,26 @@ engines:
- name : qwant
engine : qwant
shortcut : qw
search_url_keyword : web
categories : general
- name : qwant images
engine : qwant_images
engine : qwant
shortcut : qwi
search_url_keyword : images
categories : images
- name : qwant news
engine : qwant_news
engine : qwant
shortcut : qwn
search_url_keyword : news
categories : news
- name : qwant social
engine : qwant_social
engine : qwant
shortcut : qws
search_url_keyword : social
categories : social media
- name : kickass
engine : kickass

View file

@ -68,6 +68,7 @@ class TestQwantEngine(SearxTestCase):
}
"""
response = mock.Mock(text=json)
qwant.search_url_keyword = 'web'
results = qwant.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
@ -75,6 +76,181 @@ class TestQwantEngine(SearxTestCase):
self.assertEqual(results[0]['url'], 'http://www.url.xyz')
self.assertEqual(results[0]['content'], 'Description')
json = """
{
"status": "success",
"data": {
"query": {
"locale": "en_us",
"query": "Test",
"offset": 10
},
"result": {
"items": [
{
"title": "Title",
"score": 9999,
"url": "http://www.url.xyz",
"source": "...",
"media": "http://image.jpg",
"desc": "",
"thumbnail": "http://thumbnail.jpg",
"date": "",
"_id": "db0aadd62c2a8565567ffc382f5c61fa",
"favicon": "https://s.qwant.com/fav.ico"
}
],
"filters": []
},
"cache": {
"key": "e66aa864c00147a0e3a16ff7a5efafde",
"created": 1433092754,
"expiration": 259200,
"status": "miss",
"age": 0
}
}
}
"""
response = mock.Mock(text=json)
qwant.search_url_keyword = 'images'
results = qwant.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
self.assertEqual(results[0]['title'], 'Title')
self.assertEqual(results[0]['url'], 'http://www.url.xyz')
self.assertEqual(results[0]['content'], '')
self.assertEqual(results[0]['thumbnail_src'], 'http://thumbnail.jpg')
self.assertEqual(results[0]['img_src'], 'http://image.jpg')
json = """
{
"status": "success",
"data": {
"query": {
"locale": "en_us",
"query": "Test",
"offset": 10
},
"result": {
"items": [
{
"title": "Title",
"score": 9999,
"url": "http://www.url.xyz",
"source": "...",
"desc": "Description",
"date": 1433260920,
"_id": "db0aadd62c2a8565567ffc382f5c61fa",
"favicon": "https://s.qwant.com/fav.ico"
}
],
"filters": []
},
"cache": {
"key": "e66aa864c00147a0e3a16ff7a5efafde",
"created": 1433092754,
"expiration": 259200,
"status": "miss",
"age": 0
}
}
}
"""
response = mock.Mock(text=json)
qwant.search_url_keyword = 'news'
results = qwant.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
self.assertEqual(results[0]['title'], 'Title')
self.assertEqual(results[0]['url'], 'http://www.url.xyz')
self.assertEqual(results[0]['content'], 'Description')
self.assertIn('publishedDate', results[0])
json = """
{
"status": "success",
"data": {
"query": {
"locale": "en_us",
"query": "Test",
"offset": 10
},
"result": {
"items": [
{
"title": "Title",
"score": 9999,
"url": "http://www.url.xyz",
"source": "...",
"desc": "Description",
"date": 1433260920,
"_id": "db0aadd62c2a8565567ffc382f5c61fa",
"favicon": "https://s.qwant.com/fav.ico"
}
],
"filters": []
},
"cache": {
"key": "e66aa864c00147a0e3a16ff7a5efafde",
"created": 1433092754,
"expiration": 259200,
"status": "miss",
"age": 0
}
}
}
"""
response = mock.Mock(text=json)
qwant.search_url_keyword = 'social'
results = qwant.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
self.assertEqual(results[0]['title'], 'Title')
self.assertEqual(results[0]['url'], 'http://www.url.xyz')
self.assertEqual(results[0]['content'], 'Description')
self.assertIn('publishedDate', results[0])
json = """
{
"status": "success",
"data": {
"query": {
"locale": "en_us",
"query": "Test",
"offset": 10
},
"result": {
"items": [
{
"title": "Title",
"score": 9999,
"url": "http://www.url.xyz",
"source": "...",
"desc": "Description",
"date": 1433260920,
"_id": "db0aadd62c2a8565567ffc382f5c61fa",
"favicon": "https://s.qwant.com/fav.ico"
}
],
"filters": []
},
"cache": {
"key": "e66aa864c00147a0e3a16ff7a5efafde",
"created": 1433092754,
"expiration": 259200,
"status": "miss",
"age": 0
}
}
}
"""
response = mock.Mock(text=json)
qwant.search_url_keyword = ''
results = qwant.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
json = """
{
"status": "success",

View file

@ -1,145 +0,0 @@
from collections import defaultdict
import mock
from searx.engines import qwant_images
from searx.testing import SearxTestCase
class TestQwantImagesEngine(SearxTestCase):
def test_request(self):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 0
dicto['language'] = 'fr_FR'
params = qwant_images.request(query, dicto)
self.assertIn('url', params)
self.assertIn(query, params['url'])
self.assertIn('qwant.com', params['url'])
self.assertIn('fr_fr', params['url'])
dicto['language'] = 'all'
params = qwant_images.request(query, dicto)
self.assertFalse('fr' in params['url'])
def test_response(self):
self.assertRaises(AttributeError, qwant_images.response, None)
self.assertRaises(AttributeError, qwant_images.response, [])
self.assertRaises(AttributeError, qwant_images.response, '')
self.assertRaises(AttributeError, qwant_images.response, '[]')
response = mock.Mock(text='{}')
self.assertEqual(qwant_images.response(response), [])
response = mock.Mock(text='{"data": {}}')
self.assertEqual(qwant_images.response(response), [])
json = """
{
"status": "success",
"data": {
"query": {
"locale": "en_us",
"query": "Test",
"offset": 10
},
"result": {
"items": [
{
"title": "Title",
"type": "image",
"media": "http://www.url.xyz/fullimage.jpg",
"desc": "",
"thumbnail": "http://www.url.xyz/thumbnail.jpg",
"thumb_width": 365,
"thumb_height": 230,
"width": "365",
"height": "230",
"size": "187.7KB",
"url": "http://www.url.xyz",
"_id": "0ffd93fb26f3e192a6020af8fc16fbb1",
"media_fullsize": "http://www.proxy/fullimage.jpg",
"count": 0
}
],
"filters": []
},
"cache": {
"key": "e66aa864c00147a0e3a16ff7a5efafde",
"created": 1433092754,
"expiration": 259200,
"status": "miss",
"age": 0
}
}
}
"""
response = mock.Mock(text=json)
results = qwant_images.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
self.assertEqual(results[0]['title'], 'Title')
self.assertEqual(results[0]['url'], 'http://www.url.xyz')
self.assertEqual(results[0]['content'], '')
self.assertEqual(results[0]['thumbnail_src'], 'http://www.url.xyz/thumbnail.jpg')
self.assertEqual(results[0]['img_src'], 'http://www.url.xyz/fullimage.jpg')
json = """
{
"status": "success",
"data": {
"query": {
"locale": "en_us",
"query": "Test",
"offset": 10
},
"result": {
"filters": []
},
"cache": {
"key": "e66aa864c00147a0e3a16ff7a5efafde",
"created": 1433092754,
"expiration": 259200,
"status": "miss",
"age": 0
}
}
}
"""
response = mock.Mock(text=json)
results = qwant_images.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
json = """
{
"status": "success",
"data": {
"query": {
"locale": "en_us",
"query": "Test",
"offset": 10
},
"cache": {
"key": "e66aa864c00147a0e3a16ff7a5efafde",
"created": 1433092754,
"expiration": 259200,
"status": "miss",
"age": 0
}
}
}
"""
response = mock.Mock(text=json)
results = qwant_images.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
json = """
{
"status": "success"
}
"""
response = mock.Mock(text=json)
results = qwant_images.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)

View file

@ -1,137 +0,0 @@
from collections import defaultdict
import mock
from searx.engines import qwant_news
from searx.testing import SearxTestCase
class TestQwantNewsEngine(SearxTestCase):
def test_request(self):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 0
dicto['language'] = 'fr_FR'
params = qwant_news.request(query, dicto)
self.assertIn('url', params)
self.assertIn(query, params['url'])
self.assertIn('qwant.com', params['url'])
self.assertIn('fr_fr', params['url'])
dicto['language'] = 'all'
params = qwant_news.request(query, dicto)
self.assertFalse('fr' in params['url'])
def test_response(self):
self.assertRaises(AttributeError, qwant_news.response, None)
self.assertRaises(AttributeError, qwant_news.response, [])
self.assertRaises(AttributeError, qwant_news.response, '')
self.assertRaises(AttributeError, qwant_news.response, '[]')
response = mock.Mock(text='{}')
self.assertEqual(qwant_news.response(response), [])
response = mock.Mock(text='{"data": {}}')
self.assertEqual(qwant_news.response(response), [])
json = """
{
"status": "success",
"data": {
"query": {
"locale": "en_us",
"query": "Test",
"offset": 10
},
"result": {
"items": [
{
"title": "Title",
"score": 9999,
"url": "http://www.url.xyz",
"source": "...",
"desc": "Description",
"date": 1433065411,
"_id": "db0aadd62c2a8565567ffc382f5c61fa",
"favicon": "https://s.qwant.com/fav.ico"
}
],
"filters": []
},
"cache": {
"key": "e66aa864c00147a0e3a16ff7a5efafde",
"created": 1433092754,
"expiration": 259200,
"status": "miss",
"age": 0
}
}
}
"""
response = mock.Mock(text=json)
results = qwant_news.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
self.assertEqual(results[0]['title'], 'Title')
self.assertEqual(results[0]['url'], 'http://www.url.xyz')
self.assertEqual(results[0]['content'], 'Description')
json = """
{
"status": "success",
"data": {
"query": {
"locale": "en_us",
"query": "Test",
"offset": 10
},
"result": {
"filters": []
},
"cache": {
"key": "e66aa864c00147a0e3a16ff7a5efafde",
"created": 1433092754,
"expiration": 259200,
"status": "miss",
"age": 0
}
}
}
"""
response = mock.Mock(text=json)
results = qwant_news.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
json = """
{
"status": "success",
"data": {
"query": {
"locale": "en_us",
"query": "Test",
"offset": 10
},
"cache": {
"key": "e66aa864c00147a0e3a16ff7a5efafde",
"created": 1433092754,
"expiration": 259200,
"status": "miss",
"age": 0
}
}
}
"""
response = mock.Mock(text=json)
results = qwant_news.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
json = """
{
"status": "success"
}
"""
response = mock.Mock(text=json)
results = qwant_news.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)

View file

@ -1,140 +0,0 @@
from collections import defaultdict
import mock
from searx.engines import qwant_social
from searx.testing import SearxTestCase
class TestQwantSocialEngine(SearxTestCase):
def test_request(self):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 0
dicto['language'] = 'fr_FR'
params = qwant_social.request(query, dicto)
self.assertIn('url', params)
self.assertIn(query, params['url'])
self.assertIn('qwant.com', params['url'])
self.assertIn('fr_fr', params['url'])
dicto['language'] = 'all'
params = qwant_social.request(query, dicto)
self.assertFalse('fr' in params['url'])
def test_response(self):
self.assertRaises(AttributeError, qwant_social.response, None)
self.assertRaises(AttributeError, qwant_social.response, [])
self.assertRaises(AttributeError, qwant_social.response, '')
self.assertRaises(AttributeError, qwant_social.response, '[]')
response = mock.Mock(text='{}')
self.assertEqual(qwant_social.response(response), [])
response = mock.Mock(text='{"data": {}}')
self.assertEqual(qwant_social.response(response), [])
json = """
{
"status": "success",
"data": {
"query": {
"locale": "en_us",
"query": "Test",
"offset": 10
},
"result": {
"items": [
{
"_id": "dc0b3f24c93684c7d7f1b0a4c2d9f1b0",
"__index": 32,
"title": "Title",
"img": "img",
"desc": "Description",
"date": 1432643480,
"type": "twitter",
"card": "XXX",
"post": "603176590856556545",
"url": "http://www.url.xyz",
"userUrl": "https://twitter.com/XXX"
}
],
"filters": []
},
"cache": {
"key": "e66aa864c00147a0e3a16ff7a5efafde",
"created": 1433092754,
"expiration": 259200,
"status": "miss",
"age": 0
}
}
}
"""
response = mock.Mock(text=json)
results = qwant_social.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
self.assertEqual(results[0]['title'], 'Title')
self.assertEqual(results[0]['url'], 'http://www.url.xyz')
self.assertEqual(results[0]['content'], 'Description')
json = """
{
"status": "success",
"data": {
"query": {
"locale": "en_us",
"query": "Test",
"offset": 10
},
"result": {
"filters": []
},
"cache": {
"key": "e66aa864c00147a0e3a16ff7a5efafde",
"created": 1433092754,
"expiration": 259200,
"status": "miss",
"age": 0
}
}
}
"""
response = mock.Mock(text=json)
results = qwant_social.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
json = """
{
"status": "success",
"data": {
"query": {
"locale": "en_us",
"query": "Test",
"offset": 10
},
"cache": {
"key": "e66aa864c00147a0e3a16ff7a5efafde",
"created": 1433092754,
"expiration": 259200,
"status": "miss",
"age": 0
}
}
}
"""
response = mock.Mock(text=json)
results = qwant_social.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
json = """
{
"status": "success"
}
"""
response = mock.Mock(text=json)
results = qwant_social.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)

View file

@ -26,9 +26,6 @@ from searx.tests.engines.test_openstreetmap import * # noqa
from searx.tests.engines.test_photon import * # noqa
from searx.tests.engines.test_piratebay import * # noqa
from searx.tests.engines.test_qwant import * # noqa
from searx.tests.engines.test_qwant_images import * # noqa
from searx.tests.engines.test_qwant_news import * # noqa
from searx.tests.engines.test_qwant_social import * # noqa
from searx.tests.engines.test_searchcode_code import * # noqa
from searx.tests.engines.test_searchcode_doc import * # noqa
from searx.tests.engines.test_soundcloud import * # noqa