mirror of
https://github.com/searxng/searxng.git
synced 2024-12-21 00:36:29 +00:00
Add ScanR structures search engine
In theory ScanR should also search for projects but the API is different, so we'd need another engine.
This commit is contained in:
parent
678b87f9d5
commit
1dba6dcbac
3 changed files with 258 additions and 0 deletions
78
searx/engines/scanr_structures.py
Normal file
78
searx/engines/scanr_structures.py
Normal file
|
@ -0,0 +1,78 @@
|
|||
"""
|
||||
ScanR Structures (Science)
|
||||
|
||||
@website https://scanr.enseignementsup-recherche.gouv.fr
|
||||
@provide-api yes (https://scanr.enseignementsup-recherche.gouv.fr/api/swagger-ui.html)
|
||||
|
||||
@using-api yes
|
||||
@results JSON
|
||||
@stable yes
|
||||
@parse url, title, content, img_src
|
||||
"""
|
||||
|
||||
from urllib import urlencode
|
||||
from json import loads, dumps
|
||||
from dateutil import parser
|
||||
from searx.utils import html_to_text
|
||||
|
||||
# engine dependent config
|
||||
categories = ['science']
|
||||
paging = True
|
||||
page_size = 20
|
||||
|
||||
# search-url
|
||||
url = 'https://scanr.enseignementsup-recherche.gouv.fr/'
|
||||
search_url = url + 'api/structures/search'
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
|
||||
params['url'] = search_url
|
||||
params['method'] = 'POST'
|
||||
params['headers']['Content-type'] = "application/json"
|
||||
params['data'] = dumps({"query": query,
|
||||
"searchField": "ALL",
|
||||
"sortDirection": "ASC",
|
||||
"sortOrder": "RELEVANCY",
|
||||
"page": params['pageno'],
|
||||
"pageSize": page_size})
|
||||
|
||||
return params
|
||||
|
||||
|
||||
# get response from search-request
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
search_res = loads(resp.text)
|
||||
|
||||
# return empty array if there are no results
|
||||
if search_res.get('total') < 1:
|
||||
return []
|
||||
|
||||
# parse results
|
||||
for result in search_res['results']:
|
||||
if 'id' not in result:
|
||||
continue
|
||||
|
||||
# is it thumbnail or img_src??
|
||||
thumbnail = None
|
||||
if 'logo' in result:
|
||||
thumbnail = result['logo']
|
||||
if thumbnail[0] == '/':
|
||||
thumbnail = url + thumbnail
|
||||
|
||||
content = None
|
||||
if 'highlights' in result:
|
||||
content = result['highlights'][0]['value']
|
||||
|
||||
# append result
|
||||
results.append({'url': url + 'structure/' + result['id'],
|
||||
'title': result['label'],
|
||||
# 'thumbnail': thumbnail,
|
||||
'img_src': thumbnail,
|
||||
'content': html_to_text(content)})
|
||||
|
||||
# return results
|
||||
return results
|
|
@ -314,6 +314,11 @@ engines:
|
|||
engine : kickass
|
||||
shortcut : ka
|
||||
|
||||
- name : scanr_structures
|
||||
shortcut: scs
|
||||
engine : scanr_structures
|
||||
disabled : True
|
||||
|
||||
- name : soundcloud
|
||||
engine : soundcloud
|
||||
shortcut : sc
|
||||
|
|
175
tests/unit/engines/test_scanr_structures.py
Normal file
175
tests/unit/engines/test_scanr_structures.py
Normal file
|
@ -0,0 +1,175 @@
|
|||
from collections import defaultdict
|
||||
import mock
|
||||
from searx.engines import scanr_structures
|
||||
from searx.testing import SearxTestCase
|
||||
|
||||
|
||||
class TestScanrStructuresEngine(SearxTestCase):
|
||||
|
||||
def test_request(self):
|
||||
query = 'test_query'
|
||||
dicto = defaultdict(dict)
|
||||
dicto['pageno'] = 1
|
||||
params = scanr_structures.request(query, dicto)
|
||||
self.assertIn('url', params)
|
||||
self.assertIn(query, params['data'])
|
||||
self.assertIn('scanr.enseignementsup-recherche.gouv.fr', params['url'])
|
||||
|
||||
def test_response(self):
|
||||
self.assertRaises(AttributeError, scanr_structures.response, None)
|
||||
self.assertRaises(AttributeError, scanr_structures.response, [])
|
||||
self.assertRaises(AttributeError, scanr_structures.response, '')
|
||||
self.assertRaises(AttributeError, scanr_structures.response, '[]')
|
||||
|
||||
response = mock.Mock(text='{}')
|
||||
self.assertEqual(scanr_structures.response(response), [])
|
||||
|
||||
response = mock.Mock(text='{"data": []}')
|
||||
self.assertEqual(scanr_structures.response(response), [])
|
||||
|
||||
json = u"""
|
||||
{
|
||||
"request":
|
||||
{
|
||||
"query":"test_query",
|
||||
"page":1,
|
||||
"pageSize":20,
|
||||
"sortOrder":"RELEVANCY",
|
||||
"sortDirection":"ASC",
|
||||
"searchField":"ALL",
|
||||
"from":0
|
||||
},
|
||||
"total":2471,
|
||||
"results":[
|
||||
{
|
||||
"id":"200711886U",
|
||||
"label":"Laboratoire d'Informatique de Grenoble",
|
||||
"kind":"RNSR",
|
||||
"publicEntity":true,
|
||||
"address":{"city":"Grenoble","departement":"38"},
|
||||
"logo":"/static/logos/200711886U.png",
|
||||
"acronym":"LIG",
|
||||
"type":{"code":"UR","label":"Unit\xe9 de recherche"},
|
||||
"level":2,
|
||||
"institutions":[
|
||||
{
|
||||
"id":"193819125",
|
||||
"label":"Grenoble INP",
|
||||
"acronym":"IPG",
|
||||
"code":"UMR 5217"
|
||||
},
|
||||
{
|
||||
"id":"130021397",
|
||||
"label":"Universit\xe9 de Grenoble Alpes",
|
||||
"acronym":"UGA",
|
||||
"code":"UMR 5217"
|
||||
},
|
||||
{
|
||||
"id":"180089013",
|
||||
"label":"Centre national de la recherche scientifique",
|
||||
"acronym":"CNRS",
|
||||
"code":"UMR 5217"
|
||||
},
|
||||
{
|
||||
"id":"180089047",
|
||||
"label":"Institut national de recherche en informatique et en automatique",
|
||||
"acronym":"Inria",
|
||||
"code":"UMR 5217"
|
||||
}
|
||||
],
|
||||
"highlights":[
|
||||
{
|
||||
"type":"projects",
|
||||
"value":"linguicielles d\xe9velopp\xe9s jusqu'ici par le GETALP\
|
||||
du <strong>LIG</strong> en tant que prototypes op\xe9rationnels.\
|
||||
\\r\\nDans le contexte"
|
||||
},
|
||||
{
|
||||
"type":"acronym",
|
||||
"value":"<strong>LIG</strong>"
|
||||
},
|
||||
{
|
||||
"type":"websiteContents",
|
||||
"value":"S\xe9lection\\nListe structures\\nD\xe9tail\\n\
|
||||
Accueil\\n200711886U : <strong>LIG</strong>\
|
||||
Laboratoire d'Informatique de Grenoble Unit\xe9 de recherche"},
|
||||
{
|
||||
"type":"publications",
|
||||
"value":"de noms. Nous avons d'abord d\xe9velopp\xe9 LOOV \
|
||||
(pour <strong>Lig</strong> Overlaid OCR in Vid\xe9o), \
|
||||
un outil d'extraction des"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id":"199511665F",
|
||||
"label":"Laboratoire Bordelais de Recherche en Informatique",
|
||||
"kind":"RNSR",
|
||||
"publicEntity":true,
|
||||
"address":{"city":"Talence","departement":"33"},
|
||||
"logo":"/static/logos/199511665F.png",
|
||||
"acronym":"LaBRI",
|
||||
"type":{"code":"UR","label":"Unit\xe9 de recherche"},
|
||||
"level":2,
|
||||
"institutions":[
|
||||
{
|
||||
"id":"130006356",
|
||||
"label":"Institut polytechnique de Bordeaux",
|
||||
"acronym":"IPB",
|
||||
"code":"UMR 5800"
|
||||
},
|
||||
{
|
||||
"id":"130018351",
|
||||
"label":"Universit\xe9 de Bordeaux",
|
||||
"acronym":null,
|
||||
"code":"UMR 5800"
|
||||
},
|
||||
{
|
||||
"id":"180089013",
|
||||
"label":"Centre national de la recherche scientifique",
|
||||
"acronym":"CNRS",
|
||||
"code":"UMR 5800"
|
||||
},
|
||||
{
|
||||
"id":"180089047",
|
||||
"label":"Institut national de recherche en informatique et en automatique",
|
||||
"acronym":"Inria",
|
||||
"code":"UMR 5800"
|
||||
}
|
||||
],
|
||||
"highlights":[
|
||||
{
|
||||
"type":"websiteContents",
|
||||
"value":"Samia Kerdjoudj\\n2016-07-05\\nDouble-exponential\
|
||||
and <strong>triple</strong>-exponential bounds for\
|
||||
choosability problems parameterized"
|
||||
},
|
||||
{
|
||||
"type":"publications",
|
||||
"value":"de cam\xe9ras install\xe9es dans les lieux publiques \
|
||||
a <strong>tripl\xe9</strong> en 2009, passant de 20 000 \
|
||||
\xe0 60 000. Malgr\xe9 le"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
response = mock.Mock(text=json)
|
||||
results = scanr_structures.response(response)
|
||||
self.assertEqual(type(results), list)
|
||||
self.assertEqual(len(results), 2)
|
||||
self.assertEqual(results[0]['title'], u"Laboratoire d'Informatique de Grenoble")
|
||||
self.assertEqual(results[0]['url'], 'https://scanr.enseignementsup-recherche.gouv.fr/structure/200711886U')
|
||||
self.assertEqual(results[0]['content'],
|
||||
u"linguicielles d\xe9velopp\xe9s jusqu'ici par le GETALP "
|
||||
u"du LIG en tant que prototypes "
|
||||
u"op\xe9rationnels. Dans le contexte")
|
||||
self.assertEqual(results[1]['img_src'],
|
||||
'https://scanr.enseignementsup-recherche.gouv.fr//static/logos/199511665F.png')
|
||||
self.assertEqual(results[1]['content'],
|
||||
"Samia Kerdjoudj 2016-07-05 Double-exponential and"
|
||||
" triple-exponential bounds for "
|
||||
"choosability problems parameterized")
|
||||
self.assertEqual(results[1]['url'], 'https://scanr.enseignementsup-recherche.gouv.fr/structure/199511665F')
|
||||
self.assertEqual(results[1]['title'], u"Laboratoire Bordelais de Recherche en Informatique")
|
Loading…
Reference in a new issue