mirror of
https://github.com/searxng/searxng.git
synced 2025-01-11 02:45:31 +00:00
Add support for dokuwiki engine
This commit is contained in:
parent
5cbe4c5332
commit
cf09b500f3
2 changed files with 169 additions and 0 deletions
83
searx/engines/doku.py
Normal file
83
searx/engines/doku.py
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
# Doku Wiki
|
||||||
|
#
|
||||||
|
# @website https://www.dokuwiki.org/
|
||||||
|
# @provide-api yes
|
||||||
|
# (https://www.dokuwiki.org/devel:xmlrpc)
|
||||||
|
#
|
||||||
|
# @using-api no
|
||||||
|
# @results HTML
|
||||||
|
# @stable yes
|
||||||
|
# @parse (general) url, title, content
|
||||||
|
|
||||||
|
from urllib import urlencode
|
||||||
|
from lxml.html import fromstring
|
||||||
|
from searx.engines.xpath import extract_text
|
||||||
|
|
||||||
|
# engine dependent config
|
||||||
|
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'
|
||||||
|
paging = False
|
||||||
|
language_support = False
|
||||||
|
number_of_results = 5
|
||||||
|
|
||||||
|
# search-url
|
||||||
|
# Doku is OpenSearch compatible
|
||||||
|
base_url = 'http://localhost:8090'
|
||||||
|
search_url = '/?do=search'\
|
||||||
|
'&id={query}'
|
||||||
|
# TODO '&startRecord={offset}'\
|
||||||
|
# TODO '&maximumRecords={limit}'\
|
||||||
|
|
||||||
|
# do search-request
|
||||||
|
def request(query, params):
|
||||||
|
|
||||||
|
params['url'] = base_url +\
|
||||||
|
search_url.format(query=urlencode({'query': query}))
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
# get response from search-request
|
||||||
|
def response(resp):
|
||||||
|
results = []
|
||||||
|
|
||||||
|
doc = fromstring(resp.text)
|
||||||
|
|
||||||
|
# parse results
|
||||||
|
# Quickhits
|
||||||
|
for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'):
|
||||||
|
try:
|
||||||
|
res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1]
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not res_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title'))
|
||||||
|
|
||||||
|
# append result
|
||||||
|
results.append({'title': title,
|
||||||
|
'content': "",
|
||||||
|
'url': base_url + res_url})
|
||||||
|
|
||||||
|
# Search results
|
||||||
|
for r in doc.xpath('//dl[@class="search_results"]/*'):
|
||||||
|
try:
|
||||||
|
if r.tag == "dt":
|
||||||
|
res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1]
|
||||||
|
title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title'))
|
||||||
|
elif r.tag == "dd":
|
||||||
|
content = extract_text(r.xpath('.'))
|
||||||
|
|
||||||
|
# append result
|
||||||
|
results.append({'title': title,
|
||||||
|
'content': content,
|
||||||
|
'url': base_url + res_url})
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not res_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# return results
|
||||||
|
return results
|
86
tests/unit/engines/test_doku.py
Normal file
86
tests/unit/engines/test_doku.py
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from collections import defaultdict
|
||||||
|
import mock
|
||||||
|
from searx.engines import doku
|
||||||
|
from searx.testing import SearxTestCase
|
||||||
|
|
||||||
|
|
||||||
|
class TestDokuEngine(SearxTestCase):
|
||||||
|
|
||||||
|
def test_request(self):
|
||||||
|
query = 'test_query'
|
||||||
|
dicto = defaultdict(dict)
|
||||||
|
params = doku.request(query, dicto)
|
||||||
|
self.assertIn('url', params)
|
||||||
|
self.assertIn(query, params['url'])
|
||||||
|
|
||||||
|
def test_response(self):
|
||||||
|
self.assertRaises(AttributeError, doku.response, None)
|
||||||
|
self.assertRaises(AttributeError, doku.response, [])
|
||||||
|
self.assertRaises(AttributeError, doku.response, '')
|
||||||
|
self.assertRaises(AttributeError, doku.response, '[]')
|
||||||
|
|
||||||
|
response = mock.Mock(text='<html></html>')
|
||||||
|
self.assertEqual(doku.response(response), [])
|
||||||
|
|
||||||
|
html = u"""
|
||||||
|
<div class="search_quickresult">
|
||||||
|
<h3>Pages trouvées :</h3>
|
||||||
|
<ul class="search_quickhits">
|
||||||
|
<li> <a href="/xfconf-query" class="wikilink1" title="xfconf-query">xfconf-query</a></li>
|
||||||
|
</ul>
|
||||||
|
<div class="clearer"></div>
|
||||||
|
</div>
|
||||||
|
"""
|
||||||
|
response = mock.Mock(text=html)
|
||||||
|
results = doku.response(response)
|
||||||
|
self.assertEqual(doku.response(response), [{'content': '', 'title': 'xfconf-query', 'url': 'http://localhost:8090/xfconf-query'}])
|
||||||
|
|
||||||
|
html = u"""
|
||||||
|
<dl class="search_results">
|
||||||
|
<dt><a href="/xvnc?s[]=query" class="wikilink1" title="xvnc">xvnc</a>: 40 Occurrences trouvées</dt>
|
||||||
|
<dd>er = /usr/bin/Xvnc
|
||||||
|
server_args = -inetd -<strong class="search_hit">query</strong> localhost -once -geometry 640x480 -depth 8 -Secur... er = /usr/bin/Xvnc
|
||||||
|
server_args = -inetd -<strong class="search_hit">query</strong> localhost -once -geometry 800x600 -depth 8 -Secur... er = /usr/bin/Xvnc
|
||||||
|
server_args = -inetd -<strong class="search_hit">query</strong> localhost -once -geometry 1024x768 -depth 8 -Secu... er = /usr/bin/Xvnc
|
||||||
|
server_args = -inetd -<strong class="search_hit">query</strong> localhost -once -geometry 1280x1024 -depth 8 -Sec</dd>
|
||||||
|
<dt><a href="/postfix_mysql_tls_sasl_1404?s[]=query" class="wikilink1" title="postfix_mysql_tls_sasl_1404">postfix_mysql_tls_sasl_1404</a>: 14 Occurrences trouvées</dt>
|
||||||
|
<dd>tdepasse
|
||||||
|
hosts = 127.0.0.1
|
||||||
|
dbname = postfix
|
||||||
|
<strong class="search_hit">query</strong> = SELECT goto FROM alias WHERE address='%s' AND a... tdepasse
|
||||||
|
hosts = 127.0.0.1
|
||||||
|
dbname = postfix
|
||||||
|
<strong class="search_hit">query</strong> = SELECT domain FROM domain WHERE domain='%s'
|
||||||
|
#optional <strong class="search_hit">query</strong> to use when relaying for backup MX
|
||||||
|
#<strong class="search_hit">query</strong> = SELECT domain FROM domain WHERE domain='%s' and backupmx =</dd><dt><a href="/tutoriel/comment_creer_un_terminal_x_ou_recycler_une_vieille_machine?s[]=query" class="wikilink1" title="tutoriel:comment_creer_un_terminal_x_ou_recycler_une_vieille_machine">tutoriel:comment_creer_un_terminal_x_ou_recycler_une_vieille_machine</a>: 13 Occurrences trouvées</dt><dd>z gdm (ubuntu) tapez sudo /etc/init.d/gdm stop
|
||||||
|
X -<strong class="search_hit">query</strong> 192.168.1.2
|
||||||
|
</code>
|
||||||
|
:)
|
||||||
|
Si vous désirez, sur la mê... ans une console (tjs sur le vieil ordi)
|
||||||
|
<code>
|
||||||
|
X -<strong class="search_hit">query</strong> 192.168.1.2 :1
|
||||||
|
</code>
|
||||||
|
Un écran de login devrait ... ure.
|
||||||
|
<note tip>Rajouter "-once" à la commande "X -<strong class="search_hit">query</strong> 192.168.1.2 :1" permet de quitter la session et r... d'une ubuntu/kubuntu\\
|
||||||
|
Testez d'abord que le //X -<strong class="search_hit">query</strong> ...// fonctionne, dans une console (CTRL-ALT-F1) </dd>
|
||||||
|
<dt><a href="/bind9?s[]=query" class="wikilink1" title="bind9">bind9</a>: 12 Occurrences trouvées</dt>
|
||||||
|
<dd> printcmd
|
||||||
|
;; Got answer:
|
||||||
|
;; ->>HEADER<<- opcode: <strong class="search_hit">QUERY</strong>, status: NOERROR, id: 13427
|
||||||
|
;; flags: qr aa rd ra; <strong class="search_hit">QUERY</strong>: 1, ANSWER: 1, AUTHORITY: 1, ADDITIONAL: 1
|
||||||
|
|
||||||
|
[...]
|
||||||
|
|
||||||
|
;; <strong class="search_hit">Query</strong> time: 1 msec
|
||||||
|
;; SERVER: 127.0.0.1#53(127.0.0.1)
|
||||||
|
;... ne énorme diminution du temps mis par la requête (<strong class="search_hit">Query</strong> time) , entre la première et la deuxième requête.</dd>
|
||||||
|
</dl>
|
||||||
|
"""
|
||||||
|
response = mock.Mock(text=html)
|
||||||
|
results = doku.response(response)
|
||||||
|
self.assertEqual(type(results), list)
|
||||||
|
self.assertEqual(len(results), 4)
|
||||||
|
self.assertEqual(results[0]['title'], 'xvnc')
|
||||||
|
# FIXME self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
|
||||||
|
# FIXME self.assertEqual(results[0]['content'], 'This should be the content.')
|
Loading…
Reference in a new issue