mirror of
https://github.com/searxng/searxng.git
synced 2024-11-27 13:21:04 +00:00
4a36a3044d
recoll is a local search engine based on Xapian: http://www.lesbonscomptes.com/recoll/ By itself recoll does not offer web or API access, this can be achieved using recoll-webui: https://framagit.org/medoc92/recollwebui.git This engine uses a custom 'files' result template set `base_url` to the location where recoll-webui can be reached set `dl_prefix` to a location where the file hierarchy as indexed by recoll can be reached set `search_dir` to the part of the indexed file hierarchy to be searched, use an empty string to search the entire search domain
104 lines
2.6 KiB
Python
104 lines
2.6 KiB
Python
"""
|
|
Recoll (local search engine)
|
|
|
|
@using-api yes
|
|
@results JSON
|
|
@stable yes
|
|
@parse url, content, size, abstract, author, mtype, subtype, time, \
|
|
filename, label, type, embedded
|
|
"""
|
|
|
|
from datetime import date, timedelta
|
|
from json import loads
|
|
from urllib.parse import urlencode, quote
|
|
|
|
# engine dependent config
|
|
time_range_support = True
|
|
|
|
# parameters from settings.yml
|
|
base_url = None
|
|
search_dir = ''
|
|
mount_prefix = None
|
|
dl_prefix = None
|
|
|
|
# embedded
|
|
embedded_url = '<{ttype} controls height="166px" ' +\
|
|
'src="{url}" type="{mtype}"></{ttype}>'
|
|
|
|
|
|
# helper functions
|
|
def get_time_range(time_range):
|
|
sw = {
|
|
'day': 1,
|
|
'week': 7,
|
|
'month': 30,
|
|
'year': 365
|
|
}
|
|
|
|
offset = sw.get(time_range, 0)
|
|
if not offset:
|
|
return ''
|
|
|
|
return (date.today() - timedelta(days=offset)).isoformat()
|
|
|
|
|
|
# do search-request
|
|
def request(query, params):
|
|
search_after = get_time_range(params['time_range'])
|
|
search_url = base_url + 'json?{query}&highlight=0'
|
|
params['url'] = search_url.format(query=urlencode({
|
|
'query': query,
|
|
'after': search_after,
|
|
'dir': search_dir}))
|
|
|
|
return params
|
|
|
|
|
|
# get response from search-request
|
|
def response(resp):
|
|
results = []
|
|
|
|
response_json = loads(resp.text)
|
|
|
|
if not response_json:
|
|
return []
|
|
|
|
for result in response_json.get('results', []):
|
|
title = result['label']
|
|
url = result['url'].replace('file://' + mount_prefix, dl_prefix)
|
|
content = '{}'.format(result['snippet'])
|
|
|
|
# append result
|
|
item = {'url': url,
|
|
'title': title,
|
|
'content': content,
|
|
'template': 'files.html'}
|
|
|
|
if result['size']:
|
|
item['size'] = int(result['size'])
|
|
|
|
for parameter in ['filename', 'abstract', 'author', 'mtype', 'time']:
|
|
if result[parameter]:
|
|
item[parameter] = result[parameter]
|
|
|
|
# facilitate preview support for known mime types
|
|
if 'mtype' in result and '/' in result['mtype']:
|
|
(mtype, subtype) = result['mtype'].split('/')
|
|
item['mtype'] = mtype
|
|
item['subtype'] = subtype
|
|
|
|
if mtype in ['audio', 'video']:
|
|
item['embedded'] = embedded_url.format(
|
|
ttype=mtype,
|
|
url=quote(url.encode('utf8'), '/:'),
|
|
mtype=result['mtype'])
|
|
|
|
if mtype in ['image'] and subtype in ['bmp', 'gif', 'jpeg', 'png']:
|
|
item['img_src'] = url
|
|
|
|
results.append(item)
|
|
|
|
if 'nres' in response_json:
|
|
results.append({'number_of_results': response_json['nres']})
|
|
|
|
return results
|