mirror of
https://github.com/searxng/searxng.git
synced 2025-01-24 09:08:10 +00:00
[enh] validate input and raise an exception inside search.py. The exception message is output in json and rss format.
This commit is contained in:
parent
7fdfeca3a4
commit
15eef0ebdb
4 changed files with 133 additions and 38 deletions
32
searx/exceptions.py
Normal file
32
searx/exceptions.py
Normal file
|
@ -0,0 +1,32 @@
|
|||
'''
|
||||
searx is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
searx is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
||||
|
||||
(C) 2017- by Alexandre Flament, <alex@al-f.net>
|
||||
'''
|
||||
|
||||
|
||||
class SearxException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class SearxParameterException(SearxException):
|
||||
|
||||
def __init__(self, name, value):
|
||||
if value == '' or value is None:
|
||||
message = 'Empty ' + name + ' parameter'
|
||||
else:
|
||||
message = 'Invalid value "' + value + '" for parameter ' + name
|
||||
super(SearxParameterException, self).__init__(message)
|
||||
self.parameter_name = name
|
||||
self.parameter_value = value
|
|
@ -31,11 +31,16 @@ from searx.query import RawTextQuery, SearchQuery
|
|||
from searx.results import ResultContainer
|
||||
from searx import logger
|
||||
from searx.plugins import plugins
|
||||
from searx.languages import language_codes
|
||||
from searx.exceptions import SearxParameterException
|
||||
|
||||
logger = logger.getChild('search')
|
||||
|
||||
number_of_searches = 0
|
||||
|
||||
language_code_set = set(l[0].lower() for l in language_codes)
|
||||
language_code_set.add('all')
|
||||
|
||||
|
||||
def send_http_request(engine, request_params, start_time, timeout_limit):
|
||||
# for page_load_time stats
|
||||
|
@ -182,33 +187,13 @@ def default_request_params():
|
|||
|
||||
|
||||
def get_search_query_from_webapp(preferences, form):
|
||||
query = None
|
||||
query_engines = []
|
||||
query_categories = []
|
||||
query_pageno = 1
|
||||
query_lang = 'all'
|
||||
query_time_range = None
|
||||
# no text for the query ?
|
||||
if not form.get('q'):
|
||||
raise SearxParameterException('q', '')
|
||||
|
||||
# set blocked engines
|
||||
disabled_engines = preferences.engines.get_disabled()
|
||||
|
||||
# set specific language if set
|
||||
query_lang = preferences.get_value('language')
|
||||
|
||||
# safesearch
|
||||
query_safesearch = preferences.get_value('safesearch')
|
||||
|
||||
# TODO better exceptions
|
||||
if not form.get('q'):
|
||||
raise Exception('noquery')
|
||||
|
||||
# set pagenumber
|
||||
pageno_param = form.get('pageno', '1')
|
||||
if not pageno_param.isdigit() or int(pageno_param) < 1:
|
||||
pageno_param = 1
|
||||
|
||||
query_pageno = int(pageno_param)
|
||||
|
||||
# parse query, if tags are set, which change
|
||||
# the serch engine or search-language
|
||||
raw_text_query = RawTextQuery(form['q'], disabled_engines)
|
||||
|
@ -217,6 +202,13 @@ def get_search_query_from_webapp(preferences, form):
|
|||
# set query
|
||||
query = raw_text_query.getSearchQuery()
|
||||
|
||||
# get and check page number
|
||||
pageno_param = form.get('pageno', '1')
|
||||
if not pageno_param.isdigit() or int(pageno_param) < 1:
|
||||
raise SearxParameterException('pageno', pageno_param)
|
||||
query_pageno = int(pageno_param)
|
||||
|
||||
# get language
|
||||
# set specific language if set on request, query or preferences
|
||||
# TODO support search with multible languages
|
||||
if len(raw_text_query.languages):
|
||||
|
@ -226,10 +218,38 @@ def get_search_query_from_webapp(preferences, form):
|
|||
else:
|
||||
query_lang = preferences.get_value('language')
|
||||
|
||||
# check language
|
||||
if query_lang not in language_code_set:
|
||||
raise SearxParameterException('language', query_lang)
|
||||
|
||||
# get safesearch
|
||||
if 'safesearch' in form:
|
||||
query_safesearch = form.get('safesearch')
|
||||
# first check safesearch
|
||||
if not query_safesearch.isdigit():
|
||||
raise SearxParameterException('safesearch', query_safesearch)
|
||||
query_safesearch = int(query_safesearch)
|
||||
else:
|
||||
query_safesearch = preferences.get_value('safesearch')
|
||||
|
||||
# safesearch : second check
|
||||
if query_safesearch < 0 or query_safesearch > 2:
|
||||
raise SearxParameterException('safesearch', query_safesearch)
|
||||
|
||||
# get time_range
|
||||
query_time_range = form.get('time_range')
|
||||
|
||||
# check time_range
|
||||
if not(query_time_range is None)\
|
||||
and not (query_time_range in ['', 'day', 'week', 'month', 'year']):
|
||||
raise SearxParameterException('time_range', query_time_range)
|
||||
|
||||
# query_engines
|
||||
query_engines = raw_text_query.engines
|
||||
|
||||
# query_categories
|
||||
query_categories = []
|
||||
|
||||
# if engines are calculated from query,
|
||||
# set categories by using that informations
|
||||
if query_engines and raw_text_query.specific:
|
||||
|
|
|
@ -11,6 +11,12 @@
|
|||
<opensearch:itemsPerPage>{{ number_of_results }}</opensearch:itemsPerPage>
|
||||
<atom:link rel="search" type="application/opensearchdescription+xml" href="{{ base_url }}opensearch.xml"/>
|
||||
<opensearch:Query role="request" searchTerms="{{ q|e }}" startPage="1" />
|
||||
{% if error_message %}
|
||||
<item>
|
||||
<title>Error</title>
|
||||
<description>{{ error_message|e }}</description>
|
||||
</item>
|
||||
{% endif %}
|
||||
{% for r in results %}
|
||||
<item>
|
||||
<title>{{ r.title }}</title>
|
||||
|
|
|
@ -52,6 +52,7 @@ from flask import (
|
|||
from flask_babel import Babel, gettext, format_date, format_decimal
|
||||
from flask.json import jsonify
|
||||
from searx import settings, searx_dir, searx_debug
|
||||
from searx.exceptions import SearxException, SearxParameterException
|
||||
from searx.engines import (
|
||||
categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
|
||||
)
|
||||
|
@ -400,6 +401,33 @@ def pre_request():
|
|||
request.user_plugins.append(plugin)
|
||||
|
||||
|
||||
def index_error(output_format, error_message):
|
||||
if output_format == 'json':
|
||||
return Response(json.dumps({'error': error_message}),
|
||||
mimetype='application/json')
|
||||
elif output_format == 'csv':
|
||||
response = Response('', mimetype='application/csv')
|
||||
cont_disp = 'attachment;Filename=searx.csv'
|
||||
response.headers.add('Content-Disposition', cont_disp)
|
||||
return response
|
||||
elif output_format == 'rss':
|
||||
response_rss = render(
|
||||
'opensearch_response_rss.xml',
|
||||
results=[],
|
||||
q=request.form['q'] if 'q' in request.form else '',
|
||||
number_of_results=0,
|
||||
base_url=get_base_url(),
|
||||
error_message=error_message
|
||||
)
|
||||
return Response(response_rss, mimetype='text/xml')
|
||||
else:
|
||||
# html
|
||||
request.errors.append(gettext('search error'))
|
||||
return render(
|
||||
'index.html',
|
||||
)
|
||||
|
||||
|
||||
@app.route('/search', methods=['GET', 'POST'])
|
||||
@app.route('/', methods=['GET', 'POST'])
|
||||
def index():
|
||||
|
@ -408,10 +436,19 @@ def index():
|
|||
Supported outputs: html, json, csv, rss.
|
||||
"""
|
||||
|
||||
# output_format
|
||||
output_format = request.form.get('format', 'html')
|
||||
if output_format not in ['html', 'csv', 'json', 'rss']:
|
||||
output_format = 'html'
|
||||
|
||||
# check if there is query
|
||||
if request.form.get('q') is None:
|
||||
if output_format == 'html':
|
||||
return render(
|
||||
'index.html',
|
||||
)
|
||||
else:
|
||||
return index_error(output_format, 'No query'), 400
|
||||
|
||||
# search
|
||||
search_query = None
|
||||
|
@ -421,20 +458,24 @@ def index():
|
|||
# search = Search(search_query) # without plugins
|
||||
search = SearchWithPlugins(search_query, request)
|
||||
result_container = search.search()
|
||||
except:
|
||||
request.errors.append(gettext('search error'))
|
||||
except Exception as e:
|
||||
# log exception
|
||||
logger.exception('search error')
|
||||
return render(
|
||||
'index.html',
|
||||
)
|
||||
|
||||
# is it an invalid input parameter or something else ?
|
||||
if (issubclass(e.__class__, SearxParameterException)):
|
||||
return index_error(output_format, e.message), 400
|
||||
else:
|
||||
return index_error(output_format, gettext('search error')), 500
|
||||
|
||||
# results
|
||||
results = result_container.get_ordered_results()
|
||||
number_of_results = result_container.results_number()
|
||||
if number_of_results < result_container.results_length():
|
||||
number_of_results = 0
|
||||
|
||||
# UI
|
||||
advanced_search = request.form.get('advanced_search', None)
|
||||
output_format = request.form.get('format', 'html')
|
||||
if output_format not in ['html', 'csv', 'json', 'rss']:
|
||||
output_format = 'html'
|
||||
|
||||
# output
|
||||
for result in results:
|
||||
|
@ -470,10 +511,6 @@ def index():
|
|||
else:
|
||||
result['publishedDate'] = format_date(result['publishedDate'])
|
||||
|
||||
number_of_results = result_container.results_number()
|
||||
if number_of_results < result_container.results_length():
|
||||
number_of_results = 0
|
||||
|
||||
if output_format == 'json':
|
||||
return Response(json.dumps({'query': search_query.query,
|
||||
'number_of_results': number_of_results,
|
||||
|
|
Loading…
Reference in a new issue