mirror of
https://github.com/searxng/searxng.git
synced 2024-11-25 12:21:00 +00:00
Feature/standalone searx update (#1591)
* chg: dev: update standalone_searx parent d8a5df721b33dd8a7cc9e21dba4060f21d629f69 author rachmadaniHaryono <foreturiga@gmail.com> 1603896594 +0800 committer rachmadaniHaryono <foreturiga@gmail.com> 1603896619 +0800 chg: dev: debug engine_shortcuts chg: dev: only initilize if engine is given chg: dev: split main chg: dev: standalone_searx chg: dev: update standalone_searx chg: doc: remove unnecessary log chg: test: differentiate travis chg: test: disable shortcut chg: test: use default engine settings fix: dev: category choices fix: dev: duplicate engine shortcut fix: dev: travis python3 fix: test: use empty string as shortcut fix: test: apkm fix: test: engine shortcut fix: test: mypy fix: test: parameter fix: test: pep8 fix: test: py2 compatibilities fix: test: searx settings fix: test: travis engines new: dev: deduplicate engine new: dev: main receive engines parameter new: dev: parse_argument accept engines parameter new: dev: split search query from get_result func new: test: basic result case Suggestions: use RawTextQuery to make the suggestions URLs. Update all themes accordingly. * new: doc: searx import and init * chg: dev: parse_argument - doc - run on __main__ - simple parse_args * chg: doc: module * chg: dev: import section - remove unused python path modification - new required package * chg: dev: script run - parse_argument func return directly parsed results - main func return dict instead json text - dump directly on sys.stdout.write * chg: dev: get_search_query and get_search_query func * chg: dev: main func - move inner function outside - return dict instead of json text * new: dev: add utils to doc sys path * new: doc: standalone_searx * fix: doc: run script * chg: dev: mypy type hint * chg: dev: SearchQuery don't have attr engines * chg: dev: reset engines __init__ * chg: test: unit test update * chg: dev: pylint and flake8 * new: test: standalone_searx * chg: dev: main func and doc * chg: dev: import and type hint * new: dev: main func - remove get_result func - single func which just translate dict * chg: test: put mypy on dev requirement * chg: doc: update * new: doc: add standalone_searx module member * chg: doc: shell command line * chg: dev: remove mypy * chg: doc: module docstring
This commit is contained in:
parent
1b42d42695
commit
c03e4c86bc
5 changed files with 313 additions and 69 deletions
|
@ -87,6 +87,7 @@ issues_github_path = "searx/searx"
|
||||||
# HTML -----------------------------------------------------------------
|
# HTML -----------------------------------------------------------------
|
||||||
|
|
||||||
sys.path.append(os.path.abspath('_themes'))
|
sys.path.append(os.path.abspath('_themes'))
|
||||||
|
sys.path.insert(0, os.path.abspath("../utils/"))
|
||||||
html_theme_path = ['_themes']
|
html_theme_path = ['_themes']
|
||||||
html_theme = "searx"
|
html_theme = "searx"
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,7 @@ developers.
|
||||||
filtron.sh
|
filtron.sh
|
||||||
morty.sh
|
morty.sh
|
||||||
lxc.sh
|
lxc.sh
|
||||||
|
standalone_searx.py
|
||||||
|
|
||||||
.. _toolboxing common:
|
.. _toolboxing common:
|
||||||
|
|
||||||
|
|
11
docs/utils/standalone_searx.py.rst
Normal file
11
docs/utils/standalone_searx.py.rst
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
|
||||||
|
.. _standalone_searx.py:
|
||||||
|
|
||||||
|
=============================
|
||||||
|
``utils/standalone_searx.py``
|
||||||
|
=============================
|
||||||
|
|
||||||
|
.. automodule:: standalone_searx
|
||||||
|
:members:
|
||||||
|
|
||||||
|
|
118
tests/unit/test_standalone_searx.py
Normal file
118
tests/unit/test_standalone_searx.py
Normal file
|
@ -0,0 +1,118 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Test utils/standalone_searx.py"""
|
||||||
|
import datetime
|
||||||
|
import importlib.util
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from mock import Mock, patch
|
||||||
|
from nose2.tools import params
|
||||||
|
|
||||||
|
from searx.testing import SearxTestCase
|
||||||
|
|
||||||
|
|
||||||
|
def get_standalone_searx_module():
|
||||||
|
"""Get standalone_searx module."""
|
||||||
|
module_name = 'utils.standalone_searx'
|
||||||
|
filename = 'utils/standalone_searx.py'
|
||||||
|
spec = importlib.util.spec_from_file_location(module_name, filename)
|
||||||
|
sas = importlib.util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(sas)
|
||||||
|
return sas
|
||||||
|
|
||||||
|
|
||||||
|
class StandaloneSearx(SearxTestCase):
|
||||||
|
"""Unit test for standalone_searx."""
|
||||||
|
|
||||||
|
def test_parse_argument_no_args(self):
|
||||||
|
"""Test parse argument without args."""
|
||||||
|
sas = get_standalone_searx_module()
|
||||||
|
with patch.object(sys, 'argv', ['standalone_searx']), \
|
||||||
|
self.assertRaises(SystemExit):
|
||||||
|
sas.parse_argument()
|
||||||
|
|
||||||
|
def test_parse_argument_basic_args(self):
|
||||||
|
"""Test parse argument with basic args."""
|
||||||
|
sas = get_standalone_searx_module()
|
||||||
|
query = 'red box'
|
||||||
|
exp_dict = {
|
||||||
|
'query': query, 'category': 'general', 'lang': 'all', 'pageno': 1,
|
||||||
|
'safesearch': '0', 'timerange': None}
|
||||||
|
args = ['standalone_searx', query]
|
||||||
|
with patch.object(sys, 'argv', args):
|
||||||
|
res = sas.parse_argument()
|
||||||
|
self.assertEqual(exp_dict, vars(res))
|
||||||
|
res2 = sas.parse_argument(args[1:])
|
||||||
|
self.assertEqual(exp_dict, vars(res2))
|
||||||
|
|
||||||
|
def test_to_dict(self):
|
||||||
|
"""test to_dict."""
|
||||||
|
sas = get_standalone_searx_module()
|
||||||
|
self.assertEqual(
|
||||||
|
sas.to_dict(
|
||||||
|
sas.get_search_query(sas.parse_argument(['red box']))),
|
||||||
|
{
|
||||||
|
'search': {
|
||||||
|
'q': 'red box', 'pageno': 1, 'lang': 'all',
|
||||||
|
'safesearch': 0, 'timerange': None
|
||||||
|
},
|
||||||
|
'results': [], 'infoboxes': [], 'suggestions': [],
|
||||||
|
'answers': [], 'paging': False, 'results_number': 0
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_to_dict_with_mock(self):
|
||||||
|
"""test to dict."""
|
||||||
|
sas = get_standalone_searx_module()
|
||||||
|
with patch.object(sas.searx.search, 'Search') as mock_s:
|
||||||
|
m_search = mock_s().search()
|
||||||
|
m_sq = Mock()
|
||||||
|
self.assertEqual(
|
||||||
|
sas.to_dict(m_sq),
|
||||||
|
{
|
||||||
|
'answers': [],
|
||||||
|
'infoboxes': m_search.infoboxes,
|
||||||
|
'paging': m_search.paging,
|
||||||
|
'results': m_search.get_ordered_results(),
|
||||||
|
'results_number': m_search.results_number(),
|
||||||
|
'search': {
|
||||||
|
'lang': m_sq.lang,
|
||||||
|
'pageno': m_sq.pageno,
|
||||||
|
'q': m_sq.query,
|
||||||
|
'safesearch': m_sq.safesearch,
|
||||||
|
'timerange': m_sq.time_range,
|
||||||
|
},
|
||||||
|
'suggestions': []
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_get_search_query(self):
|
||||||
|
"""test get_search_query."""
|
||||||
|
sas = get_standalone_searx_module()
|
||||||
|
args = sas.parse_argument(['rain', ])
|
||||||
|
search_q = sas.get_search_query(args)
|
||||||
|
self.assertTrue(search_q)
|
||||||
|
self.assertEqual(str(search_q), 'rain;[]')
|
||||||
|
|
||||||
|
def test_no_parsed_url(self):
|
||||||
|
"""test no_parsed_url func"""
|
||||||
|
sas = get_standalone_searx_module()
|
||||||
|
self.assertEqual(
|
||||||
|
sas.no_parsed_url([{'parsed_url': 'http://example.com'}]),
|
||||||
|
[{}]
|
||||||
|
)
|
||||||
|
|
||||||
|
@params(
|
||||||
|
(datetime.datetime(2020, 1, 1), '2020-01-01T00:00:00'),
|
||||||
|
('a'.encode('utf8'), 'a'),
|
||||||
|
(set([1]), [1])
|
||||||
|
)
|
||||||
|
def test_json_serial(self, arg, exp_res):
|
||||||
|
"""test json_serial func"""
|
||||||
|
sas = get_standalone_searx_module()
|
||||||
|
self.assertEqual(sas.json_serial(arg), exp_res)
|
||||||
|
|
||||||
|
def test_json_serial_error(self):
|
||||||
|
"""test error on json_serial."""
|
||||||
|
sas = get_standalone_searx_module()
|
||||||
|
with self.assertRaises(TypeError):
|
||||||
|
sas.json_serial('a')
|
|
@ -1,5 +1,63 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
"""Script to run searx from terminal.
|
||||||
|
|
||||||
|
Getting categories without initiate the engine will only return `['general']`
|
||||||
|
|
||||||
|
>>> import searx.engines
|
||||||
|
... list(searx.engines.categories.keys())
|
||||||
|
['general']
|
||||||
|
>>> import searx
|
||||||
|
... searx.engines.initialize_engines(searx.settings['engines'])
|
||||||
|
... list(searx.engines.categories.keys())
|
||||||
|
['general', 'it', 'science', 'images', 'news', 'videos', 'music', 'files', 'social media', 'map']
|
||||||
|
|
||||||
|
Example to use this script:
|
||||||
|
|
||||||
|
.. code:: bash
|
||||||
|
|
||||||
|
$ SEARX_DEBUG=1 python3 utils/standalone_searx.py rain
|
||||||
|
|
||||||
|
Example to run it from python:
|
||||||
|
|
||||||
|
>>> import importlib
|
||||||
|
... import json
|
||||||
|
... import sys
|
||||||
|
... import searx
|
||||||
|
... import searx.engines
|
||||||
|
... search_query = 'rain'
|
||||||
|
... # initialize engines
|
||||||
|
... searx.engines.initialize_engines(searx.settings['engines'])
|
||||||
|
... # load engines categories once instead of each time the function called
|
||||||
|
... engine_cs = list(searx.engines.categories.keys())
|
||||||
|
... # load module
|
||||||
|
... spec = importlib.util.spec_from_file_location(
|
||||||
|
... 'utils.standalone_searx', 'utils/standalone_searx.py')
|
||||||
|
... sas = importlib.util.module_from_spec(spec)
|
||||||
|
... spec.loader.exec_module(sas)
|
||||||
|
... # use function from module
|
||||||
|
... prog_args = sas.parse_argument([search_query], category_choices=engine_cs)
|
||||||
|
... search_q = sas.get_search_query(prog_args, engine_categories=engine_cs)
|
||||||
|
... res_dict = sas.to_dict(search_q)
|
||||||
|
... sys.stdout.write(json.dumps(
|
||||||
|
... res_dict, sort_keys=True, indent=4, ensure_ascii=False,
|
||||||
|
... default=sas.json_serial))
|
||||||
|
{
|
||||||
|
"answers": [],
|
||||||
|
"infoboxes": [ {...} ],
|
||||||
|
"paging": true,
|
||||||
|
"results": [... ],
|
||||||
|
"results_number": 820000000.0,
|
||||||
|
"search": {
|
||||||
|
"lang": "all",
|
||||||
|
"pageno": 1,
|
||||||
|
"q": "rain",
|
||||||
|
"safesearch": 0,
|
||||||
|
"timerange": null
|
||||||
|
},
|
||||||
|
"suggestions": [...]
|
||||||
|
}
|
||||||
|
""" # noqa: E501
|
||||||
|
# pylint: disable=pointless-string-statement
|
||||||
'''
|
'''
|
||||||
searx is free software: you can redistribute it and/or modify
|
searx is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU Affero General Public License as published by
|
it under the terms of the GNU Affero General Public License as published by
|
||||||
|
@ -16,90 +74,145 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
||||||
|
|
||||||
(C) 2016- by Alexandre Flament, <alex@al-f.net>
|
(C) 2016- by Alexandre Flament, <alex@al-f.net>
|
||||||
'''
|
'''
|
||||||
|
# pylint: disable=wrong-import-position
|
||||||
# set path
|
import argparse
|
||||||
from sys import path
|
|
||||||
from os.path import realpath, dirname
|
|
||||||
path.append(realpath(dirname(realpath(__file__)) + '/../'))
|
|
||||||
|
|
||||||
# initialization
|
|
||||||
from json import dumps
|
|
||||||
from searx import settings
|
|
||||||
import sys
|
import sys
|
||||||
import codecs
|
from datetime import datetime
|
||||||
|
from json import dumps
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
import searx
|
||||||
|
import searx.engines
|
||||||
|
import searx.preferences
|
||||||
import searx.query
|
import searx.query
|
||||||
import searx.search
|
import searx.search
|
||||||
import searx.engines
|
|
||||||
import searx.webapdater
|
|
||||||
import searx.preferences
|
|
||||||
import searx.webadapter
|
import searx.webadapter
|
||||||
import argparse
|
|
||||||
|
|
||||||
searx.engines.initialize_engines(settings['engines'])
|
EngineCategoriesVar = Optional[List[str]]
|
||||||
|
|
||||||
# command line parsing
|
|
||||||
parser = argparse.ArgumentParser(description='Standalone searx.')
|
|
||||||
parser.add_argument('query', type=str,
|
|
||||||
help='Text query')
|
|
||||||
parser.add_argument('--category', type=str, nargs='?',
|
|
||||||
choices=searx.engines.categories.keys(),
|
|
||||||
default='general',
|
|
||||||
help='Search category')
|
|
||||||
parser.add_argument('--lang', type=str, nargs='?',default='all',
|
|
||||||
help='Search language')
|
|
||||||
parser.add_argument('--pageno', type=int, nargs='?', default=1,
|
|
||||||
help='Page number starting from 1')
|
|
||||||
parser.add_argument('--safesearch', type=str, nargs='?', choices=['0', '1', '2'], default='0',
|
|
||||||
help='Safe content filter from none to strict')
|
|
||||||
parser.add_argument('--timerange', type=str, nargs='?', choices=['day', 'week', 'month', 'year'],
|
|
||||||
help='Filter by time range')
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# search results for the query
|
def get_search_query(
|
||||||
form = {
|
args: argparse.Namespace, engine_categories: EngineCategoriesVar = None
|
||||||
"q":args.query,
|
) -> searx.search.SearchQuery:
|
||||||
"categories":args.category.decode(),
|
"""Get search results for the query"""
|
||||||
"pageno":str(args.pageno),
|
if engine_categories is None:
|
||||||
"language":args.lang,
|
engine_categories = list(searx.engines.categories.keys())
|
||||||
"time_range":args.timerange
|
try:
|
||||||
}
|
category = args.category.decode('utf-8')
|
||||||
preferences = searx.preferences.Preferences(['oscar'], searx.engines.categories.keys(), searx.engines.engines, [])
|
except AttributeError:
|
||||||
preferences.key_value_settings['safesearch'].parse(args.safesearch)
|
category = args.category
|
||||||
|
form = {
|
||||||
|
"q": args.query,
|
||||||
|
"categories": category,
|
||||||
|
"pageno": str(args.pageno),
|
||||||
|
"language": args.lang,
|
||||||
|
"time_range": args.timerange
|
||||||
|
}
|
||||||
|
preferences = searx.preferences.Preferences(
|
||||||
|
['oscar'], engine_categories, searx.engines.engines, [])
|
||||||
|
preferences.key_value_settings['safesearch'].parse(args.safesearch)
|
||||||
|
|
||||||
search_query, raw_text_query, _, _ = searx.webadapter.get_search_query_from_webapp(preferences, form)
|
search_query = searx.webadapter.get_search_query_from_webapp(
|
||||||
search = searx.search.Search(search_query)
|
preferences, form)[0]
|
||||||
result_container = search.search()
|
return search_query
|
||||||
|
|
||||||
# output
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
def no_parsed_url(results):
|
def no_parsed_url(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||||
|
"""Remove parsed url from dict."""
|
||||||
for result in results:
|
for result in results:
|
||||||
del result['parsed_url']
|
del result['parsed_url']
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def json_serial(obj):
|
|
||||||
"""JSON serializer for objects not serializable by default json code"""
|
def json_serial(obj: Any) -> Any:
|
||||||
|
"""JSON serializer for objects not serializable by default json code.
|
||||||
|
|
||||||
|
:raise TypeError: raised when **obj** is not serializable
|
||||||
|
"""
|
||||||
if isinstance(obj, datetime):
|
if isinstance(obj, datetime):
|
||||||
serial = obj.isoformat()
|
serial = obj.isoformat()
|
||||||
return serial
|
return serial
|
||||||
raise TypeError ("Type not serializable")
|
if isinstance(obj, bytes):
|
||||||
|
return obj.decode('utf8')
|
||||||
|
if isinstance(obj, set):
|
||||||
|
return list(obj)
|
||||||
|
raise TypeError("Type ({}) not serializable".format(type(obj)))
|
||||||
|
|
||||||
result_container_json = {
|
|
||||||
"search": {
|
def to_dict(search_query: searx.search.SearchQuery) -> Dict[str, Any]:
|
||||||
"q": search_query.query,
|
"""Get result from parsed arguments."""
|
||||||
"pageno": search_query.pageno,
|
result_container = searx.search.Search(search_query).search()
|
||||||
"lang": search_query.lang,
|
result_container_json = {
|
||||||
"safesearch": search_query.safesearch,
|
"search": {
|
||||||
"timerange": search_query.time_range,
|
"q": search_query.query,
|
||||||
"engines": search_query.engines
|
"pageno": search_query.pageno,
|
||||||
},
|
"lang": search_query.lang,
|
||||||
"results": no_parsed_url(result_container.get_ordered_results()),
|
"safesearch": search_query.safesearch,
|
||||||
"infoboxes": result_container.infoboxes,
|
"timerange": search_query.time_range,
|
||||||
"suggestions": list(result_container.suggestions),
|
},
|
||||||
"answers": list(result_container.answers),
|
"results": no_parsed_url(result_container.get_ordered_results()),
|
||||||
"paging": result_container.paging,
|
"infoboxes": result_container.infoboxes,
|
||||||
"results_number": result_container.results_number()
|
"suggestions": list(result_container.suggestions),
|
||||||
}
|
"answers": list(result_container.answers),
|
||||||
sys.stdout = codecs.getwriter("UTF-8")(sys.stdout)
|
"paging": result_container.paging,
|
||||||
sys.stdout.write(dumps(result_container_json, sort_keys=True, indent=4, ensure_ascii=False, encoding="utf-8", default=json_serial))
|
"results_number": result_container.results_number()
|
||||||
|
}
|
||||||
|
return result_container_json
|
||||||
|
|
||||||
|
|
||||||
|
def parse_argument(
|
||||||
|
args: Optional[List[str]]=None,
|
||||||
|
category_choices: EngineCategoriesVar=None
|
||||||
|
) -> argparse.Namespace:
|
||||||
|
"""Parse command line.
|
||||||
|
|
||||||
|
:raise SystemExit: Query argument required on `args`
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
>>> import importlib
|
||||||
|
... # load module
|
||||||
|
... spec = importlib.util.spec_from_file_location(
|
||||||
|
... 'utils.standalone_searx', 'utils/standalone_searx.py')
|
||||||
|
... sas = importlib.util.module_from_spec(spec)
|
||||||
|
... spec.loader.exec_module(sas)
|
||||||
|
... sas.parse_argument()
|
||||||
|
usage: ptipython [-h] [--category [{general}]] [--lang [LANG]] [--pageno [PAGENO]] [--safesearch [{0,1,2}]] [--timerange [{day,week,month,year}]]
|
||||||
|
query
|
||||||
|
SystemExit: 2
|
||||||
|
>>> sas.parse_argument(['rain'])
|
||||||
|
Namespace(category='general', lang='all', pageno=1, query='rain', safesearch='0', timerange=None)
|
||||||
|
""" # noqa: E501
|
||||||
|
if not category_choices:
|
||||||
|
category_choices = list(searx.engines.categories.keys())
|
||||||
|
parser = argparse.ArgumentParser(description='Standalone searx.')
|
||||||
|
parser.add_argument('query', type=str,
|
||||||
|
help='Text query')
|
||||||
|
parser.add_argument('--category', type=str, nargs='?',
|
||||||
|
choices=category_choices,
|
||||||
|
default='general',
|
||||||
|
help='Search category')
|
||||||
|
parser.add_argument('--lang', type=str, nargs='?', default='all',
|
||||||
|
help='Search language')
|
||||||
|
parser.add_argument('--pageno', type=int, nargs='?', default=1,
|
||||||
|
help='Page number starting from 1')
|
||||||
|
parser.add_argument(
|
||||||
|
'--safesearch', type=str, nargs='?',
|
||||||
|
choices=['0', '1', '2'], default='0',
|
||||||
|
help='Safe content filter from none to strict')
|
||||||
|
parser.add_argument(
|
||||||
|
'--timerange', type=str,
|
||||||
|
nargs='?', choices=['day', 'week', 'month', 'year'],
|
||||||
|
help='Filter by time range')
|
||||||
|
return parser.parse_args(args)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
searx.engines.initialize_engines(searx.settings['engines'])
|
||||||
|
engine_cs = list(searx.engines.categories.keys())
|
||||||
|
prog_args = parse_argument(category_choices=engine_cs)
|
||||||
|
search_q = get_search_query(prog_args, engine_categories=engine_cs)
|
||||||
|
res_dict = to_dict(search_q)
|
||||||
|
sys.stdout.write(dumps(
|
||||||
|
res_dict, sort_keys=True, indent=4, ensure_ascii=False,
|
||||||
|
default=json_serial))
|
||||||
|
|
Loading…
Reference in a new issue