From 0ca2520115ecbdab40c746ce03d3331b5c21886d Mon Sep 17 00:00:00 2001 From: Bnyro Date: Wed, 27 Nov 2024 14:13:23 +0100 Subject: [PATCH] [feat] json/xpath engine: config option for method and body --- searx/engines/json_engine.py | 34 +++++++++++++++++++++------------- searx/engines/xpath.py | 20 +++++++++++++++++++- 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py index 9d919a63d..942f6ae8a 100644 --- a/searx/engines/json_engine.py +++ b/searx/engines/json_engine.py @@ -16,23 +16,17 @@ from json import loads from urllib.parse import urlencode from searx.utils import to_string, html_to_text - +# parameters for generating a request search_url = None -url_query = None -url_prefix = "" -content_query = None -title_query = None -content_html_to_text = False -title_html_to_text = False -paging = False -suggestion_query = '' -results_query = '' +method = 'GET' +request_body = '' cookies = {} headers = {} '''Some engines might offer different result based on cookies or headers. Possible use-case: To set safesearch cookie or header to moderate.''' +paging = False # parameters for engines with paging support # # number of results on each page @@ -41,6 +35,16 @@ page_size = 1 # number of the first page (usually 0 or 1) first_page_num = 1 +# parameters for parsing the response +results_query = '' +url_query = None +url_prefix = "" +title_query = None +content_query = None +suggestion_query = '' +title_html_to_text = False +content_html_to_text = False + def iterate(iterable): if isinstance(iterable, dict): @@ -98,9 +102,8 @@ def query(data, query_string): def request(query, params): # pylint: disable=redefined-outer-name - query = urlencode({'q': query})[2:] + fp = {'query': urlencode({'q': query})[2:]} # pylint: disable=invalid-name - fp = {'query': query} # pylint: disable=invalid-name if paging and search_url.find('{pageno}') >= 0: fp['pageno'] = (params['pageno'] - 1) * page_size + first_page_num @@ -108,7 +111,12 @@ def request(query, params): # pylint: disable=redefined-outer-name params['headers'].update(headers) params['url'] = search_url.format(**fp) - params['query'] = query + params['method'] = method + + if request_body: + # don't url-encode the query if it's in the request body + fp['query'] = query + params['data'] = request_body.format(**fp) return params diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 558531880..97c84bd09 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -12,6 +12,8 @@ Request: - :py:obj:`search_url` - :py:obj:`lang_all` - :py:obj:`soft_max_redirects` +- :py:obj:`method` +- :py:obj:`request_body` - :py:obj:`cookies` - :py:obj:`headers` @@ -151,6 +153,16 @@ headers = {} '''Some engines might offer different result based headers. Possible use-case: To set header to moderate.''' +method = 'GET' +'''Some engines might require to do POST requests for search.''' + +request_body = '' +'''The body of the request. This can only be used if different :py:obj:`method` +is set, e.g. ``POST``. For formatting see the documentation of :py:obj:`search_url`:: + + search={query}&page={pageno}{time_range}{safe_search} +''' + paging = False '''Engine supports paging [True or False].''' @@ -236,8 +248,14 @@ def request(query, params): params['headers'].update(headers) params['url'] = search_url.format(**fargs) - params['soft_max_redirects'] = soft_max_redirects + params['method'] = method + if request_body: + # don't url-encode the query if it's in the request body + fargs['query'] = query + params['data'] = request_body.format(**fargs) + + params['soft_max_redirects'] = soft_max_redirects params['raise_for_httperror'] = False return params