[json_engine] document existing options

2025-02-05 23:02:22 +00:00 · 2025-01-11 01:51:33 +01:00 · 2025-01-11 01:51:33 +01:00 · 591d9c2505
commit 591d9c2505
parent 09cce18518
3 changed files with 137 additions and 13 deletions
--- a/docs/dev/engines/index.rst
+++ b/docs/dev/engines/index.rst
@ -45,6 +45,7 @@ Online Engines
   demo/demo_online
   xpath
   mediawiki
   json_engine
 .. toctree::
   :maxdepth: 1
--- a/docs/dev/engines/json_engine.rst
+++ b/docs/dev/engines/json_engine.rst
@ -0,0 +1,13 @@
 .. _json_engine engine:
 ============
 JSON Engine
 ============
 .. contents::
   :depth: 2
   :local:
   :backlinks: entry
 .. automodule:: searx.engines.json_engine
  :members:
--- a/searx/engines/json_engine.py
+++ b/searx/engines/json_engine.py
@ -2,12 +2,58 @@
 """The JSON engine is a *generic* engine with which it is possible to configure
 engines in the settings.
-.. todo::
+Configuration
 =============
-   - The JSON engine needs documentation!!
+Request:
-   - The parameters of the JSON engine should be adapted to those of the XPath
+- :py:obj:`search_url`
-     engine.
+- :py:obj:`method`
 - :py:obj:`request_body`
 - :py:obj:`cookies`
 - :py:obj:`headers`
 Paging:
 - :py:obj:`paging`
 - :py:obj:`page_size`
 - :py:obj:`first_page_num`
 Response:
 - :py:obj:`title_html_to_text`
 - :py:obj:`content_html_to_text`
 JSON query:
 - :py:obj:`results_query`
 - :py:obj:`url_query`
 - :py:obj:`url_prefix`
 - :py:obj:`title_query`
 - :py:obj:`content_query`
 - :py:obj:`suggestion_query`
 Example
 =======
 Here is a simple example of a JSON engine configure in the :ref:`settings
 engine` section, further read :ref:`engines-dev`.
 .. code:: yaml
  - name : mdn
    engine : json_engine
    paging : True
    search_url : https://developer.mozilla.org/api/v1/search?q={query}&page={pageno}
    results_query : documents
    url_query : mdn_url
    url_prefix : https://developer.mozilla.org
    title_query : title
    content_query : summary
 Implementations
 ===============
 """
@ -16,34 +62,92 @@ from json import loads
 from urllib.parse import urlencode
 from searx.utils import to_string, html_to_text
 # parameters for generating a request
 search_url = None
 """
 Search URL of the engine.  Example::
    https://example.org/?search={query}&page={pageno}
 Replacements are:
 ``{query}``:
  Search terms from user.
 ``{pageno}``:
  Page number if engine supports paging :py:obj:`paging`
 """
 method = 'GET'
 '''Some engines might require to do POST requests for search.'''
 request_body = ''
 '''The body of the request.  This can only be used if different :py:obj:`method`
 is set, e.g. ``POST``. For formatting see the documentation of :py:obj:`search_url`.
 Note: Curly brackets which aren't encapsulating a replacement placeholder
 must be escaped by doubling each ``{`` and ``}``.
 .. code:: yaml
    request_body: >-
      {{
        "search": "{query}",
        "page": {pageno},
        "extra": {{
          "time_range": {time_range},
          "rating": "{safe_search}"
        }}
      }}
 '''
 cookies = {}
 '''Some engines might offer different result based on cookies.
 Possible use-case: To set safesearch cookie.'''
 headers = {}
 '''Some engines might offer different result based on cookies or headers.
 Possible use-case: To set safesearch cookie or header to moderate.'''
 paging = False
-# parameters for engines with paging support
+'''Engine supports paging [True or False].'''
-#
+
-# number of results on each page
+page_size = 1
-# (only needed if the site requires not a page number, but an offset)
+'''Number of results on each page.  Only needed if the site requires not a page
-page_size = 1
+number, but an offset.'''
-# number of the first page (usually 0 or 1)
+
-first_page_num = 1
+first_page_num = 1
 '''Number of the first page (usually 0 or 1).'''
 # parameters for parsing the response
 results_query = ''
 '''JSON query for the list of result items.
 The query string is a slash `/` separated path of JSON key names.
 Array entries can be specified using the index or can be omitted entirely,
 in which case each entry is considered -
 most implementations will default to the first entry in this case.
 '''
 url_query = None
 '''JSON query of result's ``url``. For the query string documentation see :py:obj:`results_query`'''
 url_prefix = ""
 '''String to prepend to the result's ``url``.'''
 title_query = None
 '''JSON query of result's ``title``. For the query string documentation see :py:obj:`results_query`'''
 content_query = None
 '''JSON query of result's ``content``. For the query string documentation see :py:obj:`results_query`'''
 suggestion_query = ''
 '''JSON query of result's ``suggestion``. For the query string documentation see :py:obj:`results_query`'''
 title_html_to_text = False
 '''Extract text from a HTML title string'''
 content_html_to_text = False
 '''Extract text from a HTML content string'''
 def iterate(iterable):
@ -102,6 +206,7 @@ def query(data, query_string):
 def request(query, params):  # pylint: disable=redefined-outer-name
    '''Build request parameters (see :ref:`engine request`).'''
    fp = {'query': urlencode({'q': query})[2:]}  # pylint: disable=invalid-name
    if paging and search_url.find('{pageno}') >= 0:
@ -126,7 +231,12 @@ def identity(arg):
 def response(resp):
    '''Scrap *results* from the response (see :ref:`engine results`).'''
    results = []
    if not resp.text:
        return results
    json = loads(resp.text)
    title_filter = html_to_text if title_html_to_text else identity