Merge pull request #97 from return42/drop-searx-admin

[docs] reorder blog articles
This commit is contained in:
Markus Heiser 2021-06-08 10:56:18 +00:00 committed by GitHub
commit 5c5db719d2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
37 changed files with 1084 additions and 948 deletions

View file

@ -1,88 +0,0 @@
=======
Engines
=======
Special Engine Settings
=======================
.. sidebar:: Further reading ..
- :ref:`settings engine`
- :ref:`engine settings` & :ref:`engine file`
.. toctree::
:maxdepth: 1
engines/recoll.rst
.. _engines generic:
General Engine Settings
=======================
Explanation of the :ref:`general engine configuration` shown in the table
:ref:`configured engines`.
============= =========== ==================== ============
:ref:`engine settings` :ref:`engine file`
------------------------- ---------------------------------
Name (cfg) Categories
------------------------- ---------------------------------
Engine .. Paging support **P**
------------------------- -------------------- ------------
Shortcut **S** Language support **L**
Timeout **TO** Time range support **TR**
Disabled **D** Engine type **ET**
------------- ----------- -------------------- ------------
Safe search **SS**
------------- ----------- ---------------------------------
Weigth **W**
------------- ----------- ---------------------------------
Disabled **D**
------------- ----------- ---------------------------------
Show errors **DE**
============= =========== =================================
.. _configured engines:
.. jinja:: searx
.. flat-table:: Engines configured at built time (defaults)
:header-rows: 1
:stub-columns: 2
* - Name (cfg)
- S
- Engine
- TO
- Categories
- P
- L
- SS
- D
- TR
- ET
- W
- D
- DE
{% for name, mod in engines.items() %}
* - {{name}}
- !{{mod.shortcut}}
- {{mod.__name__}}
- {{mod.timeout}}
- {{", ".join(mod.categories)}}
- {{(mod.paging and "y") or ""}}
- {{(mod.language_support and "y") or ""}}
- {{(mod.safesearch and "y") or ""}}
- {{(mod.disabled and "y") or ""}}
- {{(mod.time_range_support and "y") or ""}}
- {{mod.engine_type or ""}}
- {{mod.weight or 1 }}
- {{(mod.disabled and "y") or ""}}
- {{(mod.display_error_messages and "y") or ""}}
{% endfor %}

View file

@ -0,0 +1,79 @@
.. _engine command:
====================
Command Line Engines
====================
.. sidebar:: info
- :origin:`command.py <searx/engines/command.py>`
- :ref:`offline engines`
With *command engines* administrators can run engines to integrate arbitrary
shell commands.
When creating and enabling a ``command`` engine on a public instance, you must
be careful to avoid leaking private data. The easiest solution is to limit the
access by setting ``tokens`` as described in section :ref:`private engines`.
The engine base is flexible. Only your imagination can limit the power of this
engine (and maybe security concerns). The following options are available:
``command``:
A comma separated list of the elements of the command. A special token
``{{QUERY}}`` tells where to put the search terms of the user. Example:
.. code:: yaml
['ls', '-l', '-h', '{{QUERY}}']
``delimiter``:
A mapping containing a delimiter ``char`` and the *titles* of each element in
``keys``.
``parse_regex``:
A dict containing the regular expressions for each result key.
``query_type``:
The expected type of user search terms. Possible values: ``path`` and
``enum``.
``path``:
Checks if the user provided path is inside the working directory. If not,
the query is not executed.
``enum``:
Is a list of allowed search terms. If the user submits something which is
not included in the list, the query returns an error.
``query_enum``:
A list containing allowed search terms if ``query_type`` is set to ``enum``.
``working_dir``:
The directory where the command has to be executed. Default: ``./``
``result_separator``:
The character that separates results. Default: ``\n``
The example engine below can be used to find files with a specific name in the
configured working directory:
.. code:: yaml
- name: find
engine: command
command: ['find', '.', '-name', '{{QUERY}}']
query_type: path
shortcut: fnd
delimiter:
chars: ' '
keys: ['line']
Acknowledgment
==============
This development was sponsored by `Search and Discovery Fund
<https://nlnet.nl/discovery>`_ of `NLnet Foundation <https://nlnet.nl/>`_.

View file

@ -0,0 +1,77 @@
.. _configured engines:
==================
Configured Engines
==================
.. sidebar:: Further reading ..
- :ref:`engines-dev`
- :ref:`settings engine`
Explanation of the :ref:`general engine configuration` shown in the table
:ref:`configured engines`.
.. table:: The legend for the following table
:width: 100%
============= =========== ==================== ============
:ref:`engine settings` :ref:`engine file`
------------------------- ---------------------------------
Name (cfg) .. Categories
------------- ----------- -------------------- ------------
Engine .. Paging support **P**
------------- ----------- -------------------- ------------
Shortcut **S** Language support **L**
Timeout **TO** Time range support **TR**
Disabled **D** Engine type **ET**
------------- ----------- -------------------- ------------
Safe search **SS**
------------- ----------- ---------------------------------
Weigth **W**
------------- ----------- ---------------------------------
Disabled **D**
------------- ----------- ---------------------------------
Show errors **DE**
============= =========== =================================
.. jinja:: searx
.. flat-table:: Engines configured at built time (defaults)
:header-rows: 1
:stub-columns: 2
* - Name (cfg)
- S
- Engine
- TO
- Categories
- P
- L
- SS
- D
- TR
- ET
- W
- D
- DE
{% for name, mod in engines.items() %}
* - {{name}}
- !{{mod.shortcut}}
- {{mod.__name__}}
- {{mod.timeout}}
- {{", ".join(mod.categories)}}
- {{(mod.paging and "y") or ""}}
- {{(mod.language_support and "y") or ""}}
- {{(mod.safesearch and "y") or ""}}
- {{(mod.disabled and "y") or ""}}
- {{(mod.time_range_support and "y") or ""}}
- {{mod.engine_type or ""}}
- {{mod.weight or 1 }}
- {{(mod.disabled and "y") or ""}}
- {{(mod.display_error_messages and "y") or ""}}
{% endfor %}

View file

@ -0,0 +1,22 @@
.. _engines and settings:
==================
Engines & Settings
==================
.. sidebar:: Further reading ..
- :ref:`settings engine`
- :ref:`engine settings` & :ref:`engine file`
.. toctree::
:maxdepth: 1
settings
configured_engines
private-engines
recoll
sql-engines
search-indexer-engines
command-line-engines
searx.engines.xpath

View file

@ -0,0 +1,49 @@
.. _private engines:
============================
Private Engines (``tokens``)
============================
Administrators might find themselves wanting to limit access to some of the
enabled engines on their instances. It might be because they do not want to
expose some private information through :ref:`offline engines`. Or they would
rather share engines only with their trusted friends or colleagues.
To solve this issue the concept of *private engines* exists.
A new option was added to engines named `tokens`. It expects a list of
strings. If the user making a request presents one of the tokens of an engine,
they can access information about the engine and make search requests.
Example configuration to restrict access to the Arch Linux Wiki engine:
.. code:: yaml
- name: arch linux wiki
engine: archlinux
shortcut: al
tokens: [ 'my-secret-token' ]
Unless a user has configured the right token, the engine is going
to be hidden from him/her. It is not going to be included in the
list of engines on the Preferences page and in the output of
`/config` REST API call.
Tokens can be added to one's configuration on the Preferences page
under "Engine tokens". The input expects a comma separated list of
strings.
The distribution of the tokens from the administrator to the users
is not carved in stone. As providing access to such engines
implies that the admin knows and trusts the user, we do not see
necessary to come up with a strict process. Instead,
we would like to add guidelines to the documentation of the feature.
Acknowledgment
==============
This development was sponsored by `Search and Discovery Fund
<https://nlnet.nl/discovery>`_ of `NLnet Foundation <https://nlnet.nl/>`_.

View file

@ -1,17 +1,17 @@
.. _engine recoll:
======
Recoll
======
=============
Recoll Engine
=============
.. sidebar:: info
- `Recoll <https://www.lesbonscomptes.com/recoll/>`_
- `recoll-webui <https://framagit.org/medoc92/recollwebui.git>`_
- :origin:`searx/engines/recoll.py`
Recoll_ is a desktop full-text search tool based on Xapian. By itself Recoll_
does not offer web or API access, this can be achieved using recoll-webui_
does not offer WEB or API access, this can be achieved using recoll-webui_
Configuration

View file

@ -0,0 +1,136 @@
====================
Local Search Engines
====================
.. sidebar:: further read
- `Comparison to alternatives
<https://docs.meilisearch.com/learn/what_is_meilisearch/comparison_to_alternatives.html>`_
Administrators might find themselves wanting to integrate locally running search
engines. The following ones are supported for now:
* `Elasticsearch`_
* `Meilisearch`_
* `Solr`_
Each search engine is powerful, capable of full-text search. All of the engines
above are added to ``settings.yml`` just commented out, as you have to
``base_url`` for all them.
Please note that if you are not using HTTPS to access these engines, you have to enable
HTTP requests by setting ``enable_http`` to ``True``.
Futhermore, if you do not want to expose these engines on a public instance, you
can still add them and limit the access by setting ``tokens`` as described in
section :ref:`private engines`.
.. _engine meilisearch:
MeiliSearch
===========
.. sidebar:: info
- :origin:`meilisearch.py <searx/engines/meilisearch.py>`
- `MeiliSearch <https://www.meilisearch.com>`_
- `MeiliSearch Documentation <https://docs.meilisearch.com/>`_
- `Install MeiliSearch
<https://docs.meilisearch.com/learn/getting_started/installation.html>`_
MeiliSearch_ is aimed at individuals and small companies. It is designed for
small-scale (less than 10 million documents) data collections. E.g. it is great
for storing web pages you have visited and searching in the contents later.
The engine supports faceted search, so you can search in a subset of documents
of the collection. Furthermore, you can search in MeiliSearch_ instances that
require authentication by setting ``auth_token``.
Here is a simple example to query a Meilisearch instance:
.. code:: yaml
- name: meilisearch
engine: meilisearch
shortcut: mes
base_url: http://localhost:7700
index: my-index
enable_http: true
.. _engine elasticsearch:
Elasticsearch
=============
.. sidebar:: info
- :origin:`elasticsearch.py <searx/engines/elasticsearch.py>`
- `Elasticsearch <https://www.elastic.co/elasticsearch/>`_
- `Elasticsearch Guide
<https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html>`_
- `Install Elasticsearch
<https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html>`_
Elasticsearch_ supports numerous ways to query the data it is storing. At the
moment the engine supports the most popular search methods (``query_type``):
- ``match``,
- ``simple_query_string``,
- ``term`` and
- ``terms``.
If none of the methods fit your use case, you can select ``custom`` query type
and provide the JSON payload to submit to Elasticsearch in
``custom_query_json``.
The following is an example configuration for an Elasticsearch_ instance with
authentication configured to read from ``my-index`` index.
.. code:: yaml
- name: elasticsearch
shortcut: es
engine: elasticsearch
base_url: http://localhost:9200
username: elastic
password: changeme
index: my-index
query_type: match
# custom_query_json: '{ ... }'
enable_http: true
.. _engine solr:
Solr
====
.. sidebar:: info
- :origin:`solr.py <searx/engines/solr.py>`
- `Solr <https://solr.apache.org>`_
- `Solr Resources <https://solr.apache.org/resources.html>`_
- `Install Solr <https://solr.apache.org/guide/installing-solr.html>`_
Solr_ is a popular search engine based on Lucene, just like Elasticsearch_. But
instead of searching in indices, you can search in collections.
This is an example configuration for searching in the collection
``my-collection`` and get the results in ascending order.
.. code:: yaml
- name: solr
engine: solr
shortcut: slr
base_url: http://localhost:8983
collection: my-collection
sort: asc
enable_http: true
Acknowledgment
==============
This development was sponsored by `Search and Discovery Fund
<https://nlnet.nl/discovery>`_ of `NLnet Foundation <https://nlnet.nl/>`_.

View file

@ -0,0 +1,9 @@
.. _xpath engine:
============
XPath Engine
============
.. automodule:: searx.engines.xpath
:members:

View file

@ -22,13 +22,14 @@ file.
settings.yml location
=====================
First, searx will try to load settings.yml from these locations:
The initial ``settings.yml`` we be load from these locations:
1. the full path specified in the ``SEARX_SETTINGS_PATH`` environment variable.
2. ``/etc/searx/settings.yml``
If these files don't exist (or are empty or can't be read), searx uses the
:origin:`searx/settings.yml` file.
:origin:`searx/settings.yml` file. Read :ref:`settings use_default_settings` to
see how you can simplify your *user defined* ``settings.yml``.
.. _settings global:
@ -42,9 +43,11 @@ Global Settings
.. code:: yaml
general:
debug : False # Debug mode, only for development
instance_name : "searxng" # displayed name
contact_url: False # mailto:contact@example.com
debug: false # Debug mode, only for development
instance_name: "searxng" # displayed name
contact_url: false # mailto:contact@example.com
.. code:: yaml
brand:
git_url: https://github.com/searxng/searxng
@ -68,10 +71,10 @@ Global Settings
If you host your own documentation, change this URL.
``wiki_url``:
Link to your wiki (or ``False``)
Link to your wiki (or ``false``)
``twitter_url``:
Link to your tweets (or ``False``)
Link to your tweets (or ``false``)
``server:``
@ -80,13 +83,13 @@ Global Settings
.. code:: yaml
server:
port : 8888
bind_address : "127.0.0.1" # address to listen on
secret_key : "ultrasecretkey" # change this!
base_url : False # set custom base_url (or False)
image_proxy : False # proxying image results through searx
default_locale : "" # default interface locale
default_theme : oscar # ui theme
port: 8888
bind_address: "127.0.0.1" # address to listen on
secret_key: "ultrasecretkey" # change this!
base_url: false # set custom base_url (or false)
image_proxy: false # proxying image results through searx
default_locale: "" # default interface locale
default_theme: oscar # ui theme
default_http_headers:
X-Content-Type-Options : nosniff
X-XSS-Protection : 1; mode=block
@ -125,15 +128,19 @@ Global Settings
``outgoing:``
-------------
Communication with search engines.
.. code:: yaml
outgoing: # communication with search engines
request_timeout : 2.0 # default timeout in seconds, can be override by engine
# max_request_timeout: 10.0 # the maximum timeout in seconds
useragent_suffix : "" # informations like an email address to the administrator
pool_connections : 100 # Maximum number of allowable connections, or None for no limits. The default is 100.
pool_maxsize : 10 # Number of allowable keep-alive connections, or None to always allow. The default is 10.
enable_http2: True # See https://www.python-httpx.org/http2/
outgoing:
request_timeout: 2.0 # default timeout in seconds, can be override by engine
max_request_timeout: 10.0 # the maximum timeout in seconds
useragent_suffix: "" # informations like an email address to the administrator
pool_connections: 100 # Maximum number of allowable connections, or null
# for no limits. The default is 100.
pool_maxsize: 10 # Number of allowable keep-alive connections, or null
# to always allow. The default is 10.
enable_http2: true # See https://www.python-httpx.org/http2/
# uncomment below section if you want to use a proxy
# proxies:
# all://:
@ -180,8 +187,8 @@ Global Settings
* ``[ 192.168.0.1, fe80::/126 ]``
``retries`` :
Number of retry in case of an HTTP error.
On each retry, searx uses an different proxy and source ip.
Number of retry in case of an HTTP error. On each retry, searx uses an
different proxy and source ip.
``retry_on_http_error`` :
Retry request on some HTTP status code.
@ -193,7 +200,7 @@ Global Settings
* ``[403, 429]``: on HTTP status code 403 and 429.
``enable_http2`` :
Enable by default. Set to ``False`` to disable HTTP/2.
Enable by default. Set to ``false`` to disable HTTP/2.
``max_redirects`` :
30 by default. Maximum redirect before it is an error.
@ -205,18 +212,18 @@ Global Settings
.. code:: yaml
locales:
en : English
de : Deutsch
he : Hebrew
hu : Magyar
fr : Français
es : Español
it : Italiano
nl : Nederlands
ja : 日本語 (Japanese)
tr : Türkçe
ru : Russian
ro : Romanian
en: English
de: Deutsch
he: Hebrew
hu: Magyar
fr: Français
es: Español
it: Italiano
nl: Nederlands
ja: 日本語 (Japanese)
tr: Türkçe
ru: Russian
ro: Romanian
``locales`` :
Locales codes and their names. Available translations of searx interface.
@ -229,35 +236,49 @@ Engine settings
.. sidebar:: Further reading ..
- :ref:`configured engines`
- :ref:`engines-dev`
In the code example below a *full fledged* example of a YAML setup from a dummy
engine is shown. Most of the options have a default value or even are optional.
.. code:: yaml
- name : bing
engine : bing
shortcut : bi
base_url : 'https://{language}.wikipedia.org/'
categories : general
timeout : 3.0
api_key : 'apikey'
disabled : True
language : en_US
#enable_http: False
#enable_http2: False
#retries: 1
#retry_on_http_error: True # or 403 or [404, 429]
#max_connections: 100
#max_keepalive_connections: 10
#keepalive_expiry: 5.0
#proxies:
# http:
# - http://proxy1:8080
# - http://proxy2:8080
# https:
# - http://proxy1:8080
# - http://proxy2:8080
# - socks5://user:password@proxy3:1080
# - socks5h://user:password@proxy4:1080
- name: example engine
engine: example
shortcut: demo
base_url: 'https://{language}.example.com/'
categories: general
timeout: 3.0
api_key: 'apikey'
disabled: false
language: en_US
tokens: [ 'my-secret-token' ]
weigth: 1
display_error_messages: true
about:
website: https://example.com
wikidata_id: Q306656
official_api_documentation: https://example.com/api-doc
use_official_api: true
require_api_key: true
results: HTML
enable_http: false
enable_http2: false
retries: 1
retry_on_http_error: true # or 403 or [404, 429]
max_connections: 100
max_keepalive_connections: 10
keepalive_expiry: 5.0
proxies:
http:
- http://proxy1:8080
- http://proxy2:8080
https:
- http://proxy1:8080
- http://proxy2:8080
- socks5://user:password@proxy3:1080
- socks5h://user:password@proxy4:1080
``name`` :
Name that will be used across searx to define this engine. In settings, on
@ -297,10 +318,14 @@ Engine settings
by using the full ISO code of language and country, like ``fr_FR``, ``en_US``,
``de_DE``.
``tokens`` : optional
A list of secret tokens to make this engine *private*, more details see
:ref:`private engines`.
``weigth`` : default ``1``
Weighting of the results of this engine.
``display_error_messages`` : default ``True``
``display_error_messages`` : default ``true``
When an engine returns an error, the message is displayed on the user interface.
``network``: optional
@ -320,7 +345,7 @@ Engine settings
use_default_settings
====================
.. sidebar:: ``use_default_settings: True``
.. sidebar:: ``use_default_settings: true``
- :ref:`settings location`
- :ref:`use_default_settings.yml`
@ -329,7 +354,7 @@ use_default_settings
The user defined ``settings.yml`` is loaded from the :ref:`settings location`
and can relied on the default configuration :origin:`searx/settings.yml` using:
``use_default_settings: True``
``use_default_settings: true``
``server:``
In the following example, the actual settings are the default settings defined
@ -338,22 +363,22 @@ and can relied on the default configuration :origin:`searx/settings.yml` using:
.. code-block:: yaml
use_default_settings: True
use_default_settings: true
server:
secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA"
secret_key: "ultrasecretkey" # change this!
bind_address: "0.0.0.0"
``engines:``
With ``use_default_settings: True``, each settings can be override in a
With ``use_default_settings: true``, each settings can be override in a
similar way, the ``engines`` section is merged according to the engine
``name``. In this example, searx will load all the engine and the arch linux
wiki engine has a :ref:`token<private engines>`:
wiki engine has a :ref:`token <private engines>`:
.. code-block:: yaml
use_default_settings: True
use_default_settings: true
server:
secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA"
secret_key: "ultrasecretkey" # change this!
engines:
- name: arch linux wiki
tokens: ['$ecretValue']
@ -370,7 +395,7 @@ and can relied on the default configuration :origin:`searx/settings.yml` using:
remove:
- google
server:
secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA"
secret_key: "ultrasecretkey" # change this!
engines:
- name: arch linux wiki
tokens: ['$ecretValue']
@ -387,7 +412,7 @@ and can relied on the default configuration :origin:`searx/settings.yml` using:
- google
- duckduckgo
server:
secret_key: "uvys6bRhKHUdFF5CqbJonSDSRN8H0sCBziNSrDGNVdpz7IeZhveVart3yvghoKHA"
secret_key: "ultrasecretkey" # change this!
engines:
- name: google
tokens: ['$ecretValue']

View file

@ -1,5 +1,7 @@
.. _sql engines:
===========
SQL engines
SQL Engines
===========
.. sidebar:: further read
@ -36,9 +38,10 @@ place the templates at::
searx/templates/{theme_name}/result_templates/{template_name}
As mentioned in previous blog posts, if you do not wish to expose these engines
on a public instance, you can still add them and limit the access by setting
``tokens`` as described in section :ref:`private engines`.
If you do not wish to expose these engines on a public instance, you can still
add them and limit the access by setting ``tokens`` as described in section
:ref:`private engines`.
Configure the engines
=====================
@ -58,6 +61,10 @@ returned results use the option ``limit``.
SQLite
------
.. sidebar:: info
- :origin:`sqlite.py <searx/engines/sqlite.py>`
.. _MediathekView: https://mediathekview.de/
SQLite is a small, fast and reliable SQL database engine. It does not require
@ -106,9 +113,10 @@ PostgreSQL
.. _psycopg2: https://www.psycopg.org/install
.. sidebar:: requirements
.. sidebar:: info
``pip install`` psycopg2_
- :origin:`postgresql.py <searx/engines/postgresql.py>`
- ``pip install`` psycopg2_
PostgreSQL is a powerful and robust open source database. Before configuring
the PostgreSQL engine, you must install the dependency ``psychopg2``. You can
@ -130,9 +138,10 @@ MySQL
.. _mysql-connector-python: https://pypi.org/project/mysql-connector-python
.. sidebar:: requirements
.. sidebar:: info
``pip install`` mysql-connector-python_
- :origin:`mysql_server.py <searx/engines/mysql_server.py>`
- ``pip install`` mysql-connector-python_
MySQL is said to be the most popular open source database. Before enabling MySQL
engine, you must install the package ``mysql-connector-python``.
@ -152,9 +161,9 @@ example configuration for quering a MySQL server:
query_str: 'SELECT * from my_table WHERE my_column=%(query)s'
Acknowledgement
===============
Acknowledgment
==============
This development was sponsored by `Search and Discovery Fund
<https://nlnet.nl/discovery>`_ of `NLnet Foundation <https://nlnet.nl/>`_ .
<https://nlnet.nl/discovery>`_ of `NLnet Foundation <https://nlnet.nl/>`_.

View file

@ -13,11 +13,10 @@ Administrator documentation
installation-apache
installation-docker
update-searx
settings
engines/index
api
architecture
filtron
morty
engines
plugins
buildhosts

View file

@ -1,43 +0,0 @@
=============================================================
Searx admin interface
=============================================================
.. _searx-admin: https://github.com/kvch/searx-admin#searx-admin
.. _NLnet Foundation: https://nlnet.nl/
manage your instance from your browser
.. sidebar:: Installation
Installation guide can be found in the repository of searx-admin_.
One of the biggest advantages of searx is being extremely customizable. But at
first it can be daunting to newcomers. A barrier of taking advantage of this
feature is our ugly settings file which is sometimes hard to understand and
edit.
To make self-hosting searx more accessible a new tool is introduced, called
``searx-admin``. It is a web application which is capable of managing your
instance and manipulating its settings via a web UI. It aims to replace editing
of ``settings.yml`` for less experienced administrators or people who prefer
graphical admin interfaces.
.. figure:: searx-admin-engines.png
:alt: Screenshot of engine list
Configuration page of engines
Since ``searx-admin`` acts as a supervisor for searx, we have decided to
implement it as a standalone tool instead of part of searx. Another reason for
making it a standalone tool is that the codebase and dependencies of searx
should not grow because of a fully optional feature, which does not affect
existing instances.
Acknowledgements
================
This development was sponsored by `NLnet Foundation`_.
| Happy hacking.
| kvch // 2017.08.22 21:25

View file

@ -1,65 +0,0 @@
========================================
Running shell commands to fetch results
========================================
Previously, with searx you could search over the Internet on other people's
computers. Now it is possible to fetch results from your local machine without
connecting to any networks from the same graphical user interface.
Command line engines
====================
In :pull-searx:`2128` a new type of engine has been introduced called ``command``.
This engine lets administrators add engines which run arbitrary shell commands
and show its output on the web UI of searx.
When creating and enabling a ``command`` engine on a public searx instance,
you must be careful to avoid leaking private data. The easiest solution
is to add tokens to the engine. Thus, only those who have the appropriate token
can retrieve results from the it.
The engine base is flexible. Only your imagination can limit the power of this engine. (And
maybe security concerns.) The following options are available:
* ``command``: A comma separated list of the elements of the command. A special token {{QUERY}} tells searx where to put the search terms of the user. Example: ``['ls', '-l', '-h', '{{QUERY}}']``
* ``delimiter``: A dict containing a delimiter char and the "titles" of each element in keys.
* ``parse_regex``: A dict containing the regular expressions for each result key.
* ``query_type``: The expected type of user search terms. Possible values: ``path`` and ``enum``. ``path`` checks if the uesr provided path is inside the working directory. If not the query is not executed. ``enum`` is a list of allowed search terms. If the user submits something which is not included in the list, the query returns an error.
* ``query_enum``: A list containing allowed search terms if ``query_type`` is set to ``enum``.
* ``working_dir``: The directory where the command has to be executed. Default: ``.``
* ``result_separator``: The character that separates results. Default: ``\n``
The example engine below can be used to find files with a specific name in the configured
working directory.
.. code:: yaml
- name: find
engine: command
command: ['find', '.', '-name', '{{QUERY}}']
query_type: path
shortcut: fnd
delimiter:
chars: ' '
keys: ['line']
Next steps
==========
In the next milestone, support for local search engines and indexers (e.g. Elasticsearch)
are going to be added. This way, you will be able to query your own databases/indexers.
Acknowledgement
===============
This development was sponsored by `Search and Discovery Fund`_ of `NLnet Foundation`_ .
.. _Search and Discovery Fund: https://nlnet.nl/discovery
.. _NLnet Foundation: https://nlnet.nl/
| Happy hacking.
| kvch // 2020.09.28 21:26

View file

@ -1,16 +0,0 @@
====
Blog
====
.. toctree::
:maxdepth: 2
:caption: Contents
lxcdev-202006
python3
admin
intro-offline
private-engines
command-line-engines
search-indexer-engines
sql-engines

View file

@ -1,77 +0,0 @@
===============================
Preparation for offline engines
===============================
Offline engines
===============
To extend the functionality of searx, offline engines are going to be
introduced. An offline engine is an engine which does not need Internet
connection to perform a search and does not use HTTP to communicate.
Offline engines can be configured as online engines, by adding those to the
`engines` list of :origin:`settings.yml <searx/settings.yml>`. Thus, searx
finds the engine file and imports it.
Example skeleton for the new engines:
.. code:: python
from subprocess import PIPE, Popen
categories = ['general']
offline = True
def init(settings):
pass
def search(query, params):
process = Popen(['ls', query], stdout=PIPE)
return_code = process.wait()
if return_code != 0:
raise RuntimeError('non-zero return code', return_code)
results = []
line = process.stdout.readline()
while line:
result = parse_line(line)
results.append(results)
line = process.stdout.readline()
return results
Development progress
====================
First, a proposal has been created as a Github issue. Then it was moved to the
wiki as a design document. You can read it here: :wiki:`Offline-engines`.
In this development step, searx core was prepared to accept and perform offline
searches. Offline search requests are scheduled together with regular offline
requests.
As offline searches can return arbitrary results depending on the engine, the
current result templates were insufficient to present such results. Thus, a new
template is introduced which is caplable of presenting arbitrary key value pairs
as a table. You can check out the pull request for more details see
:pull-searx:`1700`.
Next steps
==========
Today, it is possible to create/run an offline engine. However, it is going to be publicly available for everyone who knows the searx instance. So the next step is to introduce token based access for engines. This way administrators are able to limit the access to private engines.
Acknowledgement
===============
This development was sponsored by `Search and Discovery Fund`_ of `NLnet Foundation`_ .
.. _Search and Discovery Fund: https://nlnet.nl/discovery
.. _NLnet Foundation: https://nlnet.nl/
| Happy hacking.
| kvch // 2019.10.21 17:03

View file

@ -1,65 +0,0 @@
==================================
Limit access to your searx engines
==================================
Administrators might find themselves wanting to limit access to some of the
enabled engines on their instances. It might be because they do not want to
expose some private information through an offline engine. Or they
would rather share engines only with their trusted friends or colleagues.
.. _private engines:
Private engines
===============
To solve this issue private engines were introduced in :pull-searx:`1823`.
A new option was added to engines named `tokens`. It expects a list
of strings. If the user making a request presents one of the tokens
of an engine, they can access information about the engine
and make search requests.
Example configuration to restrict access to the Arch Linux Wiki engine:
.. code:: yaml
- name : arch linux wiki
engine : archlinux
shortcut : al
tokens : [ 'my-secret-token' ]
Unless a user has configured the right token, the engine is going
to be hidden from him/her. It is not going to be included in the
list of engines on the Preferences page and in the output of
`/config` REST API call.
Tokens can be added to one's configuration on the Preferences page
under "Engine tokens". The input expects a comma separated list of
strings.
The distribution of the tokens from the administrator to the users
is not carved in stone. As providing access to such engines
implies that the admin knows and trusts the user, we do not see
necessary to come up with a strict process. Instead,
we would like to add guidelines to the documentation of the feature.
Next steps
==========
Now that searx has support for both offline engines and private engines,
it is possible to add concrete engines which benefit from these features.
For example engines which search on the local host running the instance.
Be it searching your file system or querying a private database. Be creative
and come up with new solutions which fit your use case.
Acknowledgement
===============
This development was sponsored by `Search and Discovery Fund`_ of `NLnet Foundation`_ .
.. _Search and Discovery Fund: https://nlnet.nl/discovery
.. _NLnet Foundation: https://nlnet.nl/
| Happy hacking.
| kvch // 2020.02.28 22:26

View file

@ -1,65 +0,0 @@
============================
Introducing Python 3 support
============================
.. _Python 2.7 clock: https://pythonclock.org/
.. sidebar:: Python 2.7 to 3 upgrade
This chapter exists of historical reasons. Python 2.7 release schedule ends
(`Python 2.7 clock`_) after 11 years Python 3 exists
As most operation systems are coming with Python3 installed by default. So it is
time for searx to support Python3. But don't worry support of Python2.7 won't be
dropped.
.. image:: searxpy3.png
:scale: 50 %
:alt: hurray
:align: center
How to run searx using Python 3
===============================
Please make sure that you run at least Python 3.5.
To run searx, first a Python3 virtualenv should be created. After entering the
virtualenv, dependencies and searx must be installed. Then run searx from the
command line.
.. code:: sh
python3 -m venv venv3
source venv3/bin/activate
pip install -U pip setuptools wheel pyyaml
pip install -e .
searx-run
Fun facts
=========
- 115 files were changed when implementing the support for both Python versions.
- All of the dependencies was compatible except for the robotframework used for
browser tests. Thus, these tests were migrated to splinter. So from now on
both versions are being tested on Travis and can be tested locally.
If you found bugs
=================
Please open an issue on `GitHub`_. Make sure that you mention your Python
version in your issue, so we can investigate it properly.
.. _GitHub: https://github.com/searxng/searxng/issues
Acknowledgment
==============
This development was sponsored by `NLnet Foundation`_.
.. _NLnet Foundation: https://nlnet.nl/
| Happy hacking.
| kvch // 2017.05.13 22:57

View file

@ -1,114 +0,0 @@
===============================
Query your local search engines
===============================
From now on, searx lets you to query your locally running search engines. The following
ones are supported now:
* `Elasticsearch`_
* `Meilisearch`_
* `Solr`_
All of the engines above are added to ``settings.yml`` just commented out, as you have to
``base_url`` for all them.
Please note that if you are not using HTTPS to access these engines, you have to enable
HTTP requests by setting ``enable_http`` to ``True``.
Futhermore, if you do not want to expose these engines on a public instance, you can
still add them and limit the access by setting ``tokens`` as described in the `blog post about
private engines`_.
Configuring searx for search engines
====================================
Each search engine is powerful, capable of full-text search.
Elasticsearch
-------------
Elasticsearch supports numerous ways to query the data it is storing. At the moment
the engine supports the most popular search methods: ``match``, ``simple_query_string``, ``term`` and ``terms``.
If none of the methods fit your use case, you can select ``custom`` query type and provide the JSON payload
searx has to submit to Elasticsearch in ``custom_query_json``.
The following is an example configuration for an Elasticsearch instance with authentication
configured to read from ``my-index`` index.
.. code:: yaml
- name : elasticsearch
shortcut : es
engine : elasticsearch
base_url : http://localhost:9200
username : elastic
password : changeme
index : my-index
query_type : match
enable_http : True
Meilisearch
-----------
This search engine is aimed at individuals and small companies. It is designed for
small-scale (less than 10 million documents) data collections. E.g. it is great for storing
web pages you have visited and searching in the contents later.
The engine supports faceted search, so you can search in a subset of documents of the collection.
Futhermore, you can search in Meilisearch instances that require authentication by setting ``auth_token``.
Here is a simple example to query a Meilisearch instance:
.. code:: yaml
- name : meilisearch
engine : meilisearch
shortcut: mes
base_url : http://localhost:7700
index : my-index
enable_http: True
Solr
----
Solr is a popular search engine based on Lucene, just like Elasticsearch.
But instead of searching in indices, you can search in collections.
This is an example configuration for searching in the collection ``my-collection`` and get
the results in ascending order.
.. code:: yaml
- name : solr
engine : solr
shortcut : slr
base_url : http://localhost:8983
collection : my-collection
sort : asc
enable_http : True
Next steps
==========
The next step is to add support for various SQL databases.
Acknowledgement
===============
This development was sponsored by `Search and Discovery Fund`_ of `NLnet Foundation`_ .
.. _blog post about private engines: private-engines.html#private-engines
.. _Elasticsearch: https://www.elastic.co/elasticsearch/
.. _Meilisearch: https://www.meilisearch.com/
.. _Solr: https://solr.apache.org/
.. _Search and Discovery Fund: https://nlnet.nl/discovery
.. _NLnet Foundation: https://nlnet.nl/
| Happy hacking.
| kvch // 2021.04.07 23:16

Binary file not shown.

Before

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

View file

@ -58,11 +58,11 @@ extlinks['man'] = ('https://manpages.debian.org/jump?q=%s', '')
#extlinks['role'] = (
# 'https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-%s', '')
extlinks['duref'] = (
'https://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#%s', '')
'https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#%s', '')
extlinks['durole'] = (
'https://docutils.sourceforge.net/docs/ref/rst/roles.html#%s', '')
'https://docutils.sourceforge.io/docs/ref/rst/roles.html#%s', '')
extlinks['dudir'] = (
'https://docutils.sourceforge.net/docs/ref/rst/directives.html#%s', '')
'https://docutils.sourceforge.io/docs/ref/rst/directives.html#%s', '')
extlinks['ctan'] = (
'https://ctan.org/pkg/%s', 'CTAN: ')

View file

@ -1,12 +1,20 @@
.. _engines-dev:
===============
Engine overview
Engine Overview
===============
.. _metasearch-engine: https://en.wikipedia.org/wiki/Metasearch_engine
.. sidebar:: Further reading ..
- :ref:`configured engines`
- :ref:`settings engine`
.. contents::
:depth: 3
:backlinks: entry
searx is a metasearch-engine_, so it uses different search engines to provide
better results.
@ -14,297 +22,296 @@ Because there is no general search API which could be used for every search
engine, an adapter has to be built between searx and the external search
engines. Adapters are stored under the folder :origin:`searx/engines`.
.. contents::
:depth: 3
:backlinks: entry
.. _general engine configuration:
general engine configuration
General Engine Configuration
============================
It is required to tell searx the type of results the engine provides. The
arguments can be set in the engine file or in the settings file
(normally ``settings.yml``). The arguments in the settings file override
the ones in the engine file.
arguments can be set in the engine file or in the settings file (normally
``settings.yml``). The arguments in the settings file override the ones in the
engine file.
It does not matter if an option is stored in the engine file or in the
settings. However, the standard way is the following:
It does not matter if an option is stored in the engine file or in the settings.
However, the standard way is the following:
.. _engine file:
engine file
Engine File
-----------
======================= =========== ========================================================
argument type information
======================= =========== ========================================================
categories list pages, in which the engine is working
paging boolean support multible pages
time_range_support boolean support search time range
engine_type str ``online`` by default, other possibles values are
``offline``, ``online_dictionnary``, ``online_currency``
======================= =========== ========================================================
.. table:: Common options in the engine module
:width: 100%
======================= =========== ========================================================
argument type information
======================= =========== ========================================================
categories list pages, in which the engine is working
paging boolean support multible pages
time_range_support boolean support search time range
engine_type str - ``online`` :ref:`[ref] <demo online engine>` by
default, other possibles values are:
- ``offline`` :ref:`[ref] <offline engines>`
- ``online_dictionary``
- ``online_currency``
======================= =========== ========================================================
.. _engine settings:
settings.yml
------------
Engine ``settings.yml``
-----------------------
======================= =========== =============================================
argument type information
======================= =========== =============================================
name string name of search-engine
engine string name of searx-engine
(filename without ``.py``)
enable_http bool enable HTTP
(by default only HTTPS is enabled).
shortcut string shortcut of search-engine
timeout string specific timeout for search-engine
display_error_messages boolean display error messages on the web UI
proxies dict set proxies for a specific engine
For a more detailed description, see :ref:`settings engine` in the :ref:`settings.yml`.
.. table:: Common options in the engine setup (``settings.yml``)
:width: 100%
======================= =========== ===============================================
argument type information
======================= =========== ===============================================
name string name of search-engine
engine string name of searx-engine (filename without ``.py``)
enable_http bool enable HTTP (by default only HTTPS is enabled).
shortcut string shortcut of search-engine
timeout string specific timeout for search-engine
display_error_messages boolean display error messages on the web UI
proxies dict set proxies for a specific engine
(e.g. ``proxies : {http: socks5://proxy:port,
https: socks5://proxy:port}``)
======================= =========== =============================================
======================= =========== ===============================================
.. _engine overrides:
overrides
Overrides
---------
A few of the options have default values in the engine, but are often
overwritten by the settings. If ``None`` is assigned to an option in the engine
file, it has to be redefined in the settings, otherwise searx will not start
with that engine.
.. sidebar:: engine's global names
The naming of overrides is arbitrary. But the recommended overrides are the
following:
Global names with a leading underline are *private to the engine* and will
not be overwritten.
======================= =========== ===========================================
argument type information
======================= =========== ===========================================
base_url string base-url, can be overwritten to use same
engine on other URL
number_of_results int maximum number of results per request
language string ISO code of language and country like en_US
api_key string api-key if required by engine
======================= =========== ===========================================
A few of the options have default values in the namespace of engine's python
modul, but are often overwritten by the settings. If ``None`` is assigned to an
option in the engine file, it has to be redefined in the settings, otherwise
searx will not start with that engine.
example code
------------
Here is an very simple example of the global names in the namespace of engine's
module:
.. code:: python
# engine dependent config
categories = ['general']
paging = True
_non_overwritten_global = 'foo'
.. table:: The naming of overrides is arbitrary / recommended overrides are:
:width: 100%
======================= =========== ===========================================
argument type information
======================= =========== ===========================================
base_url string base-url, can be overwritten to use same
engine on other URL
number_of_results int maximum number of results per request
language string ISO code of language and country like en_US
api_key string api-key if required by engine
======================= =========== ===========================================
.. _engine request:
making a request
Making a Request
================
To perform a search an URL have to be specified. In addition to specifying an
URL, arguments can be passed to the query.
passed arguments
----------------
.. _engine request arguments:
Passed Arguments (request)
--------------------------
These arguments can be used to construct the search query. Furthermore,
parameters with default value can be redefined for special purposes.
If the ``engine_type`` is ``online```:
====================== ============== ========================================================================
argument type default-value, information
====================== ============== ========================================================================
url str ``''``
method str ``'GET'``
headers set ``{}``
data set ``{}``
cookies set ``{}``
verify bool ``True``
headers.User-Agent str a random User-Agent
category str current category, like ``'general'``
safesearch int ``0``, between ``0`` and ``2`` (normal, moderate, strict)
time_range Optional[str] ``None``, can be ``day``, ``week``, ``month``, ``year``
pageno int current pagenumber
language str specific language code like ``'en_US'``, or ``'all'`` if unspecified
====================== ============== ========================================================================
.. table:: If the ``engine_type`` is ``online``
:width: 100%
====================== ============== ========================================================================
argument type default-value, information
====================== ============== ========================================================================
url str ``''``
method str ``'GET'``
headers set ``{}``
data set ``{}``
cookies set ``{}``
verify bool ``True``
headers.User-Agent str a random User-Agent
category str current category, like ``'general'``
safesearch int ``0``, between ``0`` and ``2`` (normal, moderate, strict)
time_range Optional[str] ``None``, can be ``day``, ``week``, ``month``, ``year``
pageno int current pagenumber
language str specific language code like ``'en_US'``, or ``'all'`` if unspecified
====================== ============== ========================================================================
If the ``engine_type`` is ``online_dictionnary```, in addition to the ``online`` arguments:
.. table:: If the ``engine_type`` is ``online_dictionary``, in addition to the
``online`` arguments:
:width: 100%
====================== ============ ========================================================================
argument type default-value, information
====================== ============ ========================================================================
from_lang str specific language code like ``'en_US'``
to_lang str specific language code like ``'en_US'``
query str the text query without the languages
====================== ============ ========================================================================
====================== ============== ========================================================================
argument type default-value, information
====================== ============== ========================================================================
from_lang str specific language code like ``'en_US'``
to_lang str specific language code like ``'en_US'``
query str the text query without the languages
====================== ============== ========================================================================
If the ``engine_type`` is ``online_currency```, in addition to the ``online`` arguments:
.. table:: If the ``engine_type`` is ``online_currency```, in addition to the
``online`` arguments:
:width: 100%
====================== ============ ========================================================================
argument type default-value, information
====================== ============ ========================================================================
amount float the amount to convert
from str ISO 4217 code
to str ISO 4217 code
from_name str currency name
to_name str currency name
====================== ============ ========================================================================
====================== ============== ========================================================================
argument type default-value, information
====================== ============== ========================================================================
amount float the amount to convert
from str ISO 4217 code
to str ISO 4217 code
from_name str currency name
to_name str currency name
====================== ============== ========================================================================
parsed arguments
----------------
Specify Request
---------------
The function ``def request(query, params):`` always returns the ``params``
variable. Inside searx, the following paramters can be used to specify a search
request:
The function :py:func:`def request(query, params):
<searx.engines.demo_online.request>` always returns the ``params`` variable, the
following parameters can be used to specify a search request:
=================== =========== ==========================================================================
argument type information
=================== =========== ==========================================================================
url str requested url
method str HTTP request method
headers set HTTP header information
data set HTTP data information
cookies set HTTP cookies
verify bool Performing SSL-Validity check
allow_redirects bool Follow redirects
max_redirects int maximum redirects, hard limit
soft_max_redirects int maximum redirects, soft limit. Record an error but don't stop the engine
raise_for_httperror bool True by default: raise an exception if the HTTP code of response is >= 300
=================== =========== ==========================================================================
.. table::
:width: 100%
example code
------------
.. code:: python
# search-url
base_url = 'https://example.com/'
search_string = 'search?{query}&page={page}'
# do search-request
def request(query, params):
search_path = search_string.format(
query=urlencode({'q': query}),
page=params['pageno'])
params['url'] = base_url + search_path
return params
=================== =========== ==========================================================================
argument type information
=================== =========== ==========================================================================
url str requested url
method str HTTP request method
headers set HTTP header information
data set HTTP data information
cookies set HTTP cookies
verify bool Performing SSL-Validity check
allow_redirects bool Follow redirects
max_redirects int maximum redirects, hard limit
soft_max_redirects int maximum redirects, soft limit. Record an error but don't stop the engine
raise_for_httperror bool True by default: raise an exception if the HTTP code of response is >= 300
=================== =========== ==========================================================================
.. _engine results:
.. _engine media types:
returned results
================
Media Types
===========
Searx is able to return results of different media-types. Currently the
following media-types are supported:
Each result item of an engine can be of different media-types. Currently the
following media-types are supported. To set another media-type as ``default``,
the parameter ``template`` must be set to the desired type.
- default_
- images_
- videos_
- torrent_
- map_
.. table:: Parameter of the **default** media type:
:width: 100%
To set another media-type as default, the parameter ``template`` must be set to
the desired type.
========================= =====================================================
result-parameter information
========================= =====================================================
url string, url of the result
title string, title of the result
content string, general result-text
publishedDate :py:class:`datetime.datetime`, time of publish
========================= =====================================================
default
-------
========================= =====================================================
result-parameter information
========================= =====================================================
url string, url of the result
title string, title of the result
content string, general result-text
publishedDate :py:class:`datetime.datetime`, time of publish
========================= =====================================================
.. table:: Parameter of the **images** media type:
:width: 100%
images
------
To use this template, the parameter:
========================= =====================================================
result-parameter information
========================= =====================================================
template is set to ``images.html``
url string, url to the result site
title string, title of the result *(partly implemented)*
content *(partly implemented)*
publishedDate :py:class:`datetime.datetime`,
========================= =====================================================
result-parameter information
------------------------- -----------------------------------------------------
template is set to ``images.html``
========================= =====================================================
url string, url to the result site
title string, title of the result *(partly implemented)*
content *(partly implemented)*
publishedDate :py:class:`datetime.datetime`,
time of publish *(partly implemented)*
img\_src string, url to the result image
thumbnail\_src string, url to a small-preview image
========================= =====================================================
img\_src string, url to the result image
thumbnail\_src string, url to a small-preview image
========================= =====================================================
videos
------
========================= =====================================================
result-parameter information
========================= =====================================================
template is set to ``videos.html``
url string, url of the result
title string, title of the result
content *(not implemented yet)*
publishedDate :py:class:`datetime.datetime`, time of publish
thumbnail string, url to a small-preview image
========================= =====================================================
.. table:: Parameter of the **videos** media type:
:width: 100%
torrent
-------
========================= =====================================================
result-parameter information
------------------------- -----------------------------------------------------
template is set to ``videos.html``
========================= =====================================================
url string, url of the result
title string, title of the result
content *(not implemented yet)*
publishedDate :py:class:`datetime.datetime`, time of publish
thumbnail string, url to a small-preview image
========================= =====================================================
.. _magnetlink: https://en.wikipedia.org/wiki/Magnet_URI_scheme
========================= =====================================================
result-parameter information
========================= =====================================================
template is set to ``torrent.html``
url string, url of the result
title string, title of the result
content string, general result-text
publishedDate :py:class:`datetime.datetime`,
.. table:: Parameter of the **torrent** media type:
:width: 100%
========================= =====================================================
result-parameter information
------------------------- -----------------------------------------------------
template is set to ``torrent.html``
========================= =====================================================
url string, url of the result
title string, title of the result
content string, general result-text
publishedDate :py:class:`datetime.datetime`,
time of publish *(not implemented yet)*
seed int, number of seeder
leech int, number of leecher
filesize int, size of file in bytes
files int, number of files
magnetlink string, magnetlink_ of the result
torrentfile string, torrentfile of the result
========================= =====================================================
seed int, number of seeder
leech int, number of leecher
filesize int, size of file in bytes
files int, number of files
magnetlink string, magnetlink_ of the result
torrentfile string, torrentfile of the result
========================= =====================================================
.. table:: Parameter of the **map** media type:
:width: 100%
map
---
========================= =====================================================
result-parameter information
========================= =====================================================
url string, url of the result
title string, title of the result
content string, general result-text
publishedDate :py:class:`datetime.datetime`, time of publish
latitude latitude of result (in decimal format)
longitude longitude of result (in decimal format)
boundingbox boundingbox of result (array of 4. values
========================= =====================================================
result-parameter information
------------------------- -----------------------------------------------------
template is set to ``map.html``
========================= =====================================================
url string, url of the result
title string, title of the result
content string, general result-text
publishedDate :py:class:`datetime.datetime`, time of publish
latitude latitude of result (in decimal format)
longitude longitude of result (in decimal format)
boundingbox boundingbox of result (array of 4. values
``[lat-min, lat-max, lon-min, lon-max]``)
geojson geojson of result (https://geojson.org/)
osm.type type of osm-object (if OSM-Result)
osm.id id of osm-object (if OSM-Result)
address.name name of object
address.road street name of object
address.house_number house number of object
address.locality city, place of object
address.postcode postcode of object
address.country country of object
========================= =====================================================
geojson geojson of result (https://geojson.org/)
osm.type type of osm-object (if OSM-Result)
osm.id id of osm-object (if OSM-Result)
address.name name of object
address.road street name of object
address.house_number house number of object
address.locality city, place of object
address.postcode postcode of object
address.country country of object
========================= =====================================================

View file

@ -9,8 +9,10 @@ Developer documentation
quickstart
contribution_guide
engine_overview
offline_engines
search_api
plugins
translation
lxcdev
makefile
reST

View file

@ -1,48 +1,48 @@
.. _blog-lxcdev-202006:
.. _lxcdev:
=======================================
Developing in Linux containers [202006]
=======================================
==============================
Developing in Linux Containers
==============================
.. _LXC: https://linuxcontainers.org/lxc/introduction/
In this article we will show, how you can make use of Linux Containers (LXC_) in
*distributed and heterogeneous development cycles* (TL;DR; jump to the
:ref:`lxcdev summary`).
.. sidebar:: Audience
This blog post is written for experienced admins and developers / readers
should have a serious meaning about: *distributed*, *merge* and *linux
container*.
This blog post is written for experienced admins and developers. Readers
should have a serious meaning about the terms: *distributed*, *merge* and
*linux container*.
.. contents:: Contents
:depth: 2
:local:
:backlinks: entry
In PR :PR:`1803` we added a lot of scripts to Searx's boilerplate. In this blog
post I will show you, how you can make use of them in *distributed and
heterogeneous development cycles* (TL;DR; jump to the :ref:`blog-lxcdev-202006
abstract`).
Motivation
==========
Normally in our development cycle, we edit the sources and run some test and/or
builds by using ``make`` before we commit. This cycle is simple and perfect but
might fail in some aspects we should not overlook.
Usually in our development cycle, we edit the sources and run some test and/or
builds by using ``make`` :ref:`[ref] <makefile>` before we commit. This cycle
is simple and perfect but might fail in some aspects we should not overlook.
The environment in which we run all our development processes matters!
**The environment in which we run all our development processes matters!**
The :ref:`makefile` and the :ref:`make install` encapsulate a lot for us, but they
do not have access to all prerequisites. For example, there may have
The :ref:`makefile` and the :ref:`make install` encapsulate a lot for us, but
they do not have access to all prerequisites. For example, there may have
dependencies on packages that are installed on the developer's desktop, but
usually are not preinstalled on a server or client system. Another examples
are; settings have been made to the software on the developer's host that would
never be set on a *production* system.
usually are not preinstalled on a server or client system. Another example is;
settings have been made to the software on developer's desktop that would never
be set on a *production* system.
*Linux Containers* (LXC_) are isolate environments and not to mix up on
developer's all the prerequisites of all the projects he contribute to, is
always a good choice.
**Linux Containers are isolate environments and not to mix up all the
prerequisites from various projects on developer's desktop is always a good
choice.**
The scripts from PR :PR:`1803` can divide in those to install and maintain
The scripts from :ref:`searx_utils` can divide in those to install and maintain
software:
- :ref:`searx.sh`
@ -50,8 +50,10 @@ software:
- :ref:`morty.sh`
and the script :ref:`lxc.sh`, with we can scale our installation, maintenance or
even development tasks over a stack of containers, what we call: *Searx's lxc
suite*.
even development tasks over a stack of isolated containers / what we call the:
**searxNG LXC suite**
Gentlemen, start your engines!
==============================
@ -141,18 +143,18 @@ and once for the content sanitizer (content proxy morty):
...
INFO: got 200 from http://10.174.184.156/morty/
.. sidebar:: Fully functional searx suite
.. sidebar:: Fully functional searXNG suite
From here on you have a fully functional searx suite running with bot blocker
(filtron) and Web content sanitizer (content proxy morty) needed for a
*privacy protecting* search engine.
From here on you have a fully functional searXNG suite running with bot
blocker (filtron) and WEB content sanitizer (content proxy morty), both are
needed for a *privacy protecting* search engine.
On your system, the IP of your ``searx-archlinux`` container differs from
http://10.174.184.156/searx, just open the URL reported in your installation
protocol in your WEB browser from the desktop to test the instance from outside
of the container.
In such a searx suite admins can maintain and access the debug log of the
In such a searXNG suite admins can maintain and access the debug log of the
different services quite easy.
.. _working in containers:
@ -176,7 +178,7 @@ searx-archlinux``:
/share/searx
The prompt ``[root@searx-archlinux ...]`` signals, that you are the root user in
the searx-container. To debug the running searx instance use:
the searx-container. To debug the running searXNG instance use:
.. tabs::
@ -192,8 +194,8 @@ the searx-container. To debug the running searx instance use:
Back in the browser on your desktop open the service http://10.174.184.156/searx
and run your application tests while the debug log is shown in the terminal from
above. You can stop monitoring using ``CTRL-C``, this also disables the *"debug
option"* in searx's settings file and restarts the searx uwsgi application. To
debug services from filtron and morty analogous use:
option"* in searXNG's settings file and restarts the searXNG uwsgi application.
To debug services from filtron and morty analogous use:
.. tabs::
@ -250,18 +252,18 @@ user ``searx`` in the ``searx-archlinux`` container and the python *virtualenv*
Wrap production into developer suite
====================================
In this section we will see how to change the *"Fully functional searx suite"*
In this section we will see how to change the *"Fully functional searXNG suite"*
from a LXC container (which is quite ready for production) into a developer
suite. For this, we have to keep an eye on the :ref:`installation basic`:
- searx setup in: ``/etc/searx/settings.yml``
- searx user's home: ``/usr/local/searx``
- searXNG setup in: ``/etc/searx/settings.yml``
- searXNG user's home: ``/usr/local/searx``
- virtualenv in: ``/usr/local/searx/searx-pyenv``
- searx software in: ``/usr/local/searx/searx-src``
- searXNG software in: ``/usr/local/searx/searx-src``
The searx software is a clone of the ``git_url`` (see :ref:`settings global`) and
the working tree is checked out from the ``git_branch``. With the use of the
:ref:`searx.sh` the searx service was installed as :ref:`uWSGI application
The searXNG software is a clone of the ``git_url`` (see :ref:`settings global`)
and the working tree is checked out from the ``git_branch``. With the use of
the :ref:`searx.sh` the searx service was installed as :ref:`uWSGI application
<searx uwsgi>`. To maintain this service, we can use ``systemctl`` (compare
:ref:`service architectures on distributions <uwsgi configuration>`).
@ -292,7 +294,7 @@ least you should attend the settings of ``uid``, ``chdir``, ``env`` and
If you have read the :ref:`"Good to know section" <lxc.sh>` you remember, that
each container shares the root folder of the repository and the command
``utils/lxc.sh cmd`` handles relative path names **transparent**. To wrap the
searx installation into a developer one, we simple have to create a smylink to
searXNG installation into a developer one, we simple have to create a smylink to
the **transparent** reposetory from the desktop. Now lets replace the
repository at ``searx-src`` in the container with the working tree from outside
of the container:
@ -330,7 +332,7 @@ daily usage:
.. group-tab:: desktop
To *inspect* the searx instance (already described above):
To *inspect* the searXNG instance (already described above):
.. code:: sh
@ -358,12 +360,12 @@ daily usage:
$ sudo -H ./utils/lxc.sh cmd searx-archlinux \
make docs.html
.. _blog-lxcdev-202006 abstract:
.. _lxcdev summary:
Abstract
========
Summary
=======
We build up a fully functional searx suite in a archlinux container:
We build up a fully functional searXNG suite in a archlinux container:
.. code:: sh
@ -395,7 +397,8 @@ the container :
$ ln -s /share/searx/ /usr/local/searx/searx-src
$ systemctl restart uwsgi@searx
To get remarks from the suite of the archlinux container we can use:
To get information about the searxNG suite in the archlinux container we can
use:
.. tabs::

View file

@ -29,8 +29,8 @@ Calling the ``help`` target gives a first overview (``make help``):
.. _make install:
Python environment
==================
Python Environment (``make install``)
=====================================
.. sidebar:: activate environment

View file

@ -0,0 +1,78 @@
.. _offline engines:
===============
Offline Engines
===============
.. sidebar:: offline engines
- :ref:`demo offline engine`
- :ref:`sql engines`
- :ref:`engine command`
- :origin:`Redis <searx/engines/redis_server.py>`
To extend the functionality of SearxNG, offline engines are going to be
introduced. An offline engine is an engine which does not need Internet
connection to perform a search and does not use HTTP to communicate.
Offline engines can be configured, by adding those to the `engines` list of
:origin:`settings.yml <searx/settings.yml>`. An example skeleton for offline
engines can be found in :ref:`demo offline engine` (:origin:`demo_offline.py
<searx/engines/demo_offline.py>`).
Programming Interface
=====================
:py:func:`init(engine_settings=None) <searx.engines.demo_offline.init>`
All offline engines can have their own init function to setup the engine before
accepting requests. The function gets the settings from settings.yml as a
parameter. This function can be omitted, if there is no need to setup anything
in advance.
:py:func:`search(query, params) <searx.engines.demo_offline.searc>`
Each offline engine has a function named ``search``. This function is
responsible to perform a search and return the results in a presentable
format. (Where *presentable* means presentable by the selected result
template.)
The return value is a list of results retrieved by the engine.
Engine representation in ``/config``
If an engine is offline, the attribute ``offline`` is set to ``True``.
.. _offline requirements:
Extra Dependencies
==================
If an offline engine depends on an external tool, SearxNG does not install it by
default. When an administrator configures such engine and starts the instance,
the process returns an error with the list of missing dependencies. Also,
required dependencies will be added to the comment/description of the engine, so
admins can install packages in advance.
If there is a need to install additional packages in *Python's Virtual
Environment* of your SearxNG instance you need to switch into the environment
(:ref:`searx-src`) first, for this you can use :ref:`searx.sh`::
$ sudo utils/searx.sh shell
(searx-pyenv)$ pip install ...
Private engines (Security)
==========================
To limit the access to offline engines, if an instance is available publicly,
administrators can set token(s) for each of the :ref:`private engines`. If a
query contains a valid token, then SearxNG performs the requested private
search. If not, requests from an offline engines return errors.
Acknowledgement
===============
This development was sponsored by `Search and Discovery Fund
<https://nlnet.nl/discovery>`_ of `NLnet Foundation <https://nlnet.nl/>`_ .

View file

@ -1281,10 +1281,10 @@ Templating
Templating is suitable for documentation which is created generic at the build
time. The sphinx-jinja_ extension evaluates jinja_ templates in the :ref:`make
install` (with searx modules installed). We use this e.g. to build chapter:
:ref:`engines generic`. Below the jinja directive from the
:ref:`configured engines`. Below the jinja directive from the
:origin:`docs/admin/engines.rst` is shown:
.. literalinclude:: ../admin/engines.rst
.. literalinclude:: ../admin/engines/configured_engines.rst
:language: reST
:start-after: .. _configured engines:

View file

@ -20,7 +20,7 @@ Parameters
- :ref:`engines-dev`
- :ref:`settings.yml`
- :ref:`engines generic`
- :ref:`configured engines`
``q`` : required
The search query. This string is passed to external search services. Thus,

View file

@ -34,7 +34,6 @@ anyone, you can set up your own, see :ref:`installation`.
dev/index
searx_extra/index
utils/index
blog/index
src/index
.. _Searx-instances: https://searx.space

View file

@ -0,0 +1,9 @@
.. _demo offline engine:
===================
Demo Offline Engine
===================
.. automodule:: searx.engines.demo_offline
:members:

View file

@ -0,0 +1,9 @@
.. _demo online engine:
==================
Demo Online Engine
==================
.. automodule:: searx.engines.demo_online
:members:

View file

@ -1,9 +0,0 @@
.. _xpath_engine:
================
The XPath engine
================
.. automodule:: searx.engines.xpath
:members:

View file

@ -0,0 +1,74 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Within this module we implement a *demo offline engine*. Do not look to
close to the implementation, its just a simple example. To get in use of this
*demo* engine add the following entry to your engines list in ``settings.yml``:
.. code:: yaml
- name: my offline engine
engine: demo_offline
shortcut: demo
disabled: false
"""
import json
engine_type = 'offline'
categories = ['general']
disabled = True
timeout = 2.0
about = {
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'JSON',
}
# if there is a need for globals, use a leading underline
_my_offline_engine = None
def init(engine_settings=None):
"""Initialization of the (offline) engine. The origin of this demo engine is a
simple json string which is loaded in this example while the engine is
initialized.
"""
global _my_offline_engine # pylint: disable=global-statement
_my_offline_engine = (
'[ {"value": "%s"}'
', {"value":"first item"}'
', {"value":"second item"}'
', {"value":"third item"}'
']'
% engine_settings.get('name')
)
def search(query, request_params):
"""Query (offline) engine and return results. Assemble the list of results from
your local engine. In this demo engine we ignore the 'query' term, usual
you would pass the 'query' term to your local engine to filter out the
results.
"""
global _my_offline_engine # pylint: disable=global-statement
ret_val = []
result_list = json.loads(_my_offline_engine)
for row in result_list:
entry = {
'query' : query,
'language' : request_params['language'],
'value' : row.get("value"),
# choose a result template or comment out to use the *default*
'template' : 'key-value.html',
}
ret_val.append(entry)
return ret_val

View file

@ -0,0 +1,92 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Within this module we implement a *demo online engine*. Do not look to
close to the implementation, its just a simple example which queries `The Art
Institute of Chicago <https://www.artic.edu>`_
To get in use of this *demo* engine add the following entry to your engines
list in ``settings.yml``:
.. code:: yaml
- name: my online engine
engine: demo_online
shortcut: demo
disabled: false
"""
from json import loads
from urllib.parse import urlencode
engine_type = 'offline'
categories = ['general']
disabled = True
timeout = 2.0
categories = ['images']
paging = True
page_size = 20
search_api = 'https://api.artic.edu/api/v1/artworks/search?'
image_api = 'https://www.artic.edu/iiif/2/'
about = {
"website": 'https://www.artic.edu',
"wikidata_id": 'Q239303',
"official_api_documentation": 'http://api.artic.edu/docs/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# if there is a need for globals, use a leading underline
_my_online_engine = None
def init(engine_settings):
"""Initialization of the (online) engine. If no initialization is needed, drop
this init function.
"""
global _my_online_engine # pylint: disable=global-statement
_my_online_engine = engine_settings.get('name')
def request(query, params):
"""Build up the ``params`` for the online request. In this example we build a
URL to fetch images from `artic.edu <https://artic.edu>`__
"""
args = urlencode({
'q' : query,
'page' : params['pageno'],
'fields' : 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles',
'limit' : page_size,
})
params['url'] = search_api + args
return params
def response(resp):
"""Parse out the result items from the response. In this example we parse the
response from `api.artic.edu <https://artic.edu>`__ and filter out all
images.
"""
results = []
json_data = loads(resp.text)
for result in json_data['data']:
if not result['image_id']:
continue
results.append({
'url': 'https://artic.edu/artworks/%(id)s' % result,
'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result,
'content': result['medium_display'],
'author': ', '.join(result['artist_titles']),
'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result,
'img_format': result['dimensions'],
'template': 'images.html'
})
return results

View file

@ -17,7 +17,7 @@ about = {
"results": 'HTML',
}
engine_type = 'online_dictionnary'
engine_type = 'online_dictionary'
categories = ['general']
url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
weight = 100

View file

@ -13,7 +13,7 @@ about = {
"results": 'JSON',
}
engine_type = 'online_dictionnary'
engine_type = 'online_dictionary'
categories = ['general']
url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'

View file

@ -12,9 +12,9 @@ from .online import OnlineProcessor
parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
class OnlineDictionaryProcessor(OnlineProcessor):
"""Processor class used by ``online_dictionnary`` engines."""
"""Processor class used by ``online_dictionary`` engines."""
engine_type = 'online_dictionnary'
engine_type = 'online_dictionary'
def get_params(self, search_query, engine_category):
params = super().get_params(search_query, engine_category)