[docs] add documentation for the scripts in searxng_extra/update

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2022-01-03 12:40:06 +01:00
parent b630c5d7bc
commit ffea5d8ef5
10 changed files with 157 additions and 24 deletions

View file

@ -1,14 +1,15 @@
.. _searxng_extra:
======================================================
Tooling box ``searxng_extra`` for developers and users
======================================================
=============================
Tooling box ``searxng_extra``
=============================
In the folder :origin:`searxng_extra/` we maintain some tools useful for
In the folder :origin:`searxng_extra/` we maintain some tools useful for CI and
developers.
.. toctree::
:maxdepth: 2
:caption: Contents
update
standalone_searx.py

View file

@ -0,0 +1,88 @@
=========================
``searxng_extra/update/``
=========================
:origin:`[source] <searxng_extra/update/__init__.py>`
Scripts to update static data in :origin:`searx/data/`
.. _update_ahmia_blacklist.py:
``update_ahmia_blacklist.py``
=============================
:origin:`[source] <searxng_extra/update/update_ahmia_blacklist.py>`
.. automodule:: searxng_extra.update.update_ahmia_blacklist
:members:
``update_currencies.py``
========================
:origin:`[source] <searxng_extra/update/update_currencies.py>`
.. automodule:: searxng_extra.update.update_currencies
:members:
``update_engine_descriptions.py``
=================================
:origin:`[source] <searxng_extra/update/update_engine_descriptions.py>`
.. automodule:: searxng_extra.update.update_engine_descriptions
:members:
``update_external_bangs.py``
============================
:origin:`[source] <searxng_extra/update/update_external_bangs.py>`
.. automodule:: searxng_extra.update.update_external_bangs
:members:
``update_firefox_version.py``
=============================
:origin:`[source] <searxng_extra/update/update_firefox_version.py>`
.. automodule:: searxng_extra.update.update_firefox_version
:members:
``update_languages.py``
=======================
:origin:`[source] <searxng_extra/update/update_languages.py>`
.. automodule:: searxng_extra.update.update_languages
:members:
``update_osm_keys_tags.py``
===========================
:origin:`[source] <searxng_extra/update/update_osm_keys_tags.py>`
.. automodule:: searxng_extra.update.update_osm_keys_tags
:members:
``update_pygments.py``
======================
:origin:`[source] <searxng_extra/update/update_pygments.py>`
.. automodule:: searxng_extra.update.update_pygments
:members:
``update_wikidata_units.py``
============================
:origin:`[source] <searxng_extra/update/update_wikidata_units.py>`
.. automodule:: searxng_extra.update.update_wikidata_units
:members:

View file

@ -1,10 +1,14 @@
#!/usr/bin/env python
# SPDX-License-Identifier: AGPL-3.0-or-later
"""This script saves `Ahmia's blacklist`_ for onion sites.
# This script saves Ahmia's blacklist for onion sites.
# More info in https://ahmia.fi/blacklist/
Output file: :origin:`searx/data/ahmia_blacklist.txt` (:origin:`CI Update data
... <.github/workflows/data-update.yml>`).
.. _Ahmia's blacklist: https://ahmia.fi/blacklist/
"""
# set path
from os.path import join
import requests
@ -26,6 +30,7 @@ def get_ahmia_blacklist_filename():
return join(join(searx_dir, "data"), "ahmia_blacklist.txt")
if __name__ == '__main__':
blacklist = fetch_ahmia_blacklist()
with open(get_ahmia_blacklist_filename(), "w") as f:
f.write('\n'.join(blacklist))

View file

@ -1,6 +1,12 @@
#!/usr/bin/env python
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Fetch currencies from :origin:`searx/engines/wikidata.py` engine.
Output file: :origin:`searx/data/currencies.json` (:origin:`CI Update data ...
<.github/workflows/data-update.yml>`).
"""
import re
import unicodedata
import json

View file

@ -1,6 +1,13 @@
#!/usr/bin/env python
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Fetch website description from websites and from
:origin:`searx/engines/wikidata.py` engine.
Output file: :origin:`searx/data/engine_descriptions.json`.
"""
import json
from urllib.parse import urlparse
from os.path import join

View file

@ -1,17 +1,20 @@
#!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Update searx/data/external_bangs.json using the duckduckgo bangs.
"""Update :origin:`searx/data/external_bangs.json` using the duckduckgo bangs
(:origin:`CI Update data ... <.github/workflows/data-update.yml>`).
https://duckduckgo.com/newbang loads:
https://duckduckgo.com/newbang loads
* a javascript which provides the bang version ( https://duckduckgo.com/bv1.js )
* a JSON file which contains the bangs ( https://duckduckgo.com/bang.v260.js for example )
This script loads the javascript, then the bangs.
The javascript URL may change in the future ( for example https://duckduckgo.com/bv2.js ),
but most probably it will requires to update RE_BANG_VERSION
The javascript URL may change in the future ( for example
https://duckduckgo.com/bv2.js ), but most probably it will requires to update
RE_BANG_VERSION
"""
# pylint: disable=C0116

View file

@ -1,6 +1,13 @@
#!/usr/bin/env python
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Fetch firefox useragent signatures
Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ...
<.github/workflows/data-update.yml>`).
"""
import json
import requests
import re
@ -66,6 +73,7 @@ def get_useragents_filename():
return join(join(searx_dir, "data"), "useragents.json")
if __name__ == '__main__':
useragents["versions"] = fetch_firefox_last_versions()
with open(get_useragents_filename(), "w") as f:
with open(get_useragents_filename(), "w", encoding='utf-8') as f:
json.dump(useragents, f, indent=4, ensure_ascii=False)

View file

@ -1,9 +1,13 @@
#!/usr/bin/env python
# SPDX-License-Identifier: AGPL-3.0-or-later
"""This script generates languages.py from intersecting each engine's supported
languages.
# This script generates languages.py from intersecting each engine's supported languages.
#
# Output files: searx/data/engines_languages.json and searx/languages.py
Output files: :origin:`searx/data/engines_languages.json` and
:origin:`searx/languages.py` (:origin:`CI Update data ...
<.github/workflows/data-update.yml>`).
"""
import json
from pathlib import Path

View file

@ -5,7 +5,10 @@
To get the i18n names, the scripts uses `Wikidata Query Service`_ instead of for
example `OSM tags API`_ (sidenote: the actual change log from
map.atownsend.org.uk_ might be useful to normalize OSM tags)
map.atownsend.org.uk_ might be useful to normalize OSM tags).
Output file: :origin:`searx/data/osm_keys_tags` (:origin:`CI Update data ...
<.github/workflows/data-update.yml>`).
.. _Wikidata Query Service: https://query.wikidata.org/
.. _OSM tags API: https://taginfo.openstreetmap.org/taginfo/apidoc

View file

@ -3,6 +3,13 @@
# lint: pylint
# pylint: disable=missing-module-docstring
"""Fetch units from :origin:`searx/engines/wikidata.py` engine.
Output file: :origin:`searx/data/wikidata_units.json` (:origin:`CI Update data
... <.github/workflows/data-update.yml>`).
"""
import json
import collections
@ -54,5 +61,6 @@ def get_wikidata_units_filename():
return join(join(searx_dir, "data"), "wikidata_units.json")
if __name__ == '__main__':
with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f:
json.dump(get_data(), f, indent=4, ensure_ascii=False)