[fix] searx_extra/update scripts: set_loggers(wikidata, 'wikidata') (#331)

* [fix] searx_extra/update scripts: set_loggers(wikidata, 'wikidata')

To test use::

    ./manage pyenv.cmd searx_extra/update/update_currencies.py
    ./manage pyenv.cmd searx_extra/update/update_osm_keys_tags.py
    ./manage pyenv.cmd searx_extra/update/update_wikidata_units.py

The script `update_engine_descriptions.py` seems to have some issues not related
to this patch.

    ./manage pyenv.cmd python -m pip install -U pycld3
    ./manage pyenv.cmd searx_extra/update/update_engine_descriptions.py

Closes: https://github.com/searxng/searxng/issues/328
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2021-09-19 09:10:02 +00:00 committed by GitHub
parent d95c46d646
commit 55b30b5b49
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 28 additions and 15 deletions

View file

@ -10,8 +10,9 @@ from os.path import realpath, dirname, join
from searx import searx_dir from searx import searx_dir
from searx.locales import LOCALE_NAMES from searx.locales import LOCALE_NAMES
from searx.engines.wikidata import send_wikidata_query from searx.engines import wikidata, set_loggers
set_loggers(wikidata, 'wikidata')
# ORDER BY (with all the query fields) is important to keep a deterministic result order # ORDER BY (with all the query fields) is important to keep a deterministic result order
# so multiple invokation of this script doesn't change currencies.json # so multiple invokation of this script doesn't change currencies.json
@ -83,7 +84,9 @@ def add_currency_label(db, label, iso4217, language):
def wikidata_request_result_iterator(request): def wikidata_request_result_iterator(request):
result = send_wikidata_query(request.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)) result = wikidata.send_wikidata_query(
request.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)
)
if result is not None: if result is not None:
for r in result['results']['bindings']: for r in result['results']['bindings']:
yield r yield r

View file

@ -6,13 +6,15 @@ from urllib.parse import quote, urlparse
import detect_language import detect_language
from lxml.html import fromstring from lxml.html import fromstring
from searx.engines.wikidata import send_wikidata_query from searx.engines import wikidata, set_loggers
from searx.utils import extract_text from searx.utils import extract_text
from searx.locales import LOCALE_NAMES from searx.locales import LOCALE_NAMES
import searx import searx
import searx.search import searx.search
import searx.network import searx.network
set_loggers(wikidata, 'wikidata')
SPARQL_WIKIPEDIA_ARTICLE = """ SPARQL_WIKIPEDIA_ARTICLE = """
SELECT DISTINCT ?item ?name SELECT DISTINCT ?item ?name
WHERE { WHERE {
@ -128,9 +130,11 @@ def initialize():
def fetch_wikidata_descriptions(): def fetch_wikidata_descriptions():
global IDS global IDS
result = send_wikidata_query(SPARQL_DESCRIPTION result = wikidata.send_wikidata_query(
.replace('%IDS%', IDS) SPARQL_DESCRIPTION
.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)) .replace('%IDS%', IDS)
.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)
)
if result is not None: if result is not None:
for binding in result['results']['bindings']: for binding in result['results']['bindings']:
wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '') wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
@ -143,9 +147,11 @@ def fetch_wikidata_descriptions():
def fetch_wikipedia_descriptions(): def fetch_wikipedia_descriptions():
global IDS global IDS
result = send_wikidata_query(SPARQL_WIKIPEDIA_ARTICLE result = wikidata.send_wikidata_query(
.replace('%IDS%', IDS) SPARQL_WIKIPEDIA_ARTICLE
.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)) .replace('%IDS%', IDS)
.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)
)
if result is not None: if result is not None:
for binding in result['results']['bindings']: for binding in result['results']['bindings']:
wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '') wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')

View file

@ -46,10 +46,13 @@ from pathlib import Path
from searx import searx_dir from searx import searx_dir
from searx.network import set_timeout_for_thread from searx.network import set_timeout_for_thread
from searx.engines.wikidata import send_wikidata_query from searx.engines import wikidata, set_loggers
from searx.languages import language_codes from searx.languages import language_codes
from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
set_loggers(wikidata, 'wikidata')
SPARQL_TAGS_REQUEST = """ SPARQL_TAGS_REQUEST = """
SELECT ?tag ?item ?itemLabel WHERE { SELECT ?tag ?item ?itemLabel WHERE {
?item wdt:P1282 ?tag . ?item wdt:P1282 ?tag .
@ -96,7 +99,7 @@ def get_preset_keys():
def get_keys(): def get_keys():
results = get_preset_keys() results = get_preset_keys()
response = send_wikidata_query(SPARQL_KEYS_REQUEST) response = wikidata.send_wikidata_query(SPARQL_KEYS_REQUEST)
for key in response['results']['bindings']: for key in response['results']['bindings']:
keys = key['key']['value'].split(':')[1:] keys = key['key']['value'].split(':')[1:]
@ -144,7 +147,7 @@ def get_keys():
def get_tags(): def get_tags():
results = collections.OrderedDict() results = collections.OrderedDict()
response = send_wikidata_query(SPARQL_TAGS_REQUEST) response = wikidata.send_wikidata_query(SPARQL_TAGS_REQUEST)
for tag in response['results']['bindings']: for tag in response['results']['bindings']:
tag_names = tag['tag']['value'].split(':')[1].split('=') tag_names = tag['tag']['value'].split(':')[1].split('=')
if len(tag_names) == 2: if len(tag_names) == 2:

View file

@ -7,13 +7,14 @@ import collections
from os.path import join from os.path import join
from searx import searx_dir from searx import searx_dir
from searx.engines.wikidata import send_wikidata_query from searx.engines import wikidata, set_loggers
set_loggers(wikidata, 'wikidata')
# the response contains duplicate ?item with the different ?symbol # the response contains duplicate ?item with the different ?symbol
# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result # "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
# even if a ?item has different ?symbol of the same rank. # even if a ?item has different ?symbol of the same rank.
# A deterministic result # A deterministic result
# see: # see:
# * https://www.wikidata.org/wiki/Help:Ranking # * https://www.wikidata.org/wiki/Help:Ranking
# * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section) # * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section)
@ -36,7 +37,7 @@ ORDER BY ?item DESC(?rank) ?symbol
def get_data(): def get_data():
results = collections.OrderedDict() results = collections.OrderedDict()
response = send_wikidata_query(SARQL_REQUEST) response = wikidata.send_wikidata_query(SARQL_REQUEST)
for unit in response['results']['bindings']: for unit in response['results']['bindings']:
name = unit['item']['value'].replace('http://www.wikidata.org/entity/', '') name = unit['item']['value'].replace('http://www.wikidata.org/entity/', '')
unit = unit['symbol']['value'] unit = unit['symbol']['value']