mirror of
https://github.com/searxng/searxng.git
synced 2024-11-26 21:01:01 +00:00
[fix] searx_extra/update scripts: set_loggers(wikidata, 'wikidata') (#331)
* [fix] searx_extra/update scripts: set_loggers(wikidata, 'wikidata') To test use:: ./manage pyenv.cmd searx_extra/update/update_currencies.py ./manage pyenv.cmd searx_extra/update/update_osm_keys_tags.py ./manage pyenv.cmd searx_extra/update/update_wikidata_units.py The script `update_engine_descriptions.py` seems to have some issues not related to this patch. ./manage pyenv.cmd python -m pip install -U pycld3 ./manage pyenv.cmd searx_extra/update/update_engine_descriptions.py Closes: https://github.com/searxng/searxng/issues/328 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
d95c46d646
commit
55b30b5b49
4 changed files with 28 additions and 15 deletions
|
@ -10,8 +10,9 @@ from os.path import realpath, dirname, join
|
||||||
|
|
||||||
from searx import searx_dir
|
from searx import searx_dir
|
||||||
from searx.locales import LOCALE_NAMES
|
from searx.locales import LOCALE_NAMES
|
||||||
from searx.engines.wikidata import send_wikidata_query
|
from searx.engines import wikidata, set_loggers
|
||||||
|
|
||||||
|
set_loggers(wikidata, 'wikidata')
|
||||||
|
|
||||||
# ORDER BY (with all the query fields) is important to keep a deterministic result order
|
# ORDER BY (with all the query fields) is important to keep a deterministic result order
|
||||||
# so multiple invokation of this script doesn't change currencies.json
|
# so multiple invokation of this script doesn't change currencies.json
|
||||||
|
@ -83,7 +84,9 @@ def add_currency_label(db, label, iso4217, language):
|
||||||
|
|
||||||
|
|
||||||
def wikidata_request_result_iterator(request):
|
def wikidata_request_result_iterator(request):
|
||||||
result = send_wikidata_query(request.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
|
result = wikidata.send_wikidata_query(
|
||||||
|
request.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)
|
||||||
|
)
|
||||||
if result is not None:
|
if result is not None:
|
||||||
for r in result['results']['bindings']:
|
for r in result['results']['bindings']:
|
||||||
yield r
|
yield r
|
||||||
|
|
|
@ -6,13 +6,15 @@ from urllib.parse import quote, urlparse
|
||||||
import detect_language
|
import detect_language
|
||||||
from lxml.html import fromstring
|
from lxml.html import fromstring
|
||||||
|
|
||||||
from searx.engines.wikidata import send_wikidata_query
|
from searx.engines import wikidata, set_loggers
|
||||||
from searx.utils import extract_text
|
from searx.utils import extract_text
|
||||||
from searx.locales import LOCALE_NAMES
|
from searx.locales import LOCALE_NAMES
|
||||||
import searx
|
import searx
|
||||||
import searx.search
|
import searx.search
|
||||||
import searx.network
|
import searx.network
|
||||||
|
|
||||||
|
set_loggers(wikidata, 'wikidata')
|
||||||
|
|
||||||
SPARQL_WIKIPEDIA_ARTICLE = """
|
SPARQL_WIKIPEDIA_ARTICLE = """
|
||||||
SELECT DISTINCT ?item ?name
|
SELECT DISTINCT ?item ?name
|
||||||
WHERE {
|
WHERE {
|
||||||
|
@ -128,9 +130,11 @@ def initialize():
|
||||||
|
|
||||||
def fetch_wikidata_descriptions():
|
def fetch_wikidata_descriptions():
|
||||||
global IDS
|
global IDS
|
||||||
result = send_wikidata_query(SPARQL_DESCRIPTION
|
result = wikidata.send_wikidata_query(
|
||||||
|
SPARQL_DESCRIPTION
|
||||||
.replace('%IDS%', IDS)
|
.replace('%IDS%', IDS)
|
||||||
.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
|
.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)
|
||||||
|
)
|
||||||
if result is not None:
|
if result is not None:
|
||||||
for binding in result['results']['bindings']:
|
for binding in result['results']['bindings']:
|
||||||
wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
|
wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
|
||||||
|
@ -143,9 +147,11 @@ def fetch_wikidata_descriptions():
|
||||||
|
|
||||||
def fetch_wikipedia_descriptions():
|
def fetch_wikipedia_descriptions():
|
||||||
global IDS
|
global IDS
|
||||||
result = send_wikidata_query(SPARQL_WIKIPEDIA_ARTICLE
|
result = wikidata.send_wikidata_query(
|
||||||
|
SPARQL_WIKIPEDIA_ARTICLE
|
||||||
.replace('%IDS%', IDS)
|
.replace('%IDS%', IDS)
|
||||||
.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
|
.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)
|
||||||
|
)
|
||||||
if result is not None:
|
if result is not None:
|
||||||
for binding in result['results']['bindings']:
|
for binding in result['results']['bindings']:
|
||||||
wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
|
wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
|
||||||
|
|
|
@ -46,10 +46,13 @@ from pathlib import Path
|
||||||
|
|
||||||
from searx import searx_dir
|
from searx import searx_dir
|
||||||
from searx.network import set_timeout_for_thread
|
from searx.network import set_timeout_for_thread
|
||||||
from searx.engines.wikidata import send_wikidata_query
|
from searx.engines import wikidata, set_loggers
|
||||||
from searx.languages import language_codes
|
from searx.languages import language_codes
|
||||||
from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
|
from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
|
||||||
|
|
||||||
|
set_loggers(wikidata, 'wikidata')
|
||||||
|
|
||||||
|
|
||||||
SPARQL_TAGS_REQUEST = """
|
SPARQL_TAGS_REQUEST = """
|
||||||
SELECT ?tag ?item ?itemLabel WHERE {
|
SELECT ?tag ?item ?itemLabel WHERE {
|
||||||
?item wdt:P1282 ?tag .
|
?item wdt:P1282 ?tag .
|
||||||
|
@ -96,7 +99,7 @@ def get_preset_keys():
|
||||||
|
|
||||||
def get_keys():
|
def get_keys():
|
||||||
results = get_preset_keys()
|
results = get_preset_keys()
|
||||||
response = send_wikidata_query(SPARQL_KEYS_REQUEST)
|
response = wikidata.send_wikidata_query(SPARQL_KEYS_REQUEST)
|
||||||
|
|
||||||
for key in response['results']['bindings']:
|
for key in response['results']['bindings']:
|
||||||
keys = key['key']['value'].split(':')[1:]
|
keys = key['key']['value'].split(':')[1:]
|
||||||
|
@ -144,7 +147,7 @@ def get_keys():
|
||||||
|
|
||||||
def get_tags():
|
def get_tags():
|
||||||
results = collections.OrderedDict()
|
results = collections.OrderedDict()
|
||||||
response = send_wikidata_query(SPARQL_TAGS_REQUEST)
|
response = wikidata.send_wikidata_query(SPARQL_TAGS_REQUEST)
|
||||||
for tag in response['results']['bindings']:
|
for tag in response['results']['bindings']:
|
||||||
tag_names = tag['tag']['value'].split(':')[1].split('=')
|
tag_names = tag['tag']['value'].split(':')[1].split('=')
|
||||||
if len(tag_names) == 2:
|
if len(tag_names) == 2:
|
||||||
|
|
|
@ -7,8 +7,9 @@ import collections
|
||||||
from os.path import join
|
from os.path import join
|
||||||
|
|
||||||
from searx import searx_dir
|
from searx import searx_dir
|
||||||
from searx.engines.wikidata import send_wikidata_query
|
from searx.engines import wikidata, set_loggers
|
||||||
|
|
||||||
|
set_loggers(wikidata, 'wikidata')
|
||||||
|
|
||||||
# the response contains duplicate ?item with the different ?symbol
|
# the response contains duplicate ?item with the different ?symbol
|
||||||
# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
|
# "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result
|
||||||
|
@ -36,7 +37,7 @@ ORDER BY ?item DESC(?rank) ?symbol
|
||||||
|
|
||||||
def get_data():
|
def get_data():
|
||||||
results = collections.OrderedDict()
|
results = collections.OrderedDict()
|
||||||
response = send_wikidata_query(SARQL_REQUEST)
|
response = wikidata.send_wikidata_query(SARQL_REQUEST)
|
||||||
for unit in response['results']['bindings']:
|
for unit in response['results']['bindings']:
|
||||||
name = unit['item']['value'].replace('http://www.wikidata.org/entity/', '')
|
name = unit['item']['value'].replace('http://www.wikidata.org/entity/', '')
|
||||||
unit = unit['symbol']['value']
|
unit = unit['symbol']['value']
|
||||||
|
|
Loading…
Reference in a new issue