Merge pull request #90 from searxng/enh-osm-engine

[enh] improve openstreetmap engine
This commit is contained in:
Alexandre Flament 2021-06-10 09:30:41 +02:00 committed by GitHub
commit 73dbee45a6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
27 changed files with 16345 additions and 178 deletions

View file

@ -73,7 +73,7 @@ test.shell:
MANAGE += buildenv
MANAGE += babel.compile
MANAGE += data.all data.languages data.useragents
MANAGE += data.all data.languages data.useragents data.osm_keys_tags
MANAGE += docs.html docs.live docs.gh-pages docs.prebuild docs.clean
MANAGE += docker.build docker.push docker.buildx
MANAGE += gecko.driver

7
manage
View file

@ -115,6 +115,7 @@ babel.compile() {
data.all() {
data.languages
data.useragents
data.osm_keys_tags
build_msg DATA "update searx/data/ahmia_blacklist.txt"
pyenv.cmd python searx_extra/update/update_ahmia_blacklist.py
build_msg DATA "update searx/data/wikidata_units.json"
@ -140,6 +141,12 @@ data.useragents() {
dump_return $?
}
data.osm_keys_tags() {
build_msg DATA "update searx/data/osm_keys_tags.json"
pyenv.cmd python searx_extra/update/update_osm_keys_tags.py
dump_return $?
}
docs.prebuild() {
build_msg DOCS "build ${DOCS_BUILD}/includes"
(

View file

@ -1,25 +1,47 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""This module holds the *data* created by::
make data.all
"""
__all__ = [
'ENGINES_LANGUAGES',
'CURRENCIES',
'USER_AGENTS',
'EXTERNAL_URLS',
'WIKIDATA_UNITS',
'EXTERNAL_BANGS',
'OSM_KEYS_TAGS',
'ahmia_blacklist_loader',
]
import json
from pathlib import Path
__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'EXTERNAL_URLS', 'WIKIDATA_UNITS', 'EXTERNAL_BANGS',
'bangs_loader', 'ahmia_blacklist_loader']
data_dir = Path(__file__).parent
def load(filename):
with open(data_dir / filename, encoding='utf-8') as fd:
return json.load(fd)
def _load(filename):
with open(data_dir / filename, encoding='utf-8') as f:
return json.load(f)
def ahmia_blacklist_loader():
with open(str(data_dir / 'ahmia_blacklist.txt'), encoding='utf-8') as fd:
return fd.read().split()
"""Load data from `ahmia_blacklist.txt` and return a list of MD5 values of onion
names. The MD5 values are fetched by::
searx_extra/update/update_ahmia_blacklist.py
ENGINES_LANGUAGES = load('engines_languages.json')
CURRENCIES = load('currencies.json')
USER_AGENTS = load('useragents.json')
EXTERNAL_URLS = load('external_urls.json')
WIKIDATA_UNITS = load('wikidata_units.json')
EXTERNAL_BANGS = load('external_bangs.json')
This function is used by :py:mod:`searx.plugins.ahmia_filter`.
"""
with open(str(data_dir / 'ahmia_blacklist.txt'), encoding='utf-8') as f:
return f.read().split()
ENGINES_LANGUAGES = _load('engines_languages.json')
CURRENCIES = _load('currencies.json')
USER_AGENTS = _load('useragents.json')
EXTERNAL_URLS = _load('external_urls.json')
WIKIDATA_UNITS = _load('wikidata_units.json')
EXTERNAL_BANGS = _load('external_bangs.json')
OSM_KEYS_TAGS = _load('osm_keys_tags.json')

15416
searx/data/osm_keys_tags.json Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,12 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""OpenStreetMap (Map)
"""
OpenStreetMap (Map)
"""
# pylint: disable=missing-function-docstring
import re
from json import loads
from urllib.parse import urlencode
from functools import partial
from flask_babel import gettext
from searx.data import OSM_KEYS_TAGS, CURRENCIES
from searx.utils import searx_useragent
from searx.external_urls import get_external_url
from searx.engines.wikidata import send_wikidata_query, sparql_string_escape
# about
about = {
"website": 'https://www.openstreetmap.org/',
@ -23,26 +33,122 @@ paging = False
# search-url
base_url = 'https://nominatim.openstreetmap.org/'
search_string = 'search/{query}?format=json&polygon_geojson=1&addressdetails=1'
result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
search_string = 'search?{query}&polygon_geojson=1&format=jsonv2&addressdetails=1&extratags=1&dedupe=1'
result_id_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
result_lat_lon_url = 'https://www.openstreetmap.org/?mlat={lat}&mlon={lon}&zoom={zoom}&layers=M'
route_url = 'https://graphhopper.com/maps/?point={}&point={}&locale=en-US&vehicle=car&weighting=fastest&turn_costs=true&use_miles=false&layer=Omniscale' # noqa
route_url = 'https://graphhopper.com/maps/?point={}&point={}&locale=en-US&vehicle=car&weighting=fastest&turn_costs=true&use_miles=false&layer=Omniscale' # pylint: disable=line-too-long
route_re = re.compile('(?:from )?(.+) to (.+)')
wikidata_image_sparql = """
select ?item ?itemLabel ?image ?sign ?symbol ?website ?wikipediaName
where {
values ?item { %WIKIDATA_IDS% }
OPTIONAL { ?item wdt:P18|wdt:P8517|wdt:P4291|wdt:P5252|wdt:P3451|wdt:P4640|wdt:P5775|wdt:P2716|wdt:P1801|wdt:P4896 ?image }
OPTIONAL { ?item wdt:P1766|wdt:P8505|wdt:P8667 ?sign }
OPTIONAL { ?item wdt:P41|wdt:P94|wdt:P154|wdt:P158|wdt:P2910|wdt:P4004|wdt:P5962|wdt:P8972 ?symbol }
OPTIONAL { ?item wdt:P856 ?website }
SERVICE wikibase:label {
bd:serviceParam wikibase:language "%LANGUAGE%,en".
?item rdfs:label ?itemLabel .
}
OPTIONAL {
?wikipediaUrl schema:about ?item;
schema:isPartOf/wikibase:wikiGroup "wikipedia";
schema:name ?wikipediaName;
schema:inLanguage "%LANGUAGE%" .
}
}
ORDER by ?item
"""
# key value that are link: mapping functions
# 'mapillary': P1947
# but https://github.com/kartaview/openstreetcam.org/issues/60
# but https://taginfo.openstreetmap.org/keys/kartaview ...
def value_to_https_link(value):
http = 'http://'
if value.startswith(http):
value = 'https://' + value[len(http) :]
return (value, value)
def value_to_website_link(value):
value = value.split(';')[0]
return (value, value)
def value_wikipedia_link(value):
value = value.split(':', 1)
return ('https://{0}.wikipedia.org/wiki/{1}'.format(*value), '{1} ({0})'.format(*value))
def value_with_prefix(prefix, value):
return (prefix + value, value)
VALUE_TO_LINK = {
'website': value_to_website_link,
'contact:website': value_to_website_link,
'email': partial(value_with_prefix, 'mailto:'),
'contact:email': partial(value_with_prefix, 'mailto:'),
'contact:phone': partial(value_with_prefix, 'tel:'),
'phone': partial(value_with_prefix, 'tel:'),
'fax': partial(value_with_prefix, 'fax:'),
'contact:fax': partial(value_with_prefix, 'fax:'),
'contact:mastodon': value_to_https_link,
'facebook': value_to_https_link,
'contact:facebook': value_to_https_link,
'contact:foursquare': value_to_https_link,
'contact:instagram': value_to_https_link,
'contact:linkedin': value_to_https_link,
'contact:pinterest': value_to_https_link,
'contact:telegram': value_to_https_link,
'contact:tripadvisor': value_to_https_link,
'contact:twitter': value_to_https_link,
'contact:yelp': value_to_https_link,
'contact:youtube': value_to_https_link,
'contact:webcam': value_to_website_link,
'wikipedia': value_wikipedia_link,
'wikidata': partial(value_with_prefix, 'https://wikidata.org/wiki/'),
'brand:wikidata': partial(value_with_prefix, 'https://wikidata.org/wiki/'),
}
KEY_ORDER = [
'cuisine',
'organic',
'delivery',
'delivery:covid19',
'opening_hours',
'opening_hours:covid19',
'fee',
'payment:*',
'currency:*',
'outdoor_seating',
'bench',
'wheelchair',
'level',
'building:levels',
'bin',
'public_transport',
'internet_access:ssid',
]
KEY_RANKS = {k: i for i, k in enumerate(KEY_ORDER)}
# do search-request
def request(query, params):
params['url'] = base_url + search_string.format(query=query)
"""do search-request"""
params['url'] = base_url + search_string.format(query=urlencode({'q': query}))
params['route'] = route_re.match(query)
params['headers']['User-Agent'] = searx_useragent()
return params
# get response from search-request
def response(resp):
"""get response from search-request"""
results = []
json = loads(resp.text)
nominatim_json = loads(resp.text)
user_language = resp.search_params['language']
if resp.search_params['route']:
results.append({
@ -50,62 +156,286 @@ def response(resp):
'url': route_url.format(*resp.search_params['route'].groups()),
})
# parse results
for r in json:
if 'display_name' not in r:
fetch_wikidata(nominatim_json, user_language)
for result in nominatim_json:
title, address = get_title_address(result)
# ignore result without title
if not title:
continue
title = r['display_name'] or ''
osm_type = r.get('osm_type', r.get('type'))
url = result_base_url.format(osm_type=osm_type,
osm_id=r['osm_id'])
url, osm, geojson = get_url_osm_geojson(result)
img_src = get_img_src(result)
links, link_keys = get_links(result, user_language)
data = get_data(result, user_language, link_keys)
osm = {'type': osm_type,
'id': r['osm_id']}
results.append({
'template': 'map.html',
'title': title,
'address': address,
'address_label': get_key_label('addr', user_language),
'url': url,
'osm': osm,
'geojson': geojson,
'img_src': img_src,
'links': links,
'data': data,
'type': get_tag_label(
result.get('category'), result.get('type', ''), user_language
),
'type_icon': result.get('icon'),
'content': '',
'longitude': result['lon'],
'latitude': result['lat'],
'boundingbox': result['boundingbox'],
})
geojson = r.get('geojson')
return results
# if no geojson is found and osm_type is a node, add geojson Point
if not geojson and osm_type == 'node':
geojson = {'type': 'Point', 'coordinates': [r['lon'], r['lat']]}
address_raw = r.get('address')
def get_wikipedia_image(raw_value):
if not raw_value:
return None
return get_external_url('wikimedia_image', raw_value)
def fetch_wikidata(nominatim_json, user_langage):
"""Update nominatim_json using the result of an unique to wikidata
For result in nominatim_json:
If result['extratags']['wikidata'] or r['extratags']['wikidata link']:
Set result['wikidata'] to { 'image': ..., 'image_sign':..., 'image_symbal':... }
Set result['extratags']['wikipedia'] if not defined
Set result['extratags']['contact:website'] if not defined
"""
wikidata_ids = []
wd_to_results = {}
for result in nominatim_json:
e = result.get("extratags")
if e:
# ignore brand:wikidata
wd_id = e.get("wikidata", e.get("wikidata link"))
if wd_id and wd_id not in wikidata_ids:
wikidata_ids.append("wd:" + wd_id)
wd_to_results.setdefault(wd_id, []).append(result)
if wikidata_ids:
wikidata_ids_str = " ".join(wikidata_ids)
query = wikidata_image_sparql.replace('%WIKIDATA_IDS%', sparql_string_escape(wikidata_ids_str)).replace(
'%LANGUAGE%', sparql_string_escape(user_langage)
)
wikidata_json = send_wikidata_query(query)
for wd_result in wikidata_json.get('results', {}).get('bindings', {}):
wd_id = wd_result['item']['value'].replace('http://www.wikidata.org/entity/', '')
for result in wd_to_results.get(wd_id, []):
result['wikidata'] = {
'itemLabel': wd_result['itemLabel']['value'],
'image': get_wikipedia_image(wd_result.get('image', {}).get('value')),
'image_sign': get_wikipedia_image(wd_result.get('sign', {}).get('value')),
'image_symbol': get_wikipedia_image(wd_result.get('symbol', {}).get('value')),
}
# overwrite wikipedia link
wikipedia_name = wd_result.get('wikipediaName', {}).get('value')
if wikipedia_name:
result['extratags']['wikipedia'] = user_langage + ':' + wikipedia_name
# get website if not already defined
website = wd_result.get('website', {}).get('value')
if (
website
and not result['extratags'].get('contact:website')
and not result['extratags'].get('website')
):
result['extratags']['contact:website'] = website
def get_title_address(result):
"""Return title and address
title may be None
"""
address_raw = result.get('address')
address_name = None
address = {}
# get name
if r['class'] == 'amenity' or\
r['class'] == 'shop' or\
r['class'] == 'tourism' or\
r['class'] == 'leisure':
if (
result['category'] == 'amenity'
or result['category'] == 'shop'
or result['category'] == 'tourism'
or result['category'] == 'leisure'
):
if address_raw.get('address29'):
address = {'name': address_raw.get('address29')}
# https://github.com/osm-search/Nominatim/issues/1662
address_name = address_raw.get('address29')
else:
address = {'name': address_raw.get(r['type'])}
address_name = address_raw.get(result['category'])
elif result['type'] in address_raw:
address_name = address_raw.get(result['type'])
# add rest of adressdata, if something is already found
if address.get('name'):
address.update({'house_number': address_raw.get('house_number'),
if address_name:
title = address_name
address.update(
{
'name': address_name,
'house_number': address_raw.get('house_number'),
'road': address_raw.get('road'),
'locality': address_raw.get('city',
address_raw.get('town', # noqa
address_raw.get('village'))), # noqa
'locality': address_raw.get(
'city', address_raw.get('town', address_raw.get('village')) # noqa
), # noqa
'postcode': address_raw.get('postcode'),
'country': address_raw.get('country'),
'country_code': address_raw.get('country_code')})
'country_code': address_raw.get('country_code'),
}
)
else:
address = None
title = result.get('display_name')
# append result
results.append({'template': 'map.html',
'title': title,
'content': '',
'longitude': r['lon'],
'latitude': r['lat'],
'boundingbox': r['boundingbox'],
'geojson': geojson,
'address': address,
'osm': osm,
'url': url})
return title, address
# return results
return results
def get_url_osm_geojson(result):
"""Get url, osm and geojson
"""
osm_type = result.get('osm_type', result.get('type'))
if 'osm_id' not in result:
# see https://github.com/osm-search/Nominatim/issues/1521
# query example: "EC1M 5RF London"
url = result_lat_lon_url.format(lat=result['lat'], lon=result['lon'], zoom=12)
osm = {}
else:
url = result_id_url.format(osm_type=osm_type, osm_id=result['osm_id'])
osm = {'type': osm_type, 'id': result['osm_id']}
geojson = result.get('geojson')
# if no geojson is found and osm_type is a node, add geojson Point
if not geojson and osm_type == 'node':
geojson = {'type': 'Point', 'coordinates': [result['lon'], result['lat']]}
return url, osm, geojson
def get_img_src(result):
"""Get image URL from either wikidata or r['extratags']"""
# wikidata
img_src = None
if 'wikidata' in result:
img_src = result['wikidata']['image']
if not img_src:
img_src = result['wikidata']['image_symbol']
if not img_src:
img_src = result['wikidata']['image_sign']
# img_src
if not img_src and result.get('extratags', {}).get('image'):
img_src = result['extratags']['image']
del result['extratags']['image']
if not img_src and result.get('extratags', {}).get('wikimedia_commons'):
img_src = get_external_url('wikimedia_image', result['extratags']['wikimedia_commons'])
del result['extratags']['wikimedia_commons']
return img_src
def get_links(result, user_language):
"""Return links from result['extratags']"""
links = []
link_keys = set()
for k, mapping_function in VALUE_TO_LINK.items():
raw_value = result['extratags'].get(k)
if raw_value:
url, url_label = mapping_function(raw_value)
if url.startswith('https://wikidata.org'):
url_label = result.get('wikidata', {}).get('itemLabel') or url_label
links.append({
'label': get_key_label(k, user_language),
'url': url,
'url_label': url_label,
})
link_keys.add(k)
return links, link_keys
def get_data(result, user_language, ignore_keys):
"""Return key, value of result['extratags']
Must be call after get_links
Note: the values are not translated
"""
data = []
for k, v in result['extratags'].items():
if k in ignore_keys:
continue
if get_key_rank(k) is None:
continue
k_label = get_key_label(k, user_language)
if k_label:
data.append({
'label': k_label,
'key': k,
'value': v,
})
data.sort(key=lambda entry: (get_key_rank(entry['key']), entry['label']))
return data
def get_key_rank(k):
"""Get OSM key rank
The rank defines in which order the key are displayed in the HTML result
"""
key_rank = KEY_RANKS.get(k)
if key_rank is None:
# "payment:*" in KEY_ORDER matches "payment:cash", "payment:debit card", etc...
key_rank = KEY_RANKS.get(k.split(':')[0] + ':*')
return key_rank
def get_label(labels, lang):
"""Get label from labels in OSM_KEYS_TAGS
in OSM_KEYS_TAGS, labels have key == '*'
"""
tag_label = labels.get(lang.lower())
if tag_label is None:
# example: if 'zh-hk' is not found, check 'zh'
tag_label = labels.get(lang.split('-')[0])
if tag_label is None and lang != 'en':
# example: if 'zh' is not found, check 'en'
tag_label = labels.get('en')
if tag_label is None and len(labels.values()) > 0:
# example: if still not found, use the first entry
tag_label = labels.values()[0]
return tag_label
def get_tag_label(tag_category, tag_name, lang):
"""Get tag label from OSM_KEYS_TAGS"""
tag_name = '' if tag_name is None else tag_name
tag_labels = OSM_KEYS_TAGS['tags'].get(tag_category, {}).get(tag_name, {})
return get_label(tag_labels, lang)
def get_key_label(key_name, lang):
"""Get key label from OSM_KEYS_TAGS"""
if key_name.startswith('currency:'):
# currency:EUR --> get the name from the CURRENCIES variable
# see https://wiki.openstreetmap.org/wiki/Key%3Acurrency
# and for exampe https://taginfo.openstreetmap.org/keys/currency:EUR#values
# but there is also currency=EUR (currently not handled)
# https://taginfo.openstreetmap.org/keys/currency#values
currency = key_name.split(':')
if len(currency) > 1:
o = CURRENCIES['iso4217'].get(currency)
if o:
return get_label(o, lang).lower()
return currency
labels = OSM_KEYS_TAGS['keys']
for k in key_name.split(':') + ['*']:
labels = labels.get(k)
if labels is None:
return None
return get_label(labels, lang)

View file

@ -10,6 +10,7 @@ IMDB_PREFIX_TO_URL_ID = {
'co': 'imdb_company',
'ev': 'imdb_event'
}
HTTP_WIKIMEDIA_IMAGE = 'http://commons.wikimedia.org/wiki/Special:FilePath/'
def get_imdb_url_id(imdb_item_id):
@ -17,6 +18,14 @@ def get_imdb_url_id(imdb_item_id):
return IMDB_PREFIX_TO_URL_ID.get(id_prefix)
def get_wikimedia_image_id(url):
if url.startswith(HTTP_WIKIMEDIA_IMAGE):
return url[len(HTTP_WIKIMEDIA_IMAGE):]
if url.startswith('File:'):
return url[len('File:'):]
return url
def get_external_url(url_id, item_id, alternative="default"):
"""Return an external URL or None if url_id is not found.
@ -25,8 +34,11 @@ def get_external_url(url_id, item_id, alternative="default"):
If item_id is None, the raw URL with the $1 is returned.
"""
if url_id == 'imdb_id' and item_id is not None:
if item_id is not None:
if url_id == 'imdb_id':
url_id = get_imdb_url_id(item_id)
elif url_id == 'wikimedia_image':
item_id = get_wikimedia_image_id(item_id)
url_description = EXTERNAL_URLS.get(url_id)
if url_description:

View file

@ -623,6 +623,30 @@ input[type=checkbox]:not(:checked) + .label_hide_if_checked + .label_hide_if_not
.result-map {
clear: both;
}
.result-map .img-thumbnail {
float: right;
width: auto;
height: 120px;
border: 0;
background: inherit;
}
.result-map .img-type {
width: 20px;
max-height: 20px;
}
.result-map .result-map-details {
font-size: 13px;
border-collapse: separate;
border-spacing: 0 0.35rem;
}
.result-map .result-map-details th {
font-weight: inherit;
width: 20rem;
vertical-align: top;
}
.result-map .result-map-details td {
vertical-align: top;
}
.result-code {
clear: both;
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -650,6 +650,30 @@ input[type=checkbox]:not(:checked) + .label_hide_if_checked + .label_hide_if_not
.result-map {
clear: both;
}
.result-map .img-thumbnail {
float: right;
width: auto;
height: 120px;
border: 0;
background: inherit;
}
.result-map .img-type {
width: 20px;
max-height: 20px;
}
.result-map .result-map-details {
font-size: 13px;
border-collapse: separate;
border-spacing: 0 0.35rem;
}
.result-map .result-map-details th {
font-weight: inherit;
width: 20rem;
vertical-align: top;
}
.result-map .result-map-details td {
vertical-align: top;
}
.result-code {
clear: both;
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -171,6 +171,36 @@
// map formating of results
.result-map {
clear: both;
.img-thumbnail {
float: right;
width: auto;
height: 120px;
border: 0;
background: inherit;
}
.img-type {
width: 20px;
max-height: 20px;
}
.result-map-details {
font-size: 13px;
border-collapse: separate;
border-spacing: 0 0.35rem;
th {
font-weight: inherit;
width: 20rem;
vertical-align: top;
}
td {
vertical-align: top;
}
}
}
// code formating of results

View file

@ -1,4 +1,4 @@
/*! searx | 01-06-2021 | https://github.com/searxng/searxng */
/*! searx | 09-06-2021 | https://github.com/searxng/searxng */
/*
* searx, A privacy-respecting, hackable metasearch engine
*
@ -2346,6 +2346,30 @@ article.result-images[data-vim-selected]::before {
background-color: rgba(0, 0, 0, 0.6);
font-size: 0.7em;
}
.result-map img.image {
float: right !important;
height: 100px !important;
width: auto !important;
}
.result-map table {
font-size: 0.9em;
width: auto;
border-collapse: separate;
border-spacing: 0 0.35rem;
}
.result-map table th {
font-weight: inherit;
width: 17rem;
vertical-align: top;
text-align: left;
}
.result-map table td {
vertical-align: top;
text-align: left;
}
.hidden {
display: none !important;
}
.torrent_result {
border-left: 10px solid lightgray;
padding-left: 3px;

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1,4 +1,4 @@
/*! searx | 01-06-2021 | https://github.com/searxng/searxng */
/*! searx | 09-06-2021 | https://github.com/searxng/searxng */
/*
* searx, A privacy-respecting, hackable metasearch engine
*
@ -2346,6 +2346,30 @@ article.result-images[data-vim-selected]::before {
background-color: rgba(0, 0, 0, 0.6);
font-size: 0.7em;
}
.result-map img.image {
float: right !important;
height: 100px !important;
width: auto !important;
}
.result-map table {
font-size: 0.9em;
width: auto;
border-collapse: separate;
border-spacing: 0 0.35rem;
}
.result-map table th {
font-weight: inherit;
width: 17rem;
vertical-align: top;
text-align: left;
}
.result-map table td {
vertical-align: top;
text-align: left;
}
.hidden {
display: none !important;
}
.torrent_result {
border-left: 10px solid lightgray;
padding-left: 3px;

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1,4 +1,4 @@
/*! simple/searx.min.js | 01-06-2021 | https://github.com/searxng/searxng */
/*! simple/searx.min.js | 09-06-2021 | https://github.com/searxng/searxng */
(function(t,e){"use strict";var a=e.currentScript||function(){var t=e.getElementsByTagName("script");return t[t.length-1]}();t.searx={touch:"ontouchstart"in t||t.DocumentTouch&&document instanceof DocumentTouch||false,method:a.getAttribute("data-method"),autocompleter:a.getAttribute("data-autocompleter")==="true",search_on_category_select:a.getAttribute("data-search-on-category-select")==="true",infinite_scroll:a.getAttribute("data-infinite-scroll")==="true",static_path:a.getAttribute("data-static-path"),translations:JSON.parse(a.getAttribute("data-translations"))};e.getElementsByTagName("html")[0].className=t.searx.touch?"js touch":"js"})(window,document);
//# sourceMappingURL=searx.head.min.js.map

View file

@ -1,4 +1,4 @@
/*! simple/searx.min.js | 01-06-2021 | https://github.com/searxng/searxng */
/*! simple/searx.min.js | 09-06-2021 | https://github.com/searxng/searxng */
window.searx=function(t,o){"use strict";if(t.Element){(function(e){e.matches=e.matches||e.matchesSelector||e.webkitMatchesSelector||e.msMatchesSelector||function(e){var t=this,n=(t.parentNode||t.document).querySelectorAll(e),i=-1;while(n[++i]&&n[i]!=t);return!!n[i]}})(Element.prototype)}function a(e,t,n){try{e.call(t,n)}catch(e){console.log(e)}}var s=window.searx||{};s.on=function(i,e,r,t){t=t||false;if(typeof i!=="string"){i.addEventListener(e,r,t)}else{o.addEventListener(e,function(e){var t=e.target||e.srcElement,n=false;while(t&&t.matches&&t!==o&&!(n=t.matches(i)))t=t.parentElement;if(n)a(r,t,e)},t)}};s.ready=function(e){if(document.readyState!="loading"){e.call(t)}else{t.addEventListener("DOMContentLoaded",e.bind(t))}};s.http=function(e,t,n){var i=new XMLHttpRequest,r=function(){},o=function(){},a={then:function(e){r=e;return a},catch:function(e){o=e;return a}};try{i.open(e,t,true);i.onload=function(){if(i.status==200){r(i.response,i.responseType)}else{o(Error(i.statusText))}};i.onerror=function(){o(Error("Network Error"))};i.onabort=function(){o(Error("Transaction is aborted"))};i.send()}catch(e){o(e)}return a};s.loadStyle=function(e){var t=s.static_path+e,n="style_"+e.replace(".","_"),i=o.getElementById(n);if(i===null){i=o.createElement("link");i.setAttribute("id",n);i.setAttribute("rel","stylesheet");i.setAttribute("type","text/css");i.setAttribute("href",t);o.body.appendChild(i)}};s.loadScript=function(e,t){var n=s.static_path+e,i="script_"+e.replace(".","_"),r=o.getElementById(i);if(r===null){r=o.createElement("script");r.setAttribute("id",i);r.setAttribute("src",n);r.onload=t;r.onerror=function(){r.setAttribute("error","1")};o.body.appendChild(r)}else if(!r.hasAttribute("error")){try{t.apply(r,[])}catch(e){console.log(e)}}else{console.log("callback not executed : script '"+n+"' not loaded.")}};s.insertBefore=function(e,t){element.parentNode.insertBefore(e,t)};s.insertAfter=function(e,t){t.parentNode.insertBefore(e,t.nextSibling)};s.on(".close","click",function(e){var t=e.target||e.srcElement;this.parentNode.classList.add("invisible")});return s}(window,document);(function(e){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=e()}else if(typeof define==="function"&&define.amd){define([],e)}else{var t;if(typeof window!=="undefined"){t=window}else if(typeof global!=="undefined"){t=global}else if(typeof self!=="undefined"){t=self}else{t=this}t.AutoComplete=e()}})(function(){var e,t,n;return function o(a,s,l){function u(n,e){if(!s[n]){if(!a[n]){var t=typeof require=="function"&&require;if(!e&&t)return t(n,!0);if(c)return c(n,!0);var i=new Error("Cannot find module '"+n+"'");throw i.code="MODULE_NOT_FOUND",i}var r=s[n]={exports:{}};a[n][0].call(r.exports,function(e){var t=a[n][1][e];return u(t?t:e)},r,r.exports,o,a,s,l)}return s[n].exports}var c=typeof require=="function"&&require;for(var e=0;e<l.length;e++)u(l[e]);return u}({1:[function(e,t,n){
/*

View file

@ -304,6 +304,38 @@ article.result-images[data-vim-selected]::before {
}
}
.result-map {
img.image {
float: right !important;
height: 100px !important;
width: auto !important;
}
table {
font-size: .9em;
width: auto;
border-collapse: separate;
border-spacing: 0 0.35rem;
th {
font-weight: inherit;
width: 17rem;
vertical-align: top;
text-align: left;
}
td {
vertical-align: top;
text-align: left;
}
}
}
.hidden {
display: none !important;
}
.torrent_result {
border-left: 10px solid @color-result-torrent-border;
padding-left: 3px;

View file

@ -3,20 +3,31 @@
{{- result_header(result, favicons, loop.index) -}}
{{- result_sub_header(result, loop.index) -}}
{%- if (result.latitude and result.longitude) or result.boundingbox -%}
<small> &bull; <a class="text-info btn-collapse collapsed searx_init_map cursor-pointer disabled_if_nojs" data-toggle="collapse" data-target="#result-map-{{ index }}" data-leaflet-target="osm-map-{{ index }}" data-map-lon="{{ result.longitude }}" data-map-lat="{{ result.latitude }}" {% if result.boundingbox %}data-map-boundingbox='{{ result.boundingbox|tojson|safe }}'{% endif %} {% if result.geojson %}data-map-geojson='{{ result.geojson|tojson|safe }}'{% endif %} data-btn-text-collapsed="{{ _('show map') }}" data-btn-text-not-collapsed="{{ _('hide map') }}">{{ icon('globe') }} {{ _('show map') }}</a></small>
{%- endif -%}
{# {% if (result.latitude and result.longitude) %}
<small> &bull; <a class="text-info btn-collapse collapsed cursor-pointer disabled_if_nojs" data-toggle="collapse" data-target="#result-geodata-{{ index }}" data-btn-text-collapsed="{{ _('show geodata') }}" data-btn-text-not-collapsed="{{ _('hide geodata') }}">{{ icon('map-marker') }} {{ _('show geodata') }}</a></small>
{% endif %} #}
<div class="container-fluid">
{%- if result.address -%}
<p class="row result-content result-adress col-xs-12 col-sm-5 col-md-4" itemscope itemtype="http://schema.org/PostalAddress">
<div class="row">
{%- if result.img_src -%}<img src="{{ image_proxify(result.img_src) }}" title="{{ result.title|striptags }}" class="img-thumbnail">{%- endif -%}
{%- if result.type_icon or result.type -%}
<p class="result-content">
{%- if result.type_icon -%}
<img src="{{ image_proxify(result.type_icon) }}" title="{{ result.type|striptags }}" class="img-type">
{%- endif -%}
{{ result.type if result.type is not none else '' }}
</p>
{%- endif -%}
{%- if result.content %}<p class="result-content">{{ result.content|safe }}</p>{% endif -%}
<table class="result-content result-map-details">
{%- if result.address -%}
<tr>
<th scope="row">
{{ result.address_label or _('address') }}
</th>
<td class="result-content result-adress" itemscope itemtype="http://schema.org/PostalAddress">
{%- if result.address.name -%}
<strong itemprop="name">{{ result.address.name }}</strong><br/>
<strong itemprop="name" class="hidden">{{ result.address.name }}</strong>
{%- endif -%}
{%- if result.address.road -%}
<span itemprop="streetAddress">
@ -32,19 +43,24 @@
{%- if result.address.country -%}
<span itemprop="addressCountry">{{ result.address.country }}</span>
{%- endif -%}
</p>
{%- endif %}
</td>
</tr>
{%- endif %}
{%- for info in result.data -%}
<tr><th scope="row">{{ info.label }}</th><td>{{ info.value|safe }}</td></tr>
{%- endfor -%}
{%- for link in result.links -%}
<tr><th scope="row">{{ link.label }}</th><td><a class="text-info cursor-pointer" href="{{ link.url }}">{{ link.url_label|safe }}</a></td></tr>
{%- endfor -%}
</table>
<p class="result-content">
{%- if (result.latitude and result.longitude) or result.boundingbox -%}
<span><a class="text-info btn-collapse collapsed searx_init_map cursor-pointer disabled_if_nojs" data-toggle="collapse" data-target="#result-map-{{ index }}" data-leaflet-target="osm-map-{{ index }}" data-map-lon="{{ result.longitude }}" data-map-lat="{{ result.latitude }}" {% if result.boundingbox %}data-map-boundingbox='{{ result.boundingbox|tojson|safe }}'{% endif %} {% if result.geojson %}data-map-geojson='{{ result.geojson|tojson|safe }}'{% endif %} data-btn-text-collapsed="{{ _('show map') }}" data-btn-text-not-collapsed="{{ _('hide map') }}">{{ icon('globe') }} {{ _('show map') }}</a></span>
{%- endif -%}
</p>
{# {% if (result.latitude and result.longitude) %}
<div class="row collapse col-xs-12 col-sm-5 col-md-4" id="result-geodata-{{ index }}">
<strong>Longitude:</strong> {{ result.longitude }} <br/>
<strong>Latitude:</strong> {{ result.latitude }}
</div>
{% endif %} #}
{%- if result.content %}<p class="row result-content col-xs-12 col-sm-12 col-md-12">{{ result.content|safe }}</p>{% endif -%}
</div>
{%- if (result.latitude and result.longitude) or result.boundingbox -%}
<div class="collapse" id="result-map-{{ index }}">
@ -52,6 +68,7 @@
</div>
{%- endif -%}
</div>
{%- if rtl -%}
{{- result_footer_rtl(result, loop.index) -}}
{% else %}

View file

@ -3,20 +3,15 @@
{{ result_header(result, favicons, image_proxify) -}}
{{- result_sub_header(result) -}}
{%- if (result.latitude and result.longitude) or result.boundingbox -%}
<small> &bull; <a class="btn-collapse collapsed searx_init_map hide_if_nojs" data-target="#result-map-{{ index }}" data-btn-text-collapsed="{{ _('show map') }}" data-btn-text-not-collapsed="{{ _('hide map') }}" data-leaflet-target="osm-map-{{ index }}" data-map-lon="{{ result.longitude }}" data-map-lat="{{ result.latitude }}" {% if result.boundingbox %}data-map-boundingbox='{{ result.boundingbox|tojson|safe }}'{% endif %} {% if result.geojson %}data-map-geojson='{{ result.geojson|tojson|safe }}'{% endif %}>{{ icon( 'globe') }} {{ _('show map') }}</a></small>
{%- endif -%}
{%- if result.content %}<p class="content">{{ result.content|safe }}</p>{% endif -%}
{#-
{% if (result.latitude and result.longitude) %}
<small> &bull; <a class="btn-collapse disabled_if_nojs" data-target="#result-geodata-{{ index }}" data-btn-text-collapsed="{{ _('show geodata') }}" data-btn-text-not-collapsed="{{ _('hide geodata') }}">{{ icon('location') }} {{ _('show geodata') }}</a></small>
{% endif %}
-#}
{%- if result.address -%}
<p itemscope itemtype="http://schema.org/PostalAddress" class="content">
<table>
{%- if result.address -%}
<tr>
<th scope="row">{{ result.address_label or _('address') }}</th>
<td itemscope itemtype="http://schema.org/PostalAddress">
{%- if result.address.name -%}
<strong itemprop="name">{{ result.address.name }}</strong><br/>
<strong itemprop="name" class="hidden">{{ result.address.name }}</strong>
{%- endif -%}
{% if result.address.road -%}
<span itemprop="streetAddress">
@ -32,22 +27,23 @@
{%- if result.address.country -%}
<span itemprop="addressCountry">{{- result.address.country -}}</span>
{%- endif -%}
</p>
</td>
</tr>
{%- endif %}
{%- for info in result.data -%}
<tr><th scope="row">{{ info.label }}</th><td>{{ info.value|safe }}</td></tr>
{%- endfor -%}
{%- for link in result.links -%}
<tr><th scope="row">{{ link.label }}</th><td><a class="text-info cursor-pointer" href="{{ link.url }}">{{ link.url|safe }}</a></td></tr>
{%- endfor -%}
</table>
{%- if (result.latitude and result.longitude) or result.boundingbox -%}
<small> <a class="btn-collapse collapsed searx_init_map hide_if_nojs" data-target="#result-map-{{ index }}" data-btn-text-collapsed="{{ _('show map') }}" data-btn-text-not-collapsed="{{ _('hide map') }}" data-leaflet-target="osm-map-{{ index }}" data-map-lon="{{ result.longitude }}" data-map-lat="{{ result.latitude }}" {% if result.boundingbox %}data-map-boundingbox='{{ result.boundingbox|tojson|safe }}'{% endif %} {% if result.geojson %}data-map-geojson='{{ result.geojson|tojson|safe }}'{% endif %}>{{ icon( 'globe') }} {{ _('show map') }}</a></small>
{%- endif -%}
{%- if result.content %}<p class="content">{{ result.content|safe }}</p>{% endif -%}
{{- result_sub_footer(result, proxify) -}}
{#-
{% if (result.latitude and result.longitude) -%}
<div id="result-geodata-{{ index }}" class="invisible">
<strong>Longitude:</strong> {{ result.longitude }} <br/>
<strong>Latitude:</strong> {{ result.latitude }}
</div>
{%- endif %}
-#}
{% if (result.latitude and result.longitude) or result.boundingbox -%}
<div id="result-map-{{ index }}" class="invisible"><div id="osm-map-{{ index }}" style="height:300px; width:100%; margin: 10px 0;" ></div></div>
{%- endif %}

View file

@ -45,7 +45,7 @@ def searx_useragent():
"""Return the searx User Agent"""
return 'searx/{searx_version} {suffix}'.format(
searx_version=VERSION_STRING,
suffix=settings['outgoing']['useragent_suffix'].strip())
suffix=settings['outgoing']['useragent_suffix']).strip()
def gen_useragent(os=None):

View file

@ -0,0 +1,209 @@
#!/usr/bin/env python
# lint: pylint
# pylint: disable=missing-function-docstring
"""Fetch OSM keys and tags.
To get the i18n names, the scripts uses `Wikidata Query Service`_ instead of for
example `OSM tags API`_ (sidenote: the actual change log from
map.atownsend.org.uk_ might be useful to normalize OSM tags)
.. _Wikidata Query Service: https://query.wikidata.org/
.. _OSM tags API: https://taginfo.openstreetmap.org/taginfo/apidoc
.. _map.atownsend.org.uk: https://map.atownsend.org.uk/maps/map/changelog.html
:py:obj:`SPARQL_TAGS_REQUEST` :
Wikidata SPARQL query that returns *type-categories* and *types*. The
returned tag is ``Tag:{category}={type}`` (see :py:func:`get_tags`).
Example:
- https://taginfo.openstreetmap.org/tags/building=house#overview
- https://wiki.openstreetmap.org/wiki/Tag:building%3Dhouse
at the bottom of the infobox (right side), there is a link to wikidata:
https://www.wikidata.org/wiki/Q3947
see property "OpenStreetMap tag or key" (P1282)
- https://wiki.openstreetmap.org/wiki/Tag%3Abuilding%3Dbungalow
https://www.wikidata.org/wiki/Q850107
:py:obj:`SPARQL_KEYS_REQUEST` :
Wikidata SPARQL query that returns *keys*. Example with "payment":
- https://wiki.openstreetmap.org/wiki/Key%3Apayment
at the bottom of infobox (right side), there is a link to wikidata:
https://www.wikidata.org/wiki/Q1148747
link made using the "OpenStreetMap tag or key" property (P1282)
to be confirm: there is a one wiki page per key ?
- https://taginfo.openstreetmap.org/keys/payment#values
- https://taginfo.openstreetmap.org/keys/payment:cash#values
``rdfs:label`` get all the labels without language selection
(as opposed to SERVICE ``wikibase:label``).
"""
import json
import collections
from pathlib import Path
from searx import searx_dir
from searx.network import set_timeout_for_thread
from searx.engines.wikidata import send_wikidata_query
from searx.languages import language_codes
from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
SPARQL_TAGS_REQUEST = """
SELECT ?tag ?item ?itemLabel WHERE {
?item wdt:P1282 ?tag .
?item rdfs:label ?itemLabel .
FILTER(STRSTARTS(?tag, 'Tag'))
}
GROUP BY ?tag ?item ?itemLabel
ORDER BY ?tag ?item ?itemLabel
"""
SPARQL_KEYS_REQUEST = """
SELECT ?key ?item ?itemLabel WHERE {
?item wdt:P1282 ?key .
?item rdfs:label ?itemLabel .
FILTER(STRSTARTS(?key, 'Key'))
}
GROUP BY ?key ?item ?itemLabel
ORDER BY ?key ?item ?itemLabel
"""
LANGUAGES = [l[0].lower() for l in language_codes]
PRESET_KEYS = {
('wikidata',): {'en': 'Wikidata'},
('wikipedia',): {'en': 'Wikipedia'},
('email',): {'en': 'Email'},
('facebook',): {'en': 'Facebook'},
('fax',): {'en': 'Fax'},
('internet_access', 'ssid'): {'en': 'Wi-Fi'},
}
INCLUDED_KEYS = {
('addr', )
}
def get_preset_keys():
results = collections.OrderedDict()
for keys, value in PRESET_KEYS.items():
r = results
for k in keys:
r = r.setdefault(k, {})
r.setdefault('*', value)
return results
def get_keys():
results = get_preset_keys()
response = send_wikidata_query(SPARQL_KEYS_REQUEST)
for key in response['results']['bindings']:
keys = key['key']['value'].split(':')[1:]
if keys[0] == 'currency' and len(keys) > 1:
# special case in openstreetmap.py
continue
if keys[0] == 'contact' and len(keys) > 1:
# label for the key "contact.email" is "Email"
# whatever the language
r = results.setdefault('contact', {})
r[keys[1]] = {
'*': {
'en': keys[1]
}
}
continue
if tuple(keys) in PRESET_KEYS:
# skip presets (already set above)
continue
if get_key_rank(':'.join(keys)) is None\
and ':'.join(keys) not in VALUE_TO_LINK\
and tuple(keys) not in INCLUDED_KEYS:
# keep only keys that will be displayed by openstreetmap.py
continue
label = key['itemLabel']['value'].lower()
lang = key['itemLabel']['xml:lang']
r = results
for k in keys:
r = r.setdefault(k, {})
r = r.setdefault('*', {})
if lang in LANGUAGES:
r.setdefault(lang, label)
# special cases
results['delivery']['covid19']['*'].clear()
for k, v in results['delivery']['*'].items():
results['delivery']['covid19']['*'][k] = v + ' (COVID19)'
results['opening_hours']['covid19']['*'].clear()
for k, v in results['opening_hours']['*'].items():
results['opening_hours']['covid19']['*'][k] = v + ' (COVID19)'
return results
def get_tags():
results = collections.OrderedDict()
response = send_wikidata_query(SPARQL_TAGS_REQUEST)
for tag in response['results']['bindings']:
tag_names = tag['tag']['value'].split(':')[1].split('=')
if len(tag_names) == 2:
tag_category, tag_type = tag_names
else:
tag_category, tag_type = tag_names[0], ''
label = tag['itemLabel']['value'].lower()
lang = tag['itemLabel']['xml:lang']
if lang in LANGUAGES:
results.setdefault(tag_category, {}).setdefault(tag_type, {}).setdefault(lang, label)
return results
def optimize_data_lang(translations):
language_to_delete = []
# remove "zh-hk" entry if the value is the same as "zh"
# same for "en-ca" / "en" etc...
for language in translations:
if '-' in language:
base_language = language.split('-')[0]
if translations.get(base_language) == translations.get(language):
language_to_delete.append(language)
for language in language_to_delete:
del translations[language]
language_to_delete = []
# remove entries that have the same value than the "en" entry
value_en = translations.get('en')
if value_en:
for language, value in translations.items():
if language != 'en' and value == value_en:
language_to_delete.append(language)
for language in language_to_delete:
del translations[language]
def optimize_tags(data):
for v in data.values():
for translations in v.values():
optimize_data_lang(translations)
return data
def optimize_keys(data):
for k, v in data.items():
if k == '*':
optimize_data_lang(v)
elif isinstance(v, dict):
optimize_keys(v)
return data
def get_osm_tags_filename():
return Path(searx_dir) / "data" / "osm_keys_tags.json"
if __name__ == '__main__':
set_timeout_for_thread(60)
result = {
'keys': optimize_keys(get_keys()),
'tags': optimize_tags(get_tags()),
}
with open(get_osm_tags_filename(), 'w') as f:
json.dump(result, f, indent=4, ensure_ascii=False)