[fix] url_for(..., _external=True) in templates

The `url_for` function in the template context is not the one from Flask, it is
the one from `webapp`.  The `webapp.url_for_theme` is different from its
namesake of Flask and has it quirks, when called with argument `_external=True`.

The `webapp.url_for_theme` can't handle absolute URLs since it pokes a leading
'/', here is the snippet of the old code::

    url = url_for(endpoint, **values)
    if settings['server']['base_url']:
        if url.startswith('/'):
            url = url[1:]
        url = urljoin(settings['server']['base_url'], url)

Next drawback of (Flask's) `_external=True` is, that it will not return the HTTP
scheme when searx (the Flask app) listens on http and is proxied by a https
server.

To get the right scheme `HTTP_X_SCHEME` is needed by Flask (werkzeug).  Since
this is not provided in every environment (e.g. behind Apache mod_wsgi or the
HTTP header is not fully set for some other reasons) it is recommended to
get *script_name*, *server* and *scheme* from the configured `base_url`.  If
`base_url` is specified, then these values from are given preference over any
Flask's generics.

BTW this patch normalize to use `url_for` in the `opensearch.xml` and drop the
need of `host` and `urljoin` in template's context.

Signed-off-by: Markus Heiser <markus@darmarit.de>
This commit is contained in:
Markus Heiser 2021-04-03 13:56:47 +02:00 committed by Markus Heiser
parent 9292571304
commit 87e4c47621
2 changed files with 32 additions and 24 deletions

View file

@ -3,7 +3,7 @@
<ShortName>{{ instance_name }}</ShortName> <ShortName>{{ instance_name }}</ShortName>
<Description>a privacy-respecting, hackable metasearch engine</Description> <Description>a privacy-respecting, hackable metasearch engine</Description>
<InputEncoding>UTF-8</InputEncoding> <InputEncoding>UTF-8</InputEncoding>
<Image>{{ urljoin(host, url_for('static', filename='img/favicon.png')) }}</Image> <Image>{{ url_for('static', filename='img/favicon.png', _external=True) }}</Image>
<LongName>searx metasearch</LongName> <LongName>searx metasearch</LongName>
{% if opensearch_method == 'get' %} {% if opensearch_method == 'get' %}
<Url rel="results" type="text/html" method="get" template="{{ url_for('search', _external=True) }}?q={searchTerms}"/> <Url rel="results" type="text/html" method="get" template="{{ url_for('search', _external=True) }}?q={searchTerms}"/>
@ -13,7 +13,7 @@
</Url> </Url>
{% endif %} {% endif %}
{% if autocomplete %} {% if autocomplete %}
<Url rel="suggestions" type="application/x-suggestions+json" template="{{ host }}autocompleter?q={searchTerms}"/> <Url rel="suggestions" type="application/x-suggestions+json" template="{{ url_for('autocompleter', _external=True) }}?q={searchTerms}"/>
{% endif %} {% endif %}
<Url type="application/opensearchdescription+xml" <Url type="application/opensearchdescription+xml"

View file

@ -40,7 +40,7 @@ from datetime import datetime, timedelta
from time import time from time import time
from html import escape from html import escape
from io import StringIO from io import StringIO
from urllib.parse import urlencode, urljoin, urlparse from urllib.parse import urlencode, urlparse
from pygments import highlight from pygments import highlight
from pygments.lexers import get_lexer_by_name from pygments.lexers import get_lexer_by_name
@ -269,14 +269,7 @@ def extract_domain(url):
def get_base_url(): def get_base_url():
if settings['server']['base_url']: return url_for('index', _external=True)
hostname = settings['server']['base_url']
else:
scheme = 'http'
if request.is_secure:
scheme = 'https'
hostname = url_for('index', _external=True, _scheme=scheme)
return hostname
def get_current_theme_name(override=None): def get_current_theme_name(override=None):
@ -309,10 +302,6 @@ def url_for_theme(endpoint, override_theme=None, **values):
if filename_with_theme in static_files: if filename_with_theme in static_files:
values['filename'] = filename_with_theme values['filename'] = filename_with_theme
url = url_for(endpoint, **values) url = url_for(endpoint, **values)
if settings['server']['base_url']:
if url.startswith('/'):
url = url[1:]
url = urljoin(settings['server']['base_url'], url)
return url return url
@ -812,7 +801,7 @@ def preferences():
# save preferences # save preferences
if request.method == 'POST': if request.method == 'POST':
resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index')))) resp = make_response(url_for('index', _external=True))
try: try:
request.preferences.parse_form(request.form) request.preferences.parse_form(request.form)
except ValidationException: except ValidationException:
@ -1002,11 +991,11 @@ def opensearch():
if request.headers.get('User-Agent', '').lower().find('webkit') >= 0: if request.headers.get('User-Agent', '').lower().find('webkit') >= 0:
method = 'get' method = 'get'
ret = render('opensearch.xml', ret = render(
opensearch_method=method, 'opensearch.xml',
host=get_base_url(), opensearch_method=method,
urljoin=urljoin, override_theme='__common__'
override_theme='__common__') )
resp = Response(response=ret, resp = Response(response=ret,
status=200, status=200,
@ -1027,7 +1016,7 @@ def favicon():
@app.route('/clear_cookies') @app.route('/clear_cookies')
def clear_cookies(): def clear_cookies():
resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index')))) resp = make_response(redirect(url_for('index', _external=True)))
for cookie_name in request.cookies: for cookie_name in request.cookies:
resp.delete_cookie(cookie_name) resp.delete_cookie(cookie_name)
return resp return resp
@ -1128,19 +1117,38 @@ class ReverseProxyPathFix:
''' '''
def __init__(self, app): def __init__(self, app):
self.app = app self.app = app
self.script_name = None
self.scheme = None
self.server = None
if settings['server']['base_url']:
# If base_url is specified, then these values from are given
# preference over any Flask's generics.
base_url = urlparse(settings['server']['base_url'])
self.script_name = base_url.path
self.scheme = base_url.scheme
self.server = base_url.netloc
def __call__(self, environ, start_response): def __call__(self, environ, start_response):
script_name = environ.get('HTTP_X_SCRIPT_NAME', '')
script_name = self.script_name or environ.get('HTTP_X_SCRIPT_NAME', '')
if script_name: if script_name:
environ['SCRIPT_NAME'] = script_name environ['SCRIPT_NAME'] = script_name
path_info = environ['PATH_INFO'] path_info = environ['PATH_INFO']
if path_info.startswith(script_name): if path_info.startswith(script_name):
environ['PATH_INFO'] = path_info[len(script_name):] environ['PATH_INFO'] = path_info[len(script_name):]
scheme = environ.get('HTTP_X_SCHEME', '') scheme = self.scheme or environ.get('HTTP_X_SCHEME', '')
if scheme: if scheme:
environ['wsgi.url_scheme'] = scheme environ['wsgi.url_scheme'] = scheme
server = self.server or environ.get('HTTP_X_FORWARDED_HOST', '')
if server:
environ['HTTP_HOST'] = server
return self.app(environ, start_response) return self.app(environ, start_response)