mirror of
https://github.com/searxng/searxng.git
synced 2024-11-15 22:51:04 +00:00
[fix] limiter: replace real_ip by IPv4/v6 network
Closes: https://github.com/searxng/searxng/issues/2477 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
38431d2e14
commit
281e36f4b7
12 changed files with 208 additions and 106 deletions
|
@ -24,3 +24,4 @@ X-Forwarded-For
|
||||||
|
|
||||||
from ._helpers import dump_request
|
from ._helpers import dump_request
|
||||||
from ._helpers import get_real_ip
|
from ._helpers import get_real_ip
|
||||||
|
from ._helpers import too_many_requests
|
||||||
|
|
|
@ -1,11 +1,19 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# lint: pylint
|
# lint: pylint
|
||||||
# pylint: disable=missing-module-docstring, invalid-name
|
# pylint: disable=missing-module-docstring, invalid-name
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Optional
|
from ipaddress import (
|
||||||
|
IPv4Network,
|
||||||
|
IPv6Network,
|
||||||
|
IPv6Address,
|
||||||
|
ip_address,
|
||||||
|
ip_network,
|
||||||
|
)
|
||||||
import flask
|
import flask
|
||||||
import werkzeug
|
import werkzeug
|
||||||
|
|
||||||
|
from searx.tools import config
|
||||||
from searx import logger
|
from searx import logger
|
||||||
|
|
||||||
logger = logger.getChild('botdetection')
|
logger = logger.getChild('botdetection')
|
||||||
|
@ -13,7 +21,7 @@ logger = logger.getChild('botdetection')
|
||||||
|
|
||||||
def dump_request(request: flask.Request):
|
def dump_request(request: flask.Request):
|
||||||
return (
|
return (
|
||||||
"%s: %s" % (get_real_ip(request), request.path)
|
request.path
|
||||||
+ " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For')
|
+ " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For')
|
||||||
+ " || X-Real-IP: %s" % request.headers.get('X-Real-IP')
|
+ " || X-Real-IP: %s" % request.headers.get('X-Real-IP')
|
||||||
+ " || form: %s" % request.form
|
+ " || form: %s" % request.form
|
||||||
|
@ -27,12 +35,30 @@ def dump_request(request: flask.Request):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def too_many_requests(request: flask.Request, log_msg: str) -> Optional[werkzeug.Response]:
|
def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkzeug.Response | None:
|
||||||
log_prefix = 'BLOCK %s: ' % get_real_ip(request)
|
"""Returns a HTTP 429 response object and writes a ERROR message to the
|
||||||
logger.debug(log_prefix + log_msg)
|
'botdetection' logger. This function is used in part by the filter methods
|
||||||
|
to return the default ``Too Many Requests`` response.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
logger.debug("BLOCK %s: %s", network.compressed, log_msg)
|
||||||
return flask.make_response(('Too Many Requests', 429))
|
return flask.make_response(('Too Many Requests', 429))
|
||||||
|
|
||||||
|
|
||||||
|
def get_network(real_ip: str, cfg: config.Config) -> IPv4Network | IPv6Network:
|
||||||
|
"""Returns the (client) network of whether the real_ip is part of."""
|
||||||
|
|
||||||
|
ip = ip_address(real_ip)
|
||||||
|
if isinstance(ip, IPv6Address):
|
||||||
|
prefix = cfg['real_ip.ipv6_prefix']
|
||||||
|
else:
|
||||||
|
prefix = cfg['real_ip.ipv4_prefix']
|
||||||
|
network = ip_network(f"{real_ip}/{prefix}", strict=False)
|
||||||
|
# logger.debug("get_network(): %s", network.compressed)
|
||||||
|
return network
|
||||||
|
|
||||||
|
|
||||||
def get_real_ip(request: flask.Request) -> str:
|
def get_real_ip(request: flask.Request) -> str:
|
||||||
"""Returns real IP of the request. Since not all proxies set all the HTTP
|
"""Returns real IP of the request. Since not all proxies set all the HTTP
|
||||||
headers and incoming headers can be faked it may happen that the IP cannot
|
headers and incoming headers can be faked it may happen that the IP cannot
|
||||||
|
@ -63,7 +89,9 @@ def get_real_ip(request: flask.Request) -> str:
|
||||||
forwarded_for = request.headers.get("X-Forwarded-For")
|
forwarded_for = request.headers.get("X-Forwarded-For")
|
||||||
real_ip = request.headers.get('X-Real-IP')
|
real_ip = request.headers.get('X-Real-IP')
|
||||||
remote_addr = request.remote_addr
|
remote_addr = request.remote_addr
|
||||||
logger.debug("X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr)
|
# logger.debug(
|
||||||
|
# "X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr
|
||||||
|
# )
|
||||||
|
|
||||||
if not forwarded_for:
|
if not forwarded_for:
|
||||||
logger.error("X-Forwarded-For header is not set!")
|
logger.error("X-Forwarded-For header is not set!")
|
||||||
|
@ -89,5 +117,5 @@ def get_real_ip(request: flask.Request) -> str:
|
||||||
logger.warning("IP from WSGI environment (%s) is not equal to IP from X-Real-IP (%s)", remote_addr, real_ip)
|
logger.warning("IP from WSGI environment (%s) is not equal to IP from X-Real-IP (%s)", remote_addr, real_ip)
|
||||||
|
|
||||||
request_ip = forwarded_for or real_ip or remote_addr or '0.0.0.0'
|
request_ip = forwarded_for or real_ip or remote_addr or '0.0.0.0'
|
||||||
logger.debug("get_real_ip() -> %s", request_ip)
|
# logger.debug("get_real_ip() -> %s", request_ip)
|
||||||
return request_ip
|
return request_ip
|
||||||
|
|
|
@ -15,7 +15,12 @@ Accept_ header ..
|
||||||
"""
|
"""
|
||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
|
|
||||||
from typing import Optional
|
from __future__ import annotations
|
||||||
|
from ipaddress import (
|
||||||
|
IPv4Network,
|
||||||
|
IPv6Network,
|
||||||
|
)
|
||||||
|
|
||||||
import flask
|
import flask
|
||||||
import werkzeug
|
import werkzeug
|
||||||
|
|
||||||
|
@ -23,7 +28,12 @@ from searx.tools import config
|
||||||
from ._helpers import too_many_requests
|
from ._helpers import too_many_requests
|
||||||
|
|
||||||
|
|
||||||
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
|
def filter_request(
|
||||||
|
network: IPv4Network | IPv6Network,
|
||||||
|
request: flask.Request,
|
||||||
|
cfg: config.Config,
|
||||||
|
) -> werkzeug.Response | None:
|
||||||
|
|
||||||
if 'text/html' not in request.accept_mimetypes:
|
if 'text/html' not in request.accept_mimetypes:
|
||||||
return too_many_requests(request, "HTTP header Accept did not contain text/html")
|
return too_many_requests(network, "HTTP header Accept did not contain text/html")
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -16,7 +16,12 @@ bot if the Accept-Encoding_ header ..
|
||||||
"""
|
"""
|
||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
|
|
||||||
from typing import Optional
|
from __future__ import annotations
|
||||||
|
from ipaddress import (
|
||||||
|
IPv4Network,
|
||||||
|
IPv6Network,
|
||||||
|
)
|
||||||
|
|
||||||
import flask
|
import flask
|
||||||
import werkzeug
|
import werkzeug
|
||||||
|
|
||||||
|
@ -24,8 +29,13 @@ from searx.tools import config
|
||||||
from ._helpers import too_many_requests
|
from ._helpers import too_many_requests
|
||||||
|
|
||||||
|
|
||||||
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
|
def filter_request(
|
||||||
|
network: IPv4Network | IPv6Network,
|
||||||
|
request: flask.Request,
|
||||||
|
cfg: config.Config,
|
||||||
|
) -> werkzeug.Response | None:
|
||||||
|
|
||||||
accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
|
accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
|
||||||
if not ('gzip' in accept_list or 'deflate' in accept_list):
|
if not ('gzip' in accept_list or 'deflate' in accept_list):
|
||||||
return too_many_requests(request, "HTTP header Accept-Encoding did not contain gzip nor deflate")
|
return too_many_requests(network, "HTTP header Accept-Encoding did not contain gzip nor deflate")
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -12,8 +12,12 @@ if the Accept-Language_ header is unset.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
|
from __future__ import annotations
|
||||||
|
from ipaddress import (
|
||||||
|
IPv4Network,
|
||||||
|
IPv6Network,
|
||||||
|
)
|
||||||
|
|
||||||
from typing import Optional
|
|
||||||
import flask
|
import flask
|
||||||
import werkzeug
|
import werkzeug
|
||||||
|
|
||||||
|
@ -21,7 +25,11 @@ from searx.tools import config
|
||||||
from ._helpers import too_many_requests
|
from ._helpers import too_many_requests
|
||||||
|
|
||||||
|
|
||||||
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
|
def filter_request(
|
||||||
|
network: IPv4Network | IPv6Network,
|
||||||
|
request: flask.Request,
|
||||||
|
cfg: config.Config,
|
||||||
|
) -> werkzeug.Response | None:
|
||||||
if request.headers.get('Accept-Language', '').strip() == '':
|
if request.headers.get('Accept-Language', '').strip() == '':
|
||||||
return too_many_requests(request, "missing HTTP header Accept-Language")
|
return too_many_requests(network, "missing HTTP header Accept-Language")
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -13,7 +13,12 @@ the Connection_ header is set to ``close``.
|
||||||
"""
|
"""
|
||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
|
|
||||||
from typing import Optional
|
from __future__ import annotations
|
||||||
|
from ipaddress import (
|
||||||
|
IPv4Network,
|
||||||
|
IPv6Network,
|
||||||
|
)
|
||||||
|
|
||||||
import flask
|
import flask
|
||||||
import werkzeug
|
import werkzeug
|
||||||
|
|
||||||
|
@ -21,7 +26,12 @@ from searx.tools import config
|
||||||
from ._helpers import too_many_requests
|
from ._helpers import too_many_requests
|
||||||
|
|
||||||
|
|
||||||
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
|
def filter_request(
|
||||||
|
network: IPv4Network | IPv6Network,
|
||||||
|
request: flask.Request,
|
||||||
|
cfg: config.Config,
|
||||||
|
) -> werkzeug.Response | None:
|
||||||
|
|
||||||
if request.headers.get('Connection', '').strip() == 'close':
|
if request.headers.get('Connection', '').strip() == 'close':
|
||||||
return too_many_requests(request, "HTTP header 'Connection=close")
|
return too_many_requests(network, "HTTP header 'Connection=close")
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -14,8 +14,13 @@ the User-Agent_ header is unset or matches the regular expression
|
||||||
"""
|
"""
|
||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
|
|
||||||
from typing import Optional
|
from __future__ import annotations
|
||||||
import re
|
import re
|
||||||
|
from ipaddress import (
|
||||||
|
IPv4Network,
|
||||||
|
IPv6Network,
|
||||||
|
)
|
||||||
|
|
||||||
import flask
|
import flask
|
||||||
import werkzeug
|
import werkzeug
|
||||||
|
|
||||||
|
@ -50,8 +55,13 @@ def regexp_user_agent():
|
||||||
return _regexp
|
return _regexp
|
||||||
|
|
||||||
|
|
||||||
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
|
def filter_request(
|
||||||
|
network: IPv4Network | IPv6Network,
|
||||||
|
request: flask.Request,
|
||||||
|
cfg: config.Config,
|
||||||
|
) -> werkzeug.Response | None:
|
||||||
|
|
||||||
user_agent = request.headers.get('User-Agent', 'unknown')
|
user_agent = request.headers.get('User-Agent', 'unknown')
|
||||||
if regexp_user_agent().match(user_agent):
|
if regexp_user_agent().match(user_agent):
|
||||||
return too_many_requests(request, f"bot detected, HTTP header User-Agent: {user_agent}")
|
return too_many_requests(network, f"bot detected, HTTP header User-Agent: {user_agent}")
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -38,8 +38,12 @@ droped.
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
|
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
from ipaddress import (
|
||||||
|
IPv4Network,
|
||||||
|
IPv6Network,
|
||||||
|
)
|
||||||
|
|
||||||
from typing import Optional
|
|
||||||
import flask
|
import flask
|
||||||
import werkzeug
|
import werkzeug
|
||||||
from searx.tools import config
|
from searx.tools import config
|
||||||
|
@ -49,7 +53,7 @@ from searx import logger
|
||||||
from searx.redislib import incr_sliding_window, drop_counter
|
from searx.redislib import incr_sliding_window, drop_counter
|
||||||
|
|
||||||
from . import link_token
|
from . import link_token
|
||||||
from ._helpers import too_many_requests, get_real_ip
|
from ._helpers import too_many_requests
|
||||||
|
|
||||||
|
|
||||||
logger = logger.getChild('botdetection.ip_limit')
|
logger = logger.getChild('botdetection.ip_limit')
|
||||||
|
@ -85,49 +89,58 @@ SUSPICIOUS_IP_MAX = 3
|
||||||
"""Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`."""
|
"""Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`."""
|
||||||
|
|
||||||
|
|
||||||
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
|
def filter_request(
|
||||||
|
network: IPv4Network | IPv6Network,
|
||||||
|
request: flask.Request,
|
||||||
|
cfg: config.Config,
|
||||||
|
) -> werkzeug.Response | None:
|
||||||
|
|
||||||
# pylint: disable=too-many-return-statements
|
# pylint: disable=too-many-return-statements
|
||||||
redis_client = redisdb.client()
|
redis_client = redisdb.client()
|
||||||
|
|
||||||
client_ip = get_real_ip(request)
|
if network.is_link_local and not cfg['botdetection.ip_limit.filter_link_local']:
|
||||||
|
logger.debug("network %s is link-local -> not monitored by ip_limit method", network.compressed)
|
||||||
|
return None
|
||||||
|
|
||||||
if request.args.get('format', 'html') != 'html':
|
if request.args.get('format', 'html') != 'html':
|
||||||
c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + client_ip, API_WONDOW)
|
c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + network.compressed, API_WONDOW)
|
||||||
if c > API_MAX:
|
if c > API_MAX:
|
||||||
return too_many_requests(request, "too many request in API_WINDOW")
|
return too_many_requests(network, "too many request in API_WINDOW")
|
||||||
|
|
||||||
if cfg['botdetection.ip_limit.link_token']:
|
if cfg['botdetection.ip_limit.link_token']:
|
||||||
|
|
||||||
suspicious = link_token.is_suspicious(request, True)
|
suspicious = link_token.is_suspicious(network, request, True)
|
||||||
|
|
||||||
if not suspicious:
|
if not suspicious:
|
||||||
# this IP is no longer suspicious: release ip again / delete the counter of this IP
|
# this IP is no longer suspicious: release ip again / delete the counter of this IP
|
||||||
drop_counter(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + client_ip)
|
drop_counter(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# this IP is suspicious: count requests from this IP
|
# this IP is suspicious: count requests from this IP
|
||||||
c = incr_sliding_window(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + client_ip, SUSPICIOUS_IP_WINDOW)
|
c = incr_sliding_window(
|
||||||
|
redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed, SUSPICIOUS_IP_WINDOW
|
||||||
|
)
|
||||||
if c > SUSPICIOUS_IP_MAX:
|
if c > SUSPICIOUS_IP_MAX:
|
||||||
logger.error("BLOCK: too many request from %s in SUSPICIOUS_IP_WINDOW (redirect to /)", client_ip)
|
logger.error("BLOCK: too many request from %s in SUSPICIOUS_IP_WINDOW (redirect to /)", network)
|
||||||
return flask.redirect(flask.url_for('index'), code=302)
|
return flask.redirect(flask.url_for('index'), code=302)
|
||||||
|
|
||||||
c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + client_ip, BURST_WINDOW)
|
c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
|
||||||
if c > BURST_MAX_SUSPICIOUS:
|
if c > BURST_MAX_SUSPICIOUS:
|
||||||
return too_many_requests(request, "too many request in BURST_WINDOW (BURST_MAX_SUSPICIOUS)")
|
return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX_SUSPICIOUS)")
|
||||||
|
|
||||||
c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + client_ip, LONG_WINDOW)
|
c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
|
||||||
if c > LONG_MAX_SUSPICIOUS:
|
if c > LONG_MAX_SUSPICIOUS:
|
||||||
return too_many_requests(request, "too many request in LONG_WINDOW (LONG_MAX_SUSPICIOUS)")
|
return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX_SUSPICIOUS)")
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# vanilla limiter without extensions counts BURST_MAX and LONG_MAX
|
# vanilla limiter without extensions counts BURST_MAX and LONG_MAX
|
||||||
c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + client_ip, BURST_WINDOW)
|
c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
|
||||||
if c > BURST_MAX:
|
if c > BURST_MAX:
|
||||||
return too_many_requests(request, "too many request in BURST_WINDOW (BURST_MAX)")
|
return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX)")
|
||||||
|
|
||||||
c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + client_ip, LONG_WINDOW)
|
c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
|
||||||
if c > LONG_MAX:
|
if c > LONG_MAX:
|
||||||
return too_many_requests(request, "too many request in LONG_WINDOW (LONG_MAX)")
|
return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX)")
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -37,14 +37,16 @@ and set the redis-url connection. Check the value, it depends on your redis DB
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Optional, Tuple
|
from __future__ import annotations
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import flask
|
import flask
|
||||||
import pytomlpp as toml
|
import werkzeug
|
||||||
|
|
||||||
from searx import logger
|
|
||||||
from searx.tools import config
|
from searx.tools import config
|
||||||
from searx.botdetection import (
|
from searx import logger
|
||||||
|
|
||||||
|
from . import (
|
||||||
http_accept,
|
http_accept,
|
||||||
http_accept_encoding,
|
http_accept_encoding,
|
||||||
http_accept_language,
|
http_accept_language,
|
||||||
|
@ -53,6 +55,16 @@ from searx.botdetection import (
|
||||||
ip_limit,
|
ip_limit,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from ._helpers import (
|
||||||
|
get_network,
|
||||||
|
get_real_ip,
|
||||||
|
dump_request,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logger.getChild('botdetection.limiter')
|
||||||
|
|
||||||
|
CFG: config.Config = None # type: ignore
|
||||||
|
|
||||||
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
|
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
|
||||||
"""Base configuration (schema) of the botdetection."""
|
"""Base configuration (schema) of the botdetection."""
|
||||||
|
|
||||||
|
@ -63,40 +75,21 @@ CFG_DEPRECATED = {
|
||||||
# "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
|
# "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
|
||||||
}
|
}
|
||||||
|
|
||||||
CFG = None
|
|
||||||
|
|
||||||
|
|
||||||
def get_cfg() -> config.Config:
|
def get_cfg() -> config.Config:
|
||||||
|
global CFG # pylint: disable=global-statement
|
||||||
if CFG is None:
|
if CFG is None:
|
||||||
init_cfg(logger)
|
CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, LIMITER_CFG, CFG_DEPRECATED)
|
||||||
return CFG
|
return CFG
|
||||||
|
|
||||||
|
|
||||||
def init_cfg(log):
|
def filter_request(request: flask.Request) -> werkzeug.Response | None:
|
||||||
global CFG # pylint: disable=global-statement
|
|
||||||
CFG = config.Config(cfg_schema=toml.load(LIMITER_CFG_SCHEMA), deprecated=CFG_DEPRECATED)
|
|
||||||
|
|
||||||
if not LIMITER_CFG.exists():
|
cfg = get_cfg()
|
||||||
log.warning("missing config file: %s", LIMITER_CFG)
|
real_ip = get_real_ip(request)
|
||||||
return
|
network = get_network(real_ip, cfg)
|
||||||
|
if network.is_link_local:
|
||||||
log.info("load config file: %s", LIMITER_CFG)
|
return None
|
||||||
try:
|
|
||||||
upd_cfg = toml.load(LIMITER_CFG)
|
|
||||||
except toml.DecodeError as exc:
|
|
||||||
msg = str(exc).replace('\t', '').replace('\n', ' ')
|
|
||||||
log.error("%s: %s", LIMITER_CFG, msg)
|
|
||||||
raise
|
|
||||||
|
|
||||||
is_valid, issue_list = CFG.validate(upd_cfg)
|
|
||||||
for msg in issue_list:
|
|
||||||
log.error(str(msg))
|
|
||||||
if not is_valid:
|
|
||||||
raise TypeError(f"schema of {LIMITER_CFG} is invalid, can't cutomize limiter configuration from!")
|
|
||||||
CFG.update(upd_cfg)
|
|
||||||
|
|
||||||
|
|
||||||
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
|
||||||
|
|
||||||
if request.path == '/healthz':
|
if request.path == '/healthz':
|
||||||
return None
|
return None
|
||||||
|
@ -104,7 +97,7 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
||||||
for func in [
|
for func in [
|
||||||
http_user_agent,
|
http_user_agent,
|
||||||
]:
|
]:
|
||||||
val = func.filter_request(request, CFG)
|
val = func.filter_request(network, request, cfg)
|
||||||
if val is not None:
|
if val is not None:
|
||||||
return val
|
return val
|
||||||
|
|
||||||
|
@ -118,8 +111,8 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
||||||
http_user_agent,
|
http_user_agent,
|
||||||
ip_limit,
|
ip_limit,
|
||||||
]:
|
]:
|
||||||
val = func.filter_request(request, CFG)
|
val = func.filter_request(network, request, cfg)
|
||||||
if val is not None:
|
if val is not None:
|
||||||
return val
|
return val
|
||||||
|
logger.debug(f"OK {network}: %s", dump_request(flask.request))
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -1,8 +1,22 @@
|
||||||
[botdetection.ip_limit]
|
|
||||||
|
|
||||||
link_token = false
|
|
||||||
|
|
||||||
[real_ip]
|
[real_ip]
|
||||||
|
|
||||||
# Number of values to trust for X-Forwarded-For.
|
# Number of values to trust for X-Forwarded-For.
|
||||||
|
|
||||||
x_for = 1
|
x_for = 1
|
||||||
|
|
||||||
|
# The prefix defines the number of leading bits in an address that are compared
|
||||||
|
# to determine whether or not an address is part of a (client) network.
|
||||||
|
|
||||||
|
ipv4_prefix = 32
|
||||||
|
ipv6_prefix = 48
|
||||||
|
|
||||||
|
[botdetection.ip_limit]
|
||||||
|
|
||||||
|
# To get unlimited access in a local network, by default link-lokal addresses
|
||||||
|
# (networks) are not monitored by the ip_limit
|
||||||
|
filter_link_local = false
|
||||||
|
|
||||||
|
# acrivate link_token method in the ip_limit method
|
||||||
|
link_token = false
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ Method ``link_token``
|
||||||
|
|
||||||
The ``link_token`` method evaluates a request as :py:obj:`suspicious
|
The ``link_token`` method evaluates a request as :py:obj:`suspicious
|
||||||
<is_suspicious>` if the URL ``/client<token>.css`` is not requested by the
|
<is_suspicious>` if the URL ``/client<token>.css`` is not requested by the
|
||||||
client. By adding a random component (the token) in the URL a bot can not send
|
client. By adding a random component (the token) in the URL, a bot can not send
|
||||||
a ping by request a static URL.
|
a ping by request a static URL.
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
@ -35,6 +35,11 @@ And in the HTML template from flask a stylesheet link is needed (the value of
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
|
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
from ipaddress import (
|
||||||
|
IPv4Network,
|
||||||
|
IPv6Network,
|
||||||
|
)
|
||||||
|
|
||||||
import string
|
import string
|
||||||
import random
|
import random
|
||||||
|
@ -43,7 +48,11 @@ import flask
|
||||||
from searx import logger
|
from searx import logger
|
||||||
from searx import redisdb
|
from searx import redisdb
|
||||||
from searx.redislib import secret_hash
|
from searx.redislib import secret_hash
|
||||||
from ._helpers import get_real_ip
|
|
||||||
|
from ._helpers import (
|
||||||
|
get_network,
|
||||||
|
get_real_ip,
|
||||||
|
)
|
||||||
|
|
||||||
TOKEN_LIVE_TIME = 600
|
TOKEN_LIVE_TIME = 600
|
||||||
"""Livetime (sec) of limiter's CSS token."""
|
"""Livetime (sec) of limiter's CSS token."""
|
||||||
|
@ -60,29 +69,26 @@ TOKEN_KEY = 'SearXNG_limiter.token'
|
||||||
logger = logger.getChild('botdetection.link_token')
|
logger = logger.getChild('botdetection.link_token')
|
||||||
|
|
||||||
|
|
||||||
def is_suspicious(request: flask.Request, renew: bool = False):
|
def is_suspicious(network: IPv4Network | IPv6Network, request: flask.Request, renew: bool = False):
|
||||||
"""Checks if there is a valid ping for this request, if not this request is
|
"""Checks whether a valid ping is exists for this (client) network, if not
|
||||||
rated as *suspicious*. If a valid ping exists and argument ``renew`` is
|
this request is rated as *suspicious*. If a valid ping exists and argument
|
||||||
``True`` the expire time of this ping is reset to :py:obj:`PING_LIVE_TIME`.
|
``renew`` is ``True`` the expire time of this ping is reset to
|
||||||
|
:py:obj:`PING_LIVE_TIME`.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
redis_client = redisdb.client()
|
redis_client = redisdb.client()
|
||||||
if not redis_client:
|
if not redis_client:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
ping_key = get_ping_key(request)
|
ping_key = get_ping_key(network, request)
|
||||||
if not redis_client.get(ping_key):
|
if not redis_client.get(ping_key):
|
||||||
logger.warning(
|
logger.warning("missing ping (IP: %s) / request: %s", network.compressed, ping_key)
|
||||||
"missing ping (IP: %s) / request: %s",
|
|
||||||
get_real_ip(request),
|
|
||||||
ping_key,
|
|
||||||
)
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
if renew:
|
if renew:
|
||||||
redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
|
redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
|
||||||
|
|
||||||
logger.debug("found ping for client request: %s", ping_key)
|
logger.debug("found ping for (client) network %s -> %s", network.compressed, ping_key)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
@ -92,27 +98,31 @@ def ping(request: flask.Request, token: str):
|
||||||
The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`.
|
The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
from . import limiter # pylint: disable=import-outside-toplevel, cyclic-import
|
||||||
|
|
||||||
redis_client = redisdb.client()
|
redis_client = redisdb.client()
|
||||||
if not redis_client:
|
if not redis_client:
|
||||||
return
|
return
|
||||||
if not token_is_valid(token):
|
if not token_is_valid(token):
|
||||||
return
|
return
|
||||||
ping_key = get_ping_key(request)
|
|
||||||
logger.debug("store ping for: %s", ping_key)
|
cfg = limiter.get_cfg()
|
||||||
|
real_ip = get_real_ip(request)
|
||||||
|
network = get_network(real_ip, cfg)
|
||||||
|
|
||||||
|
ping_key = get_ping_key(network, request)
|
||||||
|
logger.debug("store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip, ping_key)
|
||||||
redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
|
redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
|
||||||
|
|
||||||
|
|
||||||
def get_ping_key(request: flask.Request):
|
def get_ping_key(network: IPv4Network | IPv6Network, request: flask.Request) -> str:
|
||||||
"""Generates a hashed key that fits (more or less) to a client (request).
|
"""Generates a hashed key that fits (more or less) to a *WEB-browser
|
||||||
At least X-Forwarded-For_ is needed to be able to assign the request to an
|
session* in a network."""
|
||||||
IP.
|
|
||||||
|
|
||||||
"""
|
|
||||||
return (
|
return (
|
||||||
PING_KEY
|
PING_KEY
|
||||||
+ "["
|
+ "["
|
||||||
+ secret_hash(
|
+ secret_hash(
|
||||||
get_real_ip(request) + request.headers.get('Accept-Language', '') + request.headers.get('User-Agent', '')
|
network.compressed + request.headers.get('Accept-Language', '') + request.headers.get('User-Agent', '')
|
||||||
)
|
)
|
||||||
+ "]"
|
+ "]"
|
||||||
)
|
)
|
||||||
|
|
|
@ -8,7 +8,6 @@ import flask
|
||||||
from searx import redisdb
|
from searx import redisdb
|
||||||
from searx.plugins import logger
|
from searx.plugins import logger
|
||||||
from searx.botdetection import limiter
|
from searx.botdetection import limiter
|
||||||
from searx.botdetection import dump_request
|
|
||||||
|
|
||||||
name = "Request limiter"
|
name = "Request limiter"
|
||||||
description = "Limit the number of request"
|
description = "Limit the number of request"
|
||||||
|
@ -20,10 +19,7 @@ logger = logger.getChild('limiter')
|
||||||
|
|
||||||
def pre_request():
|
def pre_request():
|
||||||
"""See :ref:`flask.Flask.before_request`"""
|
"""See :ref:`flask.Flask.before_request`"""
|
||||||
ret_val = limiter.filter_request(flask.request)
|
return limiter.filter_request(flask.request)
|
||||||
if ret_val is None:
|
|
||||||
logger.debug("OK: %s" % dump_request(flask.request))
|
|
||||||
return ret_val
|
|
||||||
|
|
||||||
|
|
||||||
def init(app: flask.Flask, settings) -> bool:
|
def init(app: flask.Flask, settings) -> bool:
|
||||||
|
@ -32,6 +28,5 @@ def init(app: flask.Flask, settings) -> bool:
|
||||||
if not redisdb.client():
|
if not redisdb.client():
|
||||||
logger.error("The limiter requires Redis")
|
logger.error("The limiter requires Redis")
|
||||||
return False
|
return False
|
||||||
limiter.init_cfg(logger)
|
|
||||||
app.before_request(pre_request)
|
app.before_request(pre_request)
|
||||||
return True
|
return True
|
||||||
|
|
Loading…
Reference in a new issue