mirror of
https://github.com/searxng/searxng.git
synced 2024-11-29 14:11:02 +00:00
[mod] limiter: add config file /etc/searxng/limiter.toml
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
1ec325adcc
commit
66fdec0eb9
12 changed files with 459 additions and 12 deletions
|
@ -16,3 +16,4 @@ redis==4.5.5
|
||||||
markdown-it-py==2.2.0
|
markdown-it-py==2.2.0
|
||||||
typing_extensions==4.6.2
|
typing_extensions==4.6.2
|
||||||
fasttext-predict==0.9.2.1
|
fasttext-predict==0.9.2.1
|
||||||
|
pytomlpp==1.0.13
|
||||||
|
|
|
@ -13,12 +13,15 @@ Accept_ header ..
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept
|
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
# pylint: disable=unused-argument
|
||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
import flask
|
import flask
|
||||||
|
|
||||||
|
from searx.tools import config
|
||||||
|
|
||||||
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
|
||||||
|
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
|
||||||
if 'text/html' not in request.accept_mimetypes:
|
if 'text/html' not in request.accept_mimetypes:
|
||||||
return 429, "bot detected, HTTP header Accept did not contain text/html"
|
return 429, "bot detected, HTTP header Accept did not contain text/html"
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -14,12 +14,15 @@ bot if the Accept-Encoding_ header ..
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
|
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
# pylint: disable=unused-argument
|
||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
import flask
|
import flask
|
||||||
|
|
||||||
|
from searx.tools import config
|
||||||
|
|
||||||
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
|
||||||
|
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
|
||||||
accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
|
accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
|
||||||
if not ('gzip' in accept_list or 'deflate' in accept_list):
|
if not ('gzip' in accept_list or 'deflate' in accept_list):
|
||||||
return 429, "bot detected, HTTP header Accept-Encoding did not contain gzip nor deflate"
|
return 429, "bot detected, HTTP header Accept-Encoding did not contain gzip nor deflate"
|
||||||
|
|
|
@ -11,13 +11,15 @@ if the Accept-Language_ header is unset.
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
|
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
# pylint: disable=unused-argument
|
||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
import flask
|
import flask
|
||||||
|
|
||||||
|
from searx.tools import config
|
||||||
|
|
||||||
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
|
||||||
|
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
|
||||||
if request.headers.get('Accept-Language', '').strip() == '':
|
if request.headers.get('Accept-Language', '').strip() == '':
|
||||||
return 429, "bot detected, missing HTTP header Accept-Language"
|
return 429, "bot detected, missing HTTP header Accept-Language"
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -11,13 +11,15 @@ the Connection_ header is set to ``close``.
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Connection
|
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Connection
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
# pylint: disable=unused-argument
|
||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
import flask
|
import flask
|
||||||
|
|
||||||
|
from searx.tools import config
|
||||||
|
|
||||||
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
|
||||||
|
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
|
||||||
if request.headers.get('Connection', '').strip() == 'close':
|
if request.headers.get('Connection', '').strip() == 'close':
|
||||||
return 429, "bot detected, HTTP header 'Connection=close'"
|
return 429, "bot detected, HTTP header 'Connection=close'"
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -12,11 +12,15 @@ the User-Agent_ header is unset or matches the regular expression
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
|
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
# pylint: disable=unused-argument
|
||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
import re
|
import re
|
||||||
import flask
|
import flask
|
||||||
|
|
||||||
|
from searx.tools import config
|
||||||
|
|
||||||
|
|
||||||
USER_AGENT = (
|
USER_AGENT = (
|
||||||
r'('
|
r'('
|
||||||
+ r'unknown'
|
+ r'unknown'
|
||||||
|
@ -44,7 +48,7 @@ def regexp_user_agent():
|
||||||
return _regexp
|
return _regexp
|
||||||
|
|
||||||
|
|
||||||
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
|
||||||
user_agent = request.headers.get('User-Agent', 'unknown')
|
user_agent = request.headers.get('User-Agent', 'unknown')
|
||||||
if regexp_user_agent().match(user_agent):
|
if regexp_user_agent().match(user_agent):
|
||||||
return (
|
return (
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
"""
|
""".. _botdetection.ip_limit:
|
||||||
|
|
||||||
Method ``ip_limit``
|
Method ``ip_limit``
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
|
@ -22,6 +23,8 @@ The :py:obj:`link_token` method is used to investigate whether a request is
|
||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
import flask
|
import flask
|
||||||
|
from searx.tools import config
|
||||||
|
|
||||||
|
|
||||||
from searx import redisdb
|
from searx import redisdb
|
||||||
from searx import logger
|
from searx import logger
|
||||||
|
@ -56,7 +59,7 @@ API_MAX = 4
|
||||||
"""Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
|
"""Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
|
||||||
|
|
||||||
|
|
||||||
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
|
||||||
redis_client = redisdb.client()
|
redis_client = redisdb.client()
|
||||||
|
|
||||||
x_forwarded_for = request.headers.get('X-Forwarded-For', '')
|
x_forwarded_for = request.headers.get('X-Forwarded-For', '')
|
||||||
|
@ -68,7 +71,9 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
||||||
if c > API_MAX:
|
if c > API_MAX:
|
||||||
return 429, "BLOCK %s: API limit exceeded"
|
return 429, "BLOCK %s: API limit exceeded"
|
||||||
|
|
||||||
suspicious = link_token.is_suspicious(request)
|
suspicious = False
|
||||||
|
if cfg['botdetection.ip_limit.link_token']:
|
||||||
|
suspicious = link_token.is_suspicious(request)
|
||||||
|
|
||||||
if suspicious:
|
if suspicious:
|
||||||
c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW)
|
c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW)
|
||||||
|
|
|
@ -38,8 +38,11 @@ and set the redis-url connection. Check the value, it depends on your redis DB
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
from pathlib import Path
|
||||||
import flask
|
import flask
|
||||||
|
import pytomlpp as toml
|
||||||
|
|
||||||
|
from searx.tools import config
|
||||||
from searx.botdetection import (
|
from searx.botdetection import (
|
||||||
http_accept,
|
http_accept,
|
||||||
http_accept_encoding,
|
http_accept_encoding,
|
||||||
|
@ -49,6 +52,42 @@ from searx.botdetection import (
|
||||||
ip_limit,
|
ip_limit,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
|
||||||
|
"""Base configuration (schema) of the botdetection."""
|
||||||
|
|
||||||
|
LIMITER_CFG = Path('/etc/searxng/limiter.toml')
|
||||||
|
"""Lokal Limiter configuration."""
|
||||||
|
|
||||||
|
CFG_DEPRECATED = {
|
||||||
|
# "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
|
||||||
|
}
|
||||||
|
|
||||||
|
CFG = config.Config({}, {})
|
||||||
|
|
||||||
|
|
||||||
|
def init_cfg(log):
|
||||||
|
global CFG # pylint: disable=global-statement
|
||||||
|
CFG = config.Config(cfg_schema=toml.load(LIMITER_CFG_SCHEMA), deprecated=CFG_DEPRECATED)
|
||||||
|
|
||||||
|
if not LIMITER_CFG.exists():
|
||||||
|
log.warning("missing config file: %s", LIMITER_CFG)
|
||||||
|
return
|
||||||
|
|
||||||
|
log.warning("load config file: %s", LIMITER_CFG)
|
||||||
|
try:
|
||||||
|
upd_cfg = toml.load(LIMITER_CFG)
|
||||||
|
except toml.DecodeError as exc:
|
||||||
|
msg = str(exc).replace('\t', '').replace('\n', ' ')
|
||||||
|
log.error("%s: %s", LIMITER_CFG, msg)
|
||||||
|
raise
|
||||||
|
|
||||||
|
is_valid, issue_list = CFG.validate(upd_cfg)
|
||||||
|
for msg in issue_list:
|
||||||
|
log.error(str(msg))
|
||||||
|
if not is_valid:
|
||||||
|
raise TypeError(f"schema of {LIMITER_CFG} is invalid, can't cutomize limiter configuration from!")
|
||||||
|
CFG.update(upd_cfg)
|
||||||
|
|
||||||
|
|
||||||
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
||||||
|
|
||||||
|
@ -58,7 +97,7 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
||||||
for func in [
|
for func in [
|
||||||
http_user_agent,
|
http_user_agent,
|
||||||
]:
|
]:
|
||||||
val = func.filter_request(request)
|
val = func.filter_request(request, CFG)
|
||||||
if val is not None:
|
if val is not None:
|
||||||
return val
|
return val
|
||||||
|
|
||||||
|
@ -72,7 +111,7 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
|
||||||
http_user_agent,
|
http_user_agent,
|
||||||
ip_limit,
|
ip_limit,
|
||||||
]:
|
]:
|
||||||
val = func.filter_request(request)
|
val = func.filter_request(request, CFG)
|
||||||
if val is not None:
|
if val is not None:
|
||||||
return val
|
return val
|
||||||
|
|
||||||
|
|
3
searx/botdetection/limiter.toml
Normal file
3
searx/botdetection/limiter.toml
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
[botdetection.ip_limit]
|
||||||
|
|
||||||
|
link_token = true
|
|
@ -38,5 +38,6 @@ def init(app: flask.Flask, settings) -> bool:
|
||||||
if not redisdb.client():
|
if not redisdb.client():
|
||||||
logger.error("The limiter requires Redis")
|
logger.error("The limiter requires Redis")
|
||||||
return False
|
return False
|
||||||
|
limiter.init_cfg(logger)
|
||||||
app.before_request(pre_request)
|
app.before_request(pre_request)
|
||||||
return True
|
return True
|
||||||
|
|
8
searx/tools/__init__.py
Normal file
8
searx/tools/__init__.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
# lint: pylint
|
||||||
|
""".. _tools src:
|
||||||
|
|
||||||
|
A collection of *utilities* used by SearXNG, but without SearXNG specific
|
||||||
|
peculiarities.
|
||||||
|
|
||||||
|
"""
|
376
searx/tools/config.py
Normal file
376
searx/tools/config.py
Normal file
|
@ -0,0 +1,376 @@
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
# lint: pylint
|
||||||
|
"""Configuration class :py:class:`Config` with deep-update, schema validation
|
||||||
|
and deprecated names.
|
||||||
|
|
||||||
|
The :py:class:`Config` class implements a configuration that is based on
|
||||||
|
structured dictionaries. The configuration schema is defined in a dictionary
|
||||||
|
structure and the configuration data is given in a dictionary structure.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import copy
|
||||||
|
import typing
|
||||||
|
import logging
|
||||||
|
import pathlib
|
||||||
|
import pytomlpp as toml
|
||||||
|
|
||||||
|
__all__ = ['Config', 'UNSET', 'SchemaIssue']
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class FALSE:
|
||||||
|
"""Class of ``False`` singelton"""
|
||||||
|
|
||||||
|
# pylint: disable=multiple-statements
|
||||||
|
def __init__(self, msg):
|
||||||
|
self.msg = msg
|
||||||
|
|
||||||
|
def __bool__(self):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.msg
|
||||||
|
|
||||||
|
__repr__ = __str__
|
||||||
|
|
||||||
|
|
||||||
|
UNSET = FALSE('<UNSET>')
|
||||||
|
|
||||||
|
|
||||||
|
class SchemaIssue(ValueError):
|
||||||
|
"""Exception to store and/or raise a message from a schema issue."""
|
||||||
|
|
||||||
|
def __init__(self, level: typing.Literal['warn', 'invalid'], msg: str):
|
||||||
|
self.level = level
|
||||||
|
super().__init__(msg)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"[cfg schema {self.level}] {self.args[0]}"
|
||||||
|
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
"""Base class used for configuration"""
|
||||||
|
|
||||||
|
UNSET = UNSET
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict) -> Config:
|
||||||
|
|
||||||
|
# init schema
|
||||||
|
|
||||||
|
log.debug("load schema file: %s", schema_file)
|
||||||
|
cfg = cls(cfg_schema=toml.load(schema_file), deprecated=deprecated)
|
||||||
|
if not cfg_file.exists():
|
||||||
|
log.warning("missing config file: %s", cfg_file)
|
||||||
|
return cfg
|
||||||
|
|
||||||
|
# load configuration
|
||||||
|
|
||||||
|
log.debug("load config file: %s", cfg_file)
|
||||||
|
try:
|
||||||
|
upd_cfg = toml.load(cfg_file)
|
||||||
|
except toml.DecodeError as exc:
|
||||||
|
msg = str(exc).replace('\t', '').replace('\n', ' ')
|
||||||
|
log.error("%s: %s", cfg_file, msg)
|
||||||
|
raise
|
||||||
|
|
||||||
|
is_valid, issue_list = cfg.validate(upd_cfg)
|
||||||
|
for msg in issue_list:
|
||||||
|
log.error(str(msg))
|
||||||
|
if not is_valid:
|
||||||
|
raise TypeError(f"schema of {cfg_file} is invalid!")
|
||||||
|
cfg.update(upd_cfg)
|
||||||
|
return cfg
|
||||||
|
|
||||||
|
def __init__(self, cfg_schema: typing.Dict, deprecated: typing.Dict[str, str]):
|
||||||
|
"""Construtor of class Config.
|
||||||
|
|
||||||
|
:param cfg_schema: Schema of the configuration
|
||||||
|
:param deprecated: dictionary that maps deprecated configuration names to a messages
|
||||||
|
|
||||||
|
These values are needed for validation, see :py:obj:`validate`.
|
||||||
|
|
||||||
|
"""
|
||||||
|
self.cfg_schema = cfg_schema
|
||||||
|
self.deprecated = deprecated
|
||||||
|
self.cfg = copy.deepcopy(cfg_schema)
|
||||||
|
|
||||||
|
def __getitem__(self, key: str):
|
||||||
|
return self.get(key)
|
||||||
|
|
||||||
|
def validate(self, cfg: dict):
|
||||||
|
"""Validation of dictionary ``cfg`` on :py:obj:`Config.SCHEMA`.
|
||||||
|
Validation is done by :py:obj:`validate`."""
|
||||||
|
|
||||||
|
return validate(self.cfg_schema, cfg, self.deprecated)
|
||||||
|
|
||||||
|
def update(self, upd_cfg: dict):
|
||||||
|
"""Update this configuration by ``upd_cfg``."""
|
||||||
|
|
||||||
|
dict_deepupdate(self.cfg, upd_cfg)
|
||||||
|
|
||||||
|
def default(self, name: str):
|
||||||
|
"""Returns default value of field ``name`` in ``self.cfg_schema``."""
|
||||||
|
return value(name, self.cfg_schema)
|
||||||
|
|
||||||
|
def get(self, name: str, default=UNSET, replace=True):
|
||||||
|
"""Returns the value to which ``name`` points in the configuration.
|
||||||
|
|
||||||
|
If there is no such ``name`` in the config and the ``default`` is
|
||||||
|
:py:obj:`UNSET`, a :py:obj:`KeyError` is raised.
|
||||||
|
"""
|
||||||
|
|
||||||
|
parent = self._get_parent_dict(name)
|
||||||
|
val = parent.get(name.split('.')[-1], UNSET)
|
||||||
|
if val is UNSET:
|
||||||
|
if default is UNSET:
|
||||||
|
raise KeyError(name)
|
||||||
|
val = default
|
||||||
|
|
||||||
|
if replace and isinstance(val, str):
|
||||||
|
val = val % self
|
||||||
|
return val
|
||||||
|
|
||||||
|
def set(self, name: str, val):
|
||||||
|
"""Set the value to which ``name`` points in the configuration.
|
||||||
|
|
||||||
|
If there is no such ``name`` in the config, a :py:obj:`KeyError` is
|
||||||
|
raised.
|
||||||
|
"""
|
||||||
|
parent = self._get_parent_dict(name)
|
||||||
|
parent[name.split('.')[-1]] = val
|
||||||
|
|
||||||
|
def _get_parent_dict(self, name):
|
||||||
|
parent_name = '.'.join(name.split('.')[:-1])
|
||||||
|
if parent_name:
|
||||||
|
parent = value(parent_name, self.cfg)
|
||||||
|
else:
|
||||||
|
parent = self.cfg
|
||||||
|
if (parent is UNSET) or (not isinstance(parent, dict)):
|
||||||
|
raise KeyError(parent_name)
|
||||||
|
return parent
|
||||||
|
|
||||||
|
def path(self, name: str, default=UNSET):
|
||||||
|
"""Get a :py:class:`pathlib.Path` object from a config string."""
|
||||||
|
|
||||||
|
val = self.get(name, default)
|
||||||
|
if val is UNSET:
|
||||||
|
if default is UNSET:
|
||||||
|
raise KeyError(name)
|
||||||
|
return default
|
||||||
|
return pathlib.Path(str(val))
|
||||||
|
|
||||||
|
def pyobj(self, name, default=UNSET):
|
||||||
|
"""Get python object refered by full qualiffied name (FQN) in the config
|
||||||
|
string."""
|
||||||
|
|
||||||
|
fqn = self.get(name, default)
|
||||||
|
if fqn is UNSET:
|
||||||
|
if default is UNSET:
|
||||||
|
raise KeyError(name)
|
||||||
|
return default
|
||||||
|
(modulename, name) = str(fqn).rsplit('.', 1)
|
||||||
|
m = __import__(modulename, {}, {}, [name], 0)
|
||||||
|
return getattr(m, name)
|
||||||
|
|
||||||
|
|
||||||
|
# working with dictionaries
|
||||||
|
|
||||||
|
|
||||||
|
def value(name: str, data_dict: dict):
|
||||||
|
"""Returns the value to which ``name`` points in the ``dat_dict``.
|
||||||
|
|
||||||
|
.. code: python
|
||||||
|
|
||||||
|
>>> data_dict = {
|
||||||
|
"foo": {"bar": 1 },
|
||||||
|
"bar": {"foo": 2 },
|
||||||
|
"foobar": [1, 2, 3],
|
||||||
|
}
|
||||||
|
>>> value('foobar', data_dict)
|
||||||
|
[1, 2, 3]
|
||||||
|
>>> value('foo.bar', data_dict)
|
||||||
|
1
|
||||||
|
>>> value('foo.bar.xxx', data_dict)
|
||||||
|
<UNSET>
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
ret_val = data_dict
|
||||||
|
for part in name.split('.'):
|
||||||
|
if isinstance(ret_val, dict):
|
||||||
|
ret_val = ret_val.get(part, UNSET)
|
||||||
|
if ret_val is UNSET:
|
||||||
|
break
|
||||||
|
return ret_val
|
||||||
|
|
||||||
|
|
||||||
|
def validate(
|
||||||
|
schema_dict: typing.Dict, data_dict: typing.Dict, deprecated: typing.Dict[str, str]
|
||||||
|
) -> typing.Tuple[bool, list]:
|
||||||
|
|
||||||
|
"""Deep validation of dictionary in ``data_dict`` against dictionary in
|
||||||
|
``schema_dict``. Argument deprecated is a dictionary that maps deprecated
|
||||||
|
configuration names to a messages::
|
||||||
|
|
||||||
|
deprecated = {
|
||||||
|
"foo.bar" : "config 'foo.bar' is deprecated, use 'bar.foo'",
|
||||||
|
"..." : "..."
|
||||||
|
}
|
||||||
|
|
||||||
|
The function returns a python tuple ``(is_valid, issue_list)``:
|
||||||
|
|
||||||
|
``is_valid``:
|
||||||
|
A bool value indicating ``data_dict`` is valid or not.
|
||||||
|
|
||||||
|
``issue_list``:
|
||||||
|
A list of messages (:py:obj:`SchemaIssue`) from the validation::
|
||||||
|
|
||||||
|
[schema warn] data_dict: deprecated 'fontlib.foo': <DEPRECATED['foo.bar']>
|
||||||
|
[schema invalid] data_dict: key unknown 'fontlib.foo'
|
||||||
|
[schema invalid] data_dict: type mismatch 'fontlib.foo': expected ..., is ...
|
||||||
|
|
||||||
|
If ``schema_dict`` or ``data_dict`` is not a dictionary type a
|
||||||
|
:py:obj:`SchemaIssue` is raised.
|
||||||
|
|
||||||
|
"""
|
||||||
|
names = []
|
||||||
|
is_valid = True
|
||||||
|
issue_list = []
|
||||||
|
|
||||||
|
if not isinstance(schema_dict, dict):
|
||||||
|
raise SchemaIssue('invalid', "schema_dict is not a dict type")
|
||||||
|
if not isinstance(data_dict, dict):
|
||||||
|
raise SchemaIssue('invalid', f"data_dict issue{'.'.join(names)} is not a dict type")
|
||||||
|
|
||||||
|
is_valid, issue_list = _validate(names, issue_list, schema_dict, data_dict, deprecated)
|
||||||
|
return is_valid, issue_list
|
||||||
|
|
||||||
|
|
||||||
|
def _validate(
|
||||||
|
names: typing.List,
|
||||||
|
issue_list: typing.List,
|
||||||
|
schema_dict: typing.Dict,
|
||||||
|
data_dict: typing.Dict,
|
||||||
|
deprecated: typing.Dict[str, str],
|
||||||
|
) -> typing.Tuple[bool, typing.List]:
|
||||||
|
|
||||||
|
is_valid = True
|
||||||
|
|
||||||
|
for key, data_value in data_dict.items():
|
||||||
|
|
||||||
|
names.append(key)
|
||||||
|
name = '.'.join(names)
|
||||||
|
|
||||||
|
deprecated_msg = deprecated.get(name)
|
||||||
|
# print("XXX %s: key %s // data_value: %s" % (name, key, data_value))
|
||||||
|
if deprecated_msg:
|
||||||
|
issue_list.append(SchemaIssue('warn', f"data_dict '{name}': deprecated - {deprecated_msg}"))
|
||||||
|
|
||||||
|
schema_value = value(name, schema_dict)
|
||||||
|
# print("YYY %s: key %s // schema_value: %s" % (name, key, schema_value))
|
||||||
|
if schema_value is UNSET:
|
||||||
|
if not deprecated_msg:
|
||||||
|
issue_list.append(SchemaIssue('invalid', f"data_dict '{name}': key unknown in schema_dict"))
|
||||||
|
is_valid = False
|
||||||
|
|
||||||
|
elif type(schema_value) != type(data_value): # pylint: disable=unidiomatic-typecheck
|
||||||
|
issue_list.append(
|
||||||
|
SchemaIssue(
|
||||||
|
'invalid',
|
||||||
|
(f"data_dict: type mismatch '{name}':" f" expected {type(schema_value)}, is: {type(data_value)}"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
is_valid = False
|
||||||
|
|
||||||
|
elif isinstance(data_value, dict):
|
||||||
|
_valid, _ = _validate(names, issue_list, schema_dict, data_value, deprecated)
|
||||||
|
is_valid = is_valid and _valid
|
||||||
|
names.pop()
|
||||||
|
|
||||||
|
return is_valid, issue_list
|
||||||
|
|
||||||
|
|
||||||
|
def dict_deepupdate(base_dict: dict, upd_dict: dict, names=None):
|
||||||
|
"""Deep-update of dictionary in ``base_dict`` by dictionary in ``upd_dict``.
|
||||||
|
|
||||||
|
For each ``upd_key`` & ``upd_val`` pair in ``upd_dict``:
|
||||||
|
|
||||||
|
0. If types of ``base_dict[upd_key]`` and ``upd_val`` do not match raise a
|
||||||
|
:py:obj:`TypeError`.
|
||||||
|
|
||||||
|
1. If ``base_dict[upd_key]`` is a dict: recursively deep-update it by ``upd_val``.
|
||||||
|
|
||||||
|
2. If ``base_dict[upd_key]`` not exist: set ``base_dict[upd_key]`` from a
|
||||||
|
(deep-) copy of ``upd_val``.
|
||||||
|
|
||||||
|
3. If ``upd_val`` is a list, extend list in ``base_dict[upd_key]`` by the
|
||||||
|
list in ``upd_val``.
|
||||||
|
|
||||||
|
4. If ``upd_val`` is a set, update set in ``base_dict[upd_key]`` by set in
|
||||||
|
``upd_val``.
|
||||||
|
"""
|
||||||
|
# pylint: disable=too-many-branches
|
||||||
|
if not isinstance(base_dict, dict):
|
||||||
|
raise TypeError("argument 'base_dict' is not a ditionary type")
|
||||||
|
if not isinstance(upd_dict, dict):
|
||||||
|
raise TypeError("argument 'upd_dict' is not a ditionary type")
|
||||||
|
|
||||||
|
if names is None:
|
||||||
|
names = []
|
||||||
|
|
||||||
|
for upd_key, upd_val in upd_dict.items():
|
||||||
|
# For each upd_key & upd_val pair in upd_dict:
|
||||||
|
|
||||||
|
if isinstance(upd_val, dict):
|
||||||
|
|
||||||
|
if upd_key in base_dict:
|
||||||
|
# if base_dict[upd_key] exists, recursively deep-update it
|
||||||
|
if not isinstance(base_dict[upd_key], dict):
|
||||||
|
raise TypeError(f"type mismatch {'.'.join(names)}: is not a dict type in base_dict")
|
||||||
|
dict_deepupdate(
|
||||||
|
base_dict[upd_key],
|
||||||
|
upd_val,
|
||||||
|
names
|
||||||
|
+ [
|
||||||
|
upd_key,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# if base_dict[upd_key] not exist, set base_dict[upd_key] from deepcopy of upd_val
|
||||||
|
base_dict[upd_key] = copy.deepcopy(upd_val)
|
||||||
|
|
||||||
|
elif isinstance(upd_val, list):
|
||||||
|
|
||||||
|
if upd_key in base_dict:
|
||||||
|
# if base_dict[upd_key] exists, base_dict[up_key] is extended by
|
||||||
|
# the list from upd_val
|
||||||
|
if not isinstance(base_dict[upd_key], list):
|
||||||
|
raise TypeError(f"type mismatch {'.'.join(names)}: is not a list type in base_dict")
|
||||||
|
base_dict[upd_key].extend(upd_val)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# if base_dict[upd_key] doesn't exists, set base_dict[key] from a deepcopy of the
|
||||||
|
# list in upd_val.
|
||||||
|
base_dict[upd_key] = copy.deepcopy(upd_val)
|
||||||
|
|
||||||
|
elif isinstance(upd_val, set):
|
||||||
|
|
||||||
|
if upd_key in base_dict:
|
||||||
|
# if base_dict[upd_key] exists, base_dict[up_key] is updated by the set in upd_val
|
||||||
|
if not isinstance(base_dict[upd_key], set):
|
||||||
|
raise TypeError(f"type mismatch {'.'.join(names)}: is not a set type in base_dict")
|
||||||
|
base_dict[upd_key].update(upd_val.copy())
|
||||||
|
|
||||||
|
else:
|
||||||
|
# if base_dict[upd_key] doesn't exists, set base_dict[upd_key] from a copy of the
|
||||||
|
# set in upd_val
|
||||||
|
base_dict[upd_key] = upd_val.copy()
|
||||||
|
|
||||||
|
else:
|
||||||
|
# for any other type of upd_val replace or add base_dict[upd_key] by a copy
|
||||||
|
# of upd_val
|
||||||
|
base_dict[upd_key] = copy.copy(upd_val)
|
Loading…
Reference in a new issue