mirror of
https://github.com/searxng/searxng.git
synced 2024-12-01 15:11:03 +00:00
[mod] limiter: minor improvements
- requests without HTTP header 'Connection' or missing 'User-Agent' will be blocked by the limiter - re_bot is related to 'User-Agent' and has been renamed to block_user_agent Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
7592d85982
commit
66810ce711
1 changed files with 10 additions and 5 deletions
|
@ -26,13 +26,17 @@ default_on = False
|
||||||
preference_section = 'service'
|
preference_section = 'service'
|
||||||
logger = logger.getChild('limiter')
|
logger = logger.getChild('limiter')
|
||||||
|
|
||||||
re_bot = re.compile(
|
block_user_agent = re.compile(
|
||||||
r'('
|
r'('
|
||||||
+ r'[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp'
|
+ r'unknown'
|
||||||
|
+ r'|[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp'
|
||||||
+ r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy'
|
+ r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy'
|
||||||
+ r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot'
|
+ r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot'
|
||||||
+ r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot'
|
+ r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot'
|
||||||
+ r'|ZmEu|BLEXBot|bitlybot'
|
+ r'|ZmEu|BLEXBot|bitlybot'
|
||||||
|
# when you block requests from Farside instances, your instance will
|
||||||
|
# disappear from https://farside.link/
|
||||||
|
# + r'|Farside'
|
||||||
+ r')'
|
+ r')'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -40,14 +44,15 @@ re_bot = re.compile(
|
||||||
def is_accepted_request() -> bool:
|
def is_accepted_request() -> bool:
|
||||||
# pylint: disable=too-many-return-statements
|
# pylint: disable=too-many-return-statements
|
||||||
redis_client = redisdb.client()
|
redis_client = redisdb.client()
|
||||||
user_agent = request.headers.get('User-Agent', '')
|
user_agent = request.headers.get('User-Agent', 'unknown')
|
||||||
x_forwarded_for = request.headers.get('X-Forwarded-For', '')
|
x_forwarded_for = request.headers.get('X-Forwarded-For', '')
|
||||||
|
|
||||||
if re_bot.match(user_agent):
|
if block_user_agent.match(user_agent):
|
||||||
logger.debug("BLOCK %s: detected bot", x_forwarded_for)
|
logger.debug("BLOCK %s: %s --> detected User-Agent: %s" % (x_forwarded_for, request.path, user_agent))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if request.path == '/search':
|
if request.path == '/search':
|
||||||
|
|
||||||
c_burst = incr_sliding_window(redis_client, 'IP limit, burst' + x_forwarded_for, 20)
|
c_burst = incr_sliding_window(redis_client, 'IP limit, burst' + x_forwarded_for, 20)
|
||||||
c_10min = incr_sliding_window(redis_client, 'IP limit, 10 minutes' + x_forwarded_for, 600)
|
c_10min = incr_sliding_window(redis_client, 'IP limit, 10 minutes' + x_forwarded_for, 600)
|
||||||
if c_burst > 15 or c_10min > 150:
|
if c_burst > 15 or c_10min > 150:
|
||||||
|
|
Loading…
Reference in a new issue