mirror of
https://github.com/searxng/searxng.git
synced 2025-04-16 00:14:11 +00:00
Merge 7d40733afe
into b146b745a7
This commit is contained in:
commit
a0c5889dd3
2 changed files with 84 additions and 28 deletions
69
searx/engine_cache.py
Normal file
69
searx/engine_cache.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""This provides an easy to use interface for engine implementations to store and read key-value pairs.
|
||||
|
||||
For instance, this can be used to remember programmatically extracted API keys or other kinds of secret tokens.
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
from searx import redisdb, redislib
|
||||
|
||||
|
||||
class EngineCache:
|
||||
def store(self, key: str, value: str):
|
||||
pass
|
||||
|
||||
def get(self, key: str) -> Optional[str]:
|
||||
pass
|
||||
|
||||
|
||||
class MemoryEngineCache(EngineCache):
|
||||
def __init__(self, max_size: int = 100):
|
||||
self.__STORAGE = {}
|
||||
self.max_size = max_size
|
||||
|
||||
def store(self, key, value):
|
||||
"""Store the provided key-value pair in the cache."""
|
||||
if len(self.__STORAGE) > self.max_size:
|
||||
self.__STORAGE.popitem()
|
||||
|
||||
# remove the old value in order to add the new value to the top
|
||||
# of the dictionary, as dictionaries are ordered since Python 3.7
|
||||
if key in self.__STORAGE:
|
||||
self.__STORAGE.pop(key)
|
||||
|
||||
self.__STORAGE[key] = value
|
||||
|
||||
def get(self, key):
|
||||
return self.__STORAGE.get(key)
|
||||
|
||||
|
||||
class RedisEngineCache(EngineCache):
|
||||
def __init__(self, key_prefix: str, expiration_seconds: int = 600):
|
||||
self.key_prefix = key_prefix
|
||||
self.expiration_seconds = expiration_seconds
|
||||
|
||||
def _get_cache_key(self, key):
|
||||
return self.key_prefix + redislib.secret_hash(key)
|
||||
|
||||
def store(self, key, value):
|
||||
c = redisdb.client()
|
||||
|
||||
cache_key = self._get_cache_key(key)
|
||||
c.set(cache_key, value, ex=self.expiration_seconds)
|
||||
|
||||
def get(self, key):
|
||||
c = redisdb.client()
|
||||
|
||||
cache_key = self._get_cache_key(key)
|
||||
value = c.get(cache_key)
|
||||
if value or value == b'':
|
||||
return value
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_or_create_cache(database_prefix: str) -> EngineCache:
|
||||
if redisdb.client():
|
||||
return RedisEngineCache(database_prefix)
|
||||
|
||||
return MemoryEngineCache()
|
|
@ -15,7 +15,6 @@ import lxml.html
|
|||
|
||||
from searx import (
|
||||
locales,
|
||||
redislib,
|
||||
external_bang,
|
||||
)
|
||||
from searx.utils import (
|
||||
|
@ -25,7 +24,7 @@ from searx.utils import (
|
|||
extract_text,
|
||||
)
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
from searx import redisdb
|
||||
from searx.engine_cache import get_or_create_cache, EngineCache
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.exceptions import SearxEngineCaptchaException
|
||||
from searx.result_types import EngineResults
|
||||
|
@ -61,25 +60,23 @@ url = "https://html.duckduckgo.com/html"
|
|||
|
||||
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
|
||||
form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
|
||||
__CACHE = []
|
||||
|
||||
__CACHE: EngineCache = get_or_create_cache('SearXNG_ddg_web_vqd')
|
||||
|
||||
|
||||
def _cache_key(query: str, region: str):
|
||||
return 'SearXNG_ddg_web_vqd' + redislib.secret_hash(f"{query}//{region}")
|
||||
def init(_):
|
||||
global __CACHE # pylint: disable=global-statement
|
||||
__CACHE = get_or_create_cache('SearXNG_ddg_web_vqd')
|
||||
# TODO: why is the __CACHE always None if initialized as None,
|
||||
# even though it should be changed here and this method is
|
||||
# confirmed to be called? ...
|
||||
|
||||
|
||||
def cache_vqd(query: str, region: str, value: str):
|
||||
"""Caches a ``vqd`` value from a query."""
|
||||
c = redisdb.client()
|
||||
if c:
|
||||
logger.debug("VALKEY cache vqd value: %s (%s)", value, region)
|
||||
c.set(_cache_key(query, region), value, ex=600)
|
||||
|
||||
else:
|
||||
logger.debug("MEM cache vqd value: %s (%s)", value, region)
|
||||
if len(__CACHE) > 100: # cache vqd from last 100 queries
|
||||
__CACHE.pop(0)
|
||||
__CACHE.append((_cache_key(query, region), value))
|
||||
__CACHE.store(f"{query}//{region}", value)
|
||||
logger.debug("cached vqd value: %s (%s)", value, region)
|
||||
|
||||
|
||||
def get_vqd(query: str, region: str, force_request: bool = False):
|
||||
|
@ -114,20 +111,10 @@ def get_vqd(query: str, region: str, force_request: bool = False):
|
|||
seems the block list is a sliding window: to get my IP rid from the bot list
|
||||
I had to cool down my IP for 1h (send no requests from that IP to DDG).
|
||||
"""
|
||||
key = _cache_key(query, region)
|
||||
|
||||
c = redisdb.client()
|
||||
if c:
|
||||
value = c.get(key)
|
||||
if value or value == b'':
|
||||
value = value.decode('utf-8') # type: ignore
|
||||
logger.debug("re-use CACHED vqd value: %s", value)
|
||||
return value
|
||||
|
||||
for k, value in __CACHE:
|
||||
if k == key:
|
||||
logger.debug("MEM re-use CACHED vqd value: %s", value)
|
||||
return value
|
||||
value = __CACHE.get(f"{query}//{region}")
|
||||
if value is not None:
|
||||
logger.debug("re-use CACHED vqd value: %s", value)
|
||||
return value
|
||||
|
||||
if force_request:
|
||||
resp = get(f'https://duckduckgo.com/?q={quote_plus(query)}')
|
||||
|
|
Loading…
Reference in a new issue