[refactor] engines: outsource cache implementation from duckduckgo engine source

This commit is contained in:
Bnyro 2025-02-12 16:20:43 +01:00
parent d456f3dd9f
commit 7d40733afe
No known key found for this signature in database
2 changed files with 84 additions and 28 deletions

69
searx/engine_cache.py Normal file
View file

@ -0,0 +1,69 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""This provides an easy to use interface for engine implementations to store and read key-value pairs.
For instance, this can be used to remember programmatically extracted API keys or other kinds of secret tokens.
"""
from typing import Optional
from searx import redisdb, redislib
class EngineCache:
def store(self, key: str, value: str):
pass
def get(self, key: str) -> Optional[str]:
pass
class MemoryEngineCache(EngineCache):
def __init__(self, max_size: int = 100):
self.__STORAGE = {}
self.max_size = max_size
def store(self, key, value):
"""Store the provided key-value pair in the cache."""
if len(self.__STORAGE) > self.max_size:
self.__STORAGE.popitem()
# remove the old value in order to add the new value to the top
# of the dictionary, as dictionaries are ordered since Python 3.7
if key in self.__STORAGE:
self.__STORAGE.pop(key)
self.__STORAGE[key] = value
def get(self, key):
return self.__STORAGE.get(key)
class RedisEngineCache(EngineCache):
def __init__(self, key_prefix: str, expiration_seconds: int = 600):
self.key_prefix = key_prefix
self.expiration_seconds = expiration_seconds
def _get_cache_key(self, key):
return self.key_prefix + redislib.secret_hash(key)
def store(self, key, value):
c = redisdb.client()
cache_key = self._get_cache_key(key)
c.set(cache_key, value, ex=self.expiration_seconds)
def get(self, key):
c = redisdb.client()
cache_key = self._get_cache_key(key)
value = c.get(cache_key)
if value or value == b'':
return value
return None
def get_or_create_cache(database_prefix: str) -> EngineCache:
if redisdb.client():
return RedisEngineCache(database_prefix)
return MemoryEngineCache()

View file

@ -15,7 +15,6 @@ import lxml.html
from searx import (
locales,
redislib,
external_bang,
)
from searx.utils import (
@ -24,7 +23,7 @@ from searx.utils import (
extract_text,
)
from searx.network import get # see https://github.com/searxng/searxng/issues/762
from searx import redisdb
from searx.engine_cache import get_or_create_cache, EngineCache
from searx.enginelib.traits import EngineTraits
from searx.exceptions import SearxEngineCaptchaException
from searx.result_types import EngineResults
@ -60,25 +59,23 @@ url = "https://html.duckduckgo.com/html"
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
__CACHE = []
__CACHE: EngineCache = get_or_create_cache('SearXNG_ddg_web_vqd')
def _cache_key(query: str, region: str):
return 'SearXNG_ddg_web_vqd' + redislib.secret_hash(f"{query}//{region}")
def init(_):
global __CACHE # pylint: disable=global-statement
__CACHE = get_or_create_cache('SearXNG_ddg_web_vqd')
# TODO: why is the __CACHE always None if initialized as None,
# even though it should be changed here and this method is
# confirmed to be called? ...
def cache_vqd(query: str, region: str, value: str):
"""Caches a ``vqd`` value from a query."""
c = redisdb.client()
if c:
logger.debug("VALKEY cache vqd value: %s (%s)", value, region)
c.set(_cache_key(query, region), value, ex=600)
else:
logger.debug("MEM cache vqd value: %s (%s)", value, region)
if len(__CACHE) > 100: # cache vqd from last 100 queries
__CACHE.pop(0)
__CACHE.append((_cache_key(query, region), value))
__CACHE.store(f"{query}//{region}", value)
logger.debug("cached vqd value: %s (%s)", value, region)
def get_vqd(query: str, region: str, force_request: bool = False):
@ -113,20 +110,10 @@ def get_vqd(query: str, region: str, force_request: bool = False):
seems the block list is a sliding window: to get my IP rid from the bot list
I had to cool down my IP for 1h (send no requests from that IP to DDG).
"""
key = _cache_key(query, region)
c = redisdb.client()
if c:
value = c.get(key)
if value or value == b'':
value = value.decode('utf-8') # type: ignore
logger.debug("re-use CACHED vqd value: %s", value)
return value
for k, value in __CACHE:
if k == key:
logger.debug("MEM re-use CACHED vqd value: %s", value)
return value
value = __CACHE.get(f"{query}//{region}")
if value is not None:
logger.debug("re-use CACHED vqd value: %s", value)
return value
if force_request:
resp = get(f'https://duckduckgo.com/?q={quote_plus(query)}')