mirror of
https://github.com/searxng/searxng.git
synced 2024-11-22 02:41:00 +00:00
[mod] Revision of the favicon solution
All favicons implementations have been documented and moved to the Python package: searx.favicons There is a configuration (based on Pydantic) for the favicons and all its components: searx.favicons.config A solution for caching favicons has been implemented: searx.favicon.cache If the favicon is already in the cache, the returned URL is a data URL [1] (something like `data:image/png;base64,...`). By generating a data url from the FaviconCache, additional HTTP roundtripps via the favicon_proxy are saved: favicons.proxy.favicon_url The favicon proxy service now sets a HTTP header "Cache-Control: max-age=...": favicons.proxy.favicon_proxy The resolvers now also provide the mime type (data, mime): searx.favicon.resolvers [1] https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
c49a2707c1
commit
7ab577a1fb
21 changed files with 1039 additions and 181 deletions
|
@ -43,7 +43,8 @@
|
|||
- ``wikipedia``
|
||||
|
||||
``favicon_resolver``:
|
||||
Favicon resolver, leave blank to turn off the feature by default.
|
||||
:ref:`Favicon resolver <favicons>`, leave blank to turn off the feature by
|
||||
default.
|
||||
|
||||
- ``allesedv``
|
||||
- ``duckduckgo``
|
||||
|
|
|
@ -127,6 +127,7 @@ extensions = [
|
|||
"sphinx_tabs.tabs", # https://github.com/djungelorm/sphinx-tabs
|
||||
'myst_parser', # https://www.sphinx-doc.org/en/master/usage/markdown.html
|
||||
'notfound.extension', # https://github.com/readthedocs/sphinx-notfound-page
|
||||
'sphinxcontrib.autodoc_pydantic', # https://github.com/mansenfranzen/autodoc_pydantic
|
||||
]
|
||||
|
||||
autodoc_default_options = {
|
||||
|
|
48
docs/src/searx.favicons.rst
Normal file
48
docs/src/searx.favicons.rst
Normal file
|
@ -0,0 +1,48 @@
|
|||
.. _favicons:
|
||||
|
||||
========
|
||||
Favicons
|
||||
========
|
||||
|
||||
.. contents::
|
||||
:depth: 2
|
||||
:local:
|
||||
:backlinks: entry
|
||||
|
||||
.. automodule:: searx.favicons
|
||||
:members:
|
||||
|
||||
.. _favicons.config:
|
||||
|
||||
Favicons Config
|
||||
===============
|
||||
|
||||
.. automodule:: searx.favicons.config
|
||||
:members:
|
||||
|
||||
.. _favicons.proxy:
|
||||
|
||||
Favicons Proxy
|
||||
==============
|
||||
|
||||
.. automodule:: searx.favicons.proxy
|
||||
:members:
|
||||
|
||||
.. _favicons.resolver:
|
||||
|
||||
Favicons Resolver
|
||||
=================
|
||||
|
||||
.. automodule:: searx.favicons.resolvers
|
||||
:members:
|
||||
|
||||
.. _favicons.cache:
|
||||
|
||||
Favicons Cache
|
||||
==============
|
||||
|
||||
.. automodule:: searx.favicons.cache
|
||||
:members:
|
||||
|
||||
|
||||
|
|
@ -21,3 +21,4 @@ wlc==1.15
|
|||
coloredlogs==15.0.1
|
||||
docutils>=0.21.2
|
||||
parameterized==0.9.0
|
||||
autodoc_pydantic==2.2.0
|
||||
|
|
|
@ -16,3 +16,6 @@ redis==5.0.8
|
|||
markdown-it-py==3.0.0
|
||||
fasttext-predict==0.9.2.2
|
||||
pytomlpp==1.0.13; python_version < '3.11'
|
||||
pydantic==2.8.2
|
||||
eval_type_backport; python_version < '3.9'
|
||||
typer-slim==0.12.5
|
||||
|
|
18
searx/compat.py
Normal file
18
searx/compat.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Compatibility with older versions"""
|
||||
|
||||
# pylint: disable=unused-import
|
||||
|
||||
__all__ = [
|
||||
"tomllib",
|
||||
]
|
||||
|
||||
import sys
|
||||
|
||||
# TOML (lib) compatibility
|
||||
# ------------------------
|
||||
|
||||
if sys.version_info >= (3, 11):
|
||||
import tomllib
|
||||
else:
|
||||
import tomli as tomllib
|
|
@ -1,105 +0,0 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""This module implements functions needed for the favicon resolver.
|
||||
|
||||
"""
|
||||
# pylint: disable=use-dict-literal
|
||||
|
||||
from httpx import HTTPError
|
||||
|
||||
from searx import settings
|
||||
|
||||
from searx.network import get as http_get, post as http_post
|
||||
from searx.exceptions import SearxEngineResponseException
|
||||
|
||||
|
||||
def update_kwargs(**kwargs):
|
||||
if 'timeout' not in kwargs:
|
||||
kwargs['timeout'] = settings['outgoing']['request_timeout']
|
||||
kwargs['raise_for_httperror'] = False
|
||||
|
||||
|
||||
def get(*args, **kwargs):
|
||||
update_kwargs(**kwargs)
|
||||
return http_get(*args, **kwargs)
|
||||
|
||||
|
||||
def post(*args, **kwargs):
|
||||
update_kwargs(**kwargs)
|
||||
return http_post(*args, **kwargs)
|
||||
|
||||
|
||||
def allesedv(domain):
|
||||
"""Favicon Resolver from allesedv.com"""
|
||||
|
||||
url = 'https://f1.allesedv.com/32/{domain}'
|
||||
|
||||
# will just return a 200 regardless of the favicon existing or not
|
||||
# sometimes will be correct size, sometimes not
|
||||
response = get(url.format(domain=domain))
|
||||
|
||||
# returns image/gif if the favicon does not exist
|
||||
if response.headers['Content-Type'] == 'image/gif':
|
||||
return []
|
||||
|
||||
return response.content
|
||||
|
||||
|
||||
def duckduckgo(domain):
|
||||
"""Favicon Resolver from duckduckgo.com"""
|
||||
|
||||
url = 'https://icons.duckduckgo.com/ip2/{domain}.ico'
|
||||
|
||||
# will return a 404 if the favicon does not exist and a 200 if it does,
|
||||
response = get(url.format(domain=domain))
|
||||
|
||||
# api will respond with a 32x32 png image
|
||||
if response.status_code == 200:
|
||||
return response.content
|
||||
return []
|
||||
|
||||
|
||||
def google(domain):
|
||||
"""Favicon Resolver from google.com"""
|
||||
|
||||
url = 'https://www.google.com/s2/favicons?sz=32&domain={domain}'
|
||||
|
||||
# will return a 404 if the favicon does not exist and a 200 if it does,
|
||||
response = get(url.format(domain=domain))
|
||||
|
||||
# api will respond with a 32x32 png image
|
||||
if response.status_code == 200:
|
||||
return response.content
|
||||
return []
|
||||
|
||||
|
||||
def yandex(domain):
|
||||
"""Favicon Resolver from yandex.com"""
|
||||
|
||||
url = 'https://favicon.yandex.net/favicon/{domain}'
|
||||
|
||||
# will always return 200
|
||||
response = get(url.format(domain=domain))
|
||||
|
||||
# api will respond with a 16x16 png image, if it doesn't exist, it will be a 1x1 png image (70 bytes)
|
||||
if response.status_code == 200:
|
||||
if len(response.content) > 70:
|
||||
return response.content
|
||||
return []
|
||||
|
||||
|
||||
backends = {
|
||||
'allesedv': allesedv,
|
||||
'duckduckgo': duckduckgo,
|
||||
'google': google,
|
||||
'yandex': yandex,
|
||||
}
|
||||
|
||||
|
||||
def search_favicon(backend_name, domain):
|
||||
backend = backends.get(backend_name)
|
||||
if backend is None:
|
||||
return []
|
||||
try:
|
||||
return backend(domain)
|
||||
except (HTTPError, SearxEngineResponseException):
|
||||
return []
|
37
searx/favicons/__init__.py
Normal file
37
searx/favicons/__init__.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Implementations for providing the favicons in SearXNG"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__all__ = ["init", "favicon_url", "favicon_proxy"]
|
||||
|
||||
import pathlib
|
||||
from searx import logger
|
||||
from searx import get_setting
|
||||
from .proxy import favicon_url, favicon_proxy
|
||||
|
||||
logger = logger.getChild('favicons')
|
||||
|
||||
|
||||
def is_active():
|
||||
return bool(get_setting("search.favicon_resolver", False))
|
||||
|
||||
|
||||
def init():
|
||||
|
||||
# pylint: disable=import-outside-toplevel
|
||||
|
||||
from . import config, cache, proxy
|
||||
|
||||
cfg_file = pathlib.Path("/etc/searxng/favicons.toml")
|
||||
if not cfg_file.exists():
|
||||
if is_active():
|
||||
logger.error(f"missing favicon config: {cfg_file}")
|
||||
cfg_file = config.DEFAULT_CFG_TOML
|
||||
|
||||
logger.debug(f"load favicon config: {cfg_file}")
|
||||
cfg = config.FaviconConfig.from_toml_file(cfg_file, use_cache=True)
|
||||
cache.init(cfg.cache)
|
||||
proxy.init(cfg.proxy)
|
||||
|
||||
del cache, config, proxy, cfg
|
12
searx/favicons/__main__.py
Normal file
12
searx/favicons/__main__.py
Normal file
|
@ -0,0 +1,12 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Command line implementation"""
|
||||
|
||||
import typer
|
||||
|
||||
from . import cache
|
||||
from . import init
|
||||
|
||||
init()
|
||||
app = typer.Typer()
|
||||
app.add_typer(cache.app, name="cache", help="commands related to the cache")
|
||||
app()
|
476
searx/favicons/cache.py
Normal file
476
searx/favicons/cache.py
Normal file
|
@ -0,0 +1,476 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Implementations for caching favicons.
|
||||
|
||||
:py:obj:`FaviconCacheConfig`:
|
||||
Configuration of the favicon cache
|
||||
|
||||
:py:obj:`FaviconCache`:
|
||||
Abstract base class for the implementation of a favicon cache.
|
||||
|
||||
:py:obj:`FaviconCacheSQLite`:
|
||||
Favicon cache that manages the favicon BLOBs in a SQLite DB.
|
||||
|
||||
:py:obj:`FaviconCacheNull`:
|
||||
Fallback solution if the configured cache cannot be used for system reasons.
|
||||
|
||||
----
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
from typing import Literal
|
||||
|
||||
import abc
|
||||
import dataclasses
|
||||
import hashlib
|
||||
import logging
|
||||
import pathlib
|
||||
import sqlite3
|
||||
import tempfile
|
||||
import time
|
||||
import typer
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from searx import sqlitedb
|
||||
from searx import logger
|
||||
from searx.utils import humanize_bytes, humanize_number
|
||||
|
||||
CACHE: "FaviconCache"
|
||||
FALLBACK_ICON = b"FALLBACK_ICON"
|
||||
|
||||
logger = logger.getChild('favicons.cache')
|
||||
app = typer.Typer()
|
||||
|
||||
|
||||
@app.command()
|
||||
def state():
|
||||
"""show state of the cache"""
|
||||
print(CACHE.state().report())
|
||||
|
||||
|
||||
@app.command()
|
||||
def maintenance(force: bool = True, debug: bool = False):
|
||||
"""perform maintenance of the cache"""
|
||||
root_log = logging.getLogger()
|
||||
if debug:
|
||||
root_log.setLevel(logging.DEBUG)
|
||||
else:
|
||||
root_log.handlers = []
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(logging.Formatter("%(message)s"))
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
state_t0 = CACHE.state()
|
||||
CACHE.maintenance(force=force)
|
||||
state_t1 = CACHE.state()
|
||||
state_delta = state_t0 - state_t1
|
||||
print("The cache has been reduced by:")
|
||||
print(state_delta.report("\n- {descr}: {val}").lstrip("\n"))
|
||||
|
||||
|
||||
def init(cfg: "FaviconCacheConfig"):
|
||||
"""Initialization of a global ``CACHE``"""
|
||||
|
||||
global CACHE # pylint: disable=global-statement
|
||||
if cfg.db_type == "sqlite":
|
||||
if sqlite3.sqlite_version_info <= (3, 35):
|
||||
logger.critical(
|
||||
"Disable favicon caching completely: SQLite library (%s) is too old! (require >= 3.35)",
|
||||
sqlite3.sqlite_version,
|
||||
)
|
||||
CACHE = FaviconCacheNull(cfg)
|
||||
else:
|
||||
CACHE = FaviconCacheSQLite(cfg)
|
||||
elif cfg.db_type == "mem":
|
||||
logger.error("Favicons are cached in memory, don't use this in production!")
|
||||
CACHE = FaviconCacheMEM(cfg)
|
||||
else:
|
||||
raise NotImplementedError(f"favicons db_type '{cfg.db_type}' is unknown")
|
||||
|
||||
|
||||
class FaviconCacheConfig(BaseModel):
|
||||
"""Configuration of the favicon cache."""
|
||||
|
||||
db_type: Literal["sqlite", "mem"] = "sqlite"
|
||||
"""Type of the database:
|
||||
|
||||
``sqlite``:
|
||||
:py:obj:`.cache.FaviconCacheSQLite`
|
||||
|
||||
``mem``:
|
||||
:py:obj:`.cache.FaviconCacheMEM` (not recommended)
|
||||
"""
|
||||
|
||||
db_url: pathlib.Path = pathlib.Path(tempfile.gettempdir()) / "faviconcache.db"
|
||||
"""URL of the SQLite DB, the path to the database file."""
|
||||
|
||||
HOLD_TIME: int = 60 * 60 * 24 * 30 # 30 days
|
||||
"""Hold time (default in sec.), after which a BLOB is removed from the cache."""
|
||||
|
||||
LIMIT_TOTAL_BYTES: int = 1024 * 1024 * 50 # 50 MB
|
||||
"""Maximum of bytes (default) stored in the cache of all blobs. Note: The
|
||||
limit is only reached at each maintenance interval after which the oldest
|
||||
BLOBs are deleted; the limit is exceeded during the maintenance period. If
|
||||
the maintenance period is *too long* or maintenance is switched off
|
||||
completely, the cache grows uncontrollably."""
|
||||
|
||||
BLOB_MAX_BYTES: int = 1024 * 20 # 20 KB
|
||||
"""The maximum BLOB size in bytes that a favicon may have so that it can be
|
||||
saved in the cache. If the favicon is larger, it is not saved in the cache
|
||||
and must be requested by the client via the proxy."""
|
||||
|
||||
MAINTENANCE_PERIOD: int = 60 * 60
|
||||
"""Maintenance period in seconds / when :py:obj:`MAINTENANCE_MODE` is set to
|
||||
``auto``."""
|
||||
|
||||
MAINTENANCE_MODE: Literal["auto", "off"] = "auto"
|
||||
"""Type of maintenance mode
|
||||
|
||||
``auto``:
|
||||
Maintenance is carried out automatically as part of the maintenance
|
||||
intervals (:py:obj:`MAINTENANCE_PERIOD`); no external process is required.
|
||||
|
||||
``off``:
|
||||
Maintenance is switched off and must be carried out by an external process
|
||||
if required.
|
||||
"""
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class FaviconCacheStats:
|
||||
"""Dataclass wich provides information on the status of the cache."""
|
||||
|
||||
favicons: int | None = None
|
||||
bytes: int | None = None
|
||||
domains: int | None = None
|
||||
resolvers: int | None = None
|
||||
|
||||
field_descr = (
|
||||
("favicons", "number of favicons in cache", humanize_number),
|
||||
("bytes", "total size (approx. bytes) of cache", humanize_bytes),
|
||||
("domains", "total number of domains in cache", humanize_number),
|
||||
("resolvers", "number of resolvers", str),
|
||||
)
|
||||
|
||||
def __sub__(self, other) -> FaviconCacheStats:
|
||||
if not isinstance(other, self.__class__):
|
||||
raise TypeError(f"unsupported operand type(s) for +: '{self.__class__}' and '{type(other)}'")
|
||||
kwargs = {}
|
||||
for field, _, _ in self.field_descr:
|
||||
self_val, other_val = getattr(self, field), getattr(other, field)
|
||||
if None in (self_val, other_val):
|
||||
continue
|
||||
if isinstance(self_val, int):
|
||||
kwargs[field] = self_val - other_val
|
||||
else:
|
||||
kwargs[field] = self_val
|
||||
return self.__class__(**kwargs)
|
||||
|
||||
def report(self, fmt: str = "{descr}: {val}\n"):
|
||||
s = []
|
||||
for field, descr, cast in self.field_descr:
|
||||
val = getattr(self, field)
|
||||
if val is None:
|
||||
val = "--"
|
||||
else:
|
||||
val = cast(val)
|
||||
s.append(fmt.format(descr=descr, val=val))
|
||||
return "".join(s)
|
||||
|
||||
|
||||
class FaviconCache(abc.ABC):
|
||||
"""Abstract base class for the implementation of a favicon cache."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def __init__(self, cfg: FaviconCacheConfig):
|
||||
"""An instance of the favicon cache is build up from the configuration."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def __call__(self, resolver: str, authority: str) -> None | tuple[None | bytes, None | str]:
|
||||
"""Returns ``None`` or the tuple of ``(data, mime)`` that has been
|
||||
registered in the cache. The ``None`` indicates that there was no entry
|
||||
in the cache."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def set(self, resolver: str, authority: str, mime: str | None, data: bytes | None) -> bool:
|
||||
"""Set data and mime-type in the cache. If data is None, the
|
||||
:py:obj:`FALLBACK_ICON` is registered. in the cache."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def state(self) -> FaviconCacheStats:
|
||||
"""Returns a :py:obj:`FaviconCacheStats` (key/values) with information
|
||||
on the state of the cache."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def maintenance(self, force=False):
|
||||
"""Performs maintenance on the cache"""
|
||||
|
||||
|
||||
class FaviconCacheNull(FaviconCache):
|
||||
"""A dummy favicon cache that caches nothing / a fallback solution. The
|
||||
NullCache is used when more efficient caches such as the
|
||||
:py:obj:`FaviconCacheSQLite` cannot be used because, for example, the SQLite
|
||||
library is only available in an old version and does not meet the
|
||||
requirements."""
|
||||
|
||||
def __init__(self, cfg: FaviconCacheConfig):
|
||||
return None
|
||||
|
||||
def __call__(self, resolver: str, authority: str) -> None | tuple[None | bytes, None | str]:
|
||||
return None
|
||||
|
||||
def set(self, resolver: str, authority: str, mime: str | None, data: bytes | None) -> bool:
|
||||
return False
|
||||
|
||||
def state(self):
|
||||
return FaviconCacheStats(favicons=0)
|
||||
|
||||
def maintenance(self, force=False):
|
||||
pass
|
||||
|
||||
|
||||
class FaviconCacheSQLite(sqlitedb.SQLiteAppl, FaviconCache):
|
||||
"""Favicon cache that manages the favicon BLOBs in a SQLite DB. The DB
|
||||
model in the SQLite DB is implemented using the abstract class
|
||||
:py:obj:`sqlitedb.SQLiteAppl`.
|
||||
|
||||
The following configurations are required / supported:
|
||||
|
||||
- :py:obj:`FaviconCacheConfig.db_url`
|
||||
- :py:obj:`FaviconCacheConfig.HOLD_TIME`
|
||||
- :py:obj:`FaviconCacheConfig.LIMIT_TOTAL_BYTES`
|
||||
- :py:obj:`FaviconCacheConfig.BLOB_MAX_BYTES`
|
||||
- :py:obj:`MAINTENANCE_PERIOD`
|
||||
- :py:obj:`MAINTENANCE_MODE`
|
||||
"""
|
||||
|
||||
DB_SCHEMA = 1
|
||||
|
||||
DDL_BLOBS = """\
|
||||
CREATE TABLE IF NOT EXISTS blobs (
|
||||
sha256 TEXT,
|
||||
bytes_c INTEGER,
|
||||
mime TEXT NOT NULL,
|
||||
data BLOB NOT NULL,
|
||||
PRIMARY KEY (sha256))"""
|
||||
|
||||
"""Table to store BLOB objects by their sha256 hash values."""
|
||||
|
||||
DDL_BLOB_MAP = """\
|
||||
CREATE TABLE IF NOT EXISTS blob_map (
|
||||
m_time INTEGER DEFAULT (strftime('%s', 'now')), -- last modified (unix epoch) time in sec.
|
||||
sha256 TEXT,
|
||||
resolver TEXT,
|
||||
authority TEXT,
|
||||
PRIMARY KEY (resolver, authority))"""
|
||||
|
||||
"""Table to map from (resolver, authority) to sha256 hash values."""
|
||||
|
||||
DDL_CREATE_TABLES = {
|
||||
"blobs": DDL_BLOBS,
|
||||
"blob_map": DDL_BLOB_MAP,
|
||||
}
|
||||
|
||||
SQL_DROP_LEFTOVER_BLOBS = (
|
||||
"DELETE FROM blobs WHERE sha256 IN ("
|
||||
" SELECT b.sha256"
|
||||
" FROM blobs b"
|
||||
" LEFT JOIN blob_map bm"
|
||||
" ON b.sha256 = bm.sha256"
|
||||
" WHERE bm.sha256 IS NULL)"
|
||||
)
|
||||
"""Delete blobs.sha256 (BLOBs) no longer in blob_map.sha256."""
|
||||
|
||||
SQL_ITER_BLOBS_SHA256_BYTES_C = (
|
||||
"SELECT b.sha256, b.bytes_c FROM blobs b"
|
||||
" JOIN blob_map bm "
|
||||
" ON b.sha256 = bm.sha256"
|
||||
" ORDER BY bm.m_time ASC"
|
||||
)
|
||||
|
||||
SQL_INSERT_BLOBS = (
|
||||
"INSERT INTO blobs (sha256, bytes_c, mime, data) VALUES (?, ?, ?, ?)"
|
||||
" ON CONFLICT (sha256) DO NOTHING"
|
||||
) # fmt: skip
|
||||
|
||||
SQL_INSERT_BLOB_MAP = (
|
||||
"INSERT INTO blob_map (sha256, resolver, authority) VALUES (?, ?, ?)"
|
||||
" ON CONFLICT DO UPDATE "
|
||||
" SET sha256=excluded.sha256, m_time=strftime('%s', 'now')"
|
||||
)
|
||||
|
||||
def __init__(self, cfg: FaviconCacheConfig):
|
||||
"""An instance of the favicon cache is build up from the configuration.""" #
|
||||
|
||||
if cfg.db_url == ":memory:":
|
||||
logger.critical("don't use SQLite DB in :memory: in production!!")
|
||||
super().__init__(cfg.db_url)
|
||||
self.cfg = cfg
|
||||
|
||||
def __call__(self, resolver: str, authority: str) -> None | tuple[None | bytes, None | str]:
|
||||
|
||||
sql = "SELECT sha256 FROM blob_map WHERE resolver = ? AND authority = ?"
|
||||
res = self.DB.execute(sql, (resolver, authority)).fetchone()
|
||||
if res is None:
|
||||
return None
|
||||
|
||||
data, mime = (None, None)
|
||||
sha256 = res[0]
|
||||
if sha256 == FALLBACK_ICON:
|
||||
return data, mime
|
||||
|
||||
sql = "SELECT data, mime FROM blobs WHERE sha256 = ?"
|
||||
res = self.DB.execute(sql, (sha256,)).fetchone()
|
||||
if res is not None:
|
||||
data, mime = res
|
||||
return data, mime
|
||||
|
||||
def set(self, resolver: str, authority: str, mime: str | None, data: bytes | None) -> bool:
|
||||
|
||||
if self.cfg.MAINTENANCE_MODE == "auto" and int(time.time()) > self.next_maintenance_time:
|
||||
# Should automatic maintenance be moved to a new thread?
|
||||
self.maintenance()
|
||||
|
||||
if data is not None and mime is None:
|
||||
logger.error(
|
||||
"favicon resolver %s tries to cache mime-type None for authority %s",
|
||||
resolver,
|
||||
authority,
|
||||
)
|
||||
return False
|
||||
|
||||
bytes_c = len(data or b"")
|
||||
if bytes_c > self.cfg.BLOB_MAX_BYTES:
|
||||
logger.info(
|
||||
"favicon of resolver: %s / authority: %s to big to cache (bytes: %s) " % (resolver, authority, bytes_c)
|
||||
)
|
||||
return False
|
||||
|
||||
if data is None:
|
||||
sha256 = FALLBACK_ICON
|
||||
else:
|
||||
sha256 = hashlib.sha256(data).hexdigest()
|
||||
|
||||
with self.connect() as conn:
|
||||
if sha256 != FALLBACK_ICON:
|
||||
conn.execute(self.SQL_INSERT_BLOBS, (sha256, bytes_c, mime, data))
|
||||
conn.execute(self.SQL_INSERT_BLOB_MAP, (sha256, resolver, authority))
|
||||
|
||||
return True
|
||||
|
||||
@property
|
||||
def next_maintenance_time(self) -> int:
|
||||
"""Returns (unix epoch) time of the next maintenance."""
|
||||
|
||||
return self.cfg.MAINTENANCE_PERIOD + self.properties.m_time("LAST_MAINTENANCE")
|
||||
|
||||
def maintenance(self, force=False):
|
||||
|
||||
# Prevent parallel DB maintenance cycles from other DB connections
|
||||
# (e.g. in multi thread or process environments).
|
||||
|
||||
if not force and int(time.time()) < self.next_maintenance_time:
|
||||
logger.debug("no maintenance required yet, next maintenance interval is in the future")
|
||||
return
|
||||
self.properties.set("LAST_MAINTENANCE", "") # hint: this (also) sets the m_time of the property!
|
||||
|
||||
# do maintenance tasks
|
||||
|
||||
with self.connect() as conn:
|
||||
|
||||
# drop items not in HOLD time
|
||||
res = conn.execute(
|
||||
f"DELETE FROM blob_map"
|
||||
f" WHERE cast(m_time as integer) < cast(strftime('%s', 'now') as integer) - {self.cfg.HOLD_TIME}"
|
||||
)
|
||||
logger.debug("dropped %s obsolete blob_map items from db", res.rowcount)
|
||||
res = conn.execute(self.SQL_DROP_LEFTOVER_BLOBS)
|
||||
logger.debug("dropped %s obsolete BLOBS from db", res.rowcount)
|
||||
|
||||
# drop old items to be in LIMIT_TOTAL_BYTES
|
||||
total_bytes = conn.execute("SELECT SUM(bytes_c) FROM blobs").fetchone()[0] or 0
|
||||
if total_bytes > self.cfg.LIMIT_TOTAL_BYTES:
|
||||
|
||||
x = total_bytes - self.cfg.LIMIT_TOTAL_BYTES
|
||||
c = 0
|
||||
sha_list = []
|
||||
for row in conn.execute(self.SQL_ITER_BLOBS_SHA256_BYTES_C):
|
||||
sha256, bytes_c = row
|
||||
sha_list.append(sha256)
|
||||
c += bytes_c
|
||||
if c > x:
|
||||
break
|
||||
if sha_list:
|
||||
conn.execute("DELETE FROM blobs WHERE sha256 IN ('%s')" % "','".join(sha_list))
|
||||
conn.execute("DELETE FROM blob_map WHERE sha256 IN ('%s')" % "','".join(sha_list))
|
||||
logger.debug("dropped %s blobs with total size of %s bytes", len(sha_list), c)
|
||||
|
||||
def _query_val(self, sql, default=None):
|
||||
val = self.DB.execute(sql).fetchone()
|
||||
if val is not None:
|
||||
val = val[0]
|
||||
if val is None:
|
||||
val = default
|
||||
return val
|
||||
|
||||
def state(self) -> FaviconCacheStats:
|
||||
return FaviconCacheStats(
|
||||
favicons=self._query_val("SELECT count(*) FROM blobs", 0),
|
||||
bytes=self._query_val("SELECT SUM(bytes_c) FROM blobs", 0),
|
||||
domains=self._query_val("SELECT count(*) FROM (SELECT authority FROM blob_map GROUP BY authority)", 0),
|
||||
resolvers=self._query_val("SELECT count(*) FROM (SELECT resolver FROM blob_map GROUP BY resolver)", 0),
|
||||
)
|
||||
|
||||
|
||||
class FaviconCacheMEM(FaviconCache):
|
||||
"""Favicon cache in process' memory. Its just a POC that stores the
|
||||
favicons in the memory of the process.
|
||||
|
||||
.. attention::
|
||||
|
||||
Don't use it in production, it will blow up your memory!!
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, cfg):
|
||||
|
||||
self.cfg = cfg
|
||||
self._data = {}
|
||||
self._sha_mime = {}
|
||||
|
||||
def __call__(self, resolver: str, authority: str) -> None | tuple[bytes | None, str | None]:
|
||||
|
||||
sha, mime = self._sha_mime.get(f"{resolver}:{authority}", (None, None))
|
||||
if sha is None:
|
||||
return None
|
||||
data = self._data.get(sha)
|
||||
if data == FALLBACK_ICON:
|
||||
data = None
|
||||
return data, mime
|
||||
|
||||
def set(self, resolver: str, authority: str, mime: str | None, data: bytes | None) -> bool:
|
||||
|
||||
if data is None:
|
||||
data = FALLBACK_ICON
|
||||
mime = None
|
||||
|
||||
elif mime is None:
|
||||
logger.error(
|
||||
"favicon resolver %s tries to cache mime-type None for authority %s",
|
||||
resolver,
|
||||
authority,
|
||||
)
|
||||
return False
|
||||
|
||||
digest = hashlib.sha256(data).hexdigest()
|
||||
self._data[digest] = data
|
||||
self._sha_mime[f"{resolver}:{authority}"] = (digest, mime)
|
||||
return True
|
||||
|
||||
def state(self):
|
||||
return FaviconCacheStats(favicons=len(self._data.keys()))
|
||||
|
||||
def maintenance(self, force=False):
|
||||
pass
|
62
searx/favicons/config.py
Normal file
62
searx/favicons/config.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# pylint: disable=missing-module-docstring
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pathlib
|
||||
from pydantic import BaseModel
|
||||
|
||||
from searx.compat import tomllib
|
||||
from .cache import FaviconCacheConfig
|
||||
from .proxy import FaviconProxyConfig
|
||||
|
||||
CONFIG_SCHEMA: int = 1
|
||||
"""Version of the configuration schema."""
|
||||
|
||||
TOML_CACHE: dict[str, "FaviconConfig"] = {}
|
||||
"""Cache config objects by TOML's filename."""
|
||||
|
||||
DEFAULT_CFG_TOML = pathlib.Path(__file__).parent / "favicons.toml"
|
||||
|
||||
|
||||
class FaviconConfig(BaseModel):
|
||||
"""The class aggregates configurations of the favicon tools"""
|
||||
|
||||
cfg_schema: int
|
||||
"""Config's schema version. The specification of the version of the schema
|
||||
is mandatory, currently only version :py:obj:`CONFIG_SCHEMA` is supported.
|
||||
By specifying a version, it is possible to ensure downward compatibility in
|
||||
the event of future changes to the configuration schema"""
|
||||
|
||||
cache: FaviconCacheConfig = FaviconCacheConfig()
|
||||
"""Setup of the :py:obj:`.cache.FaviconCacheConfig`."""
|
||||
|
||||
proxy: FaviconProxyConfig = FaviconProxyConfig()
|
||||
"""Setup of the :py:obj:`.proxy.FaviconProxyConfig`."""
|
||||
|
||||
@classmethod
|
||||
def from_toml_file(cls, cfg_file: pathlib.Path, use_cache: bool) -> "FaviconConfig":
|
||||
"""Create a config object from a TOML file, the ``use_cache`` argument
|
||||
specifies whether a cache should be used.
|
||||
"""
|
||||
|
||||
cached = TOML_CACHE.get(str(cfg_file))
|
||||
if use_cache and cached:
|
||||
return cached
|
||||
|
||||
with cfg_file.open("rb") as f:
|
||||
|
||||
cfg = tomllib.load(f)
|
||||
cfg = cfg.get("favicons", cfg)
|
||||
|
||||
schema = cfg.get("cfg_schema")
|
||||
if schema != CONFIG_SCHEMA:
|
||||
raise ValueError(
|
||||
f"config schema version {CONFIG_SCHEMA} is needed, version {schema} is given in {cfg_file}"
|
||||
)
|
||||
|
||||
cfg = cls(**cfg)
|
||||
if use_cache and cached:
|
||||
TOML_CACHE[str(cfg_file.resolve())] = cfg
|
||||
|
||||
return cfg
|
25
searx/favicons/favicons.toml
Normal file
25
searx/favicons/favicons.toml
Normal file
|
@ -0,0 +1,25 @@
|
|||
[favicons]
|
||||
|
||||
cfg_schema = 1 # config's schema version no.
|
||||
|
||||
[favicons.proxy]
|
||||
|
||||
# max_age = 5184000 # 60 days / default: 7 days (604800 sec)
|
||||
|
||||
# [favicons.proxy.resolver_map]
|
||||
#
|
||||
# The available favicon resolvers are registered here.
|
||||
#
|
||||
# "duckduckgo" = "searx.favicons.resolvers.duckduckgo"
|
||||
# "allesedv" = "searx.favicons.resolvers.allesedv"
|
||||
# "google" = "searx.favicons.resolvers.google"
|
||||
# "yandex" = "searx.favicons.resolvers.yandex"
|
||||
|
||||
[favicons.cache]
|
||||
|
||||
# db_url = "/var/cache/searxng/faviconcache.db" # default: "/tmp/faviconcache.db"
|
||||
# HOLD_TIME = 5184000 # 60 days / default: 30 days
|
||||
# LIMIT_TOTAL_BYTES = 2147483648 # 2 GB / default: 50 MB
|
||||
# BLOB_MAX_BYTES = 40960 # 40 KB / default 20 KB
|
||||
# MAINTENANCE_MODE = "off" # default: "auto"
|
||||
# MAINTENANCE_PERIOD = 600 # 10min / default: 1h
|
237
searx/favicons/proxy.py
Normal file
237
searx/favicons/proxy.py
Normal file
|
@ -0,0 +1,237 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Implementations for a favicon proxy"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Callable
|
||||
|
||||
import importlib
|
||||
import base64
|
||||
import pathlib
|
||||
import urllib.parse
|
||||
|
||||
import flask
|
||||
from httpx import HTTPError
|
||||
from pydantic import BaseModel
|
||||
|
||||
from searx import get_setting
|
||||
|
||||
from searx.webutils import new_hmac, is_hmac_of
|
||||
from searx.exceptions import SearxEngineResponseException
|
||||
|
||||
from .resolvers import DEFAULT_RESOLVER_MAP
|
||||
from . import cache
|
||||
|
||||
DEFAULT_FAVICON_URL = {}
|
||||
CFG: FaviconProxyConfig = None # type: ignore
|
||||
|
||||
|
||||
def init(cfg: FaviconProxyConfig):
|
||||
global CFG # pylint: disable=global-statement
|
||||
CFG = cfg
|
||||
|
||||
|
||||
def _initial_resolver_map():
|
||||
d = {}
|
||||
name: str = get_setting("search.favicon_resolver", None) # type: ignore
|
||||
if name:
|
||||
func = DEFAULT_RESOLVER_MAP.get(name)
|
||||
if func:
|
||||
d = {name: f"searx.favicons.resolvers.{func.__name__}"}
|
||||
return d
|
||||
|
||||
|
||||
class FaviconProxyConfig(BaseModel):
|
||||
"""Configuration of the favicon proxy."""
|
||||
|
||||
max_age: int = 60 * 60 * 24 * 7 # seven days
|
||||
"""HTTP header Cache-Control_ ``max-age``
|
||||
|
||||
.. _Cache-Control: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control
|
||||
"""
|
||||
|
||||
secret_key: str = get_setting("server.secret_key") # type: ignore
|
||||
"""By default, the value from :ref:`server.secret_key <settings server>`
|
||||
setting is used."""
|
||||
|
||||
resolver_timeout: int = get_setting("outgoing.request_timeout") # type: ignore
|
||||
"""Timeout which the resolvers should not exceed, is usually passed to the
|
||||
outgoing request of the resolver. By default, the value from
|
||||
:ref:`outgoing.request_timeout <settings outgoing>` setting is used."""
|
||||
|
||||
resolver_map: dict[str, str] = _initial_resolver_map()
|
||||
"""The resolver_map is a key / value dictionary where the key is the name of
|
||||
the resolver and the value is the fully qualifying name (fqn) of resolver's
|
||||
function (the callable). The resolvers from the python module
|
||||
:py:obj:`searx.favicons.resolver` are available by default."""
|
||||
|
||||
def get_resolver(self, name: str) -> Callable | None:
|
||||
"""Returns the callable object (function) of the resolver with the
|
||||
``name``. If no resolver is registered for the ``name``, ``None`` is
|
||||
returned.
|
||||
"""
|
||||
fqn = self.resolver_map.get(name)
|
||||
if fqn is None:
|
||||
return None
|
||||
mod_name, _, func_name = fqn.rpartition('.')
|
||||
mod = importlib.import_module(mod_name)
|
||||
func = getattr(mod, func_name)
|
||||
if func is None:
|
||||
raise ValueError(f"resolver {fqn} is not implemented")
|
||||
return func
|
||||
|
||||
favicon_path: str = get_setting("ui.static_path") + "/themes/{theme}/img/empty_favicon.svg" # type: ignore
|
||||
favicon_mime_type: str = "image/svg+xml"
|
||||
|
||||
def favicon(self, **replacements):
|
||||
"""Returns pathname and mimetype of the default favicon."""
|
||||
return (
|
||||
pathlib.Path(self.favicon_path.format(**replacements)),
|
||||
self.favicon_mime_type,
|
||||
)
|
||||
|
||||
def favicon_data_url(self, **replacements):
|
||||
"""Returns data image URL of the default favicon."""
|
||||
|
||||
cache_key = ", ".join(f"{x}:{replacements[x]}" for x in sorted(list(replacements.keys()), key=str))
|
||||
data_url = DEFAULT_FAVICON_URL.get(cache_key)
|
||||
if data_url is not None:
|
||||
return data_url
|
||||
|
||||
fav, mimetype = CFG.favicon(**replacements)
|
||||
# hint: encoding utf-8 limits favicons to be a SVG image
|
||||
with fav.open("r", encoding="utf-8") as f:
|
||||
data_url = f.read()
|
||||
|
||||
data_url = urllib.parse.quote(data_url)
|
||||
data_url = f"data:{mimetype};utf8,{data_url}"
|
||||
DEFAULT_FAVICON_URL[cache_key] = data_url
|
||||
return data_url
|
||||
|
||||
|
||||
def favicon_proxy():
|
||||
"""REST API of SearXNG's favicon proxy service
|
||||
|
||||
::
|
||||
|
||||
/favicon_proxy?authority=<...>&h=<...>
|
||||
|
||||
``authority``:
|
||||
Domain name :rfc:`3986` / see :py:obj:`favicon_url`
|
||||
|
||||
``h``:
|
||||
HMAC :rfc:`2104`, build up from the :ref:`server.secret_key <settings
|
||||
server>` setting.
|
||||
|
||||
"""
|
||||
authority = flask.request.args.get('authority')
|
||||
|
||||
# malformed request or RFC 3986 authority
|
||||
if not authority or "/" in authority:
|
||||
return '', 400
|
||||
|
||||
# malformed request / does not have authorisation
|
||||
if not is_hmac_of(
|
||||
CFG.secret_key,
|
||||
authority.encode(),
|
||||
flask.request.args.get('h', ''),
|
||||
):
|
||||
return '', 400
|
||||
|
||||
resolver = flask.request.preferences.get_value('favicon_resolver') # type: ignore
|
||||
# if resolver is empty or not valid, just return HTTP 400.
|
||||
if not resolver or resolver not in CFG.resolver_map.keys():
|
||||
return "", 400
|
||||
|
||||
data, mime = search_favicon(resolver, authority)
|
||||
|
||||
if data is not None and mime is not None:
|
||||
resp = flask.Response(data, mimetype=mime) # type: ignore
|
||||
resp.headers['Cache-Control'] = f"max-age={CFG.max_age}"
|
||||
return resp
|
||||
|
||||
# return default favicon from static path
|
||||
theme = flask.request.preferences.get_value("theme") # type: ignore
|
||||
fav, mimetype = CFG.favicon(theme=theme)
|
||||
return flask.send_from_directory(fav.parent, fav.name, mimetype=mimetype)
|
||||
|
||||
|
||||
def search_favicon(resolver: str, authority: str) -> tuple[None | bytes, None | str]:
|
||||
"""Sends the request to the favicon resolver and returns a tuple for the
|
||||
favicon. The tuple consists of ``(data, mime)``, if the resolver has not
|
||||
determined a favicon, both values are ``None``.
|
||||
|
||||
``data``:
|
||||
Binary data of the favicon.
|
||||
|
||||
``mime``:
|
||||
Mime type of the favicon.
|
||||
|
||||
"""
|
||||
|
||||
data, mime = (None, None)
|
||||
|
||||
func = CFG.get_resolver(resolver)
|
||||
if func is None:
|
||||
return data, mime
|
||||
|
||||
# to avoid superfluous requests to the resolver, first look in the cache
|
||||
data_mime = cache.CACHE(resolver, authority)
|
||||
if data_mime is not None:
|
||||
return data_mime
|
||||
|
||||
try:
|
||||
data, mime = func(authority, timeout=CFG.resolver_timeout)
|
||||
if data is None or mime is None:
|
||||
data, mime = (None, None)
|
||||
|
||||
except (HTTPError, SearxEngineResponseException):
|
||||
pass
|
||||
|
||||
cache.CACHE.set(resolver, authority, mime, data)
|
||||
return data, mime
|
||||
|
||||
|
||||
def favicon_url(authority: str) -> str:
|
||||
"""Function to generate the image URL used for favicons in SearXNG's result
|
||||
lists. The ``authority`` argument (aka netloc / :rfc:`3986`) is usually a
|
||||
(sub-) domain name. This function is used in the HTML (jinja) templates.
|
||||
|
||||
.. code:: html
|
||||
|
||||
<div class="favicon">
|
||||
<img src="{{ favicon_url(result.parsed_url.netloc) }}">
|
||||
</div>
|
||||
|
||||
The returned URL is a route to :py:obj:`favicon_proxy` REST API.
|
||||
|
||||
If the favicon is already in the cache, the returned URL is a `data URL`_
|
||||
(something like ``data:image/png;base64,...``). By generating a data url from
|
||||
the :py:obj:`.cache.FaviconCache`, additional HTTP roundtripps via the
|
||||
:py:obj:`favicon_proxy` are saved. However, it must also be borne in mind
|
||||
that data urls are not cached in the client (web browser).
|
||||
|
||||
.. _data URL: https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs
|
||||
|
||||
"""
|
||||
|
||||
resolver = flask.request.preferences.get_value('favicon_resolver') # type: ignore
|
||||
# if resolver is empty or not valid, just return nothing.
|
||||
if not resolver or resolver not in CFG.resolver_map.keys():
|
||||
return ""
|
||||
|
||||
data_mime = cache.CACHE(resolver, authority)
|
||||
|
||||
if data_mime == (None, None):
|
||||
# we have already checked, the resolver does not have a favicon
|
||||
theme = flask.request.preferences.get_value("theme") # type: ignore
|
||||
return CFG.favicon_data_url(theme=theme)
|
||||
|
||||
if data_mime is not None:
|
||||
data, mime = data_mime
|
||||
return f"data:{mime};base64,{str(base64.b64encode(data), 'utf-8')}" # type: ignore
|
||||
|
||||
h = new_hmac(CFG.secret_key, authority.encode())
|
||||
proxy_url = flask.url_for('favicon_proxy')
|
||||
query = urllib.parse.urlencode({"authority": authority, "h": h})
|
||||
return f"{proxy_url}?{query}"
|
100
searx/favicons/resolvers.py
Normal file
100
searx/favicons/resolvers.py
Normal file
|
@ -0,0 +1,100 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Implementations of the favicon *resolvers* that are available in the favicon
|
||||
proxy by default. A *resolver* is a function that obtains the favicon from an
|
||||
external source. The *resolver* function receives two arguments (``domain,
|
||||
timeout``) and returns a tuple ``(data, mime)``.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__all__ = ["DEFAULT_RESOLVER_MAP", "allesedv", "duckduckgo", "google", "yandex"]
|
||||
|
||||
from typing import Callable
|
||||
from searx import network
|
||||
from searx import logger
|
||||
|
||||
DEFAULT_RESOLVER_MAP: dict[str, Callable]
|
||||
logger = logger.getChild('favicons.resolvers')
|
||||
|
||||
|
||||
def _req_args(**kwargs):
|
||||
# add the request arguments from the searx.network
|
||||
d = {"raise_for_httperror": False}
|
||||
d.update(kwargs)
|
||||
return d
|
||||
|
||||
|
||||
def allesedv(domain: str, timeout: int) -> tuple[None | bytes, None | str]:
|
||||
"""Favicon Resolver from allesedv.com / https://favicon.allesedv.com/"""
|
||||
data, mime = (None, None)
|
||||
url = f"https://f1.allesedv.com/32/{domain}"
|
||||
logger.debug("fetch favicon from: %s", url)
|
||||
|
||||
# will just return a 200 regardless of the favicon existing or not
|
||||
# sometimes will be correct size, sometimes not
|
||||
response = network.get(url, **_req_args(timeout=timeout))
|
||||
if response and response.status_code == 200:
|
||||
mime = response.headers['Content-Type']
|
||||
if mime != 'image/gif':
|
||||
data = response.content
|
||||
return data, mime
|
||||
|
||||
|
||||
def duckduckgo(domain: str, timeout: int) -> tuple[None | bytes, None | str]:
|
||||
"""Favicon Resolver from duckduckgo.com / https://blog.jim-nielsen.com/2021/displaying-favicons-for-any-domain/"""
|
||||
data, mime = (None, None)
|
||||
url = f"https://icons.duckduckgo.com/ip2/{domain}.ico"
|
||||
logger.debug("fetch favicon from: %s", url)
|
||||
|
||||
# will return a 404 if the favicon does not exist and a 200 if it does,
|
||||
response = network.get(url, **_req_args(timeout=timeout))
|
||||
if response and response.status_code == 200:
|
||||
# api will respond with a 32x32 png image
|
||||
mime = response.headers['Content-Type']
|
||||
data = response.content
|
||||
return data, mime
|
||||
|
||||
|
||||
def google(domain: str, timeout: int) -> tuple[None | bytes, None | str]:
|
||||
"""Favicon Resolver from google.com"""
|
||||
data, mime = (None, None)
|
||||
|
||||
# URL https://www.google.com/s2/favicons?sz=32&domain={domain}" will be
|
||||
# redirected (HTTP 301 Moved Permanently) to t1.gstatic.com/faviconV2:
|
||||
url = (
|
||||
f"https://t1.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL"
|
||||
f"&url=https://{domain}&size=32"
|
||||
)
|
||||
logger.debug("fetch favicon from: %s", url)
|
||||
|
||||
# will return a 404 if the favicon does not exist and a 200 if it does,
|
||||
response = network.get(url, **_req_args(timeout=timeout))
|
||||
if response and response.status_code == 200:
|
||||
# api will respond with a 32x32 png image
|
||||
mime = response.headers['Content-Type']
|
||||
data = response.content
|
||||
return data, mime
|
||||
|
||||
|
||||
def yandex(domain: str, timeout: int) -> tuple[None | bytes, None | str]:
|
||||
"""Favicon Resolver from yandex.com"""
|
||||
data, mime = (None, None)
|
||||
url = f"https://favicon.yandex.net/favicon/{domain}"
|
||||
logger.debug("fetch favicon from: %s", url)
|
||||
|
||||
# api will respond with a 16x16 png image, if it doesn't exist, it will be a
|
||||
# 1x1 png image (70 bytes)
|
||||
response = network.get(url, **_req_args(timeout=timeout))
|
||||
if response and response.status_code == 200 and len(response.content) > 70:
|
||||
mime = response.headers['Content-Type']
|
||||
data = response.content
|
||||
return data, mime
|
||||
|
||||
|
||||
DEFAULT_RESOLVER_MAP = {
|
||||
"allesedv": allesedv,
|
||||
"duckduckgo": duckduckgo,
|
||||
"google": google,
|
||||
"yandex": yandex,
|
||||
}
|
|
@ -13,7 +13,7 @@ from collections import OrderedDict
|
|||
import flask
|
||||
import babel
|
||||
|
||||
from searx import settings, autocomplete, favicon_resolver
|
||||
from searx import settings, autocomplete, favicons
|
||||
from searx.enginelib import Engine
|
||||
from searx.plugins import Plugin
|
||||
from searx.locales import LOCALE_NAMES
|
||||
|
@ -409,7 +409,7 @@ class Preferences:
|
|||
'favicon_resolver': EnumStringSetting(
|
||||
settings['search']['favicon_resolver'],
|
||||
locked=is_locked('favicon_resolver'),
|
||||
choices=list(favicon_resolver.backends.keys()) + ['']
|
||||
choices=list(favicons.proxy.CFG.resolver_map.keys()) + ['']
|
||||
),
|
||||
'image_proxy': BooleanSetting(
|
||||
settings['server']['image_proxy'],
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
|
||||
<path fill="#fff" d="M0 0h24v24H0z"/>
|
||||
<path fill="#58f" d="M11 20.85a.92.92 0 0 1-1.1.93A10 10 0 0 1 2.06 13c-.06-.55.4-1 .95-1h3a1 1 0 0 1 1 1 3 3 0 0 0 3 3 1 1 0 0 1 1 1v3.85Zm6-1.92c0 .77.83 1.23 1.42.74a10 10 0 0 0 2.03-2.32c.39-.61-.09-1.35-.81-1.35H18a1 1 0 0 0-1 1v1.93ZM12 2a10 10 0 0 1 6.65 2.53c.61.55.17 1.47-.65 1.47h-.15A2.85 2.85 0 0 0 15 8.85c0 .33-.18.62-.47.77l-.08.04a1 1 0 0 1-.9 0l-.08-.04a.85.85 0 0 1-.47-.77A2.85 2.85 0 0 0 10.15 6H10a1 1 0 0 1-1-1V3.2c0-.44.28-.84.7-.94C10.45 2.1 11.22 2 12 2Z"/>
|
||||
<path fill="#58f" d="M3.42 10c-.63 0-1.1-.58-.9-1.18.6-1.8 1.7-3.36 3.12-4.53C6.2 3.82 7 4.26 7 5a3 3 0 0 0 3 3h.15c.47 0 .85.38.85.85 0 1.09.61 2.07 1.58 2.56l.08.04a3 3 0 0 0 2.68 0l.08-.04A2.85 2.85 0 0 0 17 8.85c0-.47.38-.85.85-.85h2.66c.4 0 .77.23.9.6a9.98 9.98 0 0 1 .52 4.6.94.94 0 0 1-.95.8H18a3 3 0 0 0-3 3v3.8c0 .44-.28.84-.7.94l-.2.04a.92.92 0 0 1-1.1-.93V17a3 3 0 0 0-3-3 1 1 0 0 1-1-1 3 3 0 0 0-3-3H3.42Z"/>
|
||||
</svg>
|
Before Width: | Height: | Size: 1 KiB After Width: | Height: | Size: 989 B |
|
@ -23,12 +23,7 @@
|
|||
{{- result_open_link(result.url, "url_wrapper") -}}
|
||||
{% if not rtl %}
|
||||
{%- if favicon_resolver != "" %}
|
||||
<div class="favicon">
|
||||
<img
|
||||
alt="{{ result.parsed_url.netloc }}"
|
||||
src="{{ favicon_proxify(result.parsed_url.netloc) }}"
|
||||
>
|
||||
</div>
|
||||
<div class="favicon"><img loading="lazy" src="{{ favicon_url(result.parsed_url.netloc) }}"></div>
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- for part in get_pretty_url(result.parsed_url) -%}
|
||||
|
@ -36,12 +31,7 @@
|
|||
{%- endfor %}
|
||||
{% if rtl %}
|
||||
{%- if favicon_resolver != "" %}
|
||||
<div class="favicon">
|
||||
<img
|
||||
alt="{{ result.parsed_url.netloc }}"
|
||||
src="{{ favicon_proxify(result.parsed_url.netloc) }}"
|
||||
>
|
||||
</div>
|
||||
<div class="favicon"><img loading="lazy" src="{{ favicon_url(result.parsed_url.netloc) }}"></div>
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{{- result_close_link() -}}
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
<div class="value">{{- '' -}}
|
||||
<select name="favicon_resolver" aria-labelledby="pref_favicon_resolver">{{- '' -}}
|
||||
<option value=""> - </option>
|
||||
{%- for backend in favicon_backends -%}
|
||||
{%- for backend in favicon_resolver_names -%}
|
||||
<option value="{{ backend }}"
|
||||
{%- if backend == favicon_resolver %} selected="selected" {%- endif -%}>
|
||||
{{- backend -}}
|
||||
|
|
|
@ -123,7 +123,8 @@ from searx.locales import (
|
|||
|
||||
# renaming names from searx imports ...
|
||||
from searx.autocomplete import search_autocomplete, backends as autocomplete_backends
|
||||
from searx.favicon_resolver import search_favicon, backends as favicon_backends
|
||||
from searx import favicons
|
||||
|
||||
from searx.redisdb import initialize as redis_initialize
|
||||
from searx.sxng_locales import sxng_locales
|
||||
from searx.search import SearchWithPlugins, initialize as search_initialize
|
||||
|
@ -298,24 +299,6 @@ def morty_proxify(url: str):
|
|||
return '{0}?{1}'.format(settings['result_proxy']['url'], urlencode(url_params))
|
||||
|
||||
|
||||
def favicon_proxify(url: str):
|
||||
# url is a FQDN (e.g. example.com, en.wikipedia.org)
|
||||
|
||||
resolver = request.preferences.get_value('favicon_resolver')
|
||||
|
||||
# if resolver is empty, just return nothing
|
||||
if not resolver:
|
||||
return ""
|
||||
|
||||
# check resolver is valid
|
||||
if resolver not in favicon_backends:
|
||||
return ""
|
||||
|
||||
h = new_hmac(settings['server']['secret_key'], url.encode())
|
||||
|
||||
return '{0}?{1}'.format(url_for('favicon_proxy'), urlencode(dict(q=url.encode(), h=h)))
|
||||
|
||||
|
||||
def image_proxify(url: str):
|
||||
|
||||
if url.startswith('//'):
|
||||
|
@ -377,7 +360,6 @@ def get_client_settings():
|
|||
return {
|
||||
'autocomplete_provider': req_pref.get_value('autocomplete'),
|
||||
'autocomplete_min': get_setting('search.autocomplete_min'),
|
||||
'favicon_resolver': req_pref.get_value('favicon_resolver'),
|
||||
'http_method': req_pref.get_value('method'),
|
||||
'infinite_scroll': req_pref.get_value('infinite_scroll'),
|
||||
'translations': get_translations(),
|
||||
|
@ -452,7 +434,7 @@ def render(template_name: str, **kwargs):
|
|||
# helpers to create links to other pages
|
||||
kwargs['url_for'] = custom_url_for # override url_for function in templates
|
||||
kwargs['image_proxify'] = image_proxify
|
||||
kwargs['favicon_proxify'] = favicon_proxify
|
||||
kwargs['favicon_url'] = favicons.favicon_url
|
||||
kwargs['proxify'] = morty_proxify if settings['result_proxy']['url'] is not None else None
|
||||
kwargs['proxify_results'] = settings['result_proxy']['proxify_results']
|
||||
kwargs['cache_url'] = settings['ui']['cache_url']
|
||||
|
@ -895,42 +877,6 @@ def autocompleter():
|
|||
return Response(suggestions, mimetype=mimetype)
|
||||
|
||||
|
||||
@app.route('/favicon', methods=['GET'])
|
||||
def favicon_proxy():
|
||||
"""Return proxied favicon results"""
|
||||
url = request.args.get('q')
|
||||
|
||||
# malformed request
|
||||
if not url:
|
||||
return '', 400
|
||||
|
||||
# malformed request / does not have authorisation
|
||||
if not is_hmac_of(settings['server']['secret_key'], url.encode(), request.args.get('h', '')):
|
||||
return '', 400
|
||||
|
||||
resolver = request.preferences.get_value('favicon_resolver')
|
||||
|
||||
# check if the favicon resolver is valid
|
||||
if not resolver or resolver not in favicon_backends:
|
||||
return '', 400
|
||||
|
||||
# parse query
|
||||
raw_text_query = RawTextQuery(url, [])
|
||||
|
||||
resp = search_favicon(resolver, raw_text_query)
|
||||
|
||||
# return 404 if the favicon is not found
|
||||
if not resp:
|
||||
theme = request.preferences.get_value("theme")
|
||||
# return favicon from /static/themes/simple/img/empty_favicon.svg
|
||||
# we can't rely on an onerror event in the img tag to display a default favicon as this violates the CSP.
|
||||
# using redirect to save network bandwidth (user will have this location cached).
|
||||
return redirect(url_for('static', filename='themes/' + theme + '/img/empty_favicon.svg'))
|
||||
|
||||
# will always return a PNG image
|
||||
return Response(resp, mimetype='image/png')
|
||||
|
||||
|
||||
@app.route('/preferences', methods=['GET', 'POST'])
|
||||
def preferences():
|
||||
"""Render preferences page && save user preferences"""
|
||||
|
@ -1078,7 +1024,7 @@ def preferences():
|
|||
],
|
||||
disabled_engines = disabled_engines,
|
||||
autocomplete_backends = autocomplete_backends,
|
||||
favicon_backends = favicon_backends,
|
||||
favicon_resolver_names = favicons.proxy.CFG.resolver_map.keys(),
|
||||
shortcuts = {y: x for x, y in engine_shortcuts.items()},
|
||||
themes = themes,
|
||||
plugins = plugins,
|
||||
|
@ -1092,6 +1038,9 @@ def preferences():
|
|||
)
|
||||
|
||||
|
||||
app.add_url_rule('/favicon_proxy', methods=['GET'], endpoint="favicon_proxy", view_func=favicons.favicon_proxy)
|
||||
|
||||
|
||||
@app.route('/image_proxy', methods=['GET'])
|
||||
def image_proxy():
|
||||
# pylint: disable=too-many-return-statements, too-many-branches
|
||||
|
@ -1403,6 +1352,7 @@ if not werkzeug_reloader or (werkzeug_reloader and os.environ.get("WERKZEUG_RUN_
|
|||
plugin_initialize(app)
|
||||
search_initialize(enable_checker=True, check_network=True, enable_metrics=settings['general']['enable_metrics'])
|
||||
limiter.initialize(app, settings)
|
||||
favicons.init()
|
||||
|
||||
|
||||
def run():
|
||||
|
|
1
setup.py
1
setup.py
|
@ -61,6 +61,7 @@ setup(
|
|||
'data/*.json',
|
||||
'data/*.txt',
|
||||
'data/*.ftz',
|
||||
'favicons/*.toml',
|
||||
'infopage/*/*',
|
||||
'static/themes/simple/css/*',
|
||||
'static/themes/simple/css/*/*',
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# pylint: disable=missing-module-docstring, invalid-name
|
||||
|
||||
from tests import SearxTestCase
|
||||
from searx import favicons
|
||||
from searx.locales import locales_initialize
|
||||
from searx.preferences import (
|
||||
EnumStringSetting,
|
||||
|
@ -14,6 +15,7 @@ from searx.preferences import (
|
|||
from searx.plugins import Plugin
|
||||
|
||||
locales_initialize()
|
||||
favicons.init()
|
||||
|
||||
|
||||
class PluginStub(Plugin): # pylint: disable=missing-class-docstring, too-few-public-methods
|
||||
|
|
Loading…
Reference in a new issue