mirror of
https://github.com/searxng/searxng.git
synced 2024-11-26 12:51:02 +00:00
commit
38e0b9360b
4 changed files with 66 additions and 30 deletions
|
@ -5,6 +5,7 @@
|
|||
import asyncio
|
||||
import threading
|
||||
import concurrent.futures
|
||||
from types import MethodType
|
||||
from timeit import default_timer
|
||||
|
||||
import httpx
|
||||
|
@ -161,19 +162,32 @@ def patch(url, data=None, **kwargs):
|
|||
def delete(url, **kwargs):
|
||||
return request('delete', url, **kwargs)
|
||||
|
||||
|
||||
async def stream_chunk_to_queue(network, queue, method, url, **kwargs):
|
||||
try:
|
||||
async with network.stream(method, url, **kwargs) as response:
|
||||
queue.put(response)
|
||||
async for chunk in response.aiter_bytes(65536):
|
||||
# aiter_raw: access the raw bytes on the response without applying any HTTP content decoding
|
||||
# https://www.python-httpx.org/quickstart/#streaming-responses
|
||||
async for chunk in response.aiter_raw(65536):
|
||||
if len(chunk) > 0:
|
||||
queue.put(chunk)
|
||||
except httpx.ResponseClosed as e:
|
||||
# the response was closed
|
||||
pass
|
||||
except (httpx.HTTPError, OSError, h2.exceptions.ProtocolError) as e:
|
||||
queue.put(e)
|
||||
finally:
|
||||
queue.put(None)
|
||||
|
||||
|
||||
def _close_response_method(self):
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
self.aclose(),
|
||||
get_loop()
|
||||
)
|
||||
|
||||
|
||||
def stream(method, url, **kwargs):
|
||||
"""Replace httpx.stream.
|
||||
|
||||
|
@ -191,10 +205,19 @@ def stream(method, url, **kwargs):
|
|||
stream_chunk_to_queue(get_network(), queue, method, url, **kwargs),
|
||||
get_loop()
|
||||
)
|
||||
|
||||
# yield response
|
||||
response = queue.get()
|
||||
if isinstance(response, Exception):
|
||||
raise response
|
||||
response.close = MethodType(_close_response_method, response)
|
||||
yield response
|
||||
|
||||
# yield chunks
|
||||
chunk_or_exception = queue.get()
|
||||
while chunk_or_exception is not None:
|
||||
if isinstance(chunk_or_exception, Exception):
|
||||
raise chunk_or_exception
|
||||
yield chunk_or_exception
|
||||
chunk_or_exception = queue.get()
|
||||
return future.result()
|
||||
future.result()
|
||||
|
|
|
@ -289,6 +289,14 @@ def initialize(settings_engines=None, settings_outgoing=None):
|
|||
if isinstance(network, str):
|
||||
NETWORKS[engine_name] = NETWORKS[network]
|
||||
|
||||
# the /image_proxy endpoint has a dedicated network.
|
||||
# same parameters than the default network, but HTTP/2 is disabled.
|
||||
# It decreases the CPU load average, and the total time is more or less the same
|
||||
if 'image_proxy' not in NETWORKS:
|
||||
image_proxy_params = default_params.copy()
|
||||
image_proxy_params['enable_http2'] = False
|
||||
NETWORKS['image_proxy'] = new_network(image_proxy_params)
|
||||
|
||||
|
||||
@atexit.register
|
||||
def done():
|
||||
|
|
|
@ -262,11 +262,7 @@ def dict_subset(d, properties):
|
|||
>>> >> dict_subset({'A': 'a', 'B': 'b', 'C': 'c'}, ['A', 'D'])
|
||||
{'A': 'a'}
|
||||
"""
|
||||
result = {}
|
||||
for k in properties:
|
||||
if k in d:
|
||||
result[k] = d[k]
|
||||
return result
|
||||
return {k: d[k] for k in properties if k in d}
|
||||
|
||||
|
||||
def get_torrent_size(filesize, filesize_multiplier):
|
||||
|
|
|
@ -108,7 +108,7 @@ from searx.autocomplete import search_autocomplete, backends as autocomplete_bac
|
|||
from searx.languages import language_codes as languages
|
||||
from searx.locales import LOCALE_NAMES, UI_LOCALE_CODES, RTL_LOCALES
|
||||
from searx.search import SearchWithPlugins, initialize as search_initialize
|
||||
from searx.network import stream as http_stream
|
||||
from searx.network import stream as http_stream, set_context_network_name
|
||||
from searx.search.checker import get_result as checker_get_result
|
||||
from searx.settings_loader import get_default_settings_path
|
||||
|
||||
|
@ -1065,7 +1065,7 @@ def _is_selected_language_supported(engine, preferences): # pylint: disable=red
|
|||
|
||||
@app.route('/image_proxy', methods=['GET'])
|
||||
def image_proxy():
|
||||
# pylint: disable=too-many-return-statements
|
||||
# pylint: disable=too-many-return-statements, too-many-branches
|
||||
|
||||
url = request.args.get('url')
|
||||
if not url:
|
||||
|
@ -1076,17 +1076,21 @@ def image_proxy():
|
|||
return '', 400
|
||||
|
||||
maximum_size = 5 * 1024 * 1024
|
||||
|
||||
forward_resp = False
|
||||
resp = None
|
||||
try:
|
||||
headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'})
|
||||
headers['User-Agent'] = gen_useragent()
|
||||
request_headers = {
|
||||
'User-Agent': gen_useragent(),
|
||||
'Accept': 'image/webp,*/*',
|
||||
'Accept-Encoding': 'gzip, deflate',
|
||||
'Sec-GPC': '1',
|
||||
'DNT': '1',
|
||||
}
|
||||
set_context_network_name('image_proxy')
|
||||
stream = http_stream(
|
||||
method = 'GET',
|
||||
url = url,
|
||||
headers = headers,
|
||||
timeout = settings['outgoing']['request_timeout'],
|
||||
allow_redirects = True,
|
||||
max_redirects = 20
|
||||
headers = request_headers
|
||||
)
|
||||
resp = next(stream)
|
||||
content_length = resp.headers.get('Content-Length')
|
||||
|
@ -1095,32 +1099,37 @@ def image_proxy():
|
|||
and int(content_length) > maximum_size ):
|
||||
return 'Max size', 400
|
||||
|
||||
if resp.status_code == 304:
|
||||
return '', resp.status_code
|
||||
|
||||
if resp.status_code != 200:
|
||||
logger.debug(
|
||||
'image-proxy: wrong response code: {0}'.format(
|
||||
resp.status_code))
|
||||
logger.debug('image-proxy: wrong response code: %i', resp.status_code)
|
||||
if resp.status_code >= 400:
|
||||
return '', resp.status_code
|
||||
return '', 400
|
||||
|
||||
if not resp.headers.get('content-type', '').startswith('image/'):
|
||||
logger.debug(
|
||||
'image-proxy: wrong content-type: {0}'.format(
|
||||
resp.headers.get('content-type')))
|
||||
if not resp.headers.get('Content-Type', '').startswith('image/'):
|
||||
logger.debug('image-proxy: wrong content-type: %s', resp.headers.get('Content-Type', ''))
|
||||
return '', 400
|
||||
|
||||
forward_resp = True
|
||||
except httpx.HTTPError:
|
||||
logger.exception('HTTP error')
|
||||
return '', 400
|
||||
finally:
|
||||
if resp and not forward_resp:
|
||||
# the code is about to return an HTTP 400 error to the browser
|
||||
# we make sure to close the response between searxng and the HTTP server
|
||||
try:
|
||||
resp.close()
|
||||
except httpx.HTTPError:
|
||||
logger.exception('HTTP error on closing')
|
||||
|
||||
try:
|
||||
headers = dict_subset(
|
||||
resp.headers,
|
||||
{'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'}
|
||||
{'Content-Type', 'Content-Encoding', 'Content-Length', 'Length'}
|
||||
)
|
||||
|
||||
total_length = 0
|
||||
|
||||
def forward_chunk():
|
||||
nonlocal total_length
|
||||
total_length = 0
|
||||
for chunk in stream:
|
||||
total_length += len(chunk)
|
||||
if total_length > maximum_size:
|
||||
|
|
Loading…
Reference in a new issue