cuda: Introduce GST_CUDA_CRITICAL_ERRORS env to abort on critical error

Adding GST_CUDA_CRITICAL_ERRORS env variable so that program can be
terminated on unrecoverable error.

Example)
GST_CUDA_CRITICAL_ERRORS=2,700 gst-launch-1.0 ...

In this example, CUDA_ERROR_OUT_OF_MEMORY(2) and
CUDA_ERROR_ILLEGAL_ADDRESS(700) are registered as critical error
and program will be aborted on those errors

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/4729>
This commit is contained in:
Seungha Yang 2023-05-29 21:53:52 +09:00 committed by GStreamer Marge Bot
parent 58b166453d
commit de749fa356
3 changed files with 62 additions and 1 deletions

View file

@ -1226,7 +1226,7 @@ Retrieves the #GstCudaContext in @context and places the result in @cuda_ctx.</d
</parameter>
</parameters>
</function>
<function-macro name="cuda_result" c:identifier="gst_cuda_result" introspectable="0">
<function-macro name="cuda_result" c:identifier="gst_cuda_result" version="1.22" introspectable="0">
<source-position filename="../subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudautils.h"/>
<parameters>
<parameter name="result">

View file

@ -5,3 +5,21 @@ gstreamer-cuda-{{ gst_api_version.md }}.pc
> NOTE: This library API is considered *unstable*
## Environment variables
The GStreamer CUDA library inspects following environment variables
**`GST_CUDA_CRITICAL_ERRORS`. (Since: 1.24)**
This environment variable can be set to a comma-separated list of CUresult
values (see CUDA driver API documentation). GStreamer CUDA library will
abort when the user registered error is detected. This environment can be useful
when unrecoverable CUDA error happens. Thus in-process error recovery
(e.g., relaunching new pipeline) is not expected to work, and therefore
the process should be relaunched.
Example: `GST_CUDA_CRITICAL_ERRORS=2,700`
As a result of the above example, if `CUDA_ERROR_OUT_OF_MEMORY(2)` or
`CUDA_ERROR_ILLEGAL_ADDRESS(700)` error is detected in GStreamer CUDA library,
the process will be aborted.

View file

@ -25,6 +25,8 @@
#include "gstcudacontext.h"
#include "gstcuda-private.h"
#include <atomic>
#include <set>
#include <string>
#ifdef HAVE_CUDA_GST_GL
#include <gst/gl/gl.h>
@ -1672,6 +1674,43 @@ gst_cuda_create_user_token (void)
return user_token.fetch_add (1);
}
static gboolean
_abort_on_error (CUresult result)
{
static std::set < CUresult > abort_list;
GST_CUDA_CALL_ONCE_BEGIN {
const gchar *env = g_getenv ("GST_CUDA_CRITICAL_ERRORS");
if (!env)
return;
gchar **split = g_strsplit (env, ",", 0);
gchar **iter;
for (iter = split; *iter; iter++) {
int error_code = 0;
try {
error_code = std::stoi (*iter);
} catch ( ...) {
GST_WARNING ("Invalid argument \"%s\"", *iter);
continue;
};
if (error_code > 0)
abort_list.insert ((CUresult) error_code);
}
g_strfreev (split);
}
GST_CUDA_CALL_ONCE_END;
if (abort_list.empty ())
return FALSE;
if (abort_list.find (result) != abort_list.end ())
return TRUE;
return FALSE;
}
/**
* _gst_cuda_debug:
* @result: CUDA result code
@ -1696,6 +1735,10 @@ _gst_cuda_debug (CUresult result, GstDebugCategory * cat,
gst_debug_log (cat, GST_LEVEL_WARNING, file, function, line,
NULL, "CUDA call failed: %s, %s", _error_name, _error_text);
#endif
if (_abort_on_error (result)) {
GST_ERROR ("Critical error %d, abort", (gint) result);
g_abort ();
}
return FALSE;
}