From de749fa3561680cb405eaed9f5188031294ee584 Mon Sep 17 00:00:00 2001 From: Seungha Yang Date: Mon, 29 May 2023 21:53:52 +0900 Subject: [PATCH] cuda: Introduce GST_CUDA_CRITICAL_ERRORS env to abort on critical error Adding GST_CUDA_CRITICAL_ERRORS env variable so that program can be terminated on unrecoverable error. Example) GST_CUDA_CRITICAL_ERRORS=2,700 gst-launch-1.0 ... In this example, CUDA_ERROR_OUT_OF_MEMORY(2) and CUDA_ERROR_ILLEGAL_ADDRESS(700) are registered as critical error and program will be aborted on those errors Part-of: --- girs/GstCuda-1.0.gir | 2 +- .../gst-plugins-bad/docs/libs/cuda/index.md | 18 ++++++++ .../gst-libs/gst/cuda/gstcudautils.cpp | 43 +++++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) diff --git a/girs/GstCuda-1.0.gir b/girs/GstCuda-1.0.gir index 5a1f529afe..77c0c04585 100644 --- a/girs/GstCuda-1.0.gir +++ b/girs/GstCuda-1.0.gir @@ -1226,7 +1226,7 @@ Retrieves the #GstCudaContext in @context and places the result in @cuda_ctx. - + diff --git a/subprojects/gst-plugins-bad/docs/libs/cuda/index.md b/subprojects/gst-plugins-bad/docs/libs/cuda/index.md index 51acb2ca4c..5e529291b8 100644 --- a/subprojects/gst-plugins-bad/docs/libs/cuda/index.md +++ b/subprojects/gst-plugins-bad/docs/libs/cuda/index.md @@ -5,3 +5,21 @@ gstreamer-cuda-{{ gst_api_version.md }}.pc > NOTE: This library API is considered *unstable* +## Environment variables + +The GStreamer CUDA library inspects following environment variables + +**`GST_CUDA_CRITICAL_ERRORS`. (Since: 1.24)** + +This environment variable can be set to a comma-separated list of CUresult +values (see CUDA driver API documentation). GStreamer CUDA library will +abort when the user registered error is detected. This environment can be useful +when unrecoverable CUDA error happens. Thus in-process error recovery +(e.g., relaunching new pipeline) is not expected to work, and therefore +the process should be relaunched. + +Example: `GST_CUDA_CRITICAL_ERRORS=2,700` + +As a result of the above example, if `CUDA_ERROR_OUT_OF_MEMORY(2)` or +`CUDA_ERROR_ILLEGAL_ADDRESS(700)` error is detected in GStreamer CUDA library, +the process will be aborted. diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudautils.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudautils.cpp index f070d5b617..2f7102e546 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudautils.cpp +++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudautils.cpp @@ -25,6 +25,8 @@ #include "gstcudacontext.h" #include "gstcuda-private.h" #include +#include +#include #ifdef HAVE_CUDA_GST_GL #include @@ -1672,6 +1674,43 @@ gst_cuda_create_user_token (void) return user_token.fetch_add (1); } +static gboolean +_abort_on_error (CUresult result) +{ + static std::set < CUresult > abort_list; + GST_CUDA_CALL_ONCE_BEGIN { + const gchar *env = g_getenv ("GST_CUDA_CRITICAL_ERRORS"); + if (!env) + return; + + gchar **split = g_strsplit (env, ",", 0); + gchar **iter; + for (iter = split; *iter; iter++) { + int error_code = 0; + try { + error_code = std::stoi (*iter); + } catch ( ...) { + GST_WARNING ("Invalid argument \"%s\"", *iter); + continue; + }; + + if (error_code > 0) + abort_list.insert ((CUresult) error_code); + } + + g_strfreev (split); + } + GST_CUDA_CALL_ONCE_END; + + if (abort_list.empty ()) + return FALSE; + + if (abort_list.find (result) != abort_list.end ()) + return TRUE; + + return FALSE; +} + /** * _gst_cuda_debug: * @result: CUDA result code @@ -1696,6 +1735,10 @@ _gst_cuda_debug (CUresult result, GstDebugCategory * cat, gst_debug_log (cat, GST_LEVEL_WARNING, file, function, line, NULL, "CUDA call failed: %s, %s", _error_name, _error_text); #endif + if (_abort_on_error (result)) { + GST_ERROR ("Critical error %d, abort", (gint) result); + g_abort (); + } return FALSE; }