nvcodec: Refactor plugin initialization

Create CUDA context per device, instead of per codec and encoder/decoder.
Allocating CUDA context is heavy operation so we should reuse it
as much as possible.

Fixes: https://gitlab.freedesktop.org/gstreamer/gst-plugins-bad/issues/1130
This commit is contained in:
Seungha Yang 2019-11-28 18:54:31 +09:00
parent 2a3205b294
commit 49bccf0433
7 changed files with 143 additions and 135 deletions

View file

@ -24,6 +24,9 @@
#include "gstcudaloader.h"
#include <gmodule.h>
GST_DEBUG_CATEGORY_EXTERN (gst_nvcodec_debug);
#define GST_CAT_DEFAULT gst_nvcodec_debug
#ifndef G_OS_WIN32
#define CUDA_LIBNAME "libcuda.so.1"
#else

View file

@ -28,6 +28,7 @@
#include <string.h>
GST_DEBUG_CATEGORY_EXTERN (gst_nvenc_debug);
#define GST_CAT_DEFAULT gst_nvenc_debug
#if HAVE_NVCODEC_GST_GL

View file

@ -34,8 +34,8 @@
#include <string.h>
GST_DEBUG_CATEGORY_STATIC (gst_nvdec_debug_category);
#define GST_CAT_DEFAULT gst_nvdec_debug_category
GST_DEBUG_CATEGORY_EXTERN (gst_nvdec_debug);
#define GST_CAT_DEFAULT gst_nvdec_debug
#ifdef HAVE_NVCODEC_GST_GL
#define SUPPORTED_GL_APIS (GST_GL_API_OPENGL | GST_GL_API_OPENGL3 | GST_GL_API_GLES2)
@ -1544,13 +1544,9 @@ typedef struct
static void
gst_nvdec_register (GstPlugin * plugin, GType type, cudaVideoCodec codec_type,
const gchar * codec, const gchar * sink_caps_string, guint rank,
gint device_count)
gint device_idx, CUcontext cuda_ctx)
{
gint i;
for (i = 0; i < device_count; i++) {
CUdevice cuda_device;
CUcontext cuda_ctx;
{
CUresult cuda_ret;
gint max_width = 0, min_width = G_MAXINT;
gint max_height = 0, min_height = G_MAXINT;
@ -1575,18 +1571,12 @@ gst_nvdec_register (GstPlugin * plugin, GType type, cudaVideoCodec codec_type,
GValue format = G_VALUE_INIT;
GValue profile_list = G_VALUE_INIT;
if (CuDeviceGet (&cuda_device, i) != CUDA_SUCCESS)
continue;
if (CuCtxCreate (&cuda_ctx, 0, cuda_device) != CUDA_SUCCESS)
continue;
g_value_init (&format_list, GST_TYPE_LIST);
g_value_init (&format, G_TYPE_STRING);
g_value_init (&profile_list, GST_TYPE_LIST);
if (CuCtxPushCurrent (cuda_ctx) != CUDA_SUCCESS)
goto cuda_free;
goto done;
for (c_idx = 0; c_idx < G_N_ELEMENTS (chroma_list); c_idx++) {
for (b_idx = 0; b_idx < G_N_ELEMENTS (bitdepth_minus8); b_idx++) {
@ -1682,7 +1672,7 @@ gst_nvdec_register (GstPlugin * plugin, GType type, cudaVideoCodec codec_type,
if (num_support == 0) {
GST_INFO ("device can not support %s", codec);
goto cuda_free;
goto done;
}
src_templ = gst_caps_new_simple ("video/x-raw",
@ -1717,16 +1707,14 @@ gst_nvdec_register (GstPlugin * plugin, GType type, cudaVideoCodec codec_type,
CuCtxPopCurrent (NULL);
cuda_free:
CuCtxDestroy (cuda_ctx);
done:
g_value_unset (&format_list);
g_value_unset (&format);
g_value_unset (&profile_list);
if (sink_templ && src_templ) {
gst_nvdec_subclass_register (plugin, type, codec_type, codec, i, rank,
sink_templ, src_templ);
gst_nvdec_subclass_register (plugin, type, codec_type, codec, device_idx,
rank, sink_templ, src_templ);
}
gst_clear_caps (&sink_templ);
@ -1773,14 +1761,10 @@ const GstNvCodecMap codec_map[] = {
};
void
gst_nvdec_plugin_init (GstPlugin * plugin)
gst_nvdec_plugin_init (GstPlugin * plugin, guint device_index,
CUcontext cuda_ctx)
{
gint i;
CUresult cuda_ret;
gint dev_count = 0;
GST_DEBUG_CATEGORY_INIT (gst_nvdec_debug_category, "nvdec", 0,
"Debug category for the nvdec element");
if (!gst_cuvid_can_get_decoder_caps ()) {
GstCaps *src_templ;
@ -1804,27 +1788,16 @@ gst_nvdec_plugin_init (GstPlugin * plugin)
sink_templ = gst_caps_from_string (codec_map[i].sink_caps_string);
gst_nvdec_subclass_register (plugin, GST_TYPE_NVDEC, codec_map[i].codec,
codec_map[i].codec_name, 0, GST_RANK_PRIMARY, sink_templ, src_templ);
codec_map[i].codec_name, device_index, GST_RANK_PRIMARY,
sink_templ, src_templ);
}
return;
}
cuda_ret = CuInit (0);
if (cuda_ret != CUDA_SUCCESS) {
GST_ERROR ("Failed to initialize CUDA API");
return;
}
cuda_ret = CuDeviceGetCount (&dev_count);
if (cuda_ret != CUDA_SUCCESS || dev_count == 0) {
GST_ERROR ("No CUDA devices detected");
return;
}
for (i = 0; i < G_N_ELEMENTS (codec_map); i++) {
gst_nvdec_register (plugin, GST_TYPE_NVDEC, codec_map[i].codec,
codec_map[i].codec_name, codec_map[i].sink_caps_string,
GST_RANK_PRIMARY, dev_count);
GST_RANK_PRIMARY, device_index, cuda_ctx);
}
}

View file

@ -101,7 +101,9 @@ struct _GstNvDecClass
GType gst_nvdec_get_type (void);
void gst_nvdec_plugin_init (GstPlugin * plugin);
void gst_nvdec_plugin_init (GstPlugin * plugin,
guint device_index,
CUcontext cuda_ctx);
G_END_DECLS

View file

@ -44,7 +44,7 @@ typedef NVENCSTATUS NVENCAPI
tNvEncodeAPICreateInstance (NV_ENCODE_API_FUNCTION_LIST * functionList);
tNvEncodeAPICreateInstance *nvEncodeAPICreateInstance;
GST_DEBUG_CATEGORY (gst_nvenc_debug);
GST_DEBUG_CATEGORY_EXTERN (gst_nvenc_debug);
#define GST_CAT_DEFAULT gst_nvenc_debug
static NV_ENCODE_API_FUNCTION_LIST nvenc_api;
@ -290,27 +290,6 @@ gst_nvenc_get_nv_buffer_format (GstVideoFormat fmt)
return NV_ENC_BUFFER_FORMAT_UNDEFINED;
}
static gboolean
load_nvenc_library (void)
{
GModule *module;
module = g_module_open (NVENC_LIBRARY_NAME, G_MODULE_BIND_LAZY);
if (module == NULL) {
GST_WARNING ("Could not open library %s, %s",
NVENC_LIBRARY_NAME, g_module_error ());
return FALSE;
}
if (!g_module_symbol (module, "NvEncodeAPICreateInstance",
(gpointer *) & nvEncodeAPICreateInstance)) {
GST_ERROR ("%s", g_module_error ());
return FALSE;
}
return TRUE;
}
typedef struct
{
GstVideoFormat gst_format;
@ -603,13 +582,9 @@ gst_nvenc_get_supported_codec_profiles (gpointer enc, GUID codec_id)
static void
gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec,
guint rank, gint device_count)
guint rank, gint device_index, CUcontext cuda_ctx)
{
gint i;
for (i = 0; i < device_count; i++) {
CUdevice cuda_device;
CUcontext cuda_ctx, dummy;
{
GValue *formats = NULL;
GValue *profiles;
GValue *interlace_modes;
@ -626,23 +601,17 @@ gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec,
gint j;
GstNvEncDeviceCaps device_caps = { 0, };
if (CuDeviceGet (&cuda_device, i) != CUDA_SUCCESS)
continue;
if (CuCtxCreate (&cuda_ctx, 0, cuda_device) != CUDA_SUCCESS)
continue;
if (CuCtxPopCurrent (&dummy) != CUDA_SUCCESS) {
goto cuda_free;
}
params.version = gst_nvenc_get_open_encode_session_ex_params_version ();
params.apiVersion = gst_nvenc_get_api_version ();
params.device = cuda_ctx;
params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
if (CuCtxPushCurrent (cuda_ctx) != CUDA_SUCCESS)
goto done;
if (NvEncOpenEncodeSessionEx (&params, &enc) != NV_ENC_SUCCESS) {
goto cuda_free;
CuCtxPopCurrent (NULL);
goto done;
}
if (NvEncGetEncodeGUIDs (enc, guids, G_N_ELEMENTS (guids),
@ -692,7 +661,7 @@ gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec,
device_caps.rc_modes = 0;
} else {
GST_DEBUG ("[device-%d %s] rate control modes: 0x%x",
i, codec, device_caps.rc_modes);
device_index, codec, device_caps.rc_modes);
#define IS_SUPPORTED_RC(rc_modes,mode) \
((((rc_modes) & (mode)) == mode) ? "supported" : "not supported")
@ -744,18 +713,20 @@ gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec,
device_caps.bframes = 0;
}
DEBUG_DEVICE_CAPS (i,
DEBUG_DEVICE_CAPS (device_index,
codec, "weighted prediction", device_caps.weighted_prediction);
DEBUG_DEVICE_CAPS (i, codec, "custom vbv-buffer-size",
DEBUG_DEVICE_CAPS (device_index, codec, "custom vbv-buffer-size",
device_caps.custom_vbv_bufsize);
DEBUG_DEVICE_CAPS (i, codec, "rc-loockahead", device_caps.lookahead);
DEBUG_DEVICE_CAPS (device_index, codec, "rc-loockahead",
device_caps.lookahead);
DEBUG_DEVICE_CAPS (i, codec, "temporal adaptive quantization",
DEBUG_DEVICE_CAPS (device_index, codec, "temporal adaptive quantization",
device_caps.temporal_aq);
GST_DEBUG ("[device-%d %s] max bframes: %d", i, codec, device_caps.bframes);
GST_DEBUG ("[device-%d %s] max bframes: %d", device_index, codec,
device_caps.bframes);
interlace_modes = gst_nvenc_get_interlace_modes (enc, codec_id);
@ -806,18 +777,17 @@ gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec,
enc_free:
NvEncDestroyEncoder (enc);
CuCtxPopCurrent (NULL);
/* fall-through */
cuda_free:
CuCtxDestroy (cuda_ctx);
done:
if (sink_templ && src_templ) {
if (gst_nvenc_cmp_guid (codec_id, NV_ENC_CODEC_H264_GUID)) {
gst_nv_h264_enc_register (plugin, i, rank, sink_templ, src_templ,
&device_caps);
gst_nv_h264_enc_register (plugin, device_index, rank, sink_templ,
src_templ, &device_caps);
} else if (gst_nvenc_cmp_guid (codec_id, NV_ENC_CODEC_HEVC_GUID)) {
gst_nv_h265_enc_register (plugin, i, rank, sink_templ, src_templ,
&device_caps);
gst_nv_h265_enc_register (plugin, device_index, rank, sink_templ,
src_templ, &device_caps);
} else {
g_assert_not_reached ();
}
@ -837,19 +807,26 @@ gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec,
static guint32 gst_nvenc_api_version = NVENCAPI_VERSION;
void
gst_nvenc_plugin_init (GstPlugin * plugin)
gboolean
gst_nvenc_load_library (void)
{
NVENCSTATUS ret = NV_ENC_SUCCESS;
GModule *module;
NVENCSTATUS ret;
GST_DEBUG_CATEGORY_INIT (gst_nvenc_debug, "nvenc", 0, "Nvidia NVENC encoder");
nvenc_api.version = NV_ENCODE_API_FUNCTION_LIST_VER;
if (!load_nvenc_library ()) {
GST_INFO ("Failed to load nvenc library");
return;
module = g_module_open (NVENC_LIBRARY_NAME, G_MODULE_BIND_LAZY);
if (module == NULL) {
GST_WARNING ("Could not open library %s, %s",
NVENC_LIBRARY_NAME, g_module_error ());
return FALSE;
}
if (!g_module_symbol (module, "NvEncodeAPICreateInstance",
(gpointer *) & nvEncodeAPICreateInstance)) {
GST_ERROR ("%s", g_module_error ());
return FALSE;
}
nvenc_api.version = NV_ENCODE_API_FUNCTION_LIST_VER;
ret = nvEncodeAPICreateInstance (&nvenc_api);
/* WARNING: Any developers who want to bump SDK version must ensure that
@ -892,31 +869,17 @@ gst_nvenc_plugin_init (GstPlugin * plugin)
ret = nvEncodeAPICreateInstance (&nvenc_api);
}
if (ret == NV_ENC_SUCCESS) {
CUresult cuda_ret;
gint dev_count = 0;
return ret == NV_ENC_SUCCESS;
}
GST_INFO ("Created NVEncodeAPI instance, got function table");
cuda_ret = CuInit (0);
if (cuda_ret != CUDA_SUCCESS) {
GST_ERROR ("Failed to initialize CUDA API");
return;
}
cuda_ret = CuDeviceGetCount (&dev_count);
if (cuda_ret != CUDA_SUCCESS || dev_count == 0) {
GST_ERROR ("No CUDA devices detected");
return;
}
gst_nv_enc_register (plugin, NV_ENC_CODEC_H264_GUID,
"h264", GST_RANK_PRIMARY * 2, dev_count);
gst_nv_enc_register (plugin, NV_ENC_CODEC_HEVC_GUID,
"h265", GST_RANK_PRIMARY * 2, dev_count);
} else {
GST_ERROR ("too old driver, could not load api vtable");
}
void
gst_nvenc_plugin_init (GstPlugin * plugin, guint device_index,
CUcontext cuda_ctx)
{
gst_nv_enc_register (plugin, NV_ENC_CODEC_H264_GUID,
"h264", GST_RANK_PRIMARY * 2, device_index, cuda_ctx);
gst_nv_enc_register (plugin, NV_ENC_CODEC_HEVC_GUID,
"h265", GST_RANK_PRIMARY * 2, device_index, cuda_ctx);
}
guint32

View file

@ -26,8 +26,6 @@
#include "gstcudaloader.h"
#include "nvEncodeAPI.h"
GST_DEBUG_CATEGORY_EXTERN (gst_nvenc_debug);
G_GNUC_INTERNAL
gboolean gst_nvenc_cmp_guid (GUID g1, GUID g2);
@ -48,7 +46,9 @@ GValue * gst_nvenc_get_supported_codec_profiles (gpointer enc,
GUID codec_id);
G_GNUC_INTERNAL
void gst_nvenc_plugin_init (GstPlugin * plugin);
void gst_nvenc_plugin_init (GstPlugin * plugin,
guint device_index,
CUcontext cuda_ctx);
G_GNUC_INTERNAL
guint32 gst_nvenc_get_api_version (void);
@ -113,5 +113,7 @@ guint32 gst_nvenc_get_event_params_version (void);
G_GNUC_INTERNAL
guint32 gst_nvenc_get_open_encode_session_ex_params_version (void);
G_GNUC_INTERNAL
gboolean gst_nvenc_load_library (void);
#endif /* __GST_NVENC_H_INCLUDED__ */

View file

@ -32,17 +32,81 @@
#include "gstnvdec.h"
#include "gstnvenc.h"
GST_DEBUG_CATEGORY (gst_nvcodec_debug);
GST_DEBUG_CATEGORY (gst_nvdec_debug);
GST_DEBUG_CATEGORY (gst_nvenc_debug);
#define GST_CAT_DEFAULT gst_nvcodec_debug
static gboolean
plugin_init (GstPlugin * plugin)
{
if (!gst_cuda_load_library ())
return TRUE;
CUresult cuda_ret;
gint dev_count = 0;
gint i;
gboolean nvdec_available = TRUE;
gboolean nvenc_available = TRUE;
if (gst_cuvid_load_library ()) {
gst_nvdec_plugin_init (plugin);
GST_DEBUG_CATEGORY_INIT (gst_nvcodec_debug, "nvcodec", 0, "nvcodec");
GST_DEBUG_CATEGORY_INIT (gst_nvdec_debug, "nvdec", 0, "nvdec");
GST_DEBUG_CATEGORY_INIT (gst_nvenc_debug, "nvenc", 0, "nvenc");
if (!gst_cuda_load_library ()) {
GST_WARNING ("Failed to load cuda library");
return TRUE;
}
gst_nvenc_plugin_init (plugin);
if (!gst_cuvid_load_library ()) {
GST_WARNING ("Failed to load nvdec library");
nvdec_available = FALSE;
}
if (!gst_nvenc_load_library ()) {
GST_WARNING ("Failed to load nvenc library");
nvenc_available = FALSE;
}
if (!nvdec_available && !nvenc_available)
return TRUE;
cuda_ret = CuInit (0);
if (cuda_ret != CUDA_SUCCESS) {
GST_WARNING ("Failed to init cuda, ret: 0x%x", (gint) cuda_ret);
return TRUE;
}
if (CuDeviceGetCount (&dev_count) != CUDA_SUCCESS || !dev_count) {
GST_WARNING ("No available device, ret: 0x%x", (gint) cuda_ret);
return TRUE;
}
for (i = 0; i < dev_count; i++) {
CUdevice cuda_device;
CUcontext cuda_ctx;
cuda_ret = CuDeviceGet (&cuda_device, i);
if (cuda_ret != CUDA_SUCCESS) {
GST_WARNING ("Failed to get device handle %d, ret: 0x%x", i,
(gint) cuda_ret);
continue;
}
cuda_ret = CuCtxCreate (&cuda_ctx, 0, cuda_device);
if (cuda_ret != CUDA_SUCCESS) {
GST_WARNING ("Failed to create cuda context, ret: 0x%x", (gint) cuda_ret);
continue;
}
CuCtxPopCurrent (NULL);
if (nvdec_available)
gst_nvdec_plugin_init (plugin, i, cuda_ctx);
if (nvenc_available)
gst_nvenc_plugin_init (plugin, i, cuda_ctx);
CuCtxDestroy (cuda_ctx);
}
return TRUE;
}