mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-03-30 12:49:40 +00:00
nvdec: Support CUDA buffer pool
If downstream can accept CUDA memory caps feature (currently nvenc only), always CUDA memory is preferred. Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-bad/-/merge_requests/1633>
This commit is contained in:
parent
cf5ef5635f
commit
8635d12929
2 changed files with 149 additions and 39 deletions
|
@ -31,6 +31,7 @@
|
|||
|
||||
#include "gstnvdec.h"
|
||||
#include "gstcudautils.h"
|
||||
#include "gstcudabufferpool.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
|
@ -46,7 +47,7 @@ gst_nvdec_copy_device_to_gl (GstNvDec * nvdec,
|
|||
#endif
|
||||
|
||||
static gboolean
|
||||
gst_nvdec_copy_device_to_system (GstNvDec * nvdec,
|
||||
gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
|
||||
CUVIDPARSERDISPINFO * dispinfo, GstBuffer * output_buffer);
|
||||
|
||||
#ifdef HAVE_NVCODEC_GST_GL
|
||||
|
@ -506,7 +507,6 @@ gst_nvdec_negotiate (GstVideoDecoder * decoder)
|
|||
state->caps = gst_video_info_to_caps (&state->info);
|
||||
nvdec->mem_type = GST_NVDEC_MEM_TYPE_SYSTEM;
|
||||
|
||||
#ifdef HAVE_NVCODEC_GST_GL
|
||||
{
|
||||
GstCaps *caps;
|
||||
caps = gst_pad_get_allowed_caps (GST_VIDEO_DECODER_SRC_PAD (nvdec));
|
||||
|
@ -515,20 +515,35 @@ gst_nvdec_negotiate (GstVideoDecoder * decoder)
|
|||
if (!caps || gst_caps_is_any (caps)) {
|
||||
GST_DEBUG_OBJECT (nvdec,
|
||||
"cannot determine output format, use system memory");
|
||||
} else if (nvdec->gl_display) {
|
||||
} else {
|
||||
GstCapsFeatures *features;
|
||||
guint size = gst_caps_get_size (caps);
|
||||
guint i;
|
||||
gboolean have_cuda = FALSE;
|
||||
gboolean have_gl = FALSE;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
features = gst_caps_get_features (caps, i);
|
||||
if (features && gst_caps_features_contains (features,
|
||||
GST_CAPS_FEATURE_MEMORY_GL_MEMORY)) {
|
||||
GST_DEBUG_OBJECT (nvdec, "found GL memory feature, use gl");
|
||||
nvdec->mem_type = GST_NVDEC_MEM_TYPE_GL;
|
||||
GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)) {
|
||||
GST_DEBUG_OBJECT (nvdec, "found CUDA memory feature");
|
||||
have_cuda = TRUE;
|
||||
break;
|
||||
}
|
||||
#ifdef HAVE_NVCODEC_GST_GL
|
||||
if (nvdec->gl_display &&
|
||||
features && gst_caps_features_contains (features,
|
||||
GST_CAPS_FEATURE_MEMORY_GL_MEMORY)) {
|
||||
GST_DEBUG_OBJECT (nvdec, "found GL memory feature");
|
||||
have_gl = TRUE;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (have_cuda)
|
||||
nvdec->mem_type = GST_NVDEC_MEM_TYPE_CUDA;
|
||||
else if (have_gl)
|
||||
nvdec->mem_type = GST_NVDEC_MEM_TYPE_GL;
|
||||
}
|
||||
gst_clear_caps (&caps);
|
||||
}
|
||||
|
@ -540,15 +555,25 @@ gst_nvdec_negotiate (GstVideoDecoder * decoder)
|
|||
nvdec->mem_type = GST_NVDEC_MEM_TYPE_SYSTEM;
|
||||
}
|
||||
|
||||
if (nvdec->mem_type == GST_NVDEC_MEM_TYPE_GL) {
|
||||
gst_caps_set_features (state->caps, 0,
|
||||
gst_caps_features_new (GST_CAPS_FEATURE_MEMORY_GL_MEMORY, NULL));
|
||||
gst_caps_set_simple (state->caps, "texture-target", G_TYPE_STRING,
|
||||
"2D", NULL);
|
||||
} else {
|
||||
GST_DEBUG_OBJECT (nvdec, "use system memory");
|
||||
}
|
||||
switch (nvdec->mem_type) {
|
||||
case GST_NVDEC_MEM_TYPE_CUDA:
|
||||
GST_DEBUG_OBJECT (nvdec, "use cuda memory");
|
||||
gst_caps_set_features (state->caps, 0,
|
||||
gst_caps_features_new (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY, NULL));
|
||||
break;
|
||||
#ifdef HAVE_NVCODEC_GST_GL
|
||||
case GST_NVDEC_MEM_TYPE_GL:
|
||||
GST_DEBUG_OBJECT (nvdec, "use gl memory");
|
||||
gst_caps_set_features (state->caps, 0,
|
||||
gst_caps_features_new (GST_CAPS_FEATURE_MEMORY_GL_MEMORY, NULL));
|
||||
gst_caps_set_simple (state->caps, "texture-target", G_TYPE_STRING,
|
||||
"2D", NULL);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
GST_DEBUG_OBJECT (nvdec, "use system memory");
|
||||
break;
|
||||
}
|
||||
|
||||
if (nvdec->output_state)
|
||||
gst_video_codec_state_unref (nvdec->output_state);
|
||||
|
@ -711,7 +736,7 @@ parser_display_callback (GstNvDec * nvdec, CUVIDPARSERDISPINFO * dispinfo)
|
|||
if (!copy_ret)
|
||||
#endif
|
||||
{
|
||||
copy_ret = gst_nvdec_copy_device_to_system (nvdec, dispinfo, output_buffer);
|
||||
copy_ret = gst_nvdec_copy_device_to_memory (nvdec, dispinfo, output_buffer);
|
||||
}
|
||||
|
||||
if (!copy_ret) {
|
||||
|
@ -1103,7 +1128,7 @@ gst_nvdec_copy_device_to_gl (GstNvDec * nvdec,
|
|||
#endif
|
||||
|
||||
static gboolean
|
||||
gst_nvdec_copy_device_to_system (GstNvDec * nvdec,
|
||||
gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
|
||||
CUVIDPARSERDISPINFO * dispinfo, GstBuffer * output_buffer)
|
||||
{
|
||||
CUVIDPROCPARAMS params = { 0, };
|
||||
|
@ -1113,16 +1138,33 @@ gst_nvdec_copy_device_to_system (GstNvDec * nvdec,
|
|||
GstVideoFrame video_frame;
|
||||
GstVideoInfo *info = &nvdec->output_state->info;
|
||||
gint i;
|
||||
GstMemory *mem;
|
||||
GstCudaMemory *cuda_mem = NULL;
|
||||
|
||||
if (!gst_cuda_context_push (nvdec->cuda_ctx)) {
|
||||
GST_WARNING_OBJECT (nvdec, "failed to lock CUDA context");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (!gst_video_frame_map (&video_frame, info, output_buffer, GST_MAP_WRITE)) {
|
||||
GST_ERROR_OBJECT (nvdec, "frame map failure");
|
||||
gst_cuda_context_pop (NULL);
|
||||
return FALSE;
|
||||
if (nvdec->mem_type == GST_NVDEC_MEM_TYPE_CUDA &&
|
||||
(mem = gst_buffer_peek_memory (output_buffer, 0)) &&
|
||||
gst_is_cuda_memory (mem)) {
|
||||
GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
|
||||
|
||||
/* FIXME: enhance CUDA memory copy over multiple-gpu */
|
||||
if (cmem->context == nvdec->cuda_ctx ||
|
||||
gst_cuda_context_get_handle (cmem->context) ==
|
||||
gst_cuda_context_get_handle (nvdec->cuda_ctx)) {
|
||||
cuda_mem = cmem;
|
||||
}
|
||||
}
|
||||
|
||||
if (!cuda_mem) {
|
||||
if (!gst_video_frame_map (&video_frame, info, output_buffer, GST_MAP_WRITE)) {
|
||||
GST_ERROR_OBJECT (nvdec, "frame map failure");
|
||||
gst_cuda_context_pop (NULL);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
params.progressive_frame = dispinfo->progressive_frame;
|
||||
|
@ -1139,20 +1181,27 @@ gst_nvdec_copy_device_to_system (GstNvDec * nvdec,
|
|||
|
||||
copy_params.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
copy_params.srcPitch = pitch;
|
||||
copy_params.dstMemoryType = CU_MEMORYTYPE_HOST;
|
||||
copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, 0)
|
||||
* GST_VIDEO_INFO_COMP_PSTRIDE (info, 0);
|
||||
copy_params.dstMemoryType =
|
||||
cuda_mem ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST;
|
||||
|
||||
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (&video_frame); i++) {
|
||||
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
|
||||
copy_params.srcDevice = dptr + (i * pitch * GST_VIDEO_INFO_HEIGHT (info));
|
||||
copy_params.dstHost = GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i);
|
||||
copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i);
|
||||
copy_params.Height = GST_VIDEO_FRAME_COMP_HEIGHT (&video_frame, i);
|
||||
if (cuda_mem) {
|
||||
copy_params.dstDevice = cuda_mem->data + cuda_mem->offset[i];
|
||||
copy_params.dstPitch = cuda_mem->stride;
|
||||
} else {
|
||||
copy_params.dstHost = GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i);
|
||||
copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i);
|
||||
}
|
||||
copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i)
|
||||
* GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
|
||||
copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
|
||||
|
||||
if (!gst_cuda_result (CuMemcpy2DAsync (©_params, nvdec->cuda_stream))) {
|
||||
GST_ERROR_OBJECT (nvdec, "failed to copy %dth plane", i);
|
||||
CuvidUnmapVideoFrame (nvdec->decoder, dptr);
|
||||
gst_video_frame_unmap (&video_frame);
|
||||
if (!cuda_mem)
|
||||
gst_video_frame_unmap (&video_frame);
|
||||
gst_cuda_context_pop (NULL);
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -1160,7 +1209,8 @@ gst_nvdec_copy_device_to_system (GstNvDec * nvdec,
|
|||
|
||||
gst_cuda_result (CuStreamSynchronize (nvdec->cuda_stream));
|
||||
|
||||
gst_video_frame_unmap (&video_frame);
|
||||
if (!cuda_mem)
|
||||
gst_video_frame_unmap (&video_frame);
|
||||
|
||||
if (!gst_cuda_result (CuvidUnmapVideoFrame (nvdec->decoder, dptr)))
|
||||
GST_WARNING_OBJECT (nvdec, "failed to unmap video frame");
|
||||
|
@ -1346,13 +1396,9 @@ gst_nvdec_ensure_gl_context (GstNvDec * nvdec)
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static gboolean
|
||||
gst_nvdec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query)
|
||||
gst_nvdec_ensure_gl_pool (GstNvDec * nvdec, GstQuery * query)
|
||||
{
|
||||
#ifdef HAVE_NVCODEC_GST_GL
|
||||
GstNvDec *nvdec = GST_NVDEC (decoder);
|
||||
GstCaps *outcaps;
|
||||
GstBufferPool *pool = NULL;
|
||||
guint n, size, min, max;
|
||||
|
@ -1361,10 +1407,6 @@ gst_nvdec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query)
|
|||
|
||||
GST_DEBUG_OBJECT (nvdec, "decide allocation");
|
||||
|
||||
if (nvdec->mem_type == GST_NVDEC_MEM_TYPE_SYSTEM)
|
||||
return GST_VIDEO_DECODER_CLASS (gst_nvdec_parent_class)->decide_allocation
|
||||
(decoder, query);
|
||||
|
||||
gst_query_parse_allocation (query, &outcaps, NULL);
|
||||
n = gst_query_get_n_allocation_pools (query);
|
||||
if (n > 0)
|
||||
|
@ -1376,6 +1418,7 @@ gst_nvdec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query)
|
|||
}
|
||||
|
||||
if (!pool) {
|
||||
GST_DEBUG_OBJECT (nvdec, "no downstream pool, create our pool");
|
||||
pool = gst_gl_buffer_pool_new (nvdec->gl_context);
|
||||
|
||||
if (outcaps)
|
||||
|
@ -1393,8 +1436,74 @@ gst_nvdec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query)
|
|||
else
|
||||
gst_query_add_allocation_pool (query, pool, size, min, max);
|
||||
gst_object_unref (pool);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
#endif
|
||||
|
||||
static gboolean
|
||||
gst_nvdec_ensure_cuda_pool (GstNvDec * nvdec, GstQuery * query)
|
||||
{
|
||||
GstCaps *outcaps;
|
||||
GstBufferPool *pool = NULL;
|
||||
guint n, size, min, max;
|
||||
GstVideoInfo vinfo = { 0, };
|
||||
GstStructure *config;
|
||||
|
||||
gst_query_parse_allocation (query, &outcaps, NULL);
|
||||
n = gst_query_get_n_allocation_pools (query);
|
||||
if (n > 0) {
|
||||
gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max);
|
||||
if (pool && !GST_IS_CUDA_BUFFER_POOL (pool)) {
|
||||
gst_object_unref (pool);
|
||||
pool = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (!pool) {
|
||||
GST_DEBUG_OBJECT (nvdec, "no downstream pool, create our pool");
|
||||
pool = gst_cuda_buffer_pool_new (nvdec->cuda_ctx);
|
||||
|
||||
if (outcaps)
|
||||
gst_video_info_from_caps (&vinfo, outcaps);
|
||||
size = (guint) vinfo.size;
|
||||
min = max = 0;
|
||||
}
|
||||
|
||||
config = gst_buffer_pool_get_config (pool);
|
||||
gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
|
||||
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
|
||||
gst_buffer_pool_set_config (pool, config);
|
||||
if (n > 0)
|
||||
gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
|
||||
else
|
||||
gst_query_add_allocation_pool (query, pool, size, min, max);
|
||||
gst_object_unref (pool);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
gst_nvdec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query)
|
||||
{
|
||||
GstNvDec *nvdec = GST_NVDEC (decoder);
|
||||
|
||||
GST_DEBUG_OBJECT (nvdec, "decide allocation");
|
||||
|
||||
if (nvdec->mem_type == GST_NVDEC_MEM_TYPE_SYSTEM)
|
||||
goto done;
|
||||
|
||||
#ifdef HAVE_NVCODEC_GST_GL
|
||||
if (nvdec->mem_type == GST_NVDEC_MEM_TYPE_GL) {
|
||||
if (!gst_nvdec_ensure_gl_pool (nvdec, query))
|
||||
return FALSE;
|
||||
} else
|
||||
#endif
|
||||
if (!gst_nvdec_ensure_cuda_pool (nvdec, query)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
done:
|
||||
return GST_VIDEO_DECODER_CLASS (gst_nvdec_parent_class)->decide_allocation
|
||||
(decoder, query);
|
||||
}
|
||||
|
|
|
@ -61,7 +61,8 @@ typedef enum
|
|||
{
|
||||
GST_NVDEC_MEM_TYPE_SYSTEM = 0,
|
||||
GST_NVDEC_MEM_TYPE_GL,
|
||||
/* FIXME: add support CUDA, D3D11 memory */
|
||||
GST_NVDEC_MEM_TYPE_CUDA,
|
||||
/* FIXME: add support D3D11 memory */
|
||||
} GstNvDecMemType;
|
||||
|
||||
struct _GstNvDec
|
||||
|
|
Loading…
Reference in a new issue