nvdecoder: Add support for D3D12 output

Enable D3D12 output if device can support D3D12 interop

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/7529>
This commit is contained in:
Seungha Yang 2024-09-16 23:34:15 +09:00
parent 16e94b7fc3
commit d9114db5d0
3 changed files with 227 additions and 8 deletions

View file

@ -50,6 +50,11 @@
#include <gst/gl/gstglfuncs.h> #include <gst/gl/gstglfuncs.h>
#endif #endif
#ifdef HAVE_GST_D3D12
#include <gst/d3d12/gstd3d12.h>
#include "gstcudainterop_d3d12.h"
#endif
#ifdef G_OS_WIN32 #ifdef G_OS_WIN32
#include <gst/d3d11/gstd3d11.h> #include <gst/d3d11/gstd3d11.h>
#endif #endif
@ -58,6 +63,7 @@
#include <gst/cuda/gstcuda-private.h> #include <gst/cuda/gstcuda-private.h>
#include "gstnvdecoder.h" #include "gstnvdecoder.h"
#include "gstcudaconverter.h" #include "gstcudaconverter.h"
#include "gstnvcodecutils.h"
#include <string.h> #include <string.h>
#include <string> #include <string>
#include <set> #include <set>
@ -80,6 +86,7 @@ typedef enum
GST_NV_DECODER_OUTPUT_TYPE_GL = (1 << 1), GST_NV_DECODER_OUTPUT_TYPE_GL = (1 << 1),
GST_NV_DECODER_OUTPUT_TYPE_CUDA = (1 << 2), GST_NV_DECODER_OUTPUT_TYPE_CUDA = (1 << 2),
GST_NV_DECODER_OUTPUT_TYPE_D3D11 = (1 << 3), GST_NV_DECODER_OUTPUT_TYPE_D3D11 = (1 << 3),
GST_NV_DECODER_OUTPUT_TYPE_D3D12 = (1 << 4),
} GstNvDecoderOutputType; } GstNvDecoderOutputType;
struct _GstNvDecoder struct _GstNvDecoder
@ -118,6 +125,11 @@ struct _GstNvDecoder
GstBuffer *convert_buf; GstBuffer *convert_buf;
GstVideoInfo output_info; GstVideoInfo output_info;
#ifdef HAVE_GST_D3D12
GstCudaD3D12Interop *interop_12;
GstD3D12Device *device_12;
#endif
GstNvDecoderOutputType output_type; GstNvDecoderOutputType output_type;
}; };
@ -264,6 +276,11 @@ gst_nv_decoder_close (GstNvDecoder * decoder)
gst_clear_buffer (&decoder->convert_buf); gst_clear_buffer (&decoder->convert_buf);
gst_clear_buffer (&decoder->export_buf); gst_clear_buffer (&decoder->export_buf);
#ifdef HAVE_GST_D3D12
gst_clear_object (&decoder->interop_12);
gst_clear_object (&decoder->device_12);
#endif
return TRUE; return TRUE;
} }
@ -296,6 +313,9 @@ gst_nv_decoder_configure (GstNvDecoder * decoder, cudaVideoCodec codec,
gst_clear_buffer (&decoder->export_buf); gst_clear_buffer (&decoder->export_buf);
gst_clear_buffer (&decoder->convert_buf); gst_clear_buffer (&decoder->convert_buf);
gst_clear_object (&decoder->converter); gst_clear_object (&decoder->converter);
#ifdef HAVE_GST_D3D12
gst_clear_object (&decoder->interop_12);
#endif
format = GST_VIDEO_INFO_FORMAT (info); format = GST_VIDEO_INFO_FORMAT (info);
if (decoder->info.finfo) if (decoder->info.finfo)
@ -730,6 +750,50 @@ gst_nv_decoder_copy_frame_to_d3d11 (GstNvDecoder * self,
} }
#endif #endif
#ifdef HAVE_GST_D3D12
static GstFlowReturn
gst_nv_decoder_copy_frame_to_d3d12 (GstNvDecoder * self,
GstNvDecSurface * surface, GstBuffer * buffer)
{
if (!self->interop_12) {
GST_ERROR_OBJECT (self, "D3D12 output is not configured");
return GST_FLOW_ERROR;
}
GstMemory *mem;
auto ret = gst_nv_dec_object_export_surface (self->object,
surface, self->stream, &mem);
if (ret != GST_FLOW_OK) {
GST_WARNING_OBJECT (self, "Couldn't export surface");
gst_nv_dec_object_unmap_surface (self->object, surface);
return ret;
}
auto cmem = GST_CUDA_MEMORY_CAST (mem);
if (!self->export_buf) {
self->export_buf = gst_buffer_new ();
gst_buffer_add_video_meta_full (self->export_buf, GST_VIDEO_FRAME_FLAG_NONE,
GST_VIDEO_INFO_FORMAT (&self->info),
GST_VIDEO_INFO_WIDTH (&self->info),
GST_VIDEO_INFO_HEIGHT (&self->info),
GST_VIDEO_INFO_N_PLANES (&self->info),
cmem->info.offset, cmem->info.stride);
}
gst_buffer_append_memory (self->export_buf, mem);
auto download_ret = gst_cuda_d3d12_interop_download_async (self->interop_12,
buffer, self->export_buf, self->stream);
gst_buffer_remove_all_memory (self->export_buf);
if (!download_ret) {
GST_ERROR_OBJECT (self, "Couldn't convert frame");
return GST_FLOW_ERROR;
}
return GST_FLOW_OK;
}
#endif
static GstFlowReturn static GstFlowReturn
gst_nv_decoder_copy_frame_to_system (GstNvDecoder * decoder, gst_nv_decoder_copy_frame_to_system (GstNvDecoder * decoder,
GstNvDecSurface * surface, GstBuffer * buffer) GstNvDecSurface * surface, GstBuffer * buffer)
@ -946,6 +1010,13 @@ gst_nv_decoder_output_picture (GstNvDecoder * decoder,
ret = gst_nv_decoder_copy_frame_to_system (decoder, ret = gst_nv_decoder_copy_frame_to_system (decoder,
surface, frame->output_buffer); surface, frame->output_buffer);
break; break;
#ifdef HAVE_GST_D3D12
case GST_NV_DECODER_OUTPUT_TYPE_D3D12:
ret = gst_nv_decoder_copy_frame_to_d3d12 (decoder, surface,
frame->output_buffer);
need_unmap = FALSE;
break;
#endif
#ifdef G_OS_WIN32 #ifdef G_OS_WIN32
case GST_NV_DECODER_OUTPUT_TYPE_D3D11: case GST_NV_DECODER_OUTPUT_TYPE_D3D11:
ret = gst_nv_decoder_copy_frame_to_d3d11 (decoder, surface, ret = gst_nv_decoder_copy_frame_to_d3d11 (decoder, surface,
@ -1185,8 +1256,8 @@ const GstNvdecoderCodecMap codec_map_list[] = {
}; };
gboolean gboolean
gst_nv_decoder_check_device_caps (CUcontext cuda_ctx, cudaVideoCodec codec, gst_nv_decoder_check_device_caps (GstCudaContext * context,
GstCaps ** sink_template, GstCaps ** src_template) cudaVideoCodec codec, GstCaps ** sink_template, GstCaps ** src_template)
{ {
CUresult cuda_ret; CUresult cuda_ret;
guint max_width = 0, min_width = G_MAXINT; guint max_width = 0, min_width = G_MAXINT;
@ -1213,6 +1284,7 @@ gst_nv_decoder_check_device_caps (CUcontext cuda_ctx, cudaVideoCodec codec,
gboolean ret = FALSE; gboolean ret = FALSE;
std::set < std::string > formats; std::set < std::string > formats;
std::set < std::string > planar_formats; std::set < std::string > planar_formats;
CUcontext cuda_ctx = gst_cuda_context_get_handle (context);
#ifdef G_OS_WIN32 #ifdef G_OS_WIN32
gboolean is_stateless = FALSE; gboolean is_stateless = FALSE;
@ -1419,6 +1491,21 @@ gst_nv_decoder_check_device_caps (CUcontext cuda_ctx, cudaVideoCodec codec,
gst_caps_features_new_single_static_str gst_caps_features_new_single_static_str
(GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)); (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY));
#ifdef HAVE_GST_D3D12
if (is_stateless && gst_nvcodec_is_windows_10_or_greater ()) {
gboolean have_interop = FALSE;
g_object_get (context, "external-resource-interop",
&have_interop, nullptr);
if (have_interop) {
auto d3d12_caps = gst_caps_copy (raw_caps);
gst_caps_set_features_simple (d3d12_caps,
gst_caps_features_new_single
(GST_CAPS_FEATURE_MEMORY_D3D12_MEMORY));
gst_caps_append (src_templ, d3d12_caps);
}
}
#endif
#ifdef G_OS_WIN32 #ifdef G_OS_WIN32
if (is_stateless) { if (is_stateless) {
format_str.clear (); format_str.clear ();
@ -1535,6 +1622,13 @@ gst_nv_decoder_handle_set_context (GstNvDecoder * decoder,
&decoder->context)) { &decoder->context)) {
return; return;
} }
#ifdef HAVE_GST_D3D12
if (gst_d3d12_handle_set_context_for_adapter_luid (element, context,
decoder->adapter_luid, &decoder->device_12)) {
return;
}
#endif
#ifdef G_OS_WIN32 #ifdef G_OS_WIN32
if (gst_d3d11_handle_set_context_for_adapter_luid (element, context, if (gst_d3d11_handle_set_context_for_adapter_luid (element, context,
decoder->adapter_luid, (GstD3D11Device **) & decoder->d3d11_device)) { decoder->adapter_luid, (GstD3D11Device **) & decoder->d3d11_device)) {
@ -1559,6 +1653,12 @@ gst_nv_decoder_handle_query (GstNvDecoder * decoder, GstElement * element,
if (gst_cuda_handle_context_query (element, query, decoder->context)) if (gst_cuda_handle_context_query (element, query, decoder->context))
return TRUE; return TRUE;
#ifdef HAVE_GST_D3D12
if (gst_d3d12_handle_context_query (element, query, decoder->device_12)) {
return TRUE;
}
#endif
#ifdef G_OS_WIN32 #ifdef G_OS_WIN32
if (gst_d3d11_handle_context_query (element, if (gst_d3d11_handle_context_query (element,
query, (GstD3D11Device *) decoder->d3d11_device)) { query, (GstD3D11Device *) decoder->d3d11_device)) {
@ -1746,6 +1846,30 @@ gst_nv_decoder_ensure_d3d11_output (GstNvDecoder * self, GstElement * element)
} }
#endif #endif
#ifdef HAVE_GST_D3D12
static gboolean
gst_nv_decoder_ensure_d3d12_output (GstNvDecoder * self, GstElement * element)
{
if (!gst_d3d12_ensure_element_data_for_adapter_luid (GST_ELEMENT (element),
self->adapter_luid, &self->device_12)) {
GST_WARNING_OBJECT (element, "D3D12 device is not available");
return FALSE;
}
if (!self->interop_12) {
self->interop_12 = gst_cuda_d3d12_interop_new (self->context,
self->device_12, &self->info, FALSE);
}
if (!self->interop_12) {
GST_WARNING_OBJECT (element, "Couldn't create downloader");
return FALSE;
}
return TRUE;
}
#endif
gboolean gboolean
gst_nv_decoder_negotiate (GstNvDecoder * decoder, gst_nv_decoder_negotiate (GstNvDecoder * decoder,
GstVideoDecoder * videodec, GstVideoCodecState * input_state) GstVideoDecoder * videodec, GstVideoCodecState * input_state)
@ -1786,6 +1910,13 @@ gst_nv_decoder_negotiate (GstNvDecoder * decoder,
GST_DEBUG_OBJECT (videodec, "found CUDA memory feature"); GST_DEBUG_OBJECT (videodec, "found CUDA memory feature");
available_types |= GST_NV_DECODER_OUTPUT_TYPE_CUDA; available_types |= GST_NV_DECODER_OUTPUT_TYPE_CUDA;
} }
#ifdef HAVE_GST_D3D12
if (features && gst_caps_features_contains (features,
GST_CAPS_FEATURE_MEMORY_D3D12_MEMORY)) {
GST_DEBUG_OBJECT (videodec, "found D3D12 memory feature");
available_types |= GST_NV_DECODER_OUTPUT_TYPE_D3D12;
}
#endif
#ifdef G_OS_WIN32 #ifdef G_OS_WIN32
if (features && gst_caps_features_contains (features, if (features && gst_caps_features_contains (features,
GST_CAPS_FEATURE_MEMORY_D3D11_MEMORY)) { GST_CAPS_FEATURE_MEMORY_D3D11_MEMORY)) {
@ -1815,6 +1946,8 @@ gst_nv_decoder_negotiate (GstNvDecoder * decoder,
if (selected_type == GST_NV_DECODER_OUTPUT_TYPE_UNKNOWN) { if (selected_type == GST_NV_DECODER_OUTPUT_TYPE_UNKNOWN) {
if ((available_types & GST_NV_DECODER_OUTPUT_TYPE_CUDA) != 0) if ((available_types & GST_NV_DECODER_OUTPUT_TYPE_CUDA) != 0)
selected_type = GST_NV_DECODER_OUTPUT_TYPE_CUDA; selected_type = GST_NV_DECODER_OUTPUT_TYPE_CUDA;
else if ((available_types & GST_NV_DECODER_OUTPUT_TYPE_D3D12) != 0)
selected_type = GST_NV_DECODER_OUTPUT_TYPE_D3D12;
else if ((available_types & GST_NV_DECODER_OUTPUT_TYPE_D3D11) != 0) else if ((available_types & GST_NV_DECODER_OUTPUT_TYPE_D3D11) != 0)
selected_type = GST_NV_DECODER_OUTPUT_TYPE_D3D11; selected_type = GST_NV_DECODER_OUTPUT_TYPE_D3D11;
else if ((available_types & GST_NV_DECODER_OUTPUT_TYPE_GL) != 0) else if ((available_types & GST_NV_DECODER_OUTPUT_TYPE_GL) != 0)
@ -1829,6 +1962,15 @@ gst_nv_decoder_negotiate (GstNvDecoder * decoder,
gst_clear_caps (&caps); gst_clear_caps (&caps);
} }
#ifdef HAVE_GST_D3D12
if (decoder->output_type == GST_NV_DECODER_OUTPUT_TYPE_D3D12 &&
!gst_nv_decoder_ensure_d3d12_output (decoder, GST_ELEMENT (videodec))) {
GST_WARNING_OBJECT (videodec, "D3D12 setup failed");
decoder->output_type = GST_NV_DECODER_OUTPUT_TYPE_SYSTEM;
decoder->output_info = decoder->info;
}
#endif
#ifdef G_OS_WIN32 #ifdef G_OS_WIN32
if (decoder->output_type == GST_NV_DECODER_OUTPUT_TYPE_D3D11 && if (decoder->output_type == GST_NV_DECODER_OUTPUT_TYPE_D3D11 &&
!gst_nv_decoder_ensure_d3d11_output (decoder, GST_ELEMENT (videodec))) { !gst_nv_decoder_ensure_d3d11_output (decoder, GST_ELEMENT (videodec))) {
@ -1861,6 +2003,12 @@ gst_nv_decoder_negotiate (GstNvDecoder * decoder,
gst_caps_features_new_single_static_str gst_caps_features_new_single_static_str
(GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)); (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY));
break; break;
#ifdef HAVE_GST_D3D12
case GST_NV_DECODER_OUTPUT_TYPE_D3D12:
gst_caps_set_features (state->caps, 0,
gst_caps_features_new_single (GST_CAPS_FEATURE_MEMORY_D3D12_MEMORY));
break;
#endif
#ifdef G_OS_WIN32 #ifdef G_OS_WIN32
case GST_NV_DECODER_OUTPUT_TYPE_D3D11: case GST_NV_DECODER_OUTPUT_TYPE_D3D11:
gst_caps_set_features (state->caps, 0, gst_caps_set_features (state->caps, 0,
@ -2095,6 +2243,75 @@ gst_nv_decoder_ensure_d3d11_pool (GstNvDecoder * self, GstElement * element,
} }
#endif #endif
#ifdef HAVE_GST_D3D12
static gboolean
gst_nv_decoder_ensure_d3d12_pool (GstNvDecoder * self, GstElement * element,
GstQuery * query)
{
GstCaps *outcaps;
GstBufferPool *pool = NULL;
guint n, size, min = 0, max = 0;
GstVideoInfo info;
gst_query_parse_allocation (query, &outcaps, nullptr);
if (!outcaps) {
GST_DEBUG_OBJECT (element, "No output caps");
return FALSE;
}
if (!gst_d3d12_ensure_element_data_for_adapter_luid (element,
self->adapter_luid, &self->device_12)) {
GST_ERROR_OBJECT (element, "Couldn't create d3d11 device");
return FALSE;
}
gst_video_info_from_caps (&info, outcaps);
n = gst_query_get_n_allocation_pools (query);
if (n > 0)
gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max);
/* create our own pool */
if (pool) {
if (!GST_IS_D3D12_BUFFER_POOL (pool)) {
GST_DEBUG_OBJECT (self,
"Downstream pool is not d3d11, will create new one");
gst_clear_object (&pool);
} else {
auto dpool = GST_D3D12_BUFFER_POOL (pool);
if (!gst_d3d12_device_is_equal (dpool->device, self->device_12)) {
GST_DEBUG_OBJECT (element, "Different device, will create new one");
gst_clear_object (&pool);
}
}
}
if (!pool) {
pool = gst_d3d12_buffer_pool_new (self->device_12);
size = info.size;
}
auto config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_buffer_pool_set_config (pool, config);
/* d3d12 buffer pool will update buffer size based on allocated texture,
* get size from config again */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, nullptr, &size, nullptr, nullptr);
gst_structure_free (config);
if (n > 0)
gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
else
gst_query_add_allocation_pool (query, pool, size, min, max);
gst_object_unref (pool);
return TRUE;
}
#endif
gboolean gboolean
gst_nv_decoder_decide_allocation (GstNvDecoder * decoder, gst_nv_decoder_decide_allocation (GstNvDecoder * decoder,
GstVideoDecoder * videodec, GstQuery * query) GstVideoDecoder * videodec, GstQuery * query)
@ -2114,6 +2331,12 @@ gst_nv_decoder_decide_allocation (GstNvDecoder * decoder,
query); query);
break; break;
#endif #endif
#ifdef HAVE_GST_D3D12
case GST_NV_DECODER_OUTPUT_TYPE_D3D12:
ret = gst_nv_decoder_ensure_d3d12_pool (decoder, GST_ELEMENT (videodec),
query);
break;
#endif
#ifdef HAVE_CUDA_GST_GL #ifdef HAVE_CUDA_GST_GL
case GST_NV_DECODER_OUTPUT_TYPE_GL: case GST_NV_DECODER_OUTPUT_TYPE_GL:
ret = gst_nv_decoder_ensure_gl_pool (decoder, query); ret = gst_nv_decoder_ensure_gl_pool (decoder, query);

View file

@ -83,7 +83,7 @@ void gst_nv_decoder_set_flushing (GstNvDecoder * decoder,
void gst_nv_decoder_reset (GstNvDecoder * decoder); void gst_nv_decoder_reset (GstNvDecoder * decoder);
/* utils for class registration */ /* utils for class registration */
gboolean gst_nv_decoder_check_device_caps (CUcontext cuda_ctx, gboolean gst_nv_decoder_check_device_caps (GstCudaContext * context,
cudaVideoCodec codec, cudaVideoCodec codec,
GstCaps **sink_template, GstCaps **sink_template,
GstCaps **src_template); GstCaps **src_template);

View file

@ -201,9 +201,6 @@ plugin_init (GstPlugin * plugin)
for (i = 0; i < dev_count; i++) { for (i = 0; i < dev_count; i++) {
GstCudaContext *context = gst_cuda_context_new (i); GstCudaContext *context = gst_cuda_context_new (i);
#ifdef HAVE_NVCODEC_DGPU
CUcontext cuda_ctx;
#endif
#if defined(G_OS_WIN32) || defined(HAVE_NVCODEC_DGPU) #if defined(G_OS_WIN32) || defined(HAVE_NVCODEC_DGPU)
gint64 adapter_luid = 0; gint64 adapter_luid = 0;
#endif #endif
@ -217,7 +214,6 @@ plugin_init (GstPlugin * plugin)
#endif #endif
#ifdef HAVE_NVCODEC_DGPU #ifdef HAVE_NVCODEC_DGPU
cuda_ctx = gst_cuda_context_get_handle (context);
if (nvdec_available) { if (nvdec_available) {
gint j; gint j;
@ -227,7 +223,7 @@ plugin_init (GstPlugin * plugin)
cudaVideoCodec codec = (cudaVideoCodec) j; cudaVideoCodec codec = (cudaVideoCodec) j;
gboolean register_cuviddec = FALSE; gboolean register_cuviddec = FALSE;
if (gst_nv_decoder_check_device_caps (cuda_ctx, if (gst_nv_decoder_check_device_caps (context,
codec, &sink_template, &src_template)) { codec, &sink_template, &src_template)) {
const gchar *codec_name = gst_cuda_video_codec_to_string (codec); const gchar *codec_name = gst_cuda_video_codec_to_string (codec);