From 7a8bb85523592588a8c698fed9abcedf1ccbbdb2 Mon Sep 17 00:00:00 2001 From: Seungha Yang Date: Thu, 22 Dec 2022 02:27:36 +0900 Subject: [PATCH] cudaupload, cudadownload: Update for shared CUDA stream Use CUDA stream of memory if exists Part-of: --- .../gst-libs/gst/cuda/gstcuda-private.h | 3 +- .../gst-libs/gst/cuda/gstcudautils.c | 90 ++++++++++++++----- .../sys/nvcodec/gstcudamemorycopy.c | 12 +-- 3 files changed, 77 insertions(+), 28 deletions(-) diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcuda-private.h b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcuda-private.h index 5c2cc1a9a8..be91a1e964 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcuda-private.h +++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcuda-private.h @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -47,7 +48,7 @@ gboolean gst_cuda_buffer_copy (GstBuffer * dst, GstCudaBufferCopyType src_type, const GstVideoInfo * src_info, GstCudaContext * context, - CUstream stream); + GstCudaStream * stream); G_END_DECLS diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudautils.c b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudautils.c index 647e23e049..a2b592d813 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudautils.c +++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudautils.c @@ -1490,14 +1490,17 @@ gboolean gst_cuda_buffer_copy (GstBuffer * dst, GstCudaBufferCopyType dst_type, const GstVideoInfo * dst_info, GstBuffer * src, GstCudaBufferCopyType src_type, const GstVideoInfo * src_info, - GstCudaContext * context, CUstream stream) + GstCudaContext * context, GstCudaStream * stream) { gboolean use_copy_2d = FALSE; GstMemory *dst_mem, *src_mem; #ifdef GST_CUDA_HAS_D3D D3D11_TEXTURE2D_DESC desc; #endif - GstCudaContext *cuda_context; + GstCudaContext *cuda_context = context; + GstCudaMemory *cmem = NULL; + GstCudaStream *mem_stream = NULL; + gboolean ret; g_return_val_if_fail (GST_IS_BUFFER (dst), FALSE); g_return_val_if_fail (dst_info != NULL, FALSE); @@ -1538,31 +1541,46 @@ gst_cuda_buffer_copy (GstBuffer * dst, GstCudaBufferCopyType dst_type, if (src_type == GST_CUDA_BUFFER_COPY_GL && gst_is_gl_memory_pbo (src_mem)) { GstGLMemory *gl_mem = (GstGLMemory *) src_mem; GstGLContext *gl_context = gl_mem->mem.context; - GstCudaContext *cuda_context = context; - if (dst_type == GST_CUDA_BUFFER_COPY_CUDA && gst_is_cuda_memory (dst_mem)) - cuda_context = GST_CUDA_MEMORY_CAST (dst_mem)->context; + if (dst_type == GST_CUDA_BUFFER_COPY_CUDA && gst_is_cuda_memory (dst_mem)) { + cmem = GST_CUDA_MEMORY_CAST (dst_mem); + cuda_context = cmem->context; + mem_stream = gst_cuda_memory_get_stream (cmem); + if (mem_stream) + stream = mem_stream; + } GST_TRACE_OBJECT (context, "GL -> %s", gst_cuda_buffer_copy_type_to_string (dst_type)); - return cuda_copy_gl_interop (dst, dst_info, src, src_info, gl_context, - cuda_context, stream, TRUE, dst_type); + ret = cuda_copy_gl_interop (dst, dst_info, src, src_info, gl_context, + cuda_context, gst_cuda_stream_get_handle (stream), TRUE, dst_type); + + if (cmem) + GST_MEMORY_FLAG_UNSET (cmem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC); + + return ret; } if (dst_type == GST_CUDA_BUFFER_COPY_GL && gst_is_gl_memory_pbo (dst_mem)) { GstGLMemory *gl_mem = (GstGLMemory *) dst_mem; GstGLContext *gl_context = gl_mem->mem.context; - GstCudaContext *cuda_context = context; - if (src_type == GST_CUDA_BUFFER_COPY_CUDA && gst_is_cuda_memory (src_mem)) - cuda_context = GST_CUDA_MEMORY_CAST (src_mem)->context; + if (src_type == GST_CUDA_BUFFER_COPY_CUDA && gst_is_cuda_memory (src_mem)) { + cmem = GST_CUDA_MEMORY_CAST (src_mem); + cuda_context = cmem->context; + + /* Use memory's stream object if available */ + mem_stream = gst_cuda_memory_get_stream (cmem); + if (mem_stream) + stream = mem_stream; + } GST_TRACE_OBJECT (context, "%s -> GL", gst_cuda_buffer_copy_type_to_string (src_type)); return cuda_copy_gl_interop (dst, dst_info, src, src_info, gl_context, - cuda_context, stream, FALSE, src_type); + cuda_context, gst_cuda_stream_get_handle (stream), FALSE, src_type); } #endif @@ -1572,16 +1590,24 @@ gst_cuda_buffer_copy (GstBuffer * dst, GstCudaBufferCopyType dst_type, && desc.Usage == D3D11_USAGE_DEFAULT && gst_is_cuda_memory (dst_mem)) { GstD3D11Memory *dmem = GST_D3D11_MEMORY_CAST (src_mem); GstD3D11Device *device = dmem->device; - GstCudaContext *cuda_context = GST_CUDA_MEMORY_CAST (dst_mem)->context; - gboolean ret; + + cmem = GST_CUDA_MEMORY_CAST (dst_mem); + cuda_context = cmem->context; + + /* Use memory's stream object if available */ + mem_stream = gst_cuda_memory_get_stream (cmem); + if (mem_stream) + stream = mem_stream; GST_TRACE_OBJECT (context, "D3D11 -> CUDA"); gst_d3d11_device_lock (device); ret = cuda_copy_d3d11_interop (dst, dst_info, src, src_info, device, - cuda_context, stream, TRUE); + cuda_context, gst_cuda_stream_get_handle (stream), TRUE); gst_d3d11_device_unlock (device); + GST_MEMORY_FLAG_UNSET (cmem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC); + return ret; } @@ -1590,14 +1616,20 @@ gst_cuda_buffer_copy (GstBuffer * dst, GstCudaBufferCopyType dst_type, && desc.Usage == D3D11_USAGE_DEFAULT && gst_is_cuda_memory (src_mem)) { GstD3D11Memory *dmem = GST_D3D11_MEMORY_CAST (dst_mem); GstD3D11Device *device = dmem->device; - GstCudaContext *cuda_context = GST_CUDA_MEMORY_CAST (src_mem)->context; - gboolean ret; + + cmem = GST_CUDA_MEMORY_CAST (src_mem); + cuda_context = cmem->context; + + /* Use memory's stream object if available */ + mem_stream = gst_cuda_memory_get_stream (cmem); + if (mem_stream) + stream = mem_stream; GST_TRACE_OBJECT (context, "CUDA -> D3D11"); gst_d3d11_device_lock (device); ret = cuda_copy_d3d11_interop (dst, dst_info, src, src_info, device, - cuda_context, stream, FALSE); + cuda_context, gst_cuda_stream_get_handle (stream), FALSE); gst_d3d11_device_unlock (device); return ret; @@ -1605,17 +1637,31 @@ gst_cuda_buffer_copy (GstBuffer * dst, GstCudaBufferCopyType dst_type, #endif if (gst_is_cuda_memory (dst_mem)) { - cuda_context = GST_CUDA_MEMORY_CAST (dst_mem)->context; + cmem = GST_CUDA_MEMORY_CAST (dst_mem); } else if (gst_is_cuda_memory (src_mem)) { - cuda_context = GST_CUDA_MEMORY_CAST (src_mem)->context; + cmem = GST_CUDA_MEMORY_CAST (src_mem); } else { - cuda_context = context; + cmem = NULL; + } + + if (cmem) { + context = cmem->context; + mem_stream = gst_cuda_memory_get_stream (cmem); + if (mem_stream) + stream = mem_stream; } GST_TRACE_OBJECT (context, "%s -> %s", gst_cuda_buffer_copy_type_to_string (src_type), gst_cuda_buffer_copy_type_to_string (dst_type)); - return gst_cuda_buffer_copy_internal (dst, dst_type, dst_info, - src, src_type, src_info, cuda_context, stream); + ret = gst_cuda_buffer_copy_internal (dst, dst_type, dst_info, + src, src_type, src_info, cuda_context, + gst_cuda_stream_get_handle (stream)); + + /* Already synchronized */ + if (gst_is_cuda_memory (src_mem)) + GST_MEMORY_FLAG_UNSET (src_mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC); + + return ret; } diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudamemorycopy.c b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudamemorycopy.c index 53604d30a2..2c008cce0a 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudamemorycopy.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudamemorycopy.c @@ -485,6 +485,7 @@ gst_cuda_memory_copy_propose_allocation (GstBaseTransform * trans, GstBufferPool *pool = NULL; GstCaps *caps; guint size; + gboolean is_cuda = FALSE; if (!GST_BASE_TRANSFORM_CLASS (parent_class)->propose_allocation (trans, decide_query, query)) @@ -574,6 +575,8 @@ gst_cuda_memory_copy_propose_allocation (GstBaseTransform * trans, size = GST_VIDEO_INFO_SIZE (&info); gst_buffer_pool_config_set_params (config, caps, size, 0, 0); + if (is_cuda && ctrans->stream) + gst_buffer_pool_config_set_cuda_stream (config, ctrans->stream); if (!gst_buffer_pool_set_config (pool, config)) { GST_ERROR_OBJECT (ctrans, "failed to set config"); @@ -951,7 +954,7 @@ gst_cuda_memory_copy_transform (GstBaseTransform * trans, GstBuffer * inbuf, GST_TRACE_OBJECT (self, "Both in/out buffers are not CUDA"); if (!gst_cuda_buffer_copy (outbuf, GST_CUDA_BUFFER_COPY_SYSTEM, out_info, inbuf, GST_CUDA_BUFFER_COPY_SYSTEM, in_info, ctrans->context, - gst_cuda_stream_get_handle (ctrans->stream))) { + ctrans->stream)) { return GST_FLOW_ERROR; } @@ -959,7 +962,7 @@ gst_cuda_memory_copy_transform (GstBaseTransform * trans, GstBuffer * inbuf, } ret = gst_cuda_buffer_copy (outbuf, out_type, out_info, inbuf, in_type, - in_info, ctrans->context, gst_cuda_stream_get_handle (ctrans->stream)); + in_info, ctrans->context, ctrans->stream); /* system memory <-> CUDA copy fallback if possible */ if (!ret) { @@ -1002,8 +1005,7 @@ gst_cuda_memory_copy_transform (GstBaseTransform * trans, GstBuffer * inbuf, gst_cuda_buffer_copy_type_to_string (fallback_out_type)); ret = gst_cuda_buffer_copy (outbuf, fallback_out_type, out_info, inbuf, - fallback_in_type, in_info, ctrans->context, - gst_cuda_stream_get_handle (ctrans->stream)); + fallback_in_type, in_info, ctrans->context, ctrans->stream); } if (ret) @@ -1018,7 +1020,7 @@ gst_cuda_memory_copy_transform (GstBaseTransform * trans, GstBuffer * inbuf, /* final fallback using system memory */ ret = gst_cuda_buffer_copy (outbuf, GST_CUDA_BUFFER_COPY_SYSTEM, out_info, inbuf, GST_CUDA_BUFFER_COPY_SYSTEM, in_info, ctrans->context, - gst_cuda_stream_get_handle (ctrans->stream)); + ctrans->stream); if (ret) return GST_FLOW_OK;