cudaupload, cudadownload: Update for shared CUDA stream

Use CUDA stream of memory if exists

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/3629>
This commit is contained in:
Seungha Yang 2022-12-22 02:27:36 +09:00 committed by GStreamer Marge Bot
parent aabcba16db
commit 7a8bb85523
3 changed files with 77 additions and 28 deletions

View file

@ -22,6 +22,7 @@
#include <gst/cuda/cuda-prelude.h> #include <gst/cuda/cuda-prelude.h>
#include <gst/cuda/cuda-gst.h> #include <gst/cuda/cuda-gst.h>
#include <gst/cuda/gstcudacontext.h> #include <gst/cuda/gstcudacontext.h>
#include <gst/cuda/gstcudastream.h>
#include <gst/video/video.h> #include <gst/video/video.h>
@ -47,7 +48,7 @@ gboolean gst_cuda_buffer_copy (GstBuffer * dst,
GstCudaBufferCopyType src_type, GstCudaBufferCopyType src_type,
const GstVideoInfo * src_info, const GstVideoInfo * src_info,
GstCudaContext * context, GstCudaContext * context,
CUstream stream); GstCudaStream * stream);
G_END_DECLS G_END_DECLS

View file

@ -1490,14 +1490,17 @@ gboolean
gst_cuda_buffer_copy (GstBuffer * dst, GstCudaBufferCopyType dst_type, gst_cuda_buffer_copy (GstBuffer * dst, GstCudaBufferCopyType dst_type,
const GstVideoInfo * dst_info, GstBuffer * src, const GstVideoInfo * dst_info, GstBuffer * src,
GstCudaBufferCopyType src_type, const GstVideoInfo * src_info, GstCudaBufferCopyType src_type, const GstVideoInfo * src_info,
GstCudaContext * context, CUstream stream) GstCudaContext * context, GstCudaStream * stream)
{ {
gboolean use_copy_2d = FALSE; gboolean use_copy_2d = FALSE;
GstMemory *dst_mem, *src_mem; GstMemory *dst_mem, *src_mem;
#ifdef GST_CUDA_HAS_D3D #ifdef GST_CUDA_HAS_D3D
D3D11_TEXTURE2D_DESC desc; D3D11_TEXTURE2D_DESC desc;
#endif #endif
GstCudaContext *cuda_context; GstCudaContext *cuda_context = context;
GstCudaMemory *cmem = NULL;
GstCudaStream *mem_stream = NULL;
gboolean ret;
g_return_val_if_fail (GST_IS_BUFFER (dst), FALSE); g_return_val_if_fail (GST_IS_BUFFER (dst), FALSE);
g_return_val_if_fail (dst_info != NULL, FALSE); g_return_val_if_fail (dst_info != NULL, FALSE);
@ -1538,31 +1541,46 @@ gst_cuda_buffer_copy (GstBuffer * dst, GstCudaBufferCopyType dst_type,
if (src_type == GST_CUDA_BUFFER_COPY_GL && gst_is_gl_memory_pbo (src_mem)) { if (src_type == GST_CUDA_BUFFER_COPY_GL && gst_is_gl_memory_pbo (src_mem)) {
GstGLMemory *gl_mem = (GstGLMemory *) src_mem; GstGLMemory *gl_mem = (GstGLMemory *) src_mem;
GstGLContext *gl_context = gl_mem->mem.context; GstGLContext *gl_context = gl_mem->mem.context;
GstCudaContext *cuda_context = context;
if (dst_type == GST_CUDA_BUFFER_COPY_CUDA && gst_is_cuda_memory (dst_mem)) if (dst_type == GST_CUDA_BUFFER_COPY_CUDA && gst_is_cuda_memory (dst_mem)) {
cuda_context = GST_CUDA_MEMORY_CAST (dst_mem)->context; cmem = GST_CUDA_MEMORY_CAST (dst_mem);
cuda_context = cmem->context;
mem_stream = gst_cuda_memory_get_stream (cmem);
if (mem_stream)
stream = mem_stream;
}
GST_TRACE_OBJECT (context, "GL -> %s", GST_TRACE_OBJECT (context, "GL -> %s",
gst_cuda_buffer_copy_type_to_string (dst_type)); gst_cuda_buffer_copy_type_to_string (dst_type));
return cuda_copy_gl_interop (dst, dst_info, src, src_info, gl_context, ret = cuda_copy_gl_interop (dst, dst_info, src, src_info, gl_context,
cuda_context, stream, TRUE, dst_type); cuda_context, gst_cuda_stream_get_handle (stream), TRUE, dst_type);
if (cmem)
GST_MEMORY_FLAG_UNSET (cmem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC);
return ret;
} }
if (dst_type == GST_CUDA_BUFFER_COPY_GL && gst_is_gl_memory_pbo (dst_mem)) { if (dst_type == GST_CUDA_BUFFER_COPY_GL && gst_is_gl_memory_pbo (dst_mem)) {
GstGLMemory *gl_mem = (GstGLMemory *) dst_mem; GstGLMemory *gl_mem = (GstGLMemory *) dst_mem;
GstGLContext *gl_context = gl_mem->mem.context; GstGLContext *gl_context = gl_mem->mem.context;
GstCudaContext *cuda_context = context;
if (src_type == GST_CUDA_BUFFER_COPY_CUDA && gst_is_cuda_memory (src_mem)) if (src_type == GST_CUDA_BUFFER_COPY_CUDA && gst_is_cuda_memory (src_mem)) {
cuda_context = GST_CUDA_MEMORY_CAST (src_mem)->context; cmem = GST_CUDA_MEMORY_CAST (src_mem);
cuda_context = cmem->context;
/* Use memory's stream object if available */
mem_stream = gst_cuda_memory_get_stream (cmem);
if (mem_stream)
stream = mem_stream;
}
GST_TRACE_OBJECT (context, "%s -> GL", GST_TRACE_OBJECT (context, "%s -> GL",
gst_cuda_buffer_copy_type_to_string (src_type)); gst_cuda_buffer_copy_type_to_string (src_type));
return cuda_copy_gl_interop (dst, dst_info, src, src_info, gl_context, return cuda_copy_gl_interop (dst, dst_info, src, src_info, gl_context,
cuda_context, stream, FALSE, src_type); cuda_context, gst_cuda_stream_get_handle (stream), FALSE, src_type);
} }
#endif #endif
@ -1572,16 +1590,24 @@ gst_cuda_buffer_copy (GstBuffer * dst, GstCudaBufferCopyType dst_type,
&& desc.Usage == D3D11_USAGE_DEFAULT && gst_is_cuda_memory (dst_mem)) { && desc.Usage == D3D11_USAGE_DEFAULT && gst_is_cuda_memory (dst_mem)) {
GstD3D11Memory *dmem = GST_D3D11_MEMORY_CAST (src_mem); GstD3D11Memory *dmem = GST_D3D11_MEMORY_CAST (src_mem);
GstD3D11Device *device = dmem->device; GstD3D11Device *device = dmem->device;
GstCudaContext *cuda_context = GST_CUDA_MEMORY_CAST (dst_mem)->context;
gboolean ret; cmem = GST_CUDA_MEMORY_CAST (dst_mem);
cuda_context = cmem->context;
/* Use memory's stream object if available */
mem_stream = gst_cuda_memory_get_stream (cmem);
if (mem_stream)
stream = mem_stream;
GST_TRACE_OBJECT (context, "D3D11 -> CUDA"); GST_TRACE_OBJECT (context, "D3D11 -> CUDA");
gst_d3d11_device_lock (device); gst_d3d11_device_lock (device);
ret = cuda_copy_d3d11_interop (dst, dst_info, src, src_info, device, ret = cuda_copy_d3d11_interop (dst, dst_info, src, src_info, device,
cuda_context, stream, TRUE); cuda_context, gst_cuda_stream_get_handle (stream), TRUE);
gst_d3d11_device_unlock (device); gst_d3d11_device_unlock (device);
GST_MEMORY_FLAG_UNSET (cmem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC);
return ret; return ret;
} }
@ -1590,14 +1616,20 @@ gst_cuda_buffer_copy (GstBuffer * dst, GstCudaBufferCopyType dst_type,
&& desc.Usage == D3D11_USAGE_DEFAULT && gst_is_cuda_memory (src_mem)) { && desc.Usage == D3D11_USAGE_DEFAULT && gst_is_cuda_memory (src_mem)) {
GstD3D11Memory *dmem = GST_D3D11_MEMORY_CAST (dst_mem); GstD3D11Memory *dmem = GST_D3D11_MEMORY_CAST (dst_mem);
GstD3D11Device *device = dmem->device; GstD3D11Device *device = dmem->device;
GstCudaContext *cuda_context = GST_CUDA_MEMORY_CAST (src_mem)->context;
gboolean ret; cmem = GST_CUDA_MEMORY_CAST (src_mem);
cuda_context = cmem->context;
/* Use memory's stream object if available */
mem_stream = gst_cuda_memory_get_stream (cmem);
if (mem_stream)
stream = mem_stream;
GST_TRACE_OBJECT (context, "CUDA -> D3D11"); GST_TRACE_OBJECT (context, "CUDA -> D3D11");
gst_d3d11_device_lock (device); gst_d3d11_device_lock (device);
ret = cuda_copy_d3d11_interop (dst, dst_info, src, src_info, device, ret = cuda_copy_d3d11_interop (dst, dst_info, src, src_info, device,
cuda_context, stream, FALSE); cuda_context, gst_cuda_stream_get_handle (stream), FALSE);
gst_d3d11_device_unlock (device); gst_d3d11_device_unlock (device);
return ret; return ret;
@ -1605,17 +1637,31 @@ gst_cuda_buffer_copy (GstBuffer * dst, GstCudaBufferCopyType dst_type,
#endif #endif
if (gst_is_cuda_memory (dst_mem)) { if (gst_is_cuda_memory (dst_mem)) {
cuda_context = GST_CUDA_MEMORY_CAST (dst_mem)->context; cmem = GST_CUDA_MEMORY_CAST (dst_mem);
} else if (gst_is_cuda_memory (src_mem)) { } else if (gst_is_cuda_memory (src_mem)) {
cuda_context = GST_CUDA_MEMORY_CAST (src_mem)->context; cmem = GST_CUDA_MEMORY_CAST (src_mem);
} else { } else {
cuda_context = context; cmem = NULL;
}
if (cmem) {
context = cmem->context;
mem_stream = gst_cuda_memory_get_stream (cmem);
if (mem_stream)
stream = mem_stream;
} }
GST_TRACE_OBJECT (context, "%s -> %s", GST_TRACE_OBJECT (context, "%s -> %s",
gst_cuda_buffer_copy_type_to_string (src_type), gst_cuda_buffer_copy_type_to_string (src_type),
gst_cuda_buffer_copy_type_to_string (dst_type)); gst_cuda_buffer_copy_type_to_string (dst_type));
return gst_cuda_buffer_copy_internal (dst, dst_type, dst_info, ret = gst_cuda_buffer_copy_internal (dst, dst_type, dst_info,
src, src_type, src_info, cuda_context, stream); src, src_type, src_info, cuda_context,
gst_cuda_stream_get_handle (stream));
/* Already synchronized */
if (gst_is_cuda_memory (src_mem))
GST_MEMORY_FLAG_UNSET (src_mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC);
return ret;
} }

View file

@ -485,6 +485,7 @@ gst_cuda_memory_copy_propose_allocation (GstBaseTransform * trans,
GstBufferPool *pool = NULL; GstBufferPool *pool = NULL;
GstCaps *caps; GstCaps *caps;
guint size; guint size;
gboolean is_cuda = FALSE;
if (!GST_BASE_TRANSFORM_CLASS (parent_class)->propose_allocation (trans, if (!GST_BASE_TRANSFORM_CLASS (parent_class)->propose_allocation (trans,
decide_query, query)) decide_query, query))
@ -574,6 +575,8 @@ gst_cuda_memory_copy_propose_allocation (GstBaseTransform * trans,
size = GST_VIDEO_INFO_SIZE (&info); size = GST_VIDEO_INFO_SIZE (&info);
gst_buffer_pool_config_set_params (config, caps, size, 0, 0); gst_buffer_pool_config_set_params (config, caps, size, 0, 0);
if (is_cuda && ctrans->stream)
gst_buffer_pool_config_set_cuda_stream (config, ctrans->stream);
if (!gst_buffer_pool_set_config (pool, config)) { if (!gst_buffer_pool_set_config (pool, config)) {
GST_ERROR_OBJECT (ctrans, "failed to set config"); GST_ERROR_OBJECT (ctrans, "failed to set config");
@ -951,7 +954,7 @@ gst_cuda_memory_copy_transform (GstBaseTransform * trans, GstBuffer * inbuf,
GST_TRACE_OBJECT (self, "Both in/out buffers are not CUDA"); GST_TRACE_OBJECT (self, "Both in/out buffers are not CUDA");
if (!gst_cuda_buffer_copy (outbuf, GST_CUDA_BUFFER_COPY_SYSTEM, out_info, if (!gst_cuda_buffer_copy (outbuf, GST_CUDA_BUFFER_COPY_SYSTEM, out_info,
inbuf, GST_CUDA_BUFFER_COPY_SYSTEM, in_info, ctrans->context, inbuf, GST_CUDA_BUFFER_COPY_SYSTEM, in_info, ctrans->context,
gst_cuda_stream_get_handle (ctrans->stream))) { ctrans->stream)) {
return GST_FLOW_ERROR; return GST_FLOW_ERROR;
} }
@ -959,7 +962,7 @@ gst_cuda_memory_copy_transform (GstBaseTransform * trans, GstBuffer * inbuf,
} }
ret = gst_cuda_buffer_copy (outbuf, out_type, out_info, inbuf, in_type, ret = gst_cuda_buffer_copy (outbuf, out_type, out_info, inbuf, in_type,
in_info, ctrans->context, gst_cuda_stream_get_handle (ctrans->stream)); in_info, ctrans->context, ctrans->stream);
/* system memory <-> CUDA copy fallback if possible */ /* system memory <-> CUDA copy fallback if possible */
if (!ret) { if (!ret) {
@ -1002,8 +1005,7 @@ gst_cuda_memory_copy_transform (GstBaseTransform * trans, GstBuffer * inbuf,
gst_cuda_buffer_copy_type_to_string (fallback_out_type)); gst_cuda_buffer_copy_type_to_string (fallback_out_type));
ret = gst_cuda_buffer_copy (outbuf, fallback_out_type, out_info, inbuf, ret = gst_cuda_buffer_copy (outbuf, fallback_out_type, out_info, inbuf,
fallback_in_type, in_info, ctrans->context, fallback_in_type, in_info, ctrans->context, ctrans->stream);
gst_cuda_stream_get_handle (ctrans->stream));
} }
if (ret) if (ret)
@ -1018,7 +1020,7 @@ gst_cuda_memory_copy_transform (GstBaseTransform * trans, GstBuffer * inbuf,
/* final fallback using system memory */ /* final fallback using system memory */
ret = gst_cuda_buffer_copy (outbuf, GST_CUDA_BUFFER_COPY_SYSTEM, out_info, ret = gst_cuda_buffer_copy (outbuf, GST_CUDA_BUFFER_COPY_SYSTEM, out_info,
inbuf, GST_CUDA_BUFFER_COPY_SYSTEM, in_info, ctrans->context, inbuf, GST_CUDA_BUFFER_COPY_SYSTEM, in_info, ctrans->context,
gst_cuda_stream_get_handle (ctrans->stream)); ctrans->stream);
if (ret) if (ret)
return GST_FLOW_OK; return GST_FLOW_OK;