From 1aa9e74aaffacbb3bf237c94177f40b096c59b32 Mon Sep 17 00:00:00 2001 From: Seungha Yang Date: Thu, 8 Jun 2023 21:29:32 +0900 Subject: [PATCH] cudadownload: Always download CUDA memory if it's bound to decoder Decoder bounded CUDA memory is allocated by driver and the pool size is fixed. Since we don't know how many buffers would be held by downstream non-CUDA element, we should download such CUDA memory and release it back to decoder. Part-of: --- .../gst-libs/gst/cuda/gstcuda-private.h | 6 +++++ .../gst-libs/gst/cuda/gstcudamemory.cpp | 27 +++++++++++++++++++ .../sys/nvcodec/gstcudamemorycopy.c | 13 ++++++--- .../sys/nvcodec/gstnvdecobject.cpp | 2 ++ 4 files changed, 44 insertions(+), 4 deletions(-) diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcuda-private.h b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcuda-private.h index 055b6f85dc..19f93b05db 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcuda-private.h +++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcuda-private.h @@ -50,6 +50,12 @@ gboolean gst_cuda_buffer_copy (GstBuffer * dst, GstCudaContext * context, GstCudaStream * stream); +GST_CUDA_API +void gst_cuda_memory_set_from_fixed_pool (GstMemory * mem); + +GST_CUDA_API +gboolean gst_cuda_memory_is_from_fixed_pool (GstMemory * mem); + G_END_DECLS #ifdef __cplusplus diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudamemory.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudamemory.cpp index 183f0af1a9..c321396ade 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudamemory.cpp +++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudamemory.cpp @@ -78,6 +78,8 @@ struct _GstCudaMemoryPrivate gboolean saw_io = FALSE; + gboolean from_fixed_pool = FALSE; + std::map < gint64, std::unique_ptr < GstCudaMemoryTokenData >> token_map; gpointer user_data = nullptr; @@ -1038,6 +1040,31 @@ gst_cuda_allocator_alloc_wrapped (GstCudaAllocator * allocator, return GST_MEMORY_CAST (mem); } +void +gst_cuda_memory_set_from_fixed_pool (GstMemory * mem) +{ + GstCudaMemory *cmem; + + if (!gst_is_cuda_memory (mem)) + return; + + cmem = GST_CUDA_MEMORY_CAST (mem); + cmem->priv->from_fixed_pool = TRUE; +} + +gboolean +gst_cuda_memory_is_from_fixed_pool (GstMemory * mem) +{ + GstCudaMemory *cmem; + + if (!gst_is_cuda_memory (mem)) + return FALSE; + + cmem = GST_CUDA_MEMORY_CAST (mem); + + return cmem->priv->from_fixed_pool; +} + #define GST_CUDA_POOL_ALLOCATOR_IS_FLUSHING(alloc) (g_atomic_int_get (&alloc->priv->flushing)) struct _GstCudaPoolAllocatorPrivate diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudamemorycopy.c b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudamemorycopy.c index 5a6090e12b..a851a0d407 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudamemorycopy.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudamemorycopy.c @@ -1077,11 +1077,16 @@ gst_cuda_download_before_transform (GstBaseTransform * trans, GST_BASE_TRANSFORM_CLASS (parent_class)->before_transform (trans, buffer); old = gst_base_transform_is_passthrough (trans); - if (copy->in_type == copy->out_type || - (copy->in_type == GST_CUDA_BUFFER_COPY_CUDA && - copy->out_type == GST_CUDA_BUFFER_COPY_SYSTEM && - copy->downstream_supports_video_meta)) { + if (copy->in_type == copy->out_type) { new = TRUE; + } else if (copy->in_type == GST_CUDA_BUFFER_COPY_CUDA && + copy->out_type == GST_CUDA_BUFFER_COPY_SYSTEM && + copy->downstream_supports_video_meta) { + GstMemory *mem = gst_buffer_peek_memory (buffer, 0); + /* zero-copy decoded memory belongs to NVDEC's fixed size pool. + * We should return the memory to the decoder as soon as possible */ + if (!gst_cuda_memory_is_from_fixed_pool (mem)) + new = TRUE; } if (new != old) { diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecobject.cpp b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecobject.cpp index 56c2d9ec36..4feb67f3f8 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecobject.cpp +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecobject.cpp @@ -29,6 +29,7 @@ #include #include #include +#include extern "C" { @@ -523,6 +524,7 @@ gst_nv_dec_object_export_surface (GstNvDecObject * object, mem = gst_cuda_allocator_alloc_wrapped (nullptr, object->context, stream, &info, output->devptr, output, (GDestroyNotify) gst_nv_dec_output_free); + gst_cuda_memory_set_from_fixed_pool (mem); priv->output_map[output->devptr] = mem; } else {