cudadownload: Always download CUDA memory if it's bound to decoder

Decoder bounded CUDA memory is allocated by driver and the pool size is fixed. Since we don't know how many buffers would be held by downstream non-CUDA element, we should download such CUDA memory and release it back to decoder. Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/4810>
2025-01-03 14:08:56 +00:00 · 2023-06-08 21:29:32 +09:00 · 2023-06-08 21:29:32 +09:00 · 1aa9e74aaf
commit 1aa9e74aaf
parent cf60babf87
4 changed files with 44 additions and 4 deletions
--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcuda-private.h
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcuda-private.h
@ -50,6 +50,12 @@ gboolean      gst_cuda_buffer_copy (GstBuffer * dst,
                                    GstCudaContext * context,
                                    GstCudaStream * stream);
 GST_CUDA_API
 void          gst_cuda_memory_set_from_fixed_pool (GstMemory * mem);
 GST_CUDA_API
 gboolean      gst_cuda_memory_is_from_fixed_pool (GstMemory * mem);
 G_END_DECLS
 #ifdef __cplusplus
--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudamemory.cpp
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudamemory.cpp
@ -78,6 +78,8 @@ struct _GstCudaMemoryPrivate
  gboolean saw_io = FALSE;
  gboolean from_fixed_pool = FALSE;
  std::map < gint64, std::unique_ptr < GstCudaMemoryTokenData >> token_map;
  gpointer user_data = nullptr;
@ -1038,6 +1040,31 @@ gst_cuda_allocator_alloc_wrapped (GstCudaAllocator * allocator,
  return GST_MEMORY_CAST (mem);
 }
 void
 gst_cuda_memory_set_from_fixed_pool (GstMemory * mem)
 {
  GstCudaMemory *cmem;
  if (!gst_is_cuda_memory (mem))
    return;
  cmem = GST_CUDA_MEMORY_CAST (mem);
  cmem->priv->from_fixed_pool = TRUE;
 }
 gboolean
 gst_cuda_memory_is_from_fixed_pool (GstMemory * mem)
 {
  GstCudaMemory *cmem;
  if (!gst_is_cuda_memory (mem))
    return FALSE;
  cmem = GST_CUDA_MEMORY_CAST (mem);
  return cmem->priv->from_fixed_pool;
 }
 #define GST_CUDA_POOL_ALLOCATOR_IS_FLUSHING(alloc)  (g_atomic_int_get (&alloc->priv->flushing))
 struct _GstCudaPoolAllocatorPrivate
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudamemorycopy.c
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudamemorycopy.c
@ -1077,11 +1077,16 @@ gst_cuda_download_before_transform (GstBaseTransform * trans,
  GST_BASE_TRANSFORM_CLASS (parent_class)->before_transform (trans, buffer);
  old = gst_base_transform_is_passthrough (trans);
-  if (copy->in_type == copy->out_type ||
+  if (copy->in_type == copy->out_type) {
      (copy->in_type == GST_CUDA_BUFFER_COPY_CUDA &&
          copy->out_type == GST_CUDA_BUFFER_COPY_SYSTEM &&
          copy->downstream_supports_video_meta)) {
    new = TRUE;
  } else if (copy->in_type == GST_CUDA_BUFFER_COPY_CUDA &&
      copy->out_type == GST_CUDA_BUFFER_COPY_SYSTEM &&
      copy->downstream_supports_video_meta) {
    GstMemory *mem = gst_buffer_peek_memory (buffer, 0);
    /* zero-copy decoded memory belongs to NVDEC's fixed size pool.
     * We should return the memory to the decoder as soon as possible */
    if (!gst_cuda_memory_is_from_fixed_pool (mem))
      new = TRUE;
  }
  if (new != old) {
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecobject.cpp
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecobject.cpp
@ -29,6 +29,7 @@
 #include <memory>
 #include <string.h>
 #include <algorithm>
 #include <gst/cuda/gstcuda-private.h>
 extern "C"
 {
@ -523,6 +524,7 @@ gst_nv_dec_object_export_surface (GstNvDecObject * object,
    mem = gst_cuda_allocator_alloc_wrapped (nullptr, object->context,
        stream, &info, output->devptr, output,
        (GDestroyNotify) gst_nv_dec_output_free);
    gst_cuda_memory_set_from_fixed_pool (mem);
    priv->output_map[output->devptr] = mem;
  } else {