cuda: Load stream ordered allocation related symbols

Required to support async memory allocation Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/7427>
2025-01-28 01:58:19 +00:00 · 2024-08-29 18:23:37 +09:00 · 2024-08-29 18:23:37 +09:00 · 174c9bfaa5
commit 174c9bfaa5
parent b9207beef6
6 changed files with 219 additions and 3 deletions
--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/cuda-gst.h
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/cuda-gst.h
@ -307,6 +307,38 @@ GST_CUDA_API
 CUresult CUDAAPI CuMemRetainAllocationHandle (CUmemGenericAllocationHandle *handle,
                                              void *addr);

+GST_CUDA_API
+CUresult CUDAAPI CuMemAllocAsync (CUdeviceptr *dptr,
+                                  size_t bytesize,
+                                  CUstream hStream);
+
+GST_CUDA_API
+CUresult CUDAAPI CuMemAllocFromPoolAsync (CUdeviceptr *dptr,
+                                          size_t bytesize,
+                                          CUmemoryPool pool,
+                                          CUstream hStream);
+
+GST_CUDA_API
+CUresult CUDAAPI CuMemFreeAsync (CUdeviceptr dptr,
+                                 CUstream hStream);
+
+GST_CUDA_API
+CUresult CUDAAPI CuMemPoolCreate (CUmemoryPool *pool,
+                                  const CUmemPoolProps *poolProps);
+
+GST_CUDA_API
+CUresult CUDAAPI CuMemPoolDestroy (CUmemoryPool pool);
+
+GST_CUDA_API
+CUresult CUDAAPI CuMemPoolSetAttribute (CUmemoryPool pool,
+                                        CUmemPool_attribute attr,
+                                        void *value);
+
+GST_CUDA_API
+CUresult CUDAAPI CuMemPoolGetAttribute (CUmemoryPool pool,
+                                        CUmemPool_attribute attr,
+                                        void *value);
+
 /* cudaGL.h */
 GST_CUDA_API
 CUresult CUDAAPI CuGraphicsGLRegisterImage  (CUgraphicsResource * pCudaResource,
--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcuda-private.h
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcuda-private.h
@ -56,8 +56,6 @@ void          gst_cuda_memory_set_from_fixed_pool (GstMemory * mem);
 GST_CUDA_API
 gboolean      gst_cuda_memory_is_from_fixed_pool (GstMemory * mem);

-gboolean      gst_cuda_virtual_memory_symbol_loaded (void);
-
 gpointer      gst_cuda_get_win32_handle_metadata (void);

 G_END_DECLS
--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudacontext.cpp
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudacontext.cpp
@ -26,6 +26,7 @@
 #include "gstcudautils.h"
 #include "gstcudamemory.h"
 #include "gstcuda-private.h"
+#include "gstcudaloader-private.h"

 #ifdef G_OS_WIN32
 #include <gst/d3d11/gstd3d11.h>
@ -53,6 +54,7 @@ enum
  PROP_DXGI_ADAPTER_LUID,
  PROP_VIRTUAL_MEMORY,
  PROP_OS_HANDLE,
+  PROP_STREAM_ORDERED_ALLOC,
 };

 struct _GstCudaContextPrivate
@ -63,6 +65,7 @@ struct _GstCudaContextPrivate
  gint64 dxgi_adapter_luid;
  gboolean virtual_memory_supported;
  gboolean os_handle_supported;
+  gboolean stream_ordered_alloc_supported;

  gint tex_align;

@ -139,6 +142,16 @@ gst_cuda_context_class_init (GstCudaContextClass * klass)
          "Whether OS specific handle is supported via virtual memory", FALSE,
          (GParamFlags) (G_PARAM_READABLE | G_PARAM_STATIC_STRINGS)));

+  /**
+   * GstCudaContext:stream-ordered-alloc:
+   *
+   * Since: 1.26
+   */
+  g_object_class_install_property (gobject_class, PROP_STREAM_ORDERED_ALLOC,
+      g_param_spec_boolean ("stream-ordered-alloc", "Stream Ordered Alloc",
+          "Device supports stream ordered allocation", FALSE,
+          (GParamFlags) (G_PARAM_READABLE | G_PARAM_STATIC_STRINGS)));
+
  gst_cuda_memory_init_once ();
 }

@ -190,6 +203,9 @@ gst_cuda_context_get_property (GObject * object, guint prop_id,
    case PROP_OS_HANDLE:
      g_value_set_boolean (value, priv->os_handle_supported);
      break;
+    case PROP_STREAM_ORDERED_ALLOC:
+      g_value_set_boolean (value, priv->stream_ordered_alloc_supported);
+      break;
    default:
      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
      break;
@ -571,7 +587,6 @@ gst_cuda_context_new_wrapped (CUcontext handler, CUdevice device)
 {
  GList *iter;
  gint tex_align = 0;
-
  GstCudaContext *self;

  g_return_val_if_fail (handler, nullptr);
@ -619,6 +634,16 @@ gst_cuda_context_new_wrapped (CUcontext handler, CUdevice device)
      self->priv->os_handle_supported = TRUE;
  }

+  if (gst_cuda_stream_ordered_symbol_loaded ()) {
+    CUresult ret;
+    int supported = 0;
+
+    ret = CuDeviceGetAttribute (&supported,
+        CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED, device);
+    if (ret == CUDA_SUCCESS && supported)
+      self->priv->stream_ordered_alloc_supported = TRUE;
+  }
+
  std::lock_guard < std::mutex > lk (list_lock);
  g_object_weak_ref (G_OBJECT (self),
      (GWeakNotify) gst_cuda_context_weak_ref_notify, nullptr);
--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudaloader-private.h
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudaloader-private.h
@ -0,0 +1,30 @@
+/* GStreamer
+ * Copyright (C) 2024 Seungha Yang <seungha@centricular.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#pragma once
+
+#include <gst/gst.h>
+
+G_BEGIN_DECLS
+
+gboolean      gst_cuda_virtual_memory_symbol_loaded (void);
+
+gboolean      gst_cuda_stream_ordered_symbol_loaded (void);
+
+G_END_DECLS
--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudaloader.cpp
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudaloader.cpp
@ -25,6 +25,7 @@
 #include "gstcudaloader.h"
 #include <gmodule.h>
 #include "gstcuda-private.h"
+#include "gstcudaloader-private.h"

 #ifdef HAVE_CUDA_GST_GL
 #include <gst/gl/gstglconfig.h>
@ -62,6 +63,7 @@ typedef struct _GstNvCodecCudaVTable
 {
  gboolean loaded;
  gboolean have_virtual_alloc;
+  gboolean have_stream_ordered_alloc;

  CUresult (CUDAAPI * CuInit) (unsigned int Flags);
  CUresult (CUDAAPI * CuGetErrorName) (CUresult error, const char **pStr);
@ -211,6 +213,19 @@ typedef struct _GstNvCodecCudaVTable
  CUresult (CUDAAPI * CuMemUnmap) (CUdeviceptr ptr, size_t size);
  CUresult (CUDAAPI * CuMemRetainAllocationHandle)
      (CUmemGenericAllocationHandle *handle, void *addr);
+
+  CUresult (CUDAAPI * CuMemAllocAsync) (CUdeviceptr *dptr, size_t bytesize,
+      CUstream hStream);
+  CUresult (CUDAAPI * CuMemAllocFromPoolAsync) (CUdeviceptr *dptr,
+      size_t bytesize, CUmemoryPool pool, CUstream hStream);
+  CUresult (CUDAAPI * CuMemFreeAsync) (CUdeviceptr dptr, CUstream hStream);
+  CUresult (CUDAAPI * CuMemPoolCreate) (CUmemoryPool *pool,
+      const CUmemPoolProps *poolProps);
+  CUresult (CUDAAPI * CuMemPoolDestroy) (CUmemoryPool pool);
+  CUresult (CUDAAPI * CuMemPoolSetAttribute) (CUmemoryPool pool,
+      CUmemPool_attribute attr, void *value);
+  CUresult (CUDAAPI * CuMemPoolGetAttribute) (CUmemoryPool pool,
+      CUmemPool_attribute attr, void *value);
 } GstNvCodecCudaVTable;
 /* *INDENT-ON* */

@ -245,6 +260,24 @@ gst_cuda_load_optional_symbols (GModule * module)
  vtable->have_virtual_alloc = TRUE;
 }

+static void
+gst_cuda_load_stream_ordered_alloc_symbols (GModule * module)
+{
+  GstNvCodecCudaVTable *vtable = &gst_cuda_vtable;
+
+  LOAD_OPTIONAL_SYMBOL (cuMemAllocAsync, CuMemAllocAsync);
+  LOAD_OPTIONAL_SYMBOL (cuMemAllocFromPoolAsync, CuMemAllocFromPoolAsync);
+  LOAD_OPTIONAL_SYMBOL (cuMemFreeAsync, CuMemFreeAsync);
+  LOAD_OPTIONAL_SYMBOL (cuMemPoolCreate, CuMemPoolCreate);
+  LOAD_OPTIONAL_SYMBOL (cuMemPoolDestroy, CuMemPoolDestroy);
+  LOAD_OPTIONAL_SYMBOL (cuMemPoolSetAttribute, CuMemPoolSetAttribute);
+  LOAD_OPTIONAL_SYMBOL (cuMemPoolGetAttribute, CuMemPoolGetAttribute);
+
+  GST_INFO ("Stream ordered alloc symbols are loaded");
+
+  vtable->have_stream_ordered_alloc = TRUE;
+}
+
 static void
 gst_cuda_load_library_once_func (void)
 {
@ -353,6 +386,7 @@ gst_cuda_load_library_once_func (void)
  vtable->loaded = TRUE;

  gst_cuda_load_optional_symbols (module);
+  gst_cuda_load_stream_ordered_alloc_symbols (module);
 }

 /**
@ -382,6 +416,14 @@ gst_cuda_virtual_memory_symbol_loaded (void)
  return gst_cuda_vtable.have_virtual_alloc;
 }

+gboolean
+gst_cuda_stream_ordered_symbol_loaded (void)
+{
+  gst_cuda_load_library ();
+
+  return gst_cuda_vtable.have_stream_ordered_alloc;
+}
+
 CUresult CUDAAPI
 CuInit (unsigned int Flags)
 {
@ -966,6 +1008,71 @@ CuMemRetainAllocationHandle (CUmemGenericAllocationHandle * handle, void *addr)
  return gst_cuda_vtable.CuMemRetainAllocationHandle (handle, addr);
 }

+CUresult CUDAAPI
+CuMemAllocAsync (CUdeviceptr * dptr, size_t bytesize, CUstream hStream)
+{
+  if (!gst_cuda_vtable.CuMemAllocAsync)
+    return CUDA_ERROR_NOT_SUPPORTED;
+
+  return gst_cuda_vtable.CuMemAllocAsync (dptr, bytesize, hStream);
+}
+
+CUresult CUDAAPI
+CuMemAllocFromPoolAsync (CUdeviceptr * dptr, size_t bytesize, CUmemoryPool pool,
+    CUstream hStream)
+{
+  if (!gst_cuda_vtable.CuMemAllocFromPoolAsync)
+    return CUDA_ERROR_NOT_SUPPORTED;
+
+  return gst_cuda_vtable.CuMemAllocFromPoolAsync (dptr,
+      bytesize, pool, hStream);
+}
+
+CUresult CUDAAPI
+CuMemFreeAsync (CUdeviceptr dptr, CUstream hStream)
+{
+  if (!gst_cuda_vtable.CuMemFreeAsync)
+    return CUDA_ERROR_NOT_SUPPORTED;
+
+  return gst_cuda_vtable.CuMemFreeAsync (dptr, hStream);
+}
+
+CUresult CUDAAPI
+CuMemPoolCreate (CUmemoryPool * pool, const CUmemPoolProps * poolProps)
+{
+  if (!gst_cuda_vtable.CuMemPoolCreate)
+    return CUDA_ERROR_NOT_SUPPORTED;
+
+  return gst_cuda_vtable.CuMemPoolCreate (pool, poolProps);
+}
+
+CUresult CUDAAPI
+CuMemPoolDestroy (CUmemoryPool pool)
+{
+  if (!gst_cuda_vtable.CuMemPoolDestroy)
+    return CUDA_ERROR_NOT_SUPPORTED;
+
+  return gst_cuda_vtable.CuMemPoolDestroy (pool);
+}
+
+CUresult CUDAAPI
+CuMemPoolSetAttribute (CUmemoryPool pool, CUmemPool_attribute attr, void *value)
+{
+  if (!gst_cuda_vtable.CuMemPoolSetAttribute)
+    return CUDA_ERROR_NOT_SUPPORTED;
+
+  return gst_cuda_vtable.CuMemPoolSetAttribute (pool, attr, value);
+}
+
+CUresult CUDAAPI
+CuMemPoolGetAttribute (CUmemoryPool pool, CUmemPool_attribute attr, void *value)
+{
+  if (!gst_cuda_vtable.CuMemPoolGetAttribute)
+    return CUDA_ERROR_NOT_SUPPORTED;
+
+  return gst_cuda_vtable.CuMemPoolGetAttribute (pool, attr, value);
+}
+
 /* cudaGL.h */
 CUresult CUDAAPI
 CuGraphicsGLRegisterImage (CUgraphicsResource * pCudaResource,
--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/stub/cuda.h
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/stub/cuda.h
@ -31,6 +31,7 @@ typedef gpointer CUmodule;
 typedef gpointer CUfunction;
 typedef gpointer CUmipmappedArray;
 typedef gpointer CUevent;
+typedef gpointer CUmemoryPool;

 typedef guint64  CUtexObject;
 typedef guintptr CUdeviceptr;
@ -62,6 +63,7 @@ typedef enum
  CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103,
  CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104,
  CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105,
+  CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115,
 } CUdevice_attribute;

 typedef enum
@ -292,6 +294,28 @@ typedef struct
  CUmemAccess_flags flags;
 } CUmemAccessDesc;

+typedef struct
+{
+  CUmemAllocationType allocType;
+  CUmemAllocationHandleType handleTypes;
+  CUmemLocation location;
+  void *win32SecurityAttributes;
+  size_t maxSize;
+  unsigned char reserved[56];
+} CUmemPoolProps;
+
+typedef enum
+{
+  CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES = 1,
+  CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC,
+  CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES,
+  CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
+  CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT,
+  CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH,
+  CU_MEMPOOL_ATTR_USED_MEM_CURRENT,
+  CU_MEMPOOL_ATTR_USED_MEM_HIGH,
+} CUmemPool_attribute;
+
 #define CUDA_VERSION 10000

 #ifdef _WIN32