From 174c9bfaa58a3cfba985b6af29ed1f63fb0de65c Mon Sep 17 00:00:00 2001 From: Seungha Yang Date: Thu, 29 Aug 2024 18:23:37 +0900 Subject: [PATCH] cuda: Load stream ordered allocation related symbols Required to support async memory allocation Part-of: --- .../gst-libs/gst/cuda/cuda-gst.h | 32 ++++++ .../gst-libs/gst/cuda/gstcuda-private.h | 2 - .../gst-libs/gst/cuda/gstcudacontext.cpp | 27 ++++- .../gst-libs/gst/cuda/gstcudaloader-private.h | 30 +++++ .../gst-libs/gst/cuda/gstcudaloader.cpp | 107 ++++++++++++++++++ .../gst-libs/gst/cuda/stub/cuda.h | 24 ++++ 6 files changed, 219 insertions(+), 3 deletions(-) create mode 100644 subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudaloader-private.h diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/cuda-gst.h b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/cuda-gst.h index c2a4ec5592..fd83158e82 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/cuda-gst.h +++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/cuda-gst.h @@ -307,6 +307,38 @@ GST_CUDA_API CUresult CUDAAPI CuMemRetainAllocationHandle (CUmemGenericAllocationHandle *handle, void *addr); +GST_CUDA_API +CUresult CUDAAPI CuMemAllocAsync (CUdeviceptr *dptr, + size_t bytesize, + CUstream hStream); + +GST_CUDA_API +CUresult CUDAAPI CuMemAllocFromPoolAsync (CUdeviceptr *dptr, + size_t bytesize, + CUmemoryPool pool, + CUstream hStream); + +GST_CUDA_API +CUresult CUDAAPI CuMemFreeAsync (CUdeviceptr dptr, + CUstream hStream); + +GST_CUDA_API +CUresult CUDAAPI CuMemPoolCreate (CUmemoryPool *pool, + const CUmemPoolProps *poolProps); + +GST_CUDA_API +CUresult CUDAAPI CuMemPoolDestroy (CUmemoryPool pool); + +GST_CUDA_API +CUresult CUDAAPI CuMemPoolSetAttribute (CUmemoryPool pool, + CUmemPool_attribute attr, + void *value); + +GST_CUDA_API +CUresult CUDAAPI CuMemPoolGetAttribute (CUmemoryPool pool, + CUmemPool_attribute attr, + void *value); + /* cudaGL.h */ GST_CUDA_API CUresult CUDAAPI CuGraphicsGLRegisterImage (CUgraphicsResource * pCudaResource, diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcuda-private.h b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcuda-private.h index be3b8e00db..f2d8761518 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcuda-private.h +++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcuda-private.h @@ -56,8 +56,6 @@ void gst_cuda_memory_set_from_fixed_pool (GstMemory * mem); GST_CUDA_API gboolean gst_cuda_memory_is_from_fixed_pool (GstMemory * mem); -gboolean gst_cuda_virtual_memory_symbol_loaded (void); - gpointer gst_cuda_get_win32_handle_metadata (void); G_END_DECLS diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudacontext.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudacontext.cpp index efc7d50157..ab6e0afa09 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudacontext.cpp +++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudacontext.cpp @@ -26,6 +26,7 @@ #include "gstcudautils.h" #include "gstcudamemory.h" #include "gstcuda-private.h" +#include "gstcudaloader-private.h" #ifdef G_OS_WIN32 #include @@ -53,6 +54,7 @@ enum PROP_DXGI_ADAPTER_LUID, PROP_VIRTUAL_MEMORY, PROP_OS_HANDLE, + PROP_STREAM_ORDERED_ALLOC, }; struct _GstCudaContextPrivate @@ -63,6 +65,7 @@ struct _GstCudaContextPrivate gint64 dxgi_adapter_luid; gboolean virtual_memory_supported; gboolean os_handle_supported; + gboolean stream_ordered_alloc_supported; gint tex_align; @@ -139,6 +142,16 @@ gst_cuda_context_class_init (GstCudaContextClass * klass) "Whether OS specific handle is supported via virtual memory", FALSE, (GParamFlags) (G_PARAM_READABLE | G_PARAM_STATIC_STRINGS))); + /** + * GstCudaContext:stream-ordered-alloc: + * + * Since: 1.26 + */ + g_object_class_install_property (gobject_class, PROP_STREAM_ORDERED_ALLOC, + g_param_spec_boolean ("stream-ordered-alloc", "Stream Ordered Alloc", + "Device supports stream ordered allocation", FALSE, + (GParamFlags) (G_PARAM_READABLE | G_PARAM_STATIC_STRINGS))); + gst_cuda_memory_init_once (); } @@ -190,6 +203,9 @@ gst_cuda_context_get_property (GObject * object, guint prop_id, case PROP_OS_HANDLE: g_value_set_boolean (value, priv->os_handle_supported); break; + case PROP_STREAM_ORDERED_ALLOC: + g_value_set_boolean (value, priv->stream_ordered_alloc_supported); + break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; @@ -571,7 +587,6 @@ gst_cuda_context_new_wrapped (CUcontext handler, CUdevice device) { GList *iter; gint tex_align = 0; - GstCudaContext *self; g_return_val_if_fail (handler, nullptr); @@ -619,6 +634,16 @@ gst_cuda_context_new_wrapped (CUcontext handler, CUdevice device) self->priv->os_handle_supported = TRUE; } + if (gst_cuda_stream_ordered_symbol_loaded ()) { + CUresult ret; + int supported = 0; + + ret = CuDeviceGetAttribute (&supported, + CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED, device); + if (ret == CUDA_SUCCESS && supported) + self->priv->stream_ordered_alloc_supported = TRUE; + } + std::lock_guard < std::mutex > lk (list_lock); g_object_weak_ref (G_OBJECT (self), (GWeakNotify) gst_cuda_context_weak_ref_notify, nullptr); diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudaloader-private.h b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudaloader-private.h new file mode 100644 index 0000000000..6e33874b3d --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudaloader-private.h @@ -0,0 +1,30 @@ +/* GStreamer + * Copyright (C) 2024 Seungha Yang + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#pragma once + +#include + +G_BEGIN_DECLS + +gboolean gst_cuda_virtual_memory_symbol_loaded (void); + +gboolean gst_cuda_stream_ordered_symbol_loaded (void); + +G_END_DECLS diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudaloader.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudaloader.cpp index 75b1a11021..7fcb6162d0 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudaloader.cpp +++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudaloader.cpp @@ -25,6 +25,7 @@ #include "gstcudaloader.h" #include #include "gstcuda-private.h" +#include "gstcudaloader-private.h" #ifdef HAVE_CUDA_GST_GL #include @@ -62,6 +63,7 @@ typedef struct _GstNvCodecCudaVTable { gboolean loaded; gboolean have_virtual_alloc; + gboolean have_stream_ordered_alloc; CUresult (CUDAAPI * CuInit) (unsigned int Flags); CUresult (CUDAAPI * CuGetErrorName) (CUresult error, const char **pStr); @@ -211,6 +213,19 @@ typedef struct _GstNvCodecCudaVTable CUresult (CUDAAPI * CuMemUnmap) (CUdeviceptr ptr, size_t size); CUresult (CUDAAPI * CuMemRetainAllocationHandle) (CUmemGenericAllocationHandle *handle, void *addr); + + CUresult (CUDAAPI * CuMemAllocAsync) (CUdeviceptr *dptr, size_t bytesize, + CUstream hStream); + CUresult (CUDAAPI * CuMemAllocFromPoolAsync) (CUdeviceptr *dptr, + size_t bytesize, CUmemoryPool pool, CUstream hStream); + CUresult (CUDAAPI * CuMemFreeAsync) (CUdeviceptr dptr, CUstream hStream); + CUresult (CUDAAPI * CuMemPoolCreate) (CUmemoryPool *pool, + const CUmemPoolProps *poolProps); + CUresult (CUDAAPI * CuMemPoolDestroy) (CUmemoryPool pool); + CUresult (CUDAAPI * CuMemPoolSetAttribute) (CUmemoryPool pool, + CUmemPool_attribute attr, void *value); + CUresult (CUDAAPI * CuMemPoolGetAttribute) (CUmemoryPool pool, + CUmemPool_attribute attr, void *value); } GstNvCodecCudaVTable; /* *INDENT-ON* */ @@ -245,6 +260,24 @@ gst_cuda_load_optional_symbols (GModule * module) vtable->have_virtual_alloc = TRUE; } +static void +gst_cuda_load_stream_ordered_alloc_symbols (GModule * module) +{ + GstNvCodecCudaVTable *vtable = &gst_cuda_vtable; + + LOAD_OPTIONAL_SYMBOL (cuMemAllocAsync, CuMemAllocAsync); + LOAD_OPTIONAL_SYMBOL (cuMemAllocFromPoolAsync, CuMemAllocFromPoolAsync); + LOAD_OPTIONAL_SYMBOL (cuMemFreeAsync, CuMemFreeAsync); + LOAD_OPTIONAL_SYMBOL (cuMemPoolCreate, CuMemPoolCreate); + LOAD_OPTIONAL_SYMBOL (cuMemPoolDestroy, CuMemPoolDestroy); + LOAD_OPTIONAL_SYMBOL (cuMemPoolSetAttribute, CuMemPoolSetAttribute); + LOAD_OPTIONAL_SYMBOL (cuMemPoolGetAttribute, CuMemPoolGetAttribute); + + GST_INFO ("Stream ordered alloc symbols are loaded"); + + vtable->have_stream_ordered_alloc = TRUE; +} + static void gst_cuda_load_library_once_func (void) { @@ -353,6 +386,7 @@ gst_cuda_load_library_once_func (void) vtable->loaded = TRUE; gst_cuda_load_optional_symbols (module); + gst_cuda_load_stream_ordered_alloc_symbols (module); } /** @@ -382,6 +416,14 @@ gst_cuda_virtual_memory_symbol_loaded (void) return gst_cuda_vtable.have_virtual_alloc; } +gboolean +gst_cuda_stream_ordered_symbol_loaded (void) +{ + gst_cuda_load_library (); + + return gst_cuda_vtable.have_stream_ordered_alloc; +} + CUresult CUDAAPI CuInit (unsigned int Flags) { @@ -966,6 +1008,71 @@ CuMemRetainAllocationHandle (CUmemGenericAllocationHandle * handle, void *addr) return gst_cuda_vtable.CuMemRetainAllocationHandle (handle, addr); } +CUresult CUDAAPI +CuMemAllocAsync (CUdeviceptr * dptr, size_t bytesize, CUstream hStream) +{ + if (!gst_cuda_vtable.CuMemAllocAsync) + return CUDA_ERROR_NOT_SUPPORTED; + + return gst_cuda_vtable.CuMemAllocAsync (dptr, bytesize, hStream); +} + +CUresult CUDAAPI +CuMemAllocFromPoolAsync (CUdeviceptr * dptr, size_t bytesize, CUmemoryPool pool, + CUstream hStream) +{ + if (!gst_cuda_vtable.CuMemAllocFromPoolAsync) + return CUDA_ERROR_NOT_SUPPORTED; + + return gst_cuda_vtable.CuMemAllocFromPoolAsync (dptr, + bytesize, pool, hStream); +} + +CUresult CUDAAPI +CuMemFreeAsync (CUdeviceptr dptr, CUstream hStream) +{ + if (!gst_cuda_vtable.CuMemFreeAsync) + return CUDA_ERROR_NOT_SUPPORTED; + + return gst_cuda_vtable.CuMemFreeAsync (dptr, hStream); +} + +CUresult CUDAAPI +CuMemPoolCreate (CUmemoryPool * pool, const CUmemPoolProps * poolProps) +{ + if (!gst_cuda_vtable.CuMemPoolCreate) + return CUDA_ERROR_NOT_SUPPORTED; + + return gst_cuda_vtable.CuMemPoolCreate (pool, poolProps); +} + +CUresult CUDAAPI +CuMemPoolDestroy (CUmemoryPool pool) +{ + if (!gst_cuda_vtable.CuMemPoolDestroy) + return CUDA_ERROR_NOT_SUPPORTED; + + return gst_cuda_vtable.CuMemPoolDestroy (pool); +} + +CUresult CUDAAPI +CuMemPoolSetAttribute (CUmemoryPool pool, CUmemPool_attribute attr, void *value) +{ + if (!gst_cuda_vtable.CuMemPoolSetAttribute) + return CUDA_ERROR_NOT_SUPPORTED; + + return gst_cuda_vtable.CuMemPoolSetAttribute (pool, attr, value); +} + +CUresult CUDAAPI +CuMemPoolGetAttribute (CUmemoryPool pool, CUmemPool_attribute attr, void *value) +{ + if (!gst_cuda_vtable.CuMemPoolGetAttribute) + return CUDA_ERROR_NOT_SUPPORTED; + + return gst_cuda_vtable.CuMemPoolGetAttribute (pool, attr, value); +} + /* cudaGL.h */ CUresult CUDAAPI CuGraphicsGLRegisterImage (CUgraphicsResource * pCudaResource, diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/stub/cuda.h b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/stub/cuda.h index 0382301ad9..f6a590b600 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/stub/cuda.h +++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/stub/cuda.h @@ -31,6 +31,7 @@ typedef gpointer CUmodule; typedef gpointer CUfunction; typedef gpointer CUmipmappedArray; typedef gpointer CUevent; +typedef gpointer CUmemoryPool; typedef guint64 CUtexObject; typedef guintptr CUdeviceptr; @@ -62,6 +63,7 @@ typedef enum CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103, CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104, CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105, + CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115, } CUdevice_attribute; typedef enum @@ -292,6 +294,28 @@ typedef struct CUmemAccess_flags flags; } CUmemAccessDesc; +typedef struct +{ + CUmemAllocationType allocType; + CUmemAllocationHandleType handleTypes; + CUmemLocation location; + void *win32SecurityAttributes; + size_t maxSize; + unsigned char reserved[56]; +} CUmemPoolProps; + +typedef enum +{ + CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES = 1, + CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC, + CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES, + CU_MEMPOOL_ATTR_RELEASE_THRESHOLD, + CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT, + CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH, + CU_MEMPOOL_ATTR_USED_MEM_CURRENT, + CU_MEMPOOL_ATTR_USED_MEM_HIGH, +} CUmemPool_attribute; + #define CUDA_VERSION 10000 #ifdef _WIN32