cuda: Load stream ordered allocation related symbols

Required to support async memory allocation

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/7427>
This commit is contained in:
Seungha Yang 2024-08-29 18:23:37 +09:00 committed by GStreamer Marge Bot
parent b9207beef6
commit 174c9bfaa5
6 changed files with 219 additions and 3 deletions

View file

@ -307,6 +307,38 @@ GST_CUDA_API
CUresult CUDAAPI CuMemRetainAllocationHandle (CUmemGenericAllocationHandle *handle,
void *addr);
GST_CUDA_API
CUresult CUDAAPI CuMemAllocAsync (CUdeviceptr *dptr,
size_t bytesize,
CUstream hStream);
GST_CUDA_API
CUresult CUDAAPI CuMemAllocFromPoolAsync (CUdeviceptr *dptr,
size_t bytesize,
CUmemoryPool pool,
CUstream hStream);
GST_CUDA_API
CUresult CUDAAPI CuMemFreeAsync (CUdeviceptr dptr,
CUstream hStream);
GST_CUDA_API
CUresult CUDAAPI CuMemPoolCreate (CUmemoryPool *pool,
const CUmemPoolProps *poolProps);
GST_CUDA_API
CUresult CUDAAPI CuMemPoolDestroy (CUmemoryPool pool);
GST_CUDA_API
CUresult CUDAAPI CuMemPoolSetAttribute (CUmemoryPool pool,
CUmemPool_attribute attr,
void *value);
GST_CUDA_API
CUresult CUDAAPI CuMemPoolGetAttribute (CUmemoryPool pool,
CUmemPool_attribute attr,
void *value);
/* cudaGL.h */
GST_CUDA_API
CUresult CUDAAPI CuGraphicsGLRegisterImage (CUgraphicsResource * pCudaResource,

View file

@ -56,8 +56,6 @@ void gst_cuda_memory_set_from_fixed_pool (GstMemory * mem);
GST_CUDA_API
gboolean gst_cuda_memory_is_from_fixed_pool (GstMemory * mem);
gboolean gst_cuda_virtual_memory_symbol_loaded (void);
gpointer gst_cuda_get_win32_handle_metadata (void);
G_END_DECLS

View file

@ -26,6 +26,7 @@
#include "gstcudautils.h"
#include "gstcudamemory.h"
#include "gstcuda-private.h"
#include "gstcudaloader-private.h"
#ifdef G_OS_WIN32
#include <gst/d3d11/gstd3d11.h>
@ -53,6 +54,7 @@ enum
PROP_DXGI_ADAPTER_LUID,
PROP_VIRTUAL_MEMORY,
PROP_OS_HANDLE,
PROP_STREAM_ORDERED_ALLOC,
};
struct _GstCudaContextPrivate
@ -63,6 +65,7 @@ struct _GstCudaContextPrivate
gint64 dxgi_adapter_luid;
gboolean virtual_memory_supported;
gboolean os_handle_supported;
gboolean stream_ordered_alloc_supported;
gint tex_align;
@ -139,6 +142,16 @@ gst_cuda_context_class_init (GstCudaContextClass * klass)
"Whether OS specific handle is supported via virtual memory", FALSE,
(GParamFlags) (G_PARAM_READABLE | G_PARAM_STATIC_STRINGS)));
/**
* GstCudaContext:stream-ordered-alloc:
*
* Since: 1.26
*/
g_object_class_install_property (gobject_class, PROP_STREAM_ORDERED_ALLOC,
g_param_spec_boolean ("stream-ordered-alloc", "Stream Ordered Alloc",
"Device supports stream ordered allocation", FALSE,
(GParamFlags) (G_PARAM_READABLE | G_PARAM_STATIC_STRINGS)));
gst_cuda_memory_init_once ();
}
@ -190,6 +203,9 @@ gst_cuda_context_get_property (GObject * object, guint prop_id,
case PROP_OS_HANDLE:
g_value_set_boolean (value, priv->os_handle_supported);
break;
case PROP_STREAM_ORDERED_ALLOC:
g_value_set_boolean (value, priv->stream_ordered_alloc_supported);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
@ -571,7 +587,6 @@ gst_cuda_context_new_wrapped (CUcontext handler, CUdevice device)
{
GList *iter;
gint tex_align = 0;
GstCudaContext *self;
g_return_val_if_fail (handler, nullptr);
@ -619,6 +634,16 @@ gst_cuda_context_new_wrapped (CUcontext handler, CUdevice device)
self->priv->os_handle_supported = TRUE;
}
if (gst_cuda_stream_ordered_symbol_loaded ()) {
CUresult ret;
int supported = 0;
ret = CuDeviceGetAttribute (&supported,
CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED, device);
if (ret == CUDA_SUCCESS && supported)
self->priv->stream_ordered_alloc_supported = TRUE;
}
std::lock_guard < std::mutex > lk (list_lock);
g_object_weak_ref (G_OBJECT (self),
(GWeakNotify) gst_cuda_context_weak_ref_notify, nullptr);

View file

@ -0,0 +1,30 @@
/* GStreamer
* Copyright (C) 2024 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#pragma once
#include <gst/gst.h>
G_BEGIN_DECLS
gboolean gst_cuda_virtual_memory_symbol_loaded (void);
gboolean gst_cuda_stream_ordered_symbol_loaded (void);
G_END_DECLS

View file

@ -25,6 +25,7 @@
#include "gstcudaloader.h"
#include <gmodule.h>
#include "gstcuda-private.h"
#include "gstcudaloader-private.h"
#ifdef HAVE_CUDA_GST_GL
#include <gst/gl/gstglconfig.h>
@ -62,6 +63,7 @@ typedef struct _GstNvCodecCudaVTable
{
gboolean loaded;
gboolean have_virtual_alloc;
gboolean have_stream_ordered_alloc;
CUresult (CUDAAPI * CuInit) (unsigned int Flags);
CUresult (CUDAAPI * CuGetErrorName) (CUresult error, const char **pStr);
@ -211,6 +213,19 @@ typedef struct _GstNvCodecCudaVTable
CUresult (CUDAAPI * CuMemUnmap) (CUdeviceptr ptr, size_t size);
CUresult (CUDAAPI * CuMemRetainAllocationHandle)
(CUmemGenericAllocationHandle *handle, void *addr);
CUresult (CUDAAPI * CuMemAllocAsync) (CUdeviceptr *dptr, size_t bytesize,
CUstream hStream);
CUresult (CUDAAPI * CuMemAllocFromPoolAsync) (CUdeviceptr *dptr,
size_t bytesize, CUmemoryPool pool, CUstream hStream);
CUresult (CUDAAPI * CuMemFreeAsync) (CUdeviceptr dptr, CUstream hStream);
CUresult (CUDAAPI * CuMemPoolCreate) (CUmemoryPool *pool,
const CUmemPoolProps *poolProps);
CUresult (CUDAAPI * CuMemPoolDestroy) (CUmemoryPool pool);
CUresult (CUDAAPI * CuMemPoolSetAttribute) (CUmemoryPool pool,
CUmemPool_attribute attr, void *value);
CUresult (CUDAAPI * CuMemPoolGetAttribute) (CUmemoryPool pool,
CUmemPool_attribute attr, void *value);
} GstNvCodecCudaVTable;
/* *INDENT-ON* */
@ -245,6 +260,24 @@ gst_cuda_load_optional_symbols (GModule * module)
vtable->have_virtual_alloc = TRUE;
}
static void
gst_cuda_load_stream_ordered_alloc_symbols (GModule * module)
{
GstNvCodecCudaVTable *vtable = &gst_cuda_vtable;
LOAD_OPTIONAL_SYMBOL (cuMemAllocAsync, CuMemAllocAsync);
LOAD_OPTIONAL_SYMBOL (cuMemAllocFromPoolAsync, CuMemAllocFromPoolAsync);
LOAD_OPTIONAL_SYMBOL (cuMemFreeAsync, CuMemFreeAsync);
LOAD_OPTIONAL_SYMBOL (cuMemPoolCreate, CuMemPoolCreate);
LOAD_OPTIONAL_SYMBOL (cuMemPoolDestroy, CuMemPoolDestroy);
LOAD_OPTIONAL_SYMBOL (cuMemPoolSetAttribute, CuMemPoolSetAttribute);
LOAD_OPTIONAL_SYMBOL (cuMemPoolGetAttribute, CuMemPoolGetAttribute);
GST_INFO ("Stream ordered alloc symbols are loaded");
vtable->have_stream_ordered_alloc = TRUE;
}
static void
gst_cuda_load_library_once_func (void)
{
@ -353,6 +386,7 @@ gst_cuda_load_library_once_func (void)
vtable->loaded = TRUE;
gst_cuda_load_optional_symbols (module);
gst_cuda_load_stream_ordered_alloc_symbols (module);
}
/**
@ -382,6 +416,14 @@ gst_cuda_virtual_memory_symbol_loaded (void)
return gst_cuda_vtable.have_virtual_alloc;
}
gboolean
gst_cuda_stream_ordered_symbol_loaded (void)
{
gst_cuda_load_library ();
return gst_cuda_vtable.have_stream_ordered_alloc;
}
CUresult CUDAAPI
CuInit (unsigned int Flags)
{
@ -966,6 +1008,71 @@ CuMemRetainAllocationHandle (CUmemGenericAllocationHandle * handle, void *addr)
return gst_cuda_vtable.CuMemRetainAllocationHandle (handle, addr);
}
CUresult CUDAAPI
CuMemAllocAsync (CUdeviceptr * dptr, size_t bytesize, CUstream hStream)
{
if (!gst_cuda_vtable.CuMemAllocAsync)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemAllocAsync (dptr, bytesize, hStream);
}
CUresult CUDAAPI
CuMemAllocFromPoolAsync (CUdeviceptr * dptr, size_t bytesize, CUmemoryPool pool,
CUstream hStream)
{
if (!gst_cuda_vtable.CuMemAllocFromPoolAsync)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemAllocFromPoolAsync (dptr,
bytesize, pool, hStream);
}
CUresult CUDAAPI
CuMemFreeAsync (CUdeviceptr dptr, CUstream hStream)
{
if (!gst_cuda_vtable.CuMemFreeAsync)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemFreeAsync (dptr, hStream);
}
CUresult CUDAAPI
CuMemPoolCreate (CUmemoryPool * pool, const CUmemPoolProps * poolProps)
{
if (!gst_cuda_vtable.CuMemPoolCreate)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemPoolCreate (pool, poolProps);
}
CUresult CUDAAPI
CuMemPoolDestroy (CUmemoryPool pool)
{
if (!gst_cuda_vtable.CuMemPoolDestroy)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemPoolDestroy (pool);
}
CUresult CUDAAPI
CuMemPoolSetAttribute (CUmemoryPool pool, CUmemPool_attribute attr, void *value)
{
if (!gst_cuda_vtable.CuMemPoolSetAttribute)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemPoolSetAttribute (pool, attr, value);
}
CUresult CUDAAPI
CuMemPoolGetAttribute (CUmemoryPool pool, CUmemPool_attribute attr, void *value)
{
if (!gst_cuda_vtable.CuMemPoolGetAttribute)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemPoolGetAttribute (pool, attr, value);
}
/* cudaGL.h */
CUresult CUDAAPI
CuGraphicsGLRegisterImage (CUgraphicsResource * pCudaResource,

View file

@ -31,6 +31,7 @@ typedef gpointer CUmodule;
typedef gpointer CUfunction;
typedef gpointer CUmipmappedArray;
typedef gpointer CUevent;
typedef gpointer CUmemoryPool;
typedef guint64 CUtexObject;
typedef guintptr CUdeviceptr;
@ -62,6 +63,7 @@ typedef enum
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103,
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104,
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105,
CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115,
} CUdevice_attribute;
typedef enum
@ -292,6 +294,28 @@ typedef struct
CUmemAccess_flags flags;
} CUmemAccessDesc;
typedef struct
{
CUmemAllocationType allocType;
CUmemAllocationHandleType handleTypes;
CUmemLocation location;
void *win32SecurityAttributes;
size_t maxSize;
unsigned char reserved[56];
} CUmemPoolProps;
typedef enum
{
CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES = 1,
CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC,
CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES,
CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT,
CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH,
CU_MEMPOOL_ATTR_USED_MEM_CURRENT,
CU_MEMPOOL_ATTR_USED_MEM_HIGH,
} CUmemPool_attribute;
#define CUDA_VERSION 10000
#ifdef _WIN32