cuda: Load virtual memory management and IPC API symbols

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/4510>
This commit is contained in:
Seungha Yang 2023-08-10 01:58:57 +09:00 committed by GStreamer Marge Bot
parent d9894da59d
commit a712a768a4
4 changed files with 505 additions and 0 deletions

View file

@ -169,6 +169,108 @@ CUresult CUDAAPI CuLaunchKernel (CUfunction f,
void **kernelParams,
void **extra);
GST_CUDA_API
CUresult CUDAAPI CuEventCreate (CUevent *phEvent,
unsigned int Flags);
GST_CUDA_API
CUresult CUDAAPI CuEventDestroy (CUevent hEvent);
GST_CUDA_API
CUresult CUDAAPI CuEventRecord (CUevent hEvent,
CUstream hStream);
GST_CUDA_API
CUresult CUDAAPI CuEventSynchronize (CUevent hEvent);
GST_CUDA_API
CUresult CUDAAPI CuIpcGetEventHandle (CUipcEventHandle *pHandle,
CUevent event);
GST_CUDA_API
CUresult CUDAAPI CuIpcOpenEventHandle (CUevent* phEvent,
CUipcEventHandle handle);
GST_CUDA_API
CUresult CUDAAPI CuIpcGetMemHandle (CUipcMemHandle *pHandle,
CUdeviceptr dptr);
GST_CUDA_API
CUresult CUDAAPI CuIpcOpenMemHandle (CUdeviceptr *pdptr,
CUipcMemHandle handle,
unsigned int Flags);
GST_CUDA_API
CUresult CUDAAPI CuIpcCloseMemHandle (CUdeviceptr dptr);
GST_CUDA_API
CUresult CUDAAPI CuMemAddressReserve (CUdeviceptr *ptr,
size_t size,
size_t alignment,
CUdeviceptr addr,
unsigned long long flags);
GST_CUDA_API
CUresult CUDAAPI CuMemAddressFree (CUdeviceptr ptr,
size_t size);
GST_CUDA_API
CUresult CUDAAPI CuMemCreate (CUmemGenericAllocationHandle *handle,
size_t size,
const CUmemAllocationProp *prop,
unsigned long long flags);
GST_CUDA_API
CUresult CUDAAPI CuMemRelease (CUmemGenericAllocationHandle handle);
GST_CUDA_API
CUresult CUDAAPI CuMemExportToShareableHandle (void *shareableHandle,
CUmemGenericAllocationHandle handle,
CUmemAllocationHandleType handleType,
unsigned long long flags);
GST_CUDA_API
CUresult CUDAAPI CuMemImportFromShareableHandle (CUmemGenericAllocationHandle *handle,
void *osHandle,
CUmemAllocationHandleType shHandleType);
GST_CUDA_API
CUresult CUDAAPI CuMemSetAccess (CUdeviceptr ptr,
size_t size,
const CUmemAccessDesc *desc,
size_t count);
GST_CUDA_API
CUresult CUDAAPI CuMemGetAccess (unsigned long long *flags,
const CUmemLocation *location,
CUdeviceptr ptr);
GST_CUDA_API
CUresult CUDAAPI CuMemGetAllocationGranularity (size_t *granularity,
const CUmemAllocationProp *prop,
CUmemAllocationGranularity_flags option);
GST_CUDA_API
CUresult CUDAAPI CuMemGetAllocationPropertiesFromHandle (CUmemAllocationProp *prop,
CUmemGenericAllocationHandle handle);
GST_CUDA_API
CUresult CUDAAPI CuMemMap (CUdeviceptr ptr,
size_t size,
size_t offset,
CUmemGenericAllocationHandle handle,
unsigned long long flags);
GST_CUDA_API
CUresult CUDAAPI CuMemUnmap (CUdeviceptr ptr,
size_t size);
GST_CUDA_API
CUresult CUDAAPI CuMemRetainAllocationHandle (CUmemGenericAllocationHandle *handle,
void *addr);
/* cudaGL.h */
GST_CUDA_API
CUresult CUDAAPI CuGraphicsGLRegisterImage (CUgraphicsResource * pCudaResource,

View file

@ -56,6 +56,8 @@ void gst_cuda_memory_set_from_fixed_pool (GstMemory * mem);
GST_CUDA_API
gboolean gst_cuda_memory_is_from_fixed_pool (GstMemory * mem);
gboolean gst_cuda_virtual_memory_symbol_loaded (void);
G_END_DECLS
#ifdef __cplusplus

View file

@ -43,10 +43,18 @@ GST_DEBUG_CATEGORY (gst_cudaloader_debug);
} \
} G_STMT_END;
#define LOAD_OPTIONAL_SYMBOL(name,func) G_STMT_START { \
if (!g_module_symbol (module, G_STRINGIFY (name), (gpointer *) &vtable->func)) { \
GST_WARNING ("Failed to load '%s', %s", G_STRINGIFY (name), g_module_error()); \
return; \
} \
} G_STMT_END;
/* *INDENT-OFF* */
typedef struct _GstNvCodecCudaVTable
{
gboolean loaded;
gboolean have_virtual_alloc;
CUresult (CUDAAPI * CuInit) (unsigned int Flags);
CUresult (CUDAAPI * CuGetErrorName) (CUresult error, const char **pStr);
@ -125,6 +133,22 @@ typedef struct _GstNvCodecCudaVTable
CUdevice * pCudaDevices, unsigned int cudaDeviceCount,
CUGLDeviceList deviceList);
CUresult (CUDAAPI * CuEventCreate) (CUevent *phEvent, unsigned int Flags);
CUresult (CUDAAPI * CuEventDestroy) (CUevent hEvent);
CUresult (CUDAAPI * CuEventRecord) (CUevent hEvent, CUstream hStream);
CUresult (CUDAAPI * CuEventSynchronize) (CUevent hEvent);
CUresult (CUDAAPI * CuIpcGetEventHandle) (CUipcEventHandle *pHandle,
CUevent event);
CUresult (CUDAAPI * CuIpcOpenEventHandle) (CUevent* phEvent,
CUipcEventHandle handle);
CUresult (CUDAAPI * CuIpcGetMemHandle) (CUipcMemHandle *pHandle,
CUdeviceptr dptr);
CUresult (CUDAAPI * CuIpcOpenMemHandle) (CUdeviceptr *pdptr,
CUipcMemHandle handle, unsigned int Flags);
CUresult (CUDAAPI * CuIpcCloseMemHandle) (CUdeviceptr dptr);
#ifdef G_OS_WIN32
CUresult (CUDAAPI * CuGraphicsD3D11RegisterResource) (CUgraphicsResource *
pCudaResource, ID3D11Resource * pD3DResource, unsigned int Flags);
@ -134,11 +158,66 @@ typedef struct _GstNvCodecCudaVTable
CUdevice * pCudaDevices, unsigned int cudaDeviceCount,
ID3D11Device * pD3D11Device, CUd3d11DeviceList deviceList);
#endif
CUresult (CUDAAPI * CuMemAddressReserve) (CUdeviceptr *ptr, size_t size,
size_t alignment, CUdeviceptr addr, unsigned long long flags);
CUresult (CUDAAPI * CuMemAddressFree) (CUdeviceptr ptr, size_t size);
CUresult (CUDAAPI * CuMemCreate) (CUmemGenericAllocationHandle *handle,
size_t size, const CUmemAllocationProp *prop, unsigned long long flags);
CUresult (CUDAAPI * CuMemRelease) (CUmemGenericAllocationHandle handle);
CUresult (CUDAAPI * CuMemExportToShareableHandle) (void *shareableHandle,
CUmemGenericAllocationHandle handle, CUmemAllocationHandleType handleType,
unsigned long long flags);
CUresult (CUDAAPI * CuMemImportFromShareableHandle)
(CUmemGenericAllocationHandle *handle, void *osHandle,
CUmemAllocationHandleType shHandleType);
CUresult (CUDAAPI * CuMemSetAccess) (CUdeviceptr ptr, size_t size,
const CUmemAccessDesc *desc, size_t count);
CUresult (CUDAAPI * CuMemGetAccess) (unsigned long long *flags,
const CUmemLocation *location, CUdeviceptr ptr);
CUresult (CUDAAPI * CuMemGetAllocationGranularity) (size_t *granularity,
const CUmemAllocationProp *prop, CUmemAllocationGranularity_flags option);
CUresult (CUDAAPI * CuMemGetAllocationPropertiesFromHandle)
(CUmemAllocationProp *prop, CUmemGenericAllocationHandle handle);
CUresult (CUDAAPI * CuMemMap) (CUdeviceptr ptr, size_t size, size_t offset,
CUmemGenericAllocationHandle handle, unsigned long long flags);
CUresult (CUDAAPI * CuMemUnmap) (CUdeviceptr ptr, size_t size);
CUresult (CUDAAPI * CuMemRetainAllocationHandle)
(CUmemGenericAllocationHandle *handle, void *addr);
} GstNvCodecCudaVTable;
/* *INDENT-ON* */
static GstNvCodecCudaVTable gst_cuda_vtable = { 0, };
static void
gst_cuda_load_optional_symbols (GModule * module)
{
GstNvCodecCudaVTable *vtable = &gst_cuda_vtable;
LOAD_OPTIONAL_SYMBOL (cuMemAddressReserve, CuMemAddressReserve);
LOAD_OPTIONAL_SYMBOL (cuMemAddressFree, CuMemAddressFree);
LOAD_OPTIONAL_SYMBOL (cuMemCreate, CuMemCreate);
LOAD_OPTIONAL_SYMBOL (cuMemRelease, CuMemRelease);
LOAD_OPTIONAL_SYMBOL (cuMemExportToShareableHandle,
CuMemExportToShareableHandle);
LOAD_OPTIONAL_SYMBOL (cuMemImportFromShareableHandle,
CuMemImportFromShareableHandle);
LOAD_OPTIONAL_SYMBOL (cuMemSetAccess, CuMemSetAccess);
LOAD_OPTIONAL_SYMBOL (cuMemGetAccess, CuMemGetAccess);
LOAD_OPTIONAL_SYMBOL (cuMemGetAllocationGranularity,
CuMemGetAllocationGranularity);
LOAD_OPTIONAL_SYMBOL (cuMemGetAllocationPropertiesFromHandle,
CuMemGetAllocationPropertiesFromHandle);
LOAD_OPTIONAL_SYMBOL (cuMemMap, CuMemMap);
LOAD_OPTIONAL_SYMBOL (cuMemUnmap, CuMemUnmap);
LOAD_OPTIONAL_SYMBOL (cuMemRetainAllocationHandle,
CuMemRetainAllocationHandle);
GST_INFO ("Virtual alloc symbols are loaded");
vtable->have_virtual_alloc = TRUE;
}
static void
gst_cuda_load_library_once_func (void)
{
@ -205,6 +284,18 @@ gst_cuda_load_library_once_func (void)
LOAD_SYMBOL (cuTexObjectDestroy, CuTexObjectDestroy);
LOAD_SYMBOL (cuLaunchKernel, CuLaunchKernel);
LOAD_SYMBOL (cuEventCreate, CuEventCreate);
LOAD_SYMBOL (cuEventDestroy, CuEventDestroy);
LOAD_SYMBOL (cuEventRecord, CuEventRecord);
LOAD_SYMBOL (cuEventSynchronize, CuEventSynchronize);
LOAD_SYMBOL (cuIpcGetEventHandle, CuIpcGetEventHandle);
LOAD_SYMBOL (cuIpcOpenEventHandle, CuIpcOpenEventHandle);
LOAD_SYMBOL (cuIpcGetMemHandle, CuIpcGetMemHandle);
LOAD_SYMBOL (cuIpcOpenMemHandle, CuIpcOpenMemHandle);
LOAD_SYMBOL (cuIpcCloseMemHandle, CuIpcCloseMemHandle);
/* cudaGL.h */
LOAD_SYMBOL (cuGraphicsGLRegisterImage, CuGraphicsGLRegisterImage);
LOAD_SYMBOL (cuGraphicsGLRegisterBuffer, CuGraphicsGLRegisterBuffer);
@ -219,6 +310,8 @@ gst_cuda_load_library_once_func (void)
#endif
vtable->loaded = TRUE;
gst_cuda_load_optional_symbols (module);
}
/**
@ -240,6 +333,14 @@ gst_cuda_load_library (void)
return gst_cuda_vtable.loaded;
}
gboolean
gst_cuda_virtual_memory_symbol_loaded (void)
{
gst_cuda_load_library ();
return gst_cuda_vtable.have_virtual_alloc;
}
CUresult CUDAAPI
CuInit (unsigned int Flags)
{
@ -557,6 +658,211 @@ CuLaunchKernel (CUfunction f, unsigned int gridDimX,
extra);
}
CUresult CUDAAPI
CuEventCreate (CUevent * phEvent, unsigned int Flags)
{
g_assert (gst_cuda_vtable.CuEventCreate);
return gst_cuda_vtable.CuEventCreate (phEvent, Flags);
}
CUresult CUDAAPI
CuEventDestroy (CUevent hEvent)
{
g_assert (gst_cuda_vtable.CuEventDestroy);
return gst_cuda_vtable.CuEventDestroy (hEvent);
}
CUresult CUDAAPI
CuEventRecord (CUevent hEvent, CUstream hStream)
{
g_assert (gst_cuda_vtable.CuEventRecord);
return gst_cuda_vtable.CuEventRecord (hEvent, hStream);
}
CUresult CUDAAPI
CuEventSynchronize (CUevent hEvent)
{
g_assert (gst_cuda_vtable.CuEventSynchronize);
return gst_cuda_vtable.CuEventSynchronize (hEvent);
}
CUresult CUDAAPI
CuIpcGetEventHandle (CUipcEventHandle * pHandle, CUevent event)
{
g_assert (gst_cuda_vtable.CuIpcGetEventHandle);
return gst_cuda_vtable.CuIpcGetEventHandle (pHandle, event);
}
CUresult CUDAAPI
CuIpcOpenEventHandle (CUevent * phEvent, CUipcEventHandle handle)
{
g_assert (gst_cuda_vtable.CuIpcOpenEventHandle);
return gst_cuda_vtable.CuIpcOpenEventHandle (phEvent, handle);
}
CUresult CUDAAPI
CuIpcGetMemHandle (CUipcMemHandle * pHandle, CUdeviceptr dptr)
{
g_assert (gst_cuda_vtable.CuIpcGetMemHandle);
return gst_cuda_vtable.CuIpcGetMemHandle (pHandle, dptr);
}
CUresult CUDAAPI
CuIpcOpenMemHandle (CUdeviceptr * pdptr, CUipcMemHandle handle,
unsigned int Flags)
{
g_assert (gst_cuda_vtable.CuIpcOpenMemHandle);
return gst_cuda_vtable.CuIpcOpenMemHandle (pdptr, handle, Flags);
}
CUresult CUDAAPI
CuIpcCloseMemHandle (CUdeviceptr dptr)
{
g_assert (gst_cuda_vtable.CuIpcCloseMemHandle);
return gst_cuda_vtable.CuIpcCloseMemHandle (dptr);
}
CUresult CUDAAPI
CuMemAddressReserve (CUdeviceptr * ptr, size_t size, size_t alignment,
CUdeviceptr addr, unsigned long long flags)
{
if (!gst_cuda_vtable.CuMemAddressReserve)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemAddressReserve (ptr,
size, alignment, addr, flags);
}
CUresult CUDAAPI
CuMemAddressFree (CUdeviceptr ptr, size_t size)
{
if (!gst_cuda_vtable.CuMemAddressFree)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemAddressFree (ptr, size);
}
CUresult CUDAAPI
CuMemCreate (CUmemGenericAllocationHandle * handle, size_t size,
const CUmemAllocationProp * prop, unsigned long long flags)
{
if (!gst_cuda_vtable.CuMemCreate)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemCreate (handle, size, prop, flags);
}
CUresult CUDAAPI
CuMemRelease (CUmemGenericAllocationHandle handle)
{
if (!gst_cuda_vtable.CuMemRelease)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemRelease (handle);
}
CUresult CUDAAPI
CuMemExportToShareableHandle (void *shareableHandle,
CUmemGenericAllocationHandle handle, CUmemAllocationHandleType handleType,
unsigned long long flags)
{
if (!gst_cuda_vtable.CuMemExportToShareableHandle)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemExportToShareableHandle (shareableHandle,
handle, handleType, flags);
}
CUresult CUDAAPI
CuMemImportFromShareableHandle (CUmemGenericAllocationHandle * handle,
void *osHandle, CUmemAllocationHandleType shHandleType)
{
if (!gst_cuda_vtable.CuMemImportFromShareableHandle)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemImportFromShareableHandle (handle,
osHandle, shHandleType);
}
CUresult CUDAAPI
CuMemSetAccess (CUdeviceptr ptr, size_t size, const CUmemAccessDesc * desc,
size_t count)
{
if (!gst_cuda_vtable.CuMemSetAccess)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemSetAccess (ptr, size, desc, count);
}
CUresult CUDAAPI
CuMemGetAccess (unsigned long long *flags, const CUmemLocation * location,
CUdeviceptr ptr)
{
if (!gst_cuda_vtable.CuMemGetAccess)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemGetAccess (flags, location, ptr);
}
CUresult CUDAAPI
CuMemGetAllocationGranularity (size_t *granularity,
const CUmemAllocationProp * prop, CUmemAllocationGranularity_flags option)
{
if (!gst_cuda_vtable.CuMemGetAllocationGranularity)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemGetAllocationGranularity (granularity,
prop, option);
}
CUresult CUDAAPI
CuMemGetAllocationPropertiesFromHandle (CUmemAllocationProp * prop,
CUmemGenericAllocationHandle handle)
{
if (!gst_cuda_vtable.CuMemGetAllocationPropertiesFromHandle)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemGetAllocationPropertiesFromHandle (prop, handle);
}
CUresult CUDAAPI
CuMemMap (CUdeviceptr ptr, size_t size, size_t offset,
CUmemGenericAllocationHandle handle, unsigned long long flags)
{
if (!gst_cuda_vtable.CuMemMap)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemMap (ptr, size, offset, handle, flags);
}
CUresult CUDAAPI
CuMemUnmap (CUdeviceptr ptr, size_t size)
{
if (!gst_cuda_vtable.CuMemUnmap)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemUnmap (ptr, size);
}
CUresult CUDAAPI
CuMemRetainAllocationHandle (CUmemGenericAllocationHandle * handle, void *addr)
{
if (!gst_cuda_vtable.CuMemRetainAllocationHandle)
return CUDA_ERROR_NOT_SUPPORTED;
return gst_cuda_vtable.CuMemRetainAllocationHandle (handle, addr);
}
/* cudaGL.h */
CUresult CUDAAPI
CuGraphicsGLRegisterImage (CUgraphicsResource * pCudaResource,

View file

@ -30,6 +30,7 @@ typedef gpointer CUarray;
typedef gpointer CUmodule;
typedef gpointer CUfunction;
typedef gpointer CUmipmappedArray;
typedef gpointer CUevent;
typedef guint64 CUtexObject;
typedef guintptr CUdeviceptr;
@ -38,6 +39,8 @@ typedef gint CUdevice;
typedef enum
{
CUDA_SUCCESS = 0,
CUDA_ERROR_ALREADY_MAPPED = 208,
CUDA_ERROR_NOT_SUPPORTED = 801,
} CUresult;
typedef enum
@ -51,6 +54,7 @@ typedef enum
typedef enum
{
CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
} CUdevice_attribute;
@ -110,6 +114,14 @@ typedef enum
CU_RES_VIEW_FORMAT_NONE = 0,
} CUresourceViewFormat;
typedef enum
{
CU_EVENT_DEFAULT = 0x0,
CU_EVENT_BLOCKING_SYNC = 0x1,
CU_EVENT_DISABLE_TIMING = 0x2,
CU_EVENT_INTERPROCESS = 0x4,
} CUevent_flags;
typedef struct
{
gsize srcXInBytes;
@ -192,6 +204,87 @@ typedef struct
guint reserved[16];
} CUDA_RESOURCE_VIEW_DESC;
typedef enum
{
CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = 0x1
} CUipcMem_flags;
#define CU_IPC_HANDLE_SIZE 64
typedef struct
{
char reserved[CU_IPC_HANDLE_SIZE];
} CUipcMemHandle;
typedef struct
{
char reserved[CU_IPC_HANDLE_SIZE];
} CUipcEventHandle;
typedef unsigned long long CUmemGenericAllocationHandle;
typedef enum
{
CU_MEM_HANDLE_TYPE_NONE = 0x0,
CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR = 0x1,
CU_MEM_HANDLE_TYPE_WIN32 = 0x2,
CU_MEM_HANDLE_TYPE_WIN32_KMT = 0x4,
CU_MEM_HANDLE_TYPE_MAX = 0x7FFFFFFF
} CUmemAllocationHandleType;
typedef enum
{
CU_MEM_ACCESS_FLAGS_PROT_NONE = 0x0,
CU_MEM_ACCESS_FLAGS_PROT_READ = 0x1,
CU_MEM_ACCESS_FLAGS_PROT_READWRITE = 0x3,
CU_MEM_ACCESS_FLAGS_PROT_MAX = 0x7FFFFFFF
} CUmemAccess_flags;
typedef enum
{
CU_MEM_LOCATION_TYPE_INVALID = 0x0,
CU_MEM_LOCATION_TYPE_DEVICE = 0x1,
CU_MEM_LOCATION_TYPE_MAX = 0x7FFFFFFF
} CUmemLocationType;
typedef enum CUmemAllocationType_enum {
CU_MEM_ALLOCATION_TYPE_INVALID = 0x0,
CU_MEM_ALLOCATION_TYPE_PINNED = 0x1,
CU_MEM_ALLOCATION_TYPE_MAX = 0x7FFFFFFF
} CUmemAllocationType;
typedef enum
{
CU_MEM_ALLOC_GRANULARITY_MINIMUM = 0x0,
CU_MEM_ALLOC_GRANULARITY_RECOMMENDED = 0x1
} CUmemAllocationGranularity_flags;
typedef struct
{
CUmemLocationType type;
int id;
} CUmemLocation;
typedef struct
{
CUmemAllocationType type;
CUmemAllocationHandleType requestedHandleTypes;
CUmemLocation location;
void *win32HandleMetaData;
struct
{
unsigned char compressionType;
unsigned char gpuDirectRDMACapable;
unsigned short usage;
unsigned char reserved[4];
} allocFlags;
} CUmemAllocationProp;
typedef struct
{
CUmemLocation location;
CUmemAccess_flags flags;
} CUmemAccessDesc;
#define CUDA_VERSION 10000
#ifdef _WIN32
@ -214,6 +307,8 @@ typedef struct
#define cuMemcpy2DAsync cuMemcpy2DAsync_v2
#define cuMemFree cuMemFree_v2
#define cuEventDestroy cuEventDestroy_v2
#define CU_TRSF_READ_AS_INTEGER 1
G_END_DECLS