From cee01d7fbdaa906183ac95ddaf57abae397d300d Mon Sep 17 00:00:00 2001 From: Seungha Yang Date: Tue, 21 May 2024 18:09:12 +0900 Subject: [PATCH] cuda: Load 1D memcpy method symbols Part-of: --- .../gst-libs/gst/cuda/cuda-gst.h | 33 +++++++++ .../gst-libs/gst/cuda/gstcudaloader.cpp | 72 +++++++++++++++++++ .../gst-libs/gst/cuda/stub/cuda.h | 6 ++ 3 files changed, 111 insertions(+) diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/cuda-gst.h b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/cuda-gst.h index 6a385e8b6c..825650939c 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/cuda-gst.h +++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/cuda-gst.h @@ -94,6 +94,39 @@ CUresult CUDAAPI CuMemcpy2D (const CUDA_MEMCPY2D * pCopy); GST_CUDA_API CUresult CUDAAPI CuMemcpy2DAsync (const CUDA_MEMCPY2D *pCopy, CUstream hStream); +GST_CUDA_API +CUresult CUDAAPI CuMemcpyDtoD (CUdeviceptr dstDevice, + CUdeviceptr srcDevice, + size_t ByteCount); + +GST_CUDA_API +CUresult CUDAAPI CuMemcpyDtoDAsync (CUdeviceptr dstDevice, + CUdeviceptr srcDevice, + size_t ByteCount, + CUstream hStream); + +GST_CUDA_API +CUresult CUDAAPI CuMemcpyDtoH (void *dstHost, + CUdeviceptr srcDevice, + size_t ByteCount); + +GST_CUDA_API +CUresult CUDAAPI CuMemcpyDtoHAsync (void *dstHost, + CUdeviceptr srcDevice, + size_t ByteCount, + CUstream hStream); + +GST_CUDA_API +CUresult CUDAAPI CuMemcpyHtoD (CUdeviceptr dstDevice, + const void *srcHost, + size_t ByteCount); + +GST_CUDA_API +CUresult CUDAAPI CuMemcpyHtoDAsync (CUdeviceptr dstDevice, + const void *srcHost, + size_t ByteCount, + CUstream hStream); + GST_CUDA_API CUresult CUDAAPI CuMemFree (CUdeviceptr dptr); diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudaloader.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudaloader.cpp index 11718b8dcc..21501398c9 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudaloader.cpp +++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudaloader.cpp @@ -91,6 +91,18 @@ typedef struct _GstNvCodecCudaVTable CUresult (CUDAAPI * CuMemcpy2D) (const CUDA_MEMCPY2D * pCopy); CUresult (CUDAAPI * CuMemcpy2DAsync) (const CUDA_MEMCPY2D * pCopy, CUstream hStream); + CUresult (CUDAAPI *CuMemcpyDtoD) (CUdeviceptr dstDevice, + CUdeviceptr srcDevice, size_t ByteCount); + CUresult (CUDAAPI *CuMemcpyDtoDAsync) (CUdeviceptr dstDevice, + CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream); + CUresult (CUDAAPI *CuMemcpyDtoH) (void *dstHost, CUdeviceptr srcDevice, + size_t ByteCount); + CUresult (CUDAAPI *CuMemcpyDtoHAsync) (void *dstHost, CUdeviceptr srcDevice, + size_t ByteCount, CUstream hStream); + CUresult (CUDAAPI *CuMemcpyHtoD) (CUdeviceptr dstDevice, const void *srcHost, + size_t ByteCount); + CUresult (CUDAAPI *CuMemcpyHtoDAsync) (CUdeviceptr dstDevice, + const void *srcHost, size_t ByteCount, CUstream hStream); CUresult (CUDAAPI * CuMemFree) (CUdeviceptr dptr); CUresult (CUDAAPI * CuMemFreeHost) (void *p); @@ -261,6 +273,12 @@ gst_cuda_load_library_once_func (void) LOAD_SYMBOL (cuMemAllocHost, CuMemAllocHost); LOAD_SYMBOL (cuMemcpy2D, CuMemcpy2D); LOAD_SYMBOL (cuMemcpy2DAsync, CuMemcpy2DAsync); + LOAD_SYMBOL (cuMemcpyDtoD, CuMemcpyDtoD); + LOAD_SYMBOL (cuMemcpyDtoDAsync, CuMemcpyDtoDAsync); + LOAD_SYMBOL (cuMemcpyDtoH, CuMemcpyDtoH); + LOAD_SYMBOL (cuMemcpyDtoHAsync, CuMemcpyDtoHAsync); + LOAD_SYMBOL (cuMemcpyHtoD, CuMemcpyHtoD); + LOAD_SYMBOL (cuMemcpyHtoDAsync, CuMemcpyHtoDAsync); LOAD_SYMBOL (cuMemFree, CuMemFree); LOAD_SYMBOL (cuMemFreeHost, CuMemFreeHost); @@ -513,6 +531,60 @@ CuMemcpy2DAsync (const CUDA_MEMCPY2D * pCopy, CUstream hStream) return gst_cuda_vtable.CuMemcpy2DAsync (pCopy, hStream); } +CUresult CUDAAPI +CuMemcpyDtoD (CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount) +{ + g_assert (gst_cuda_vtable.CuMemcpyDtoD != nullptr); + + return gst_cuda_vtable.CuMemcpyDtoD (dstDevice, srcDevice, ByteCount); +} + +CUresult CUDAAPI +CuMemcpyDtoDAsync (CUdeviceptr dstDevice, CUdeviceptr srcDevice, + size_t ByteCount, CUstream hStream) +{ + g_assert (gst_cuda_vtable.CuMemcpyDtoDAsync != nullptr); + + return gst_cuda_vtable.CuMemcpyDtoDAsync (dstDevice, srcDevice, ByteCount, + hStream); +} + +CUresult CUDAAPI +CuMemcpyDtoH (void *dstHost, CUdeviceptr srcDevice, size_t ByteCount) +{ + g_assert (gst_cuda_vtable.CuMemcpyDtoH != nullptr); + + return gst_cuda_vtable.CuMemcpyDtoH (dstHost, srcDevice, ByteCount); +} + +CUresult CUDAAPI +CuMemcpyDtoHAsync (void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, + CUstream hStream) +{ + g_assert (gst_cuda_vtable.CuMemcpyDtoHAsync != nullptr); + + return gst_cuda_vtable.CuMemcpyDtoHAsync (dstHost, srcDevice, ByteCount, + hStream); +} + +CUresult CUDAAPI +CuMemcpyHtoD (CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount) +{ + g_assert (gst_cuda_vtable.CuMemcpyHtoD != nullptr); + + return gst_cuda_vtable.CuMemcpyHtoD (dstDevice, srcHost, ByteCount); +} + +CUresult CUDAAPI +CuMemcpyHtoDAsync (CUdeviceptr dstDevice, const void *srcHost, + size_t ByteCount, CUstream hStream) +{ + g_assert (gst_cuda_vtable.CuMemcpyHtoD != nullptr); + + return gst_cuda_vtable.CuMemcpyHtoDAsync (dstDevice, srcHost, ByteCount, + hStream); +} + CUresult CUDAAPI CuMemFree (CUdeviceptr dptr) { diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/stub/cuda.h b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/stub/cuda.h index fb07171180..0382301ad9 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/stub/cuda.h +++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/stub/cuda.h @@ -314,6 +314,12 @@ typedef struct #define cuMemAllocHost cuMemAllocHost_v2 #define cuMemcpy2D cuMemcpy2D_v2 #define cuMemcpy2DAsync cuMemcpy2DAsync_v2 +#define cuMemcpyDtoD cuMemcpyDtoD_v2 +#define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2 +#define cuMemcpyDtoH cuMemcpyDtoH_v2 +#define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2 +#define cuMemcpyHtoD cuMemcpyHtoD_v2 +#define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2 #define cuMemFree cuMemFree_v2 #define cuEventDestroy cuEventDestroy_v2