cuda/nvcodec: Add support for importing and producing embedded NVMM memory

As produced on the Nvidia Jetson series of devices.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/7274>
This commit is contained in:
Matthew Waters 2024-07-29 22:49:03 +10:00 committed by GStreamer Marge Bot
parent adcc6c8d38
commit 8dac91537d
10 changed files with 355 additions and 29 deletions

View file

@ -668,6 +668,9 @@ Free with gst_cuda_graphics_resource_free</doc>
</member> </member>
<member name="d3d11_resource" value="2" c:identifier="GST_CUDA_GRAPHICS_RESOURCE_D3D11_RESOURCE"> <member name="d3d11_resource" value="2" c:identifier="GST_CUDA_GRAPHICS_RESOURCE_D3D11_RESOURCE">
</member> </member>
<member name="egl_resource" value="3" c:identifier="GST_CUDA_GRAPHICS_RESOURCE_EGL_RESOURCE" version="1.26">
<doc xml:space="preserve" filename="../subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudautils.h">Resource represents a EGL resource.</doc>
</member>
</enumeration> </enumeration>
<record name="CudaMemory" c:type="GstCudaMemory" version="1.22"> <record name="CudaMemory" c:type="GstCudaMemory" version="1.22">
<source-position filename="../subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudamemory.h"/> <source-position filename="../subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudamemory.h"/>

View file

@ -42,6 +42,9 @@ CUresult CUDAAPI CuCtxPopCurrent (CUcontext * pctx);
GST_CUDA_API GST_CUDA_API
CUresult CUDAAPI CuCtxPushCurrent (CUcontext ctx); CUresult CUDAAPI CuCtxPushCurrent (CUcontext ctx);
GST_CUDA_API
CUresult CUDAAPI CuCtxSynchronize (void);
GST_CUDA_API GST_CUDA_API
CUresult CUDAAPI CuCtxEnablePeerAccess (CUcontext peerContext, CUresult CUDAAPI CuCtxEnablePeerAccess (CUcontext peerContext,
unsigned int Flags); unsigned int Flags);
@ -326,7 +329,6 @@ CUresult CUDAAPI CuGLGetDevices (unsigned int * pCudaDeviceCount,
unsigned int cudaDeviceCount, unsigned int cudaDeviceCount,
CUGLDeviceList deviceList); CUGLDeviceList deviceList);
#ifdef G_OS_WIN32 #ifdef G_OS_WIN32
/* cudaD3D11.h */ /* cudaD3D11.h */
GST_CUDA_API GST_CUDA_API

View file

@ -71,5 +71,24 @@ G_END_DECLS
#define GST_CUDA_CALL_ONCE_END ) #define GST_CUDA_CALL_ONCE_END )
/* cudaEGL.h */
#if defined(HAVE_CUDA_NVMM_JETSON) && defined(HAVE_CUDA_GST_GL)
#include <gst/gl/gstglconfig.h>
#if GST_GL_HAVE_PLATFORM_EGL
#include <cudaEGL.h>
#include <gst/gl/egl/egl.h>
GST_CUDA_API
CUresult CUDAAPI CuGraphicsEGLRegisterImage (CUgraphicsResource *pCudaResource,
EGLImageKHR image,
unsigned int flags);
GST_CUDA_API
CUresult CUDAAPI CuGraphicsResourceGetMappedEglFrame(CUeglFrame* eglFrame,
CUgraphicsResource resource,
unsigned int index,
unsigned int mipLevel);
#endif /* HAVE_CUDA_GST_GL */
#endif /* HAVE_CUDA_NVMM_JETSON */
#endif /* __cplusplus */ #endif /* __cplusplus */

View file

@ -26,6 +26,13 @@
#include <gmodule.h> #include <gmodule.h>
#include "gstcuda-private.h" #include "gstcuda-private.h"
#ifdef HAVE_CUDA_GST_GL
#include <gst/gl/gstglconfig.h>
#if GST_GL_HAVE_PLATFORM_EGL
#include <gst/gl/egl/egl.h>
#endif /* GST_GL_HAVE_PLATFORM_EGL */
#endif /* HAVE_CUDA_GST_GL */
GST_DEBUG_CATEGORY (gst_cudaloader_debug); GST_DEBUG_CATEGORY (gst_cudaloader_debug);
#define GST_CAT_DEFAULT gst_cudaloader_debug #define GST_CAT_DEFAULT gst_cudaloader_debug
@ -65,6 +72,7 @@ typedef struct _GstNvCodecCudaVTable
CUresult (CUDAAPI * CuCtxDestroy) (CUcontext ctx); CUresult (CUDAAPI * CuCtxDestroy) (CUcontext ctx);
CUresult (CUDAAPI * CuCtxPopCurrent) (CUcontext * pctx); CUresult (CUDAAPI * CuCtxPopCurrent) (CUcontext * pctx);
CUresult (CUDAAPI * CuCtxPushCurrent) (CUcontext ctx); CUresult (CUDAAPI * CuCtxPushCurrent) (CUcontext ctx);
CUresult (CUDAAPI * CuCtxSynchronize) (void);
CUresult (CUDAAPI * CuCtxEnablePeerAccess) (CUcontext peerContext, CUresult (CUDAAPI * CuCtxEnablePeerAccess) (CUcontext peerContext,
unsigned int Flags); unsigned int Flags);
@ -135,7 +143,6 @@ typedef struct _GstNvCodecCudaVTable
unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ,
unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams,
void **extra); void **extra);
CUresult (CUDAAPI * CuGraphicsGLRegisterImage) (CUgraphicsResource * CUresult (CUDAAPI * CuGraphicsGLRegisterImage) (CUgraphicsResource *
pCudaResource, unsigned int image, unsigned int target, pCudaResource, unsigned int image, unsigned int target,
unsigned int Flags); unsigned int Flags);
@ -145,6 +152,14 @@ typedef struct _GstNvCodecCudaVTable
CUdevice * pCudaDevices, unsigned int cudaDeviceCount, CUdevice * pCudaDevices, unsigned int cudaDeviceCount,
CUGLDeviceList deviceList); CUGLDeviceList deviceList);
#if defined(HAVE_CUDA_NVMM_JETSON) && GST_GL_HAVE_PLATFORM_EGL
CUresult (CUDAAPI* CuGraphicsEGLRegisterImage) (CUgraphicsResource *
pCudaResource, EGLImageKHR image, unsigned int flags);
CUresult (CUDAAPI* CuGraphicsResourceGetMappedEglFrame)
(CUeglFrame* eglFrame, CUgraphicsResource resource, unsigned int index,
unsigned int mipLevel);
#endif
CUresult (CUDAAPI * CuEventCreate) (CUevent *phEvent, unsigned int Flags); CUresult (CUDAAPI * CuEventCreate) (CUevent *phEvent, unsigned int Flags);
CUresult (CUDAAPI * CuEventDestroy) (CUevent hEvent); CUresult (CUDAAPI * CuEventDestroy) (CUevent hEvent);
CUresult (CUDAAPI * CuEventRecord) (CUevent hEvent, CUstream hStream); CUresult (CUDAAPI * CuEventRecord) (CUevent hEvent, CUstream hStream);
@ -255,6 +270,7 @@ gst_cuda_load_library_once_func (void)
LOAD_SYMBOL (cuCtxCreate, CuCtxCreate); LOAD_SYMBOL (cuCtxCreate, CuCtxCreate);
LOAD_SYMBOL (cuCtxDestroy, CuCtxDestroy); LOAD_SYMBOL (cuCtxDestroy, CuCtxDestroy);
LOAD_SYMBOL (cuCtxPopCurrent, CuCtxPopCurrent); LOAD_SYMBOL (cuCtxPopCurrent, CuCtxPopCurrent);
LOAD_SYMBOL (cuCtxSynchronize, CuCtxSynchronize);
LOAD_SYMBOL (cuCtxPushCurrent, CuCtxPushCurrent); LOAD_SYMBOL (cuCtxPushCurrent, CuCtxPushCurrent);
LOAD_SYMBOL (cuCtxEnablePeerAccess, CuCtxEnablePeerAccess); LOAD_SYMBOL (cuCtxEnablePeerAccess, CuCtxEnablePeerAccess);
LOAD_SYMBOL (cuCtxDisablePeerAccess, CuCtxDisablePeerAccess); LOAD_SYMBOL (cuCtxDisablePeerAccess, CuCtxDisablePeerAccess);
@ -319,6 +335,13 @@ gst_cuda_load_library_once_func (void)
LOAD_SYMBOL (cuGraphicsGLRegisterBuffer, CuGraphicsGLRegisterBuffer); LOAD_SYMBOL (cuGraphicsGLRegisterBuffer, CuGraphicsGLRegisterBuffer);
LOAD_SYMBOL (cuGLGetDevices, CuGLGetDevices); LOAD_SYMBOL (cuGLGetDevices, CuGLGetDevices);
/* cudaEGL.h */
#if defined(HAVE_CUDA_NVMM_JETSON) && GST_GL_HAVE_PLATFORM_EGL
LOAD_SYMBOL (cuGraphicsEGLRegisterImage, CuGraphicsEGLRegisterImage);
LOAD_SYMBOL (cuGraphicsResourceGetMappedEglFrame,
CuGraphicsResourceGetMappedEglFrame);
#endif
#ifdef G_OS_WIN32 #ifdef G_OS_WIN32
/* cudaD3D11.h */ /* cudaD3D11.h */
LOAD_SYMBOL (cuGraphicsD3D11RegisterResource, LOAD_SYMBOL (cuGraphicsD3D11RegisterResource,
@ -415,6 +438,14 @@ CuCtxPushCurrent (CUcontext ctx)
return gst_cuda_vtable.CuCtxPushCurrent (ctx); return gst_cuda_vtable.CuCtxPushCurrent (ctx);
} }
CUresult CUDAAPI
CuCtxSynchronize (void)
{
g_assert (gst_cuda_vtable.CuCtxSynchronize != nullptr);
return gst_cuda_vtable.CuCtxSynchronize ();
}
CUresult CUDAAPI CUresult CUDAAPI
CuCtxEnablePeerAccess (CUcontext peerContext, unsigned int Flags) CuCtxEnablePeerAccess (CUcontext peerContext, unsigned int Flags)
{ {
@ -966,6 +997,29 @@ CuGLGetDevices (unsigned int *pCudaDeviceCount, CUdevice * pCudaDevices,
cudaDeviceCount, deviceList); cudaDeviceCount, deviceList);
} }
/* cudaEGL.h */
#if defined(HAVE_CUDA_NVMM_JETSON) && GST_GL_HAVE_PLATFORM_EGL
CUresult CUDAAPI
CuGraphicsEGLRegisterImage (CUgraphicsResource * pCudaResource,
EGLImageKHR image, unsigned int Flags)
{
g_assert (gst_cuda_vtable.CuGraphicsEGLRegisterImage != nullptr);
return gst_cuda_vtable.CuGraphicsEGLRegisterImage (pCudaResource, image,
Flags);
}
CUresult CUDAAPI
CuGraphicsResourceGetMappedEglFrame (CUeglFrame * eglFrame,
CUgraphicsResource resource, unsigned int index, unsigned int mipLevel)
{
g_assert (gst_cuda_vtable.CuGraphicsResourceGetMappedEglFrame != nullptr);
return gst_cuda_vtable.CuGraphicsResourceGetMappedEglFrame (eglFrame,
resource, index, mipLevel);
}
#endif
/* cudaD3D11.h */ /* cudaD3D11.h */
#ifdef G_OS_WIN32 #ifdef G_OS_WIN32
CUresult CUDAAPI CUresult CUDAAPI

View file

@ -32,6 +32,9 @@
#ifdef HAVE_CUDA_GST_GL #ifdef HAVE_CUDA_GST_GL
#include <gst/gl/gl.h> #include <gst/gl/gl.h>
#include <gst/gl/gstglfuncs.h> #include <gst/gl/gstglfuncs.h>
#if GST_GL_HAVE_PLATFORM_EGL
#include <gst/gl/egl/egl.h>
#endif
#endif #endif
#ifdef G_OS_WIN32 #ifdef G_OS_WIN32
@ -678,6 +681,8 @@ gst_cuda_graphics_resource_free (GstCudaGraphicsResource * resource)
gst_gl_context_thread_add ((GstGLContext *) resource->graphics_context, gst_gl_context_thread_add ((GstGLContext *) resource->graphics_context,
(GstGLContextThreadFunc) unregister_resource_from_gl_thread, (GstGLContextThreadFunc) unregister_resource_from_gl_thread,
resource); resource);
} else if (resource->type == GST_CUDA_GRAPHICS_RESOURCE_EGL_RESOURCE) {
gst_cuda_graphics_resource_unregister (resource);
} else } else
#endif #endif
#ifdef G_OS_WIN32 #ifdef G_OS_WIN32
@ -780,7 +785,8 @@ static gboolean
map_buffer_and_fill_copy2d (GstBuffer * buf, const GstVideoInfo * info, map_buffer_and_fill_copy2d (GstBuffer * buf, const GstVideoInfo * info,
GstCudaBufferCopyType copy_type, GstVideoFrame * frame, GstCudaBufferCopyType copy_type, GstVideoFrame * frame,
GstMapInfo * map_info, gboolean is_src, GstMapInfo * map_info, gboolean is_src,
CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES]) CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES],
GstCudaGraphicsResource * nvSurfaceArrayResource)
{ {
gboolean buffer_mapped = FALSE; gboolean buffer_mapped = FALSE;
guint i; guint i;
@ -817,8 +823,73 @@ map_buffer_and_fill_copy2d (GstBuffer * buf, const GstVideoInfo * info,
} }
switch (surface->memType) { switch (surface->memType) {
/* TODO: NVBUF_MEM_DEFAULT on jetson is SURFACE_ARRAY */ /* NVBUF_MEM_DEFAULT on jetson is SURFACE_ARRAY otherwise it is NVBUF_MEM_CUDA_DEVICE */
case NVBUF_MEM_DEFAULT: case NVBUF_MEM_DEFAULT:
#if defined(HAVE_CUDA_NVMM_JETSON)
case NVBUF_MEM_SURFACE_ARRAY:{
if (NvBufSurfaceMapEglImage (surface, 0) != 0) {
GST_ERROR ("could not map EGL Image from NvBufSurface");
goto error;
}
CUresult result;
if ((result =
CuGraphicsEGLRegisterImage (&nvSurfaceArrayResource->resource,
surface_params->mappedAddr.eglImage,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE)) != CUDA_SUCCESS) {
GST_ERROR ("cuGraphicsEGLRegisterImage failed: %u "
"with mapped EGLImageAddress: %p", result,
surface_params->mappedAddr.eglImage);
NvBufSurfaceUnMapEglImage (surface, 0);
goto error;
}
nvSurfaceArrayResource->registered = true;
CUeglFrame eglFrame;
if ((result = CuGraphicsResourceGetMappedEglFrame (&eglFrame,
nvSurfaceArrayResource->resource, 0, 0)) != CUDA_SUCCESS) {
GST_ERROR ("cuGraphicsResourceGetMappedEglFrame failed: %u", result);
NvBufSurfaceUnMapEglImage (surface, 0);
goto error;
}
if ((result = CuCtxSynchronize ()) != CUDA_SUCCESS) {
GST_ERROR ("cuCtxSynchronize failed: %u", result);
NvBufSurfaceUnMapEglImage (surface, 0);
goto error;
}
for (i = 0; i < plane_params->num_planes; i++) {
if (is_src) {
if (eglFrame.frameType == CU_EGL_FRAME_TYPE_PITCH) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].srcDevice =
((CUdeviceptr) eglFrame.frame.pPitch[i]);
} else if (eglFrame.frameType == CU_EGL_FRAME_TYPE_ARRAY) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_ARRAY;
copy_params[i].srcArray = eglFrame.frame.pArray[i];
}
copy_params[i].srcPitch = plane_params->pitch[i];
} else {
if (eglFrame.frameType == CU_EGL_FRAME_TYPE_PITCH) {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].dstDevice =
((CUdeviceptr) eglFrame.frame.pPitch[i]);
} else if (eglFrame.frameType == CU_EGL_FRAME_TYPE_ARRAY) {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_ARRAY;
copy_params[i].dstArray = eglFrame.frame.pArray[i];
}
copy_params[i].dstPitch = plane_params->pitch[i];
}
}
if (NvBufSurfaceUnMapEglImage (surface, 0) != 0) {
GST_ERROR ("could not unmap EGL Image from NvBufSurface");
goto error;
}
break;
}
#endif /* HAVE_CUDA_NVMM_JETSON */
case NVBUF_MEM_CUDA_DEVICE: case NVBUF_MEM_CUDA_DEVICE:
{ {
for (i = 0; i < plane_params->num_planes; i++) { for (i = 0; i < plane_params->num_planes; i++) {
@ -891,7 +962,7 @@ map_buffer_and_fill_copy2d (GstBuffer * buf, const GstVideoInfo * info,
} }
} }
} else } else
#endif #endif /* HAVE_CUDA_NVMM */
{ {
GstMapFlags map_flags; GstMapFlags map_flags;
@ -981,6 +1052,7 @@ gst_cuda_buffer_copy_internal (GstBuffer * dst_buf,
GstMapInfo dst_map, src_map; GstMapInfo dst_map, src_map;
guint i; guint i;
CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES]; CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES];
GstCudaGraphicsResource *nvSurfaceArrayResource = nullptr;
memset (copy_params, 0, sizeof (copy_params)); memset (copy_params, 0, sizeof (copy_params));
memset (&dst_frame, 0, sizeof (GstVideoFrame)); memset (&dst_frame, 0, sizeof (GstVideoFrame));
@ -988,24 +1060,44 @@ gst_cuda_buffer_copy_internal (GstBuffer * dst_buf,
memset (&dst_map, 0, sizeof (GstMapInfo)); memset (&dst_map, 0, sizeof (GstMapInfo));
memset (&src_map, 0, sizeof (GstMapInfo)); memset (&src_map, 0, sizeof (GstMapInfo));
/* push context here, because if we have nvmm on jetson, we need the context */
if (!gst_cuda_context_push (context)) {
GST_ERROR_OBJECT (context, "Failed to push our context");
goto unmap_and_out;
}
#ifdef HAVE_CUDA_NVMM_JETSON
/* create new resource for mapping the EGL image from the surface */
nvSurfaceArrayResource =
gst_cuda_graphics_resource_new (context, NULL,
GstCudaGraphicsResourceType::GST_CUDA_GRAPHICS_RESOURCE_EGL_RESOURCE);
#endif
if (!map_buffer_and_fill_copy2d (dst_buf, dst_info, if (!map_buffer_and_fill_copy2d (dst_buf, dst_info,
dst_type, &dst_frame, &dst_map, FALSE, copy_params)) { dst_type, &dst_frame, &dst_map, FALSE, copy_params,
nvSurfaceArrayResource)) {
GST_ERROR_OBJECT (context, "Failed to map output buffer"); GST_ERROR_OBJECT (context, "Failed to map output buffer");
#ifdef HAVE_CUDA_NVMM_JETSON
gst_cuda_graphics_resource_unregister (nvSurfaceArrayResource);
gst_cuda_graphics_resource_free (nvSurfaceArrayResource);
nvSurfaceArrayResource = nullptr;
#endif
return FALSE; return FALSE;
} }
if (!map_buffer_and_fill_copy2d (src_buf, src_info, if (!map_buffer_and_fill_copy2d (src_buf, src_info,
src_type, &src_frame, &src_map, TRUE, copy_params)) { src_type, &src_frame, &src_map, TRUE, copy_params,
nvSurfaceArrayResource)) {
GST_ERROR_OBJECT (context, "Failed to map input buffer"); GST_ERROR_OBJECT (context, "Failed to map input buffer");
#ifdef HAVE_CUDA_NVMM_JETSON
gst_cuda_graphics_resource_unregister (nvSurfaceArrayResource);
gst_cuda_graphics_resource_free (nvSurfaceArrayResource);
nvSurfaceArrayResource = nullptr;
#endif
unmap_buffer_or_frame (dst_buf, &dst_frame, &dst_map); unmap_buffer_or_frame (dst_buf, &dst_frame, &dst_map);
return FALSE; return FALSE;
} }
if (!gst_cuda_context_push (context)) {
GST_ERROR_OBJECT (context, "Failed to push our context");
goto unmap_and_out;
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (dst_info); i++) { for (i = 0; i < GST_VIDEO_INFO_N_PLANES (dst_info); i++) {
ret = gst_cuda_result (CuMemcpy2DAsync (&copy_params[i], stream)); ret = gst_cuda_result (CuMemcpy2DAsync (&copy_params[i], stream));
if (!ret) { if (!ret) {
@ -1015,6 +1107,13 @@ gst_cuda_buffer_copy_internal (GstBuffer * dst_buf,
} }
gst_cuda_result (CuStreamSynchronize (stream)); gst_cuda_result (CuStreamSynchronize (stream));
#ifdef HAVE_CUDA_NVMM_JETSON
gst_cuda_graphics_resource_unregister (nvSurfaceArrayResource);
gst_cuda_graphics_resource_free (nvSurfaceArrayResource);
nvSurfaceArrayResource = NULL;
#endif
gst_cuda_context_pop (nullptr); gst_cuda_context_pop (nullptr);
unmap_and_out: unmap_and_out:
@ -1111,6 +1210,7 @@ static void
gl_copy_thread_func (GstGLContext * gl_context, GLCopyData * data) gl_copy_thread_func (GstGLContext * gl_context, GLCopyData * data)
{ {
GstCudaGraphicsResource *resources[GST_VIDEO_MAX_PLANES]; GstCudaGraphicsResource *resources[GST_VIDEO_MAX_PLANES];
GstCudaGraphicsResource *nvSurfaceArrayResource = nullptr;
guint num_resources; guint num_resources;
GstBuffer *gl_buf, *cuda_buf; GstBuffer *gl_buf, *cuda_buf;
GstVideoFrame cuda_frame; GstVideoFrame cuda_frame;
@ -1130,14 +1230,32 @@ gl_copy_thread_func (GstGLContext * gl_context, GLCopyData * data)
if (!ensure_gl_interop ()) if (!ensure_gl_interop ())
return; return;
if (!gst_cuda_context_push (context)) {
GST_ERROR_OBJECT (context, "Failed to push context");
unmap_buffer_or_frame (data->pbo_to_cuda ? data->dst_buf : data->src_buf,
&cuda_frame, &cuda_map_info);
return;
}
#ifdef HAVE_CUDA_NVMM_JETSON
nvSurfaceArrayResource =
gst_cuda_graphics_resource_new (context, NULL,
GstCudaGraphicsResourceType::GST_CUDA_GRAPHICS_RESOURCE_EGL_RESOURCE);
#endif
if (data->pbo_to_cuda) { if (data->pbo_to_cuda) {
gl_buf = data->src_buf; gl_buf = data->src_buf;
cuda_buf = data->dst_buf; cuda_buf = data->dst_buf;
if (!map_buffer_and_fill_copy2d (cuda_buf, if (!map_buffer_and_fill_copy2d (cuda_buf,
data->dst_info, data->copy_type, &cuda_frame, &cuda_map_info, data->dst_info, data->copy_type, &cuda_frame, &cuda_map_info,
FALSE, copy_params)) { FALSE, copy_params, nvSurfaceArrayResource)) {
GST_ERROR_OBJECT (context, "Failed to map output CUDA buffer"); GST_ERROR_OBJECT (context, "Failed to map output CUDA buffer");
#ifdef HAVE_CUDA_NVMM_JETSON
gst_cuda_graphics_resource_unregister (nvSurfaceArrayResource);
gst_cuda_graphics_resource_free (nvSurfaceArrayResource);
nvSurfaceArrayResource = nullptr;
#endif
return; return;
} }
} else { } else {
@ -1146,8 +1264,13 @@ gl_copy_thread_func (GstGLContext * gl_context, GLCopyData * data)
if (!map_buffer_and_fill_copy2d (cuda_buf, if (!map_buffer_and_fill_copy2d (cuda_buf,
data->src_info, data->copy_type, &cuda_frame, &cuda_map_info, data->src_info, data->copy_type, &cuda_frame, &cuda_map_info,
TRUE, copy_params)) { TRUE, copy_params, nvSurfaceArrayResource)) {
GST_ERROR_OBJECT (context, "Failed to map input CUDA buffer"); GST_ERROR_OBJECT (context, "Failed to map input CUDA buffer");
#ifdef HAVE_CUDA_NVMM_JETSON
gst_cuda_graphics_resource_unregister (nvSurfaceArrayResource);
gst_cuda_graphics_resource_free (nvSurfaceArrayResource);
nvSurfaceArrayResource = nullptr;
#endif
return; return;
} }
} }
@ -1155,12 +1278,6 @@ gl_copy_thread_func (GstGLContext * gl_context, GLCopyData * data)
num_resources = gst_buffer_n_memory (gl_buf); num_resources = gst_buffer_n_memory (gl_buf);
g_assert (num_resources >= GST_VIDEO_INFO_N_PLANES (data->src_info)); g_assert (num_resources >= GST_VIDEO_INFO_N_PLANES (data->src_info));
if (!gst_cuda_context_push (context)) {
GST_ERROR_OBJECT (context, "Failed to push context");
unmap_buffer_or_frame (cuda_buf, &cuda_frame, &cuda_map_info);
return;
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (data->src_info); i++) { for (i = 0; i < GST_VIDEO_INFO_N_PLANES (data->src_info); i++) {
GstMemory *mem = gst_buffer_peek_memory (gl_buf, i); GstMemory *mem = gst_buffer_peek_memory (gl_buf, i);
GstGLMemoryPBO *pbo; GstGLMemoryPBO *pbo;
@ -1250,6 +1367,12 @@ gl_copy_thread_func (GstGLContext * gl_context, GLCopyData * data)
out: out:
gst_cuda_result (CuStreamSynchronize (stream)); gst_cuda_result (CuStreamSynchronize (stream));
#ifdef HAVE_CUDA_NVMM_JETSON
gst_cuda_graphics_resource_unregister (nvSurfaceArrayResource);
gst_cuda_graphics_resource_free (nvSurfaceArrayResource);
#endif
gst_cuda_context_pop (nullptr); gst_cuda_context_pop (nullptr);
unmap_buffer_or_frame (cuda_buf, &cuda_frame, &cuda_map_info); unmap_buffer_or_frame (cuda_buf, &cuda_frame, &cuda_map_info);
} }
@ -1382,7 +1505,7 @@ cuda_copy_d3d11_interop (GstBuffer * dst_buf, const GstVideoInfo * dst_info,
} }
if (!map_buffer_and_fill_copy2d (cuda_buf, if (!map_buffer_and_fill_copy2d (cuda_buf,
dst_info, GST_CUDA_BUFFER_COPY_CUDA, &cuda_frame, &cuda_map_info, dst_info, GST_CUDA_BUFFER_COPY_CUDA, &cuda_frame, &cuda_map_info,
FALSE, copy_params)) { FALSE, copy_params, NULL)) {
GST_ERROR_OBJECT (context, "Failed to map output CUDA buffer"); GST_ERROR_OBJECT (context, "Failed to map output CUDA buffer");
gst_video_frame_unmap (&d3d11_frame); gst_video_frame_unmap (&d3d11_frame);
return FALSE; return FALSE;
@ -1397,7 +1520,7 @@ cuda_copy_d3d11_interop (GstBuffer * dst_buf, const GstVideoInfo * dst_info,
} }
if (!map_buffer_and_fill_copy2d (cuda_buf, if (!map_buffer_and_fill_copy2d (cuda_buf,
src_info, GST_CUDA_BUFFER_COPY_CUDA, &cuda_frame, &cuda_map_info, src_info, GST_CUDA_BUFFER_COPY_CUDA, &cuda_frame, &cuda_map_info,
TRUE, copy_params)) { TRUE, copy_params, NULL)) {
GST_ERROR_OBJECT (context, "Failed to map input CUDA buffer"); GST_ERROR_OBJECT (context, "Failed to map input CUDA buffer");
gst_video_frame_unmap (&d3d11_frame); gst_video_frame_unmap (&d3d11_frame);
return FALSE; return FALSE;

View file

@ -69,14 +69,23 @@ typedef enum
* @GST_CUDA_GRAPHICS_RESSOURCE_NONE: Ressource represents a CUDA buffer. * @GST_CUDA_GRAPHICS_RESSOURCE_NONE: Ressource represents a CUDA buffer.
* @GST_CUDA_GRAPHICS_RESSOURCE_GL_BUFFER: Ressource represents a GL buffer. * @GST_CUDA_GRAPHICS_RESSOURCE_GL_BUFFER: Ressource represents a GL buffer.
* @GST_CUDA_GRAPHICS_RESSOURCE_D3D11_RESOURCE: Ressource represents a D3D resource. * @GST_CUDA_GRAPHICS_RESSOURCE_D3D11_RESOURCE: Ressource represents a D3D resource.
* @GST_CUDA_GRAPHICS_RESSOURCE_EGL_RESOURCE: Ressource represents a EGL resource.
* *
* Since: 1.22 * Since: 1.22
*/ */
/**
* GST_CUDA_GRAPHICS_RESOURCE_EGL_RESOURCE:
*
* Resource represents a EGL resource.
*
* Since: 1.26
*/
typedef enum typedef enum
{ {
GST_CUDA_GRAPHICS_RESOURCE_NONE = 0, GST_CUDA_GRAPHICS_RESOURCE_NONE = 0,
GST_CUDA_GRAPHICS_RESOURCE_GL_BUFFER = 1, GST_CUDA_GRAPHICS_RESOURCE_GL_BUFFER = 1,
GST_CUDA_GRAPHICS_RESOURCE_D3D11_RESOURCE = 2, GST_CUDA_GRAPHICS_RESOURCE_D3D11_RESOURCE = 2,
GST_CUDA_GRAPHICS_RESOURCE_EGL_RESOURCE = 3,
} GstCudaGraphicsResourceType; } GstCudaGraphicsResourceType;
/** /**

View file

@ -21,6 +21,7 @@ cuda_headers = files([
'gstcudautils.h', 'gstcudautils.h',
]) ])
extra_deps = []
gstcuda_dep = dependency('', required : false) gstcuda_dep = dependency('', required : false)
cuda_stubinc = include_directories('./stub') cuda_stubinc = include_directories('./stub')
gstcuda_stub_dep = declare_dependency( gstcuda_stub_dep = declare_dependency(
@ -29,6 +30,7 @@ gstcuda_stub_dep = declare_dependency(
have_nvbufsurface_h = false have_nvbufsurface_h = false
gstcuda_nvmm_inc = [] gstcuda_nvmm_inc = []
nvbuf_dep = dependency('', required: false)
gstcuda_platform_dep = [] gstcuda_platform_dep = []
if host_system not in ['windows', 'linux'] if host_system not in ['windows', 'linux']
@ -92,12 +94,35 @@ else
gstcuda_nvmm_inc = [include_directories(nvmm_inc_opt)] gstcuda_nvmm_inc = [include_directories(nvmm_inc_opt)]
endif endif
have_nvbufsurface_h = cc.has_header('nvbufsurface.h', if cc.has_header('nvbufsurface.h',
include_directories: gstcuda_nvmm_inc, include_directories: gstcuda_nvmm_inc,
required: nvmm_opt) required: false)
have_nvbufsurface_h = true
# try some other default locations
elif cc.has_header('/usr/src/jetson_multimedia_api/include/nvbufsurface.h',
required: false)
have_nvbufsurface_h = true
gstcuda_nvmm_inc = [include_directories('/usr/src/jetson_multimedia_api/include')]
elif cc.has_header('/opt/nvidia/deepstream/deepstream/sources/includes/nvbufsurface.h',
required: false)
have_nvbufsurface_h = true
gstcuda_nvmm_inc = [include_directories('/opt/nvidia/deepstream/deepstream/sources/includes')]
endif
if nvmm_opt.enabled() and not have_nvbufsurface_h
error('Could not find required header: "nvbufsurface.h"')
subdir_done()
endif
endif endif
if have_nvbufsurface_h if have_nvbufsurface_h
extra_args += ['-DHAVE_CUDA_NVMM'] extra_args += ['-DHAVE_CUDA_NVMM']
# check if we have a tegra based system (jetson)
nvbuf_dep = cc.find_library('nvbufsurface', dirs: '/usr/lib/aarch64-linux-gnu/tegra/', required: false)
if nvbuf_dep.found()
extra_deps += [nvbuf_dep]
extra_args += ['-DHAVE_CUDA_NVMM_JETSON']
endif
endif endif
endif endif
@ -110,7 +135,7 @@ gstcuda= library('gstcuda-' + api_version,
version : libversion, version : libversion,
soversion : soversion, soversion : soversion,
install : true, install : true,
dependencies : [gstbase_dep, gmodule_dep, gstvideo_dep, gstglproto_dep, gstd3d11_dep, gstcuda_platform_dep] dependencies : [gstbase_dep, gmodule_dep, gstvideo_dep, gstglproto_dep, gstd3d11_dep, gstcuda_platform_dep, extra_deps]
) )
gen_sources = [] gen_sources = []

View file

@ -0,0 +1,65 @@
/* CUDA EGL stub header
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifndef __GST_CUDA_EGLSTUB_H__
#define __GST_CUDA_EGLSTUB_H__
#include "cuda.h"
#ifdef CUDA_FORCE_API_VERSION
#error "CUDA_FORCE_API_VERSION is no longer supported."
#endif
#ifdef __cplusplus
extern "C" {
#endif
#define CU_EGL_MAX_PLANES 3
typedef enum CUeglFrameType_enum {
CU_EGL_FRAME_TYPE_ARRAY = 0,
CU_EGL_FRAME_TYPE_PITCH = 1,
} CUeglFrameType;
typedef enum CUeglColorFormat_enum {
CU_EGL_COLOR_FORMAT_RGBA = 0x07,
CU_EGL_COLOR_FORMAT_MAX = 0x72,
} CUeglColorFormat;
typedef struct CUeglFrame {
union {
CUarray pArray[CU_EGL_MAX_PLANES];
void* pPitch[CU_EGL_MAX_PLANES];
} frame;
guint width;
guint height;
guint depth;
guint pitch;
guint planeCount;
guint numChannels;
CUeglFrameType frameType;
CUeglColorFormat eglColorFormat;
CUarray_format cuFormat;
} CUeglFrame;
#ifdef __cplusplus
};
#endif
#endif

View file

@ -8,6 +8,10 @@ nvcodec_sources = [
'gstcudaipcsink.cpp', 'gstcudaipcsink.cpp',
'gstcudaipcsrc.cpp', 'gstcudaipcsrc.cpp',
'gstcudamemorycopy.c', 'gstcudamemorycopy.c',
'plugin.c'
]
nvcodec_dgpu_sources = [
'gstcuvidloader.c', 'gstcuvidloader.c',
'gstnvav1dec.cpp', 'gstnvav1dec.cpp',
'gstnvav1encoder.cpp', 'gstnvav1encoder.cpp',
@ -23,8 +27,7 @@ nvcodec_sources = [
'gstnvh265encoder.cpp', 'gstnvh265encoder.cpp',
'gstnvjpegenc.cpp', 'gstnvjpegenc.cpp',
'gstnvvp8dec.cpp', 'gstnvvp8dec.cpp',
'gstnvvp9dec.cpp', 'gstnvvp9dec.cpp'
'plugin.c',
] ]
nvcodec_win32_sources = [ nvcodec_win32_sources = [
@ -70,6 +73,12 @@ else
nvcodec_sources += nvcodec_win32_sources nvcodec_sources += nvcodec_win32_sources
endif endif
# if the system is not a tegra based system we add the other sources for encoding and decoding
if not nvbuf_dep.found()
extra_args += ['-DHAVE_NVCODEC_DGPU']
nvcodec_sources += nvcodec_dgpu_sources
endif
if cc.get_id() != 'msvc' if cc.get_id() != 'msvc'
if host_system == 'windows' if host_system == 'windows'
# MinGW 32bits compiler seems to be complaining about redundant-decls # MinGW 32bits compiler seems to be complaining about redundant-decls

View file

@ -30,6 +30,7 @@
#endif #endif
#include <gst/cuda/gstcuda.h> #include <gst/cuda/gstcuda.h>
#ifdef HAVE_NVCODEC_DGPU
#include "gstnvdec.h" #include "gstnvdec.h"
#include "gstnvenc.h" #include "gstnvenc.h"
#include "gstnvav1dec.h" #include "gstnvav1dec.h"
@ -38,6 +39,7 @@
#include "gstnvvp8dec.h" #include "gstnvvp8dec.h"
#include "gstnvvp9dec.h" #include "gstnvvp9dec.h"
#include "gstnvdecoder.h" #include "gstnvdecoder.h"
#endif
#include "gstcudamemorycopy.h" #include "gstcudamemorycopy.h"
#include "gstcudaconvertscale.h" #include "gstcudaconvertscale.h"
#include <gst/cuda/gstcudanvmm-private.h> #include <gst/cuda/gstcudanvmm-private.h>
@ -45,13 +47,16 @@
#ifdef G_OS_WIN32 #ifdef G_OS_WIN32
#include <gst/d3d11/gstd3d11.h> #include <gst/d3d11/gstd3d11.h>
#endif #endif
#ifdef HAVE_NVCODEC_DGPU
#include "gstnvh264encoder.h" #include "gstnvh264encoder.h"
#include "gstnvh265encoder.h" #include "gstnvh265encoder.h"
#include "gstnvav1encoder.h" #include "gstnvav1encoder.h"
#include "gstnvjpegenc.h"
#endif
#include "gstcudaipcsink.h" #include "gstcudaipcsink.h"
#include "gstcudaipcsrc.h" #include "gstcudaipcsrc.h"
#include "gstnvcodecutils.h" #include "gstnvcodecutils.h"
#include "gstnvjpegenc.h"
#include <glib/gi18n-lib.h> #include <glib/gi18n-lib.h>
@ -114,6 +119,7 @@ plugin_init (GstPlugin * plugin)
const char *err_name = NULL, *err_desc = NULL; const char *err_name = NULL, *err_desc = NULL;
gint dev_count = 0; gint dev_count = 0;
guint i; guint i;
#ifdef HAVE_NVCODEC_DGPU
gboolean nvdec_available = TRUE; gboolean nvdec_available = TRUE;
gboolean nvenc_available = TRUE; gboolean nvenc_available = TRUE;
/* hardcoded minimum supported version */ /* hardcoded minimum supported version */
@ -122,6 +128,7 @@ plugin_init (GstPlugin * plugin)
GList *h264_enc_cdata = NULL; GList *h264_enc_cdata = NULL;
GList *h265_enc_cdata = NULL; GList *h265_enc_cdata = NULL;
GList *av1_enc_cdata = NULL; GList *av1_enc_cdata = NULL;
#endif
gboolean have_nvrtc = FALSE; gboolean have_nvrtc = FALSE;
GST_DEBUG_CATEGORY_INIT (gst_nvcodec_debug, "nvcodec", 0, "nvcodec"); GST_DEBUG_CATEGORY_INIT (gst_nvcodec_debug, "nvcodec", 0, "nvcodec");
@ -135,6 +142,7 @@ plugin_init (GstPlugin * plugin)
return TRUE; return TRUE;
} }
#ifdef HAVE_NVCODEC_DGPU
/* get available API version from nvenc and it will be passed to /* get available API version from nvenc and it will be passed to
* nvdec */ * nvdec */
if (!gst_nvenc_load_library (&api_major_ver, &api_minor_ver)) { if (!gst_nvenc_load_library (&api_major_ver, &api_minor_ver)) {
@ -153,6 +161,7 @@ plugin_init (GstPlugin * plugin)
if (!nvdec_available && !nvenc_available) if (!nvdec_available && !nvenc_available)
return TRUE; return TRUE;
#endif
cuda_ret = CuInit (0); cuda_ret = CuInit (0);
if (cuda_ret != CUDA_SUCCESS) { if (cuda_ret != CUDA_SUCCESS) {
@ -192,8 +201,12 @@ plugin_init (GstPlugin * plugin)
for (i = 0; i < dev_count; i++) { for (i = 0; i < dev_count; i++) {
GstCudaContext *context = gst_cuda_context_new (i); GstCudaContext *context = gst_cuda_context_new (i);
#ifdef HAVE_NVCODEC_DGPU
CUcontext cuda_ctx; CUcontext cuda_ctx;
#endif
#if defined(G_OS_WIN32) || defined(HAVE_NVCODEC_DGPU)
gint64 adapter_luid = 0; gint64 adapter_luid = 0;
#endif
if (!context) { if (!context) {
GST_WARNING ("Failed to create context for device %d", i); GST_WARNING ("Failed to create context for device %d", i);
@ -203,6 +216,7 @@ plugin_init (GstPlugin * plugin)
g_object_get (context, "dxgi-adapter-luid", &adapter_luid, NULL); g_object_get (context, "dxgi-adapter-luid", &adapter_luid, NULL);
#endif #endif
#ifdef HAVE_NVCODEC_DGPU
cuda_ctx = gst_cuda_context_get_handle (context); cuda_ctx = gst_cuda_context_get_handle (context);
if (nvdec_available) { if (nvdec_available) {
gint j; gint j;
@ -310,9 +324,11 @@ plugin_init (GstPlugin * plugin)
gst_nv_jpeg_enc_register (plugin, context, GST_RANK_NONE, have_nvrtc); gst_nv_jpeg_enc_register (plugin, context, GST_RANK_NONE, have_nvrtc);
#endif
gst_object_unref (context); gst_object_unref (context);
} }
#ifdef HAVE_NVCODEC_DGPU
if (h264_enc_cdata) { if (h264_enc_cdata) {
gst_nv_h264_encoder_register_auto_select (plugin, h264_enc_cdata, gst_nv_h264_encoder_register_auto_select (plugin, h264_enc_cdata,
GST_RANK_NONE); GST_RANK_NONE);
@ -327,6 +343,7 @@ plugin_init (GstPlugin * plugin)
gst_nv_av1_encoder_register_auto_select (plugin, av1_enc_cdata, gst_nv_av1_encoder_register_auto_select (plugin, av1_enc_cdata,
GST_RANK_NONE); GST_RANK_NONE);
} }
#endif
gst_cuda_memory_copy_register (plugin, GST_RANK_NONE); gst_cuda_memory_copy_register (plugin, GST_RANK_NONE);