cuda/nvcodec: Add support for importing and producing embedded NVMM memory

As produced on the Nvidia Jetson series of devices. Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/7274>
2024-11-25 19:21:06 +00:00 · 2024-07-29 22:49:03 +10:00 · 2024-07-29 22:49:03 +10:00 · 8dac91537d
commit 8dac91537d
parent adcc6c8d38
10 changed files with 355 additions and 29 deletions
--- a/girs/GstCuda-1.0.gir
+++ b/girs/GstCuda-1.0.gir
@ -668,6 +668,9 @@ Free with gst_cuda_graphics_resource_free</doc>
      </member>
      <member name="d3d11_resource" value="2" c:identifier="GST_CUDA_GRAPHICS_RESOURCE_D3D11_RESOURCE">
      </member>
+      <member name="egl_resource" value="3" c:identifier="GST_CUDA_GRAPHICS_RESOURCE_EGL_RESOURCE" version="1.26">
+        <doc xml:space="preserve" filename="../subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudautils.h">Resource represents a EGL resource.</doc>
+      </member>
    </enumeration>
    <record name="CudaMemory" c:type="GstCudaMemory" version="1.22">
      <source-position filename="../subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudamemory.h"/>
--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/cuda-gst.h
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/cuda-gst.h
@ -42,6 +42,9 @@ CUresult CUDAAPI CuCtxPopCurrent    (CUcontext * pctx);
 GST_CUDA_API
 CUresult CUDAAPI CuCtxPushCurrent   (CUcontext ctx);

+GST_CUDA_API
+CUresult CUDAAPI CuCtxSynchronize   (void);
+
 GST_CUDA_API
 CUresult CUDAAPI CuCtxEnablePeerAccess (CUcontext peerContext,
                                             unsigned int Flags);
@ -326,7 +329,6 @@ CUresult CUDAAPI CuGLGetDevices (unsigned int * pCudaDeviceCount,
                                 unsigned int cudaDeviceCount,
                                 CUGLDeviceList deviceList);

-
 #ifdef G_OS_WIN32
 /* cudaD3D11.h */
 GST_CUDA_API
--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcuda-private.h
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcuda-private.h
@ -71,5 +71,24 @@ G_END_DECLS

 #define GST_CUDA_CALL_ONCE_END )

+/* cudaEGL.h */
+#if defined(HAVE_CUDA_NVMM_JETSON) && defined(HAVE_CUDA_GST_GL)
+#include <gst/gl/gstglconfig.h>
+#if GST_GL_HAVE_PLATFORM_EGL
+#include <cudaEGL.h>
+#include <gst/gl/egl/egl.h>
+GST_CUDA_API
+CUresult CUDAAPI CuGraphicsEGLRegisterImage (CUgraphicsResource *pCudaResource,
+                                             EGLImageKHR image,
+                                             unsigned int flags);
+
+GST_CUDA_API
+CUresult CUDAAPI CuGraphicsResourceGetMappedEglFrame(CUeglFrame* eglFrame,
+                                                     CUgraphicsResource resource,
+                                                     unsigned int index,
+                                                     unsigned int mipLevel);
+#endif /* HAVE_CUDA_GST_GL */
+#endif /* HAVE_CUDA_NVMM_JETSON */
+
 #endif /* __cplusplus */

--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudaloader.cpp
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudaloader.cpp
@ -26,6 +26,13 @@
 #include <gmodule.h>
 #include "gstcuda-private.h"

+#ifdef HAVE_CUDA_GST_GL
+#include <gst/gl/gstglconfig.h>
+#if GST_GL_HAVE_PLATFORM_EGL
+#include <gst/gl/egl/egl.h>
+#endif /* GST_GL_HAVE_PLATFORM_EGL */
+#endif /* HAVE_CUDA_GST_GL */
+
 GST_DEBUG_CATEGORY (gst_cudaloader_debug);
 #define GST_CAT_DEFAULT gst_cudaloader_debug

@ -65,6 +72,7 @@ typedef struct _GstNvCodecCudaVTable
  CUresult (CUDAAPI * CuCtxDestroy) (CUcontext ctx);
  CUresult (CUDAAPI * CuCtxPopCurrent) (CUcontext * pctx);
  CUresult (CUDAAPI * CuCtxPushCurrent) (CUcontext ctx);
+  CUresult (CUDAAPI * CuCtxSynchronize) (void);

  CUresult (CUDAAPI * CuCtxEnablePeerAccess) (CUcontext peerContext,
      unsigned int Flags);
@ -135,7 +143,6 @@ typedef struct _GstNvCodecCudaVTable
      unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ,
      unsigned int sharedMemBytes, CUstream hStream, void **kernelParams,
      void **extra);
-
  CUresult (CUDAAPI * CuGraphicsGLRegisterImage) (CUgraphicsResource *
      pCudaResource, unsigned int image, unsigned int target,
      unsigned int Flags);
@ -145,6 +152,14 @@ typedef struct _GstNvCodecCudaVTable
      CUdevice * pCudaDevices, unsigned int cudaDeviceCount,
      CUGLDeviceList deviceList);

+#if defined(HAVE_CUDA_NVMM_JETSON) && GST_GL_HAVE_PLATFORM_EGL
+  CUresult (CUDAAPI* CuGraphicsEGLRegisterImage) (CUgraphicsResource *
+      pCudaResource, EGLImageKHR image, unsigned int flags);
+  CUresult (CUDAAPI* CuGraphicsResourceGetMappedEglFrame)
+      (CUeglFrame* eglFrame, CUgraphicsResource resource, unsigned int index,
+      unsigned int mipLevel);
+#endif
+
  CUresult (CUDAAPI * CuEventCreate) (CUevent *phEvent, unsigned int Flags);
  CUresult (CUDAAPI * CuEventDestroy) (CUevent hEvent);
  CUresult (CUDAAPI * CuEventRecord) (CUevent hEvent, CUstream hStream);
@ -255,6 +270,7 @@ gst_cuda_load_library_once_func (void)
  LOAD_SYMBOL (cuCtxCreate, CuCtxCreate);
  LOAD_SYMBOL (cuCtxDestroy, CuCtxDestroy);
  LOAD_SYMBOL (cuCtxPopCurrent, CuCtxPopCurrent);
+  LOAD_SYMBOL (cuCtxSynchronize, CuCtxSynchronize);
  LOAD_SYMBOL (cuCtxPushCurrent, CuCtxPushCurrent);
  LOAD_SYMBOL (cuCtxEnablePeerAccess, CuCtxEnablePeerAccess);
  LOAD_SYMBOL (cuCtxDisablePeerAccess, CuCtxDisablePeerAccess);
@ -319,6 +335,13 @@ gst_cuda_load_library_once_func (void)
  LOAD_SYMBOL (cuGraphicsGLRegisterBuffer, CuGraphicsGLRegisterBuffer);
  LOAD_SYMBOL (cuGLGetDevices, CuGLGetDevices);

+  /* cudaEGL.h */
+#if defined(HAVE_CUDA_NVMM_JETSON) && GST_GL_HAVE_PLATFORM_EGL
+  LOAD_SYMBOL (cuGraphicsEGLRegisterImage, CuGraphicsEGLRegisterImage);
+  LOAD_SYMBOL (cuGraphicsResourceGetMappedEglFrame,
+      CuGraphicsResourceGetMappedEglFrame);
+#endif
+
 #ifdef G_OS_WIN32
  /* cudaD3D11.h */
  LOAD_SYMBOL (cuGraphicsD3D11RegisterResource,
@ -415,6 +438,14 @@ CuCtxPushCurrent (CUcontext ctx)
  return gst_cuda_vtable.CuCtxPushCurrent (ctx);
 }

+CUresult CUDAAPI
+CuCtxSynchronize (void)
+{
+  g_assert (gst_cuda_vtable.CuCtxSynchronize != nullptr);
+
+  return gst_cuda_vtable.CuCtxSynchronize ();
+}
+
 CUresult CUDAAPI
 CuCtxEnablePeerAccess (CUcontext peerContext, unsigned int Flags)
 {
@ -966,6 +997,29 @@ CuGLGetDevices (unsigned int *pCudaDeviceCount, CUdevice * pCudaDevices,
      cudaDeviceCount, deviceList);
 }

+/* cudaEGL.h */
+#if defined(HAVE_CUDA_NVMM_JETSON) && GST_GL_HAVE_PLATFORM_EGL
+CUresult CUDAAPI
+CuGraphicsEGLRegisterImage (CUgraphicsResource * pCudaResource,
+    EGLImageKHR image, unsigned int Flags)
+{
+  g_assert (gst_cuda_vtable.CuGraphicsEGLRegisterImage != nullptr);
+
+  return gst_cuda_vtable.CuGraphicsEGLRegisterImage (pCudaResource, image,
+      Flags);
+}
+
+CUresult CUDAAPI
+CuGraphicsResourceGetMappedEglFrame (CUeglFrame * eglFrame,
+    CUgraphicsResource resource, unsigned int index, unsigned int mipLevel)
+{
+  g_assert (gst_cuda_vtable.CuGraphicsResourceGetMappedEglFrame != nullptr);
+
+  return gst_cuda_vtable.CuGraphicsResourceGetMappedEglFrame (eglFrame,
+      resource, index, mipLevel);
+}
+#endif
+
 /* cudaD3D11.h */
 #ifdef G_OS_WIN32
 CUresult CUDAAPI
--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudautils.cpp
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudautils.cpp
@ -32,6 +32,9 @@
 #ifdef HAVE_CUDA_GST_GL
 #include <gst/gl/gl.h>
 #include <gst/gl/gstglfuncs.h>
+#if GST_GL_HAVE_PLATFORM_EGL
+#include <gst/gl/egl/egl.h>
+#endif
 #endif

 #ifdef G_OS_WIN32
@ -678,6 +681,8 @@ gst_cuda_graphics_resource_free (GstCudaGraphicsResource * resource)
      gst_gl_context_thread_add ((GstGLContext *) resource->graphics_context,
          (GstGLContextThreadFunc) unregister_resource_from_gl_thread,
          resource);
+    } else if (resource->type == GST_CUDA_GRAPHICS_RESOURCE_EGL_RESOURCE) {
+      gst_cuda_graphics_resource_unregister (resource);
    } else
 #endif
 #ifdef G_OS_WIN32
@ -780,7 +785,8 @@ static gboolean
 map_buffer_and_fill_copy2d (GstBuffer * buf, const GstVideoInfo * info,
    GstCudaBufferCopyType copy_type, GstVideoFrame * frame,
    GstMapInfo * map_info, gboolean is_src,
-    CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES])
+    CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES],
+    GstCudaGraphicsResource * nvSurfaceArrayResource)
 {
  gboolean buffer_mapped = FALSE;
  guint i;
@ -817,8 +823,73 @@ map_buffer_and_fill_copy2d (GstBuffer * buf, const GstVideoInfo * info,
    }

    switch (surface->memType) {
-        /* TODO: NVBUF_MEM_DEFAULT on jetson is SURFACE_ARRAY */
+        /* NVBUF_MEM_DEFAULT on jetson is SURFACE_ARRAY otherwise it is NVBUF_MEM_CUDA_DEVICE */
      case NVBUF_MEM_DEFAULT:
+#if defined(HAVE_CUDA_NVMM_JETSON)
+      case NVBUF_MEM_SURFACE_ARRAY:{
+        if (NvBufSurfaceMapEglImage (surface, 0) != 0) {
+          GST_ERROR ("could not map EGL Image from NvBufSurface");
+          goto error;
+        }
+
+        CUresult result;
+        if ((result =
+                CuGraphicsEGLRegisterImage (&nvSurfaceArrayResource->resource,
+                    surface_params->mappedAddr.eglImage,
+                    CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE)) != CUDA_SUCCESS) {
+          GST_ERROR ("cuGraphicsEGLRegisterImage failed: %u "
+              "with mapped EGLImageAddress: %p", result,
+              surface_params->mappedAddr.eglImage);
+          NvBufSurfaceUnMapEglImage (surface, 0);
+          goto error;
+        }
+        nvSurfaceArrayResource->registered = true;
+
+        CUeglFrame eglFrame;
+        if ((result = CuGraphicsResourceGetMappedEglFrame (&eglFrame,
+                    nvSurfaceArrayResource->resource, 0, 0)) != CUDA_SUCCESS) {
+          GST_ERROR ("cuGraphicsResourceGetMappedEglFrame failed: %u", result);
+          NvBufSurfaceUnMapEglImage (surface, 0);
+          goto error;
+        }
+
+        if ((result = CuCtxSynchronize ()) != CUDA_SUCCESS) {
+          GST_ERROR ("cuCtxSynchronize failed: %u", result);
+          NvBufSurfaceUnMapEglImage (surface, 0);
+          goto error;
+        }
+
+        for (i = 0; i < plane_params->num_planes; i++) {
+          if (is_src) {
+            if (eglFrame.frameType == CU_EGL_FRAME_TYPE_PITCH) {
+              copy_params[i].srcMemoryType = CU_MEMORYTYPE_DEVICE;
+              copy_params[i].srcDevice =
+                  ((CUdeviceptr) eglFrame.frame.pPitch[i]);
+            } else if (eglFrame.frameType == CU_EGL_FRAME_TYPE_ARRAY) {
+              copy_params[i].srcMemoryType = CU_MEMORYTYPE_ARRAY;
+              copy_params[i].srcArray = eglFrame.frame.pArray[i];
+            }
+            copy_params[i].srcPitch = plane_params->pitch[i];
+          } else {
+            if (eglFrame.frameType == CU_EGL_FRAME_TYPE_PITCH) {
+              copy_params[i].dstMemoryType = CU_MEMORYTYPE_DEVICE;
+              copy_params[i].dstDevice =
+                  ((CUdeviceptr) eglFrame.frame.pPitch[i]);
+            } else if (eglFrame.frameType == CU_EGL_FRAME_TYPE_ARRAY) {
+              copy_params[i].dstMemoryType = CU_MEMORYTYPE_ARRAY;
+              copy_params[i].dstArray = eglFrame.frame.pArray[i];
+            }
+            copy_params[i].dstPitch = plane_params->pitch[i];
+          }
+        }
+
+        if (NvBufSurfaceUnMapEglImage (surface, 0) != 0) {
+          GST_ERROR ("could not unmap EGL Image from NvBufSurface");
+          goto error;
+        }
+        break;
+      }
+#endif /* HAVE_CUDA_NVMM_JETSON */
      case NVBUF_MEM_CUDA_DEVICE:
      {
        for (i = 0; i < plane_params->num_planes; i++) {
@ -891,7 +962,7 @@ map_buffer_and_fill_copy2d (GstBuffer * buf, const GstVideoInfo * info,
      }
    }
  } else
-#endif
+#endif /* HAVE_CUDA_NVMM */
  {
    GstMapFlags map_flags;

@ -981,6 +1052,7 @@ gst_cuda_buffer_copy_internal (GstBuffer * dst_buf,
  GstMapInfo dst_map, src_map;
  guint i;
  CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES];
+  GstCudaGraphicsResource *nvSurfaceArrayResource = nullptr;

  memset (copy_params, 0, sizeof (copy_params));
  memset (&dst_frame, 0, sizeof (GstVideoFrame));
@ -988,24 +1060,44 @@ gst_cuda_buffer_copy_internal (GstBuffer * dst_buf,
  memset (&dst_map, 0, sizeof (GstMapInfo));
  memset (&src_map, 0, sizeof (GstMapInfo));

+  /* push context here, because if we have nvmm on jetson, we need the context */
+  if (!gst_cuda_context_push (context)) {
+    GST_ERROR_OBJECT (context, "Failed to push our context");
+    goto unmap_and_out;
+  }
+
+#ifdef HAVE_CUDA_NVMM_JETSON
+  /* create new resource for mapping the EGL image from the surface */
+  nvSurfaceArrayResource =
+      gst_cuda_graphics_resource_new (context, NULL,
+      GstCudaGraphicsResourceType::GST_CUDA_GRAPHICS_RESOURCE_EGL_RESOURCE);
+#endif
+
  if (!map_buffer_and_fill_copy2d (dst_buf, dst_info,
-          dst_type, &dst_frame, &dst_map, FALSE, copy_params)) {
+          dst_type, &dst_frame, &dst_map, FALSE, copy_params,
+          nvSurfaceArrayResource)) {
    GST_ERROR_OBJECT (context, "Failed to map output buffer");
+#ifdef HAVE_CUDA_NVMM_JETSON
+    gst_cuda_graphics_resource_unregister (nvSurfaceArrayResource);
+    gst_cuda_graphics_resource_free (nvSurfaceArrayResource);
+    nvSurfaceArrayResource = nullptr;
+#endif
    return FALSE;
  }

  if (!map_buffer_and_fill_copy2d (src_buf, src_info,
-          src_type, &src_frame, &src_map, TRUE, copy_params)) {
+          src_type, &src_frame, &src_map, TRUE, copy_params,
+          nvSurfaceArrayResource)) {
    GST_ERROR_OBJECT (context, "Failed to map input buffer");
+#ifdef HAVE_CUDA_NVMM_JETSON
+    gst_cuda_graphics_resource_unregister (nvSurfaceArrayResource);
+    gst_cuda_graphics_resource_free (nvSurfaceArrayResource);
+    nvSurfaceArrayResource = nullptr;
+#endif
    unmap_buffer_or_frame (dst_buf, &dst_frame, &dst_map);
    return FALSE;
  }

-  if (!gst_cuda_context_push (context)) {
-    GST_ERROR_OBJECT (context, "Failed to push our context");
-    goto unmap_and_out;
-  }
-
  for (i = 0; i < GST_VIDEO_INFO_N_PLANES (dst_info); i++) {
    ret = gst_cuda_result (CuMemcpy2DAsync (&copy_params[i], stream));
    if (!ret) {
@ -1015,6 +1107,13 @@ gst_cuda_buffer_copy_internal (GstBuffer * dst_buf,
  }

  gst_cuda_result (CuStreamSynchronize (stream));
+
+#ifdef HAVE_CUDA_NVMM_JETSON
+  gst_cuda_graphics_resource_unregister (nvSurfaceArrayResource);
+  gst_cuda_graphics_resource_free (nvSurfaceArrayResource);
+  nvSurfaceArrayResource = NULL;
+#endif
+
  gst_cuda_context_pop (nullptr);

 unmap_and_out:
@ -1111,6 +1210,7 @@ static void
 gl_copy_thread_func (GstGLContext * gl_context, GLCopyData * data)
 {
  GstCudaGraphicsResource *resources[GST_VIDEO_MAX_PLANES];
+  GstCudaGraphicsResource *nvSurfaceArrayResource = nullptr;
  guint num_resources;
  GstBuffer *gl_buf, *cuda_buf;
  GstVideoFrame cuda_frame;
@ -1130,14 +1230,32 @@ gl_copy_thread_func (GstGLContext * gl_context, GLCopyData * data)
  if (!ensure_gl_interop ())
    return;

+  if (!gst_cuda_context_push (context)) {
+    GST_ERROR_OBJECT (context, "Failed to push context");
+    unmap_buffer_or_frame (data->pbo_to_cuda ? data->dst_buf : data->src_buf,
+        &cuda_frame, &cuda_map_info);
+    return;
+  }
+
+#ifdef HAVE_CUDA_NVMM_JETSON
+  nvSurfaceArrayResource =
+      gst_cuda_graphics_resource_new (context, NULL,
+      GstCudaGraphicsResourceType::GST_CUDA_GRAPHICS_RESOURCE_EGL_RESOURCE);
+#endif
+
  if (data->pbo_to_cuda) {
    gl_buf = data->src_buf;
    cuda_buf = data->dst_buf;

    if (!map_buffer_and_fill_copy2d (cuda_buf,
            data->dst_info, data->copy_type, &cuda_frame, &cuda_map_info,
-            FALSE, copy_params)) {
+            FALSE, copy_params, nvSurfaceArrayResource)) {
      GST_ERROR_OBJECT (context, "Failed to map output CUDA buffer");
+#ifdef HAVE_CUDA_NVMM_JETSON
+      gst_cuda_graphics_resource_unregister (nvSurfaceArrayResource);
+      gst_cuda_graphics_resource_free (nvSurfaceArrayResource);
+      nvSurfaceArrayResource = nullptr;
+#endif
      return;
    }
  } else {
@ -1146,8 +1264,13 @@ gl_copy_thread_func (GstGLContext * gl_context, GLCopyData * data)

    if (!map_buffer_and_fill_copy2d (cuda_buf,
            data->src_info, data->copy_type, &cuda_frame, &cuda_map_info,
-            TRUE, copy_params)) {
+            TRUE, copy_params, nvSurfaceArrayResource)) {
      GST_ERROR_OBJECT (context, "Failed to map input CUDA buffer");
+#ifdef HAVE_CUDA_NVMM_JETSON
+      gst_cuda_graphics_resource_unregister (nvSurfaceArrayResource);
+      gst_cuda_graphics_resource_free (nvSurfaceArrayResource);
+      nvSurfaceArrayResource = nullptr;
+#endif
      return;
    }
  }
@ -1155,12 +1278,6 @@ gl_copy_thread_func (GstGLContext * gl_context, GLCopyData * data)
  num_resources = gst_buffer_n_memory (gl_buf);
  g_assert (num_resources >= GST_VIDEO_INFO_N_PLANES (data->src_info));

-  if (!gst_cuda_context_push (context)) {
-    GST_ERROR_OBJECT (context, "Failed to push context");
-    unmap_buffer_or_frame (cuda_buf, &cuda_frame, &cuda_map_info);
-    return;
-  }
-
  for (i = 0; i < GST_VIDEO_INFO_N_PLANES (data->src_info); i++) {
    GstMemory *mem = gst_buffer_peek_memory (gl_buf, i);
    GstGLMemoryPBO *pbo;
@ -1250,6 +1367,12 @@ gl_copy_thread_func (GstGLContext * gl_context, GLCopyData * data)

 out:
  gst_cuda_result (CuStreamSynchronize (stream));
+
+#ifdef HAVE_CUDA_NVMM_JETSON
+  gst_cuda_graphics_resource_unregister (nvSurfaceArrayResource);
+  gst_cuda_graphics_resource_free (nvSurfaceArrayResource);
+#endif
+
  gst_cuda_context_pop (nullptr);
  unmap_buffer_or_frame (cuda_buf, &cuda_frame, &cuda_map_info);
 }
@ -1382,7 +1505,7 @@ cuda_copy_d3d11_interop (GstBuffer * dst_buf, const GstVideoInfo * dst_info,
    }
    if (!map_buffer_and_fill_copy2d (cuda_buf,
            dst_info, GST_CUDA_BUFFER_COPY_CUDA, &cuda_frame, &cuda_map_info,
-            FALSE, copy_params)) {
+            FALSE, copy_params, NULL)) {
      GST_ERROR_OBJECT (context, "Failed to map output CUDA buffer");
      gst_video_frame_unmap (&d3d11_frame);
      return FALSE;
@ -1397,7 +1520,7 @@ cuda_copy_d3d11_interop (GstBuffer * dst_buf, const GstVideoInfo * dst_info,
    }
    if (!map_buffer_and_fill_copy2d (cuda_buf,
            src_info, GST_CUDA_BUFFER_COPY_CUDA, &cuda_frame, &cuda_map_info,
-            TRUE, copy_params)) {
+            TRUE, copy_params, NULL)) {
      GST_ERROR_OBJECT (context, "Failed to map input CUDA buffer");
      gst_video_frame_unmap (&d3d11_frame);
      return FALSE;
--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudautils.h
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudautils.h
@ -69,14 +69,23 @@ typedef enum
 * @GST_CUDA_GRAPHICS_RESSOURCE_NONE: Ressource represents a CUDA buffer.
 * @GST_CUDA_GRAPHICS_RESSOURCE_GL_BUFFER: Ressource represents a GL buffer.
 * @GST_CUDA_GRAPHICS_RESSOURCE_D3D11_RESOURCE: Ressource represents a D3D resource.
+ * @GST_CUDA_GRAPHICS_RESSOURCE_EGL_RESOURCE: Ressource represents a EGL resource.
 *
 * Since: 1.22
 */
+/**
+ * GST_CUDA_GRAPHICS_RESOURCE_EGL_RESOURCE:
+ *
+ * Resource represents a EGL resource.
+ *
+ * Since: 1.26
+ */
 typedef enum
 {
  GST_CUDA_GRAPHICS_RESOURCE_NONE = 0,
  GST_CUDA_GRAPHICS_RESOURCE_GL_BUFFER = 1,
  GST_CUDA_GRAPHICS_RESOURCE_D3D11_RESOURCE = 2,
+  GST_CUDA_GRAPHICS_RESOURCE_EGL_RESOURCE = 3,
 } GstCudaGraphicsResourceType;

 /**
--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/meson.build
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/meson.build
@ -21,6 +21,7 @@ cuda_headers = files([
  'gstcudautils.h',
 ])

+extra_deps = []
 gstcuda_dep = dependency('', required : false)
 cuda_stubinc = include_directories('./stub')
 gstcuda_stub_dep = declare_dependency(
@ -29,6 +30,7 @@ gstcuda_stub_dep = declare_dependency(

 have_nvbufsurface_h = false
 gstcuda_nvmm_inc = []
+nvbuf_dep = dependency('', required: false)

 gstcuda_platform_dep = []
 if host_system not in ['windows', 'linux']
@ -92,12 +94,35 @@ else
      gstcuda_nvmm_inc = [include_directories(nvmm_inc_opt)]
    endif

-    have_nvbufsurface_h = cc.has_header('nvbufsurface.h',
+    if cc.has_header('nvbufsurface.h',
                     include_directories: gstcuda_nvmm_inc,
-                                        required: nvmm_opt)
+                     required: false)
+      have_nvbufsurface_h = true
+    # try some other default locations
+    elif cc.has_header('/usr/src/jetson_multimedia_api/include/nvbufsurface.h',
+	               required: false)
+      have_nvbufsurface_h = true
+      gstcuda_nvmm_inc = [include_directories('/usr/src/jetson_multimedia_api/include')]
+    elif cc.has_header('/opt/nvidia/deepstream/deepstream/sources/includes/nvbufsurface.h',
+                       required: false)
+      have_nvbufsurface_h = true
+      gstcuda_nvmm_inc = [include_directories('/opt/nvidia/deepstream/deepstream/sources/includes')]
    endif
+    if nvmm_opt.enabled() and not have_nvbufsurface_h
+      error('Could not find required header: "nvbufsurface.h"')
+      subdir_done()
+    endif
+  endif
+
  if have_nvbufsurface_h
    extra_args += ['-DHAVE_CUDA_NVMM']
+
+    # check if we have a tegra based system (jetson)
+    nvbuf_dep = cc.find_library('nvbufsurface', dirs: '/usr/lib/aarch64-linux-gnu/tegra/', required: false)
+    if nvbuf_dep.found()
+      extra_deps += [nvbuf_dep]
+      extra_args += ['-DHAVE_CUDA_NVMM_JETSON']
+    endif
  endif
 endif

@ -110,7 +135,7 @@ gstcuda= library('gstcuda-' + api_version,
  version : libversion,
  soversion : soversion,
  install : true,
-  dependencies : [gstbase_dep, gmodule_dep, gstvideo_dep, gstglproto_dep, gstd3d11_dep, gstcuda_platform_dep]
+  dependencies : [gstbase_dep, gmodule_dep, gstvideo_dep, gstglproto_dep, gstd3d11_dep, gstcuda_platform_dep, extra_deps]
 )

 gen_sources = []
--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/stub/cudaEGL.h
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/stub/cudaEGL.h
@ -0,0 +1,65 @@
+/* CUDA EGL stub header
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __GST_CUDA_EGLSTUB_H__
+#define __GST_CUDA_EGLSTUB_H__
+
+#include "cuda.h"
+
+#ifdef CUDA_FORCE_API_VERSION
+#error "CUDA_FORCE_API_VERSION is no longer supported."
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define CU_EGL_MAX_PLANES 3
+
+typedef enum CUeglFrameType_enum {
+  CU_EGL_FRAME_TYPE_ARRAY = 0,
+  CU_EGL_FRAME_TYPE_PITCH = 1,
+} CUeglFrameType;
+
+typedef enum CUeglColorFormat_enum {
+  CU_EGL_COLOR_FORMAT_RGBA = 0x07,
+  CU_EGL_COLOR_FORMAT_MAX = 0x72,
+} CUeglColorFormat;
+
+typedef struct CUeglFrame {
+    union {
+        CUarray pArray[CU_EGL_MAX_PLANES];
+        void*   pPitch[CU_EGL_MAX_PLANES];
+    } frame;
+    guint width;
+    guint height;
+    guint depth;
+    guint pitch;
+    guint planeCount;
+    guint numChannels;
+    CUeglFrameType frameType;
+    CUeglColorFormat eglColorFormat;
+    CUarray_format cuFormat;
+} CUeglFrame;
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif
+
--- a/subprojects/gst-plugins-bad/sys/nvcodec/meson.build
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/meson.build
@ -8,6 +8,10 @@ nvcodec_sources = [
  'gstcudaipcsink.cpp',
  'gstcudaipcsrc.cpp',
  'gstcudamemorycopy.c',
+  'plugin.c'
+]
+
+nvcodec_dgpu_sources = [
  'gstcuvidloader.c',
  'gstnvav1dec.cpp',
  'gstnvav1encoder.cpp',
@ -23,8 +27,7 @@ nvcodec_sources = [
  'gstnvh265encoder.cpp',
  'gstnvjpegenc.cpp',
  'gstnvvp8dec.cpp',
-  'gstnvvp9dec.cpp',
-  'plugin.c',
+  'gstnvvp9dec.cpp'
 ]

 nvcodec_win32_sources = [
@ -70,6 +73,12 @@ else
  nvcodec_sources += nvcodec_win32_sources
 endif

+# if the system is not a tegra based system we add the other sources for encoding and decoding
+if not nvbuf_dep.found()
+  extra_args += ['-DHAVE_NVCODEC_DGPU']
+  nvcodec_sources += nvcodec_dgpu_sources
+endif
+
 if cc.get_id() != 'msvc'
  if host_system == 'windows'
    # MinGW 32bits compiler seems to be complaining about redundant-decls
--- a/subprojects/gst-plugins-bad/sys/nvcodec/plugin.c
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/plugin.c
@ -30,6 +30,7 @@
 #endif

 #include <gst/cuda/gstcuda.h>
+#ifdef HAVE_NVCODEC_DGPU
 #include "gstnvdec.h"
 #include "gstnvenc.h"
 #include "gstnvav1dec.h"
@ -38,6 +39,7 @@
 #include "gstnvvp8dec.h"
 #include "gstnvvp9dec.h"
 #include "gstnvdecoder.h"
+#endif
 #include "gstcudamemorycopy.h"
 #include "gstcudaconvertscale.h"
 #include <gst/cuda/gstcudanvmm-private.h>
@ -45,13 +47,16 @@
 #ifdef G_OS_WIN32
 #include <gst/d3d11/gstd3d11.h>
 #endif
+#ifdef HAVE_NVCODEC_DGPU
 #include "gstnvh264encoder.h"
 #include "gstnvh265encoder.h"
 #include "gstnvav1encoder.h"
+#include "gstnvjpegenc.h"
+#endif
+
 #include "gstcudaipcsink.h"
 #include "gstcudaipcsrc.h"
 #include "gstnvcodecutils.h"
-#include "gstnvjpegenc.h"

 #include <glib/gi18n-lib.h>

@ -114,6 +119,7 @@ plugin_init (GstPlugin * plugin)
  const char *err_name = NULL, *err_desc = NULL;
  gint dev_count = 0;
  guint i;
+#ifdef HAVE_NVCODEC_DGPU
  gboolean nvdec_available = TRUE;
  gboolean nvenc_available = TRUE;
  /* hardcoded minimum supported version */
@ -122,6 +128,7 @@ plugin_init (GstPlugin * plugin)
  GList *h264_enc_cdata = NULL;
  GList *h265_enc_cdata = NULL;
  GList *av1_enc_cdata = NULL;
+#endif
  gboolean have_nvrtc = FALSE;

  GST_DEBUG_CATEGORY_INIT (gst_nvcodec_debug, "nvcodec", 0, "nvcodec");
@ -135,6 +142,7 @@ plugin_init (GstPlugin * plugin)
    return TRUE;
  }

+#ifdef HAVE_NVCODEC_DGPU
  /* get available API version from nvenc and it will be passed to
   * nvdec */
  if (!gst_nvenc_load_library (&api_major_ver, &api_minor_ver)) {
@ -153,6 +161,7 @@ plugin_init (GstPlugin * plugin)

  if (!nvdec_available && !nvenc_available)
    return TRUE;
+#endif

  cuda_ret = CuInit (0);
  if (cuda_ret != CUDA_SUCCESS) {
@ -192,8 +201,12 @@ plugin_init (GstPlugin * plugin)

  for (i = 0; i < dev_count; i++) {
    GstCudaContext *context = gst_cuda_context_new (i);
+#ifdef HAVE_NVCODEC_DGPU
    CUcontext cuda_ctx;
+#endif
+#if defined(G_OS_WIN32) || defined(HAVE_NVCODEC_DGPU)
    gint64 adapter_luid = 0;
+#endif

    if (!context) {
      GST_WARNING ("Failed to create context for device %d", i);
@ -203,6 +216,7 @@ plugin_init (GstPlugin * plugin)
    g_object_get (context, "dxgi-adapter-luid", &adapter_luid, NULL);
 #endif

+#ifdef HAVE_NVCODEC_DGPU
    cuda_ctx = gst_cuda_context_get_handle (context);
    if (nvdec_available) {
      gint j;
@ -310,9 +324,11 @@ plugin_init (GstPlugin * plugin)

    gst_nv_jpeg_enc_register (plugin, context, GST_RANK_NONE, have_nvrtc);

+#endif
    gst_object_unref (context);
  }

+#ifdef HAVE_NVCODEC_DGPU
  if (h264_enc_cdata) {
    gst_nv_h264_encoder_register_auto_select (plugin, h264_enc_cdata,
        GST_RANK_NONE);
@ -327,6 +343,7 @@ plugin_init (GstPlugin * plugin)
    gst_nv_av1_encoder_register_auto_select (plugin, av1_enc_cdata,
        GST_RANK_NONE);
  }
+#endif

  gst_cuda_memory_copy_register (plugin, GST_RANK_NONE);