nvcodec: Refactor basetransform subclasses

* cudaupload/download - Specify only formats actually we can deal with nvcodec elements, not all video formats - Supports CUDA output for download and input for upload in order to make passthrough possible, like other upload/download elements. * cudabasetransform - Reset conversion element if upstream CUDA memory holds different CUDA context and the element can accept it. This is the same behavior as corresponding d3d11 filter elements. Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/1834>
2025-03-30 20:59:44 +00:00 · 2022-03-02 22:03:54 +09:00 · 2022-03-02 22:03:54 +09:00 · ad0e7fca14
commit ad0e7fca14
parent 89bbcf0061
11 changed files with 764 additions and 617 deletions
--- a/subprojects/gst-plugins-bad/sys/nvcodec/cuda-converter.h
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/cuda-converter.h
@ -28,10 +28,6 @@ G_BEGIN_DECLS
 typedef struct _GstCudaConverter GstCudaConverter;
 #define GST_CUDA_CONVERTER_FORMATS \
    "{ I420, YV12, NV12, NV21, P010_10LE, P016_LE, I420_10LE, Y444, Y444_16LE, " \
    "BGRA, RGBA, RGBx, BGRx, ARGB, ABGR, RGB, BGR, BGR10A2_LE, RGB10A2_LE }"
 GstCudaConverter *    gst_cuda_converter_new           (GstVideoInfo * in_info,
                                                        GstVideoInfo * out_info,
                                                        GstCudaContext * cuda_ctx);
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudabasefilter.c
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudabasefilter.c
@ -33,6 +33,7 @@
 #include "gstcudabasefilter.h"
 #include "gstcudautils.h"
 #include "gstcudaformat.h"
 #include <string.h>
 GST_DEBUG_CATEGORY_STATIC (gst_cuda_base_filter_debug);
@ -42,14 +43,14 @@ static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink",
    GST_PAD_SINK,
    GST_PAD_ALWAYS,
    GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE_WITH_FEATURES
-        (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY, GST_CUDA_CONVERTER_FORMATS))
+        (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY, GST_CUDA_FORMATS))
    );
 static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src",
    GST_PAD_SRC,
    GST_PAD_ALWAYS,
    GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE_WITH_FEATURES
-        (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY, GST_CUDA_CONVERTER_FORMATS))
+        (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY, GST_CUDA_FORMATS))
    );
 #define gst_cuda_base_filter_parent_class parent_class
@ -57,10 +58,13 @@ G_DEFINE_ABSTRACT_TYPE (GstCudaBaseFilter,
    gst_cuda_base_filter, GST_TYPE_CUDA_BASE_TRANSFORM);
 static void gst_cuda_base_filter_dispose (GObject * object);
-static GstFlowReturn
+static gboolean
-gst_cuda_base_filter_transform_frame (GstCudaBaseTransform * btrans,
+gst_cuda_base_filter_propose_allocation (GstBaseTransform * trans,
-    GstVideoFrame * in_frame, GstCudaMemory * in_cuda_mem,
+    GstQuery * decide_query, GstQuery * query);
-    GstVideoFrame * out_frame, GstCudaMemory * out_cuda_mem);
+static gboolean gst_cuda_base_filter_decide_allocation (GstBaseTransform *
    trans, GstQuery * query);
 static GstFlowReturn gst_cuda_base_filter_transform (GstBaseTransform * trans,
    GstBuffer * inbuf, GstBuffer * outbuf);
 static gboolean gst_cuda_base_filter_set_info (GstCudaBaseTransform * btrans,
    GstCaps * incaps, GstVideoInfo * in_info, GstCaps * outcaps,
    GstVideoInfo * out_info);
@ -81,12 +85,18 @@ gst_cuda_base_filter_class_init (GstCudaBaseFilterClass * klass)
  trans_class->passthrough_on_same_caps = TRUE;
  trans_class->propose_allocation =
      GST_DEBUG_FUNCPTR (gst_cuda_base_filter_propose_allocation);
  trans_class->decide_allocation =
      GST_DEBUG_FUNCPTR (gst_cuda_base_filter_decide_allocation);
  trans_class->transform = GST_DEBUG_FUNCPTR (gst_cuda_base_filter_transform);
  btrans_class->set_info = GST_DEBUG_FUNCPTR (gst_cuda_base_filter_set_info);
  btrans_class->transform_frame =
      GST_DEBUG_FUNCPTR (gst_cuda_base_filter_transform_frame);
  GST_DEBUG_CATEGORY_INIT (gst_cuda_base_filter_debug,
      "cudabasefilter", 0, "CUDA Base Filter");
  gst_type_mark_as_plugin_api (GST_TYPE_CUDA_BASE_FILTER, 0);
 }
 static void
@ -104,218 +114,228 @@ gst_cuda_base_filter_dispose (GObject * object)
    filter->converter = NULL;
  }
  if (filter->in_fallback) {
    gst_memory_unref (GST_MEMORY_CAST (filter->in_fallback));
    filter->in_fallback = NULL;
  }
  if (filter->out_fallback) {
    gst_memory_unref (GST_MEMORY_CAST (filter->out_fallback));
    filter->out_fallback = NULL;
  }
  gst_clear_object (&filter->allocator);
  G_OBJECT_CLASS (parent_class)->dispose (object);
 }
 static gboolean
 gst_cuda_base_filter_configure (GstCudaBaseFilter * filter,
    GstVideoInfo * in_info, GstVideoInfo * out_info)
 {
  GstCudaBaseTransform *btrans = GST_CUDA_BASE_TRANSFORM (filter);
  /* cleanup internal pool */
  if (filter->in_fallback) {
    gst_memory_unref (GST_MEMORY_CAST (filter->in_fallback));
    filter->in_fallback = NULL;
  }
  if (filter->out_fallback) {
    gst_memory_unref (GST_MEMORY_CAST (filter->out_fallback));
    filter->out_fallback = NULL;
  }
  if (!filter->allocator)
    filter->allocator = gst_cuda_allocator_new (btrans->context);
  if (!filter->allocator) {
    GST_ERROR_OBJECT (filter, "Failed to create CUDA allocator");
    return FALSE;
  }
  return TRUE;
 }
 static gboolean
 gst_cuda_base_filter_set_info (GstCudaBaseTransform * btrans, GstCaps * incaps,
    GstVideoInfo * in_info, GstCaps * outcaps, GstVideoInfo * out_info)
 {
  GstCudaBaseFilter *filter = GST_CUDA_BASE_FILTER (btrans);
  if (!gst_cuda_base_filter_configure (filter, in_info, out_info)) {
    return FALSE;
  }
  if (filter->converter)
    gst_cuda_converter_free (filter->converter);
  filter->converter =
      gst_cuda_converter_new (in_info, out_info, btrans->context);
-  if (filter->converter == NULL)
+  if (!filter->converter) {
-    goto no_converter;
+    GST_ERROR_OBJECT (filter, "could not create converter");
    return FALSE;
  }
  GST_DEBUG_OBJECT (filter, "reconfigured %d %d",
      GST_VIDEO_INFO_FORMAT (in_info), GST_VIDEO_INFO_FORMAT (out_info));
  return TRUE;
 }
-no_converter:
+static gboolean
-  {
+gst_cuda_base_filter_propose_allocation (GstBaseTransform * trans,
-    GST_ERROR_OBJECT (filter, "could not create converter");
+    GstQuery * decide_query, GstQuery * query)
 {
  GstCudaBaseTransform *ctrans = GST_CUDA_BASE_TRANSFORM (trans);
  GstVideoInfo info;
  GstBufferPool *pool;
  GstCaps *caps;
  guint size;
  if (!GST_BASE_TRANSFORM_CLASS (parent_class)->propose_allocation (trans,
          decide_query, query))
    return FALSE;
  /* passthrough, we're done */
  if (decide_query == NULL)
    return TRUE;
  gst_query_parse_allocation (query, &caps, NULL);
  if (caps == NULL)
    return FALSE;
  if (!gst_video_info_from_caps (&info, caps))
    return FALSE;
  if (gst_query_get_n_allocation_pools (query) == 0) {
    GstStructure *config;
    GstVideoAlignment align;
    GstAllocationParams params = { 0, 31, 0, 0, };
    GstAllocator *allocator = NULL;
    gint i;
    pool = gst_cuda_buffer_pool_new (ctrans->context);
    config = gst_buffer_pool_get_config (pool);
    gst_video_alignment_reset (&align);
    for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&info); i++) {
      align.stride_align[i] = 31;
    }
    gst_video_info_align (&info, &align);
    gst_buffer_pool_config_add_option (config,
        GST_BUFFER_POOL_OPTION_VIDEO_META);
    gst_buffer_pool_config_add_option (config,
        GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
    gst_buffer_pool_config_set_video_alignment (config, &align);
    size = GST_VIDEO_INFO_SIZE (&info);
    gst_buffer_pool_config_set_params (config, caps, size, 0, 0);
    gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
    gst_query_add_allocation_pool (query, pool, size, 0, 0);
    if (gst_buffer_pool_config_get_allocator (config, &allocator, &params)) {
      if (params.align < 31)
        params.align = 31;
      gst_query_add_allocation_param (query, allocator, &params);
      gst_buffer_pool_config_set_allocator (config, allocator, &params);
    }
    if (!gst_buffer_pool_set_config (pool, config)) {
      GST_ERROR_OBJECT (ctrans, "failed to set config");
      gst_object_unref (pool);
      return FALSE;
    }
    gst_object_unref (pool);
  }
  gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
  return TRUE;
 }
 static gboolean
 gst_cuda_base_filter_decide_allocation (GstBaseTransform * trans,
    GstQuery * query)
 {
  GstCudaBaseTransform *ctrans = GST_CUDA_BASE_TRANSFORM (trans);
  GstCaps *outcaps = NULL;
  GstBufferPool *pool = NULL;
  guint size, min, max;
  GstStructure *config;
  gboolean update_pool = FALSE;
  gst_query_parse_allocation (query, &outcaps, NULL);
  if (!outcaps)
    return FALSE;
  if (gst_query_get_n_allocation_pools (query) > 0) {
    gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max);
    if (pool) {
      if (!GST_IS_CUDA_BUFFER_POOL (pool)) {
        gst_clear_object (&pool);
      } else {
        GstCudaBufferPool *cpool = GST_CUDA_BUFFER_POOL (pool);
        if (cpool->context != ctrans->context) {
          gst_clear_object (&pool);
        }
      }
    }
    update_pool = TRUE;
  } else {
    GstVideoInfo vinfo;
    gst_video_info_from_caps (&vinfo, outcaps);
    size = GST_VIDEO_INFO_SIZE (&vinfo);
    min = max = 0;
  }
  if (!pool) {
    GST_DEBUG_OBJECT (ctrans, "create our pool");
    pool = gst_cuda_buffer_pool_new (ctrans->context);
  }
  config = gst_buffer_pool_get_config (pool);
  gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
  gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
  gst_buffer_pool_set_config (pool, config);
  if (update_pool)
    gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
  else
    gst_query_add_allocation_pool (query, pool, size, min, max);
  gst_object_unref (pool);
  return GST_BASE_TRANSFORM_CLASS (parent_class)->decide_allocation (trans,
      query);
 }
 static GstFlowReturn
-gst_cuda_base_filter_transform_frame (GstCudaBaseTransform * btrans,
+gst_cuda_base_filter_transform (GstBaseTransform * trans,
-    GstVideoFrame * in_frame, GstCudaMemory * in_cuda_mem,
+    GstBuffer * inbuf, GstBuffer * outbuf)
    GstVideoFrame * out_frame, GstCudaMemory * out_cuda_mem)
 {
-  GstCudaBaseFilter *filter = GST_CUDA_BASE_FILTER (btrans);
+  GstCudaBaseFilter *self = GST_CUDA_BASE_FILTER (trans);
-  gboolean conv_ret;
+  GstCudaBaseTransform *ctrans = GST_CUDA_BASE_TRANSFORM (trans);
-  GstCudaMemory *in_mem;
+  GstVideoFrame in_frame, out_frame;
-  GstCudaMemory *out_mem;
+  GstFlowReturn ret = GST_FLOW_OK;
-  gint i;
+  GstMemory *mem;
  GstCudaMemory *in_cuda_mem = NULL;
  GstCudaMemory *out_cuda_mem = NULL;
-  if (in_cuda_mem) {
+  if (gst_buffer_n_memory (inbuf) != 1) {
-    in_mem = in_cuda_mem;
+    GST_ERROR_OBJECT (self, "Invalid input buffer");
  } else {
    if (!filter->in_fallback) {
      GstCudaAllocationParams params;
      memset (&params, 0, sizeof (GstCudaAllocationParams));
      params.info = btrans->in_info;
      filter->in_fallback =
          (GstCudaMemory *) gst_cuda_allocator_alloc (filter->allocator,
          GST_VIDEO_INFO_SIZE (&params.info), &params);
    }
    if (!filter->in_fallback) {
      GST_ERROR_OBJECT (filter, "Couldn't allocate fallback memory");
      return GST_FLOW_ERROR;
    }
    GST_TRACE_OBJECT (filter, "use CUDA fallback memory input");
    if (!gst_cuda_context_push (btrans->context)) {
      GST_ELEMENT_ERROR (filter, LIBRARY, FAILED, (NULL),
          ("Cannot push CUDA context"));
      return FALSE;
    }
    /* upload frame to device memory */
    for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (in_frame); i++) {
      CUDA_MEMCPY2D param = { 0, };
      guint width, height;
      width = GST_VIDEO_FRAME_COMP_WIDTH (in_frame, i) *
          GST_VIDEO_FRAME_COMP_PSTRIDE (in_frame, i);
      height = GST_VIDEO_FRAME_COMP_HEIGHT (in_frame, i);
      param.srcMemoryType = CU_MEMORYTYPE_HOST;
      param.srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (in_frame, i);
      param.srcHost = GST_VIDEO_FRAME_PLANE_DATA (in_frame, i);
      param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
      param.dstPitch = filter->in_fallback->stride;
      param.dstDevice =
          filter->in_fallback->data + filter->in_fallback->offset[i];
      param.WidthInBytes = width;
      param.Height = height;
      if (!gst_cuda_result (CuMemcpy2DAsync (&param, btrans->cuda_stream))) {
        gst_cuda_context_pop (NULL);
        GST_ELEMENT_ERROR (filter, LIBRARY, FAILED, (NULL),
            ("Cannot upload input video frame"));
        return GST_FLOW_ERROR;
      }
    }
    gst_cuda_result (CuStreamSynchronize (btrans->cuda_stream));
    gst_cuda_context_pop (NULL);
    in_mem = filter->in_fallback;
  }
  if (out_cuda_mem) {
    out_mem = out_cuda_mem;
  } else {
    if (!filter->out_fallback) {
      GstCudaAllocationParams params;
      memset (&params, 0, sizeof (GstCudaAllocationParams));
      params.info = btrans->out_info;
      filter->out_fallback =
          (GstCudaMemory *) gst_cuda_allocator_alloc (filter->allocator,
          GST_VIDEO_INFO_SIZE (&params.info), &params);
    }
    if (!filter->out_fallback) {
      GST_ERROR_OBJECT (filter, "Couldn't allocate fallback memory");
      return GST_FLOW_ERROR;
    }
    out_mem = filter->out_fallback;
  }
  conv_ret =
      gst_cuda_converter_frame (filter->converter, in_mem, &btrans->in_info,
      out_mem, &btrans->out_info, btrans->cuda_stream);
  if (!conv_ret) {
    GST_ERROR_OBJECT (filter, "Failed to convert frame");
    return GST_FLOW_ERROR;
  }
-  if (!out_cuda_mem) {
+  mem = gst_buffer_peek_memory (inbuf, 0);
-    if (!gst_cuda_context_push (btrans->context)) {
+  if (!gst_is_cuda_memory (mem)) {
-      GST_ELEMENT_ERROR (filter, LIBRARY, FAILED, (NULL),
+    GST_ERROR_OBJECT (self, "Input buffer is not CUDA");
-          ("Cannot push CUDA context"));
+    return GST_FLOW_ERROR;
      return FALSE;
    }
    for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (out_frame); i++) {
      CUDA_MEMCPY2D param = { 0, };
      guint width, height;
      width = GST_VIDEO_FRAME_COMP_WIDTH (out_frame, i) *
          GST_VIDEO_FRAME_COMP_PSTRIDE (out_frame, i);
      height = GST_VIDEO_FRAME_COMP_HEIGHT (out_frame, i);
      param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
      param.srcPitch = out_mem->stride;
      param.srcDevice =
          filter->out_fallback->data + filter->out_fallback->offset[i];
      param.dstMemoryType = CU_MEMORYTYPE_HOST;
      param.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (out_frame, i);
      param.dstHost = GST_VIDEO_FRAME_PLANE_DATA (out_frame, i);
      param.WidthInBytes = width;
      param.Height = height;
      if (!gst_cuda_result (CuMemcpy2DAsync (&param, btrans->cuda_stream))) {
        gst_cuda_context_pop (NULL);
        GST_ELEMENT_ERROR (filter, LIBRARY, FAILED, (NULL),
            ("Cannot upload input video frame"));
        return GST_FLOW_ERROR;
      }
    }
    gst_cuda_result (CuStreamSynchronize (btrans->cuda_stream));
    gst_cuda_context_pop (NULL);
  }
-  return GST_FLOW_OK;
+  in_cuda_mem = GST_CUDA_MEMORY_CAST (mem);
  if (gst_buffer_n_memory (outbuf) != 1) {
    GST_ERROR_OBJECT (self, "Invalid output buffer");
    return GST_FLOW_ERROR;
  }
  mem = gst_buffer_peek_memory (outbuf, 0);
  if (!gst_is_cuda_memory (mem)) {
    GST_ERROR_OBJECT (self, "Input buffer is not CUDA");
    return GST_FLOW_ERROR;
  }
  out_cuda_mem = GST_CUDA_MEMORY_CAST (mem);
  if (!gst_video_frame_map (&in_frame, &ctrans->in_info, inbuf,
          GST_MAP_READ | GST_MAP_CUDA)) {
    GST_ERROR_OBJECT (self, "Failed to map input buffer");
    return GST_FLOW_ERROR;
  }
  if (!gst_video_frame_map (&out_frame, &ctrans->out_info, outbuf,
          GST_MAP_WRITE | GST_MAP_CUDA)) {
    gst_video_frame_unmap (&in_frame);
    GST_ERROR_OBJECT (self, "Failed to map output buffer");
    return GST_FLOW_ERROR;
  }
  if (!gst_cuda_converter_frame (self->converter,
          in_cuda_mem, &ctrans->in_info,
          out_cuda_mem, &ctrans->out_info, ctrans->cuda_stream)) {
    GST_ERROR_OBJECT (self, "Failed to convert frame");
    ret = GST_FLOW_ERROR;
  }
  gst_video_frame_unmap (&out_frame);
  gst_video_frame_unmap (&in_frame);
  return ret;
 }
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudabasefilter.h
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudabasefilter.h
@ -42,11 +42,6 @@ struct _GstCudaBaseFilter
  GstCudaBaseTransform parent;
  GstCudaConverter *converter;
  /* fallback CUDA memory */
  GstAllocator *allocator;
  GstCudaMemory *in_fallback;
  GstCudaMemory *out_fallback;
 };
 struct _GstCudaBaseFilterClass
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudabasetransform.c
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudabasetransform.c
@ -58,20 +58,12 @@ static gboolean gst_cuda_base_transform_start (GstBaseTransform * trans);
 static gboolean gst_cuda_base_transform_stop (GstBaseTransform * trans);
 static gboolean gst_cuda_base_transform_set_caps (GstBaseTransform * trans,
    GstCaps * incaps, GstCaps * outcaps);
 static GstFlowReturn gst_cuda_base_transform_transform (GstBaseTransform *
    trans, GstBuffer * inbuf, GstBuffer * outbuf);
 static gboolean gst_cuda_base_transform_get_unit_size (GstBaseTransform * trans,
    GstCaps * caps, gsize * size);
 static gboolean gst_cuda_base_transform_propose_allocation (GstBaseTransform *
    trans, GstQuery * decide_query, GstQuery * query);
 static gboolean gst_cuda_base_transform_decide_allocation (GstBaseTransform *
    trans, GstQuery * query);
 static gboolean gst_cuda_base_transform_query (GstBaseTransform * trans,
    GstPadDirection direction, GstQuery * query);
-static GstFlowReturn
+static void gst_cuda_base_transform_before_transform (GstBaseTransform * trans,
-gst_cuda_base_transform_transform_frame_default (GstCudaBaseTransform * filter,
+    GstBuffer * buffer);
    GstVideoFrame * in_frame, GstCudaMemory * in_cuda_mem,
    GstVideoFrame * out_frame, GstCudaMemory * out_cuda_mem);
 static void
 gst_cuda_base_transform_class_init (GstCudaBaseTransformClass * klass)
@ -104,29 +96,22 @@ gst_cuda_base_transform_class_init (GstCudaBaseTransformClass * klass)
  trans_class->start = GST_DEBUG_FUNCPTR (gst_cuda_base_transform_start);
  trans_class->stop = GST_DEBUG_FUNCPTR (gst_cuda_base_transform_stop);
  trans_class->set_caps = GST_DEBUG_FUNCPTR (gst_cuda_base_transform_set_caps);
  trans_class->transform =
      GST_DEBUG_FUNCPTR (gst_cuda_base_transform_transform);
  trans_class->get_unit_size =
      GST_DEBUG_FUNCPTR (gst_cuda_base_transform_get_unit_size);
  trans_class->propose_allocation =
      GST_DEBUG_FUNCPTR (gst_cuda_base_transform_propose_allocation);
  trans_class->decide_allocation =
      GST_DEBUG_FUNCPTR (gst_cuda_base_transform_decide_allocation);
  trans_class->query = GST_DEBUG_FUNCPTR (gst_cuda_base_transform_query);
-
+  trans_class->before_transform =
-  klass->transform_frame =
+      GST_DEBUG_FUNCPTR (gst_cuda_base_transform_before_transform);
      GST_DEBUG_FUNCPTR (gst_cuda_base_transform_transform_frame_default);
  GST_DEBUG_CATEGORY_INIT (gst_cuda_base_transform_debug,
      "cudabasefilter", 0, "cudabasefilter Element");
  gst_type_mark_as_plugin_api (GST_TYPE_CUDA_BASE_TRANSFORM, 0);
 }
 static void
 gst_cuda_base_transform_init (GstCudaBaseTransform * filter)
 {
  filter->device_id = DEFAULT_DEVICE_ID;
  filter->negotiated = FALSE;
 }
 static void
@ -240,12 +225,16 @@ gst_cuda_base_transform_set_caps (GstBaseTransform * trans, GstCaps * incaps,
  }
  /* input caps */
-  if (!gst_video_info_from_caps (&in_info, incaps))
+  if (!gst_video_info_from_caps (&in_info, incaps)) {
-    goto invalid_caps;
+    GST_ERROR_OBJECT (filter, "invalid incaps %" GST_PTR_FORMAT, incaps);
    return FALSE;
  }
  /* output caps */
-  if (!gst_video_info_from_caps (&out_info, outcaps))
+  if (!gst_video_info_from_caps (&out_info, outcaps)) {
-    goto invalid_caps;
+    GST_ERROR_OBJECT (filter, "invalid incaps %" GST_PTR_FORMAT, incaps);
    return FALSE;
  }
  klass = GST_CUDA_BASE_TRANSFORM_GET_CLASS (filter);
  if (klass->set_info)
@ -258,17 +247,7 @@ gst_cuda_base_transform_set_caps (GstBaseTransform * trans, GstCaps * incaps,
    filter->out_info = out_info;
  }
  filter->negotiated = res;
  return res;
  /* ERRORS */
 invalid_caps:
  {
    GST_ERROR_OBJECT (filter, "invalid caps");
    filter->negotiated = FALSE;
    return FALSE;
  }
 }
 static gboolean
@ -285,315 +264,6 @@ gst_cuda_base_transform_get_unit_size (GstBaseTransform * trans, GstCaps * caps,
  return TRUE;
 }
 static GstFlowReturn
 gst_cuda_base_transform_transform (GstBaseTransform * trans,
    GstBuffer * inbuf, GstBuffer * outbuf)
 {
  GstCudaBaseTransform *filter = GST_CUDA_BASE_TRANSFORM (trans);
  GstCudaBaseTransformClass *fclass =
      GST_CUDA_BASE_TRANSFORM_GET_CLASS (filter);
  GstVideoFrame in_frame, out_frame;
  GstFlowReturn ret = GST_FLOW_OK;
  GstMapFlags in_map_flags, out_map_flags;
  GstMemory *mem;
  GstCudaMemory *in_cuda_mem = NULL;
  GstCudaMemory *out_cuda_mem = NULL;
  if (G_UNLIKELY (!filter->negotiated))
    goto unknown_format;
  in_map_flags = GST_MAP_READ | GST_VIDEO_FRAME_MAP_FLAG_NO_REF;
  out_map_flags = GST_MAP_WRITE | GST_VIDEO_FRAME_MAP_FLAG_NO_REF;
  in_cuda_mem = out_cuda_mem = FALSE;
  if (gst_buffer_n_memory (inbuf) == 1 &&
      (mem = gst_buffer_peek_memory (inbuf, 0)) && gst_is_cuda_memory (mem)) {
    GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
    if (cmem->context == filter->context ||
        gst_cuda_context_get_handle (cmem->context) ==
        gst_cuda_context_get_handle (filter->context) ||
        (gst_cuda_context_can_access_peer (cmem->context, filter->context) &&
            gst_cuda_context_can_access_peer (filter->context,
                cmem->context))) {
      in_map_flags |= GST_MAP_CUDA;
      in_cuda_mem = cmem;
    }
  }
  if (gst_buffer_n_memory (outbuf) == 1 &&
      (mem = gst_buffer_peek_memory (outbuf, 0)) && gst_is_cuda_memory (mem)) {
    GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
    if (cmem->context == filter->context ||
        gst_cuda_context_get_handle (cmem->context) ==
        gst_cuda_context_get_handle (filter->context) ||
        (gst_cuda_context_can_access_peer (cmem->context, filter->context) &&
            gst_cuda_context_can_access_peer (filter->context,
                cmem->context))) {
      out_map_flags |= GST_MAP_CUDA;
      out_cuda_mem = cmem;
    }
  }
  if (!gst_video_frame_map (&in_frame, &filter->in_info, inbuf, in_map_flags))
    goto invalid_buffer;
  if (!gst_video_frame_map (&out_frame, &filter->out_info, outbuf,
          out_map_flags)) {
    gst_video_frame_unmap (&in_frame);
    goto invalid_buffer;
  }
  ret = fclass->transform_frame (filter, &in_frame, in_cuda_mem, &out_frame,
      out_cuda_mem);
  gst_video_frame_unmap (&out_frame);
  gst_video_frame_unmap (&in_frame);
  return ret;
  /* ERRORS */
 unknown_format:
  {
    GST_ELEMENT_ERROR (filter, CORE, NOT_IMPLEMENTED, (NULL),
        ("unknown format"));
    return GST_FLOW_NOT_NEGOTIATED;
  }
 invalid_buffer:
  {
    GST_ELEMENT_WARNING (trans, CORE, NOT_IMPLEMENTED, (NULL),
        ("invalid video buffer received"));
    return GST_FLOW_OK;
  }
 }
 static GstFlowReturn
 gst_cuda_base_transform_transform_frame_default (GstCudaBaseTransform * filter,
    GstVideoFrame * in_frame, GstCudaMemory * in_cuda_mem,
    GstVideoFrame * out_frame, GstCudaMemory * out_cuda_mem)
 {
  gint i;
  GstFlowReturn ret = GST_FLOW_OK;
  if (in_cuda_mem || out_cuda_mem) {
    if (!gst_cuda_context_push (filter->context)) {
      GST_ELEMENT_ERROR (filter, LIBRARY, FAILED, (NULL),
          ("Cannot push CUDA context"));
      return GST_FLOW_ERROR;
    }
    for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (in_frame); i++) {
      CUDA_MEMCPY2D param = { 0, };
      guint width, height;
      width = GST_VIDEO_FRAME_COMP_WIDTH (in_frame, i) *
          GST_VIDEO_FRAME_COMP_PSTRIDE (in_frame, i);
      height = GST_VIDEO_FRAME_COMP_HEIGHT (in_frame, i);
      if (in_cuda_mem) {
        param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
        param.srcDevice = in_cuda_mem->data + in_cuda_mem->offset[i];
        param.srcPitch = in_cuda_mem->stride;
      } else {
        param.srcMemoryType = CU_MEMORYTYPE_HOST;
        param.srcHost = GST_VIDEO_FRAME_PLANE_DATA (in_frame, i);
        param.srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (in_frame, i);
      }
      if (out_cuda_mem) {
        param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
        param.dstDevice = out_cuda_mem->data + out_cuda_mem->offset[i];
        param.dstPitch = out_cuda_mem->stride;
      } else {
        param.dstMemoryType = CU_MEMORYTYPE_HOST;
        param.dstHost = GST_VIDEO_FRAME_PLANE_DATA (out_frame, i);
        param.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (out_frame, i);
      }
      param.WidthInBytes = width;
      param.Height = height;
      if (!gst_cuda_result (CuMemcpy2DAsync (&param, filter->cuda_stream))) {
        gst_cuda_context_pop (NULL);
        GST_ELEMENT_ERROR (filter, LIBRARY, FAILED, (NULL),
            ("Cannot upload input video frame"));
        return GST_FLOW_ERROR;
      }
    }
    CuStreamSynchronize (filter->cuda_stream);
    gst_cuda_context_pop (NULL);
  } else {
    for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (in_frame); i++) {
      if (!gst_video_frame_copy_plane (out_frame, in_frame, i)) {
        GST_ERROR_OBJECT (filter, "Couldn't copy %dth plane", i);
        return GST_FLOW_ERROR;
      }
    }
  }
  return ret;
 }
 static gboolean
 gst_cuda_base_transform_propose_allocation (GstBaseTransform * trans,
    GstQuery * decide_query, GstQuery * query)
 {
  GstCudaBaseTransform *filter = GST_CUDA_BASE_TRANSFORM (trans);
  GstVideoInfo info;
  GstBufferPool *pool;
  GstCaps *caps;
  guint size;
  if (!GST_BASE_TRANSFORM_CLASS (parent_class)->propose_allocation (trans,
          decide_query, query))
    return FALSE;
  /* passthrough, we're done */
  if (decide_query == NULL)
    return TRUE;
  gst_query_parse_allocation (query, &caps, NULL);
  if (caps == NULL)
    return FALSE;
  if (!gst_video_info_from_caps (&info, caps))
    return FALSE;
  if (gst_query_get_n_allocation_pools (query) == 0) {
    GstCapsFeatures *features;
    GstStructure *config;
    GstVideoAlignment align;
    GstAllocationParams params = { 0, 31, 0, 0, };
    GstAllocator *allocator = NULL;
    gint i;
    features = gst_caps_get_features (caps, 0);
    if (features && gst_caps_features_contains (features,
            GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)) {
      GST_DEBUG_OBJECT (filter, "upstream support CUDA memory");
      pool = gst_cuda_buffer_pool_new (filter->context);
    } else {
      pool = gst_video_buffer_pool_new ();
    }
    config = gst_buffer_pool_get_config (pool);
    gst_video_alignment_reset (&align);
    for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&info); i++) {
      align.stride_align[i] = 31;
    }
    gst_video_info_align (&info, &align);
    gst_buffer_pool_config_add_option (config,
        GST_BUFFER_POOL_OPTION_VIDEO_META);
    gst_buffer_pool_config_add_option (config,
        GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
    gst_buffer_pool_config_set_video_alignment (config, &align);
    size = GST_VIDEO_INFO_SIZE (&info);
    gst_buffer_pool_config_set_params (config, caps, size, 0, 0);
    gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
    gst_query_add_allocation_pool (query, pool, size, 0, 0);
    if (gst_buffer_pool_config_get_allocator (config, &allocator, &params)) {
      if (params.align < 31)
        params.align = 31;
      gst_query_add_allocation_param (query, allocator, &params);
      gst_buffer_pool_config_set_allocator (config, allocator, &params);
    }
    if (!gst_buffer_pool_set_config (pool, config))
      goto config_failed;
    gst_object_unref (pool);
  }
  return TRUE;
  /* ERRORS */
 config_failed:
  {
    GST_ERROR_OBJECT (filter, "failed to set config");
    gst_object_unref (pool);
    return FALSE;
  }
 }
 static gboolean
 gst_cuda_base_transform_decide_allocation (GstBaseTransform * trans,
    GstQuery * query)
 {
  GstCudaBaseTransform *filter = GST_CUDA_BASE_TRANSFORM (trans);
  GstCaps *outcaps = NULL;
  GstBufferPool *pool = NULL;
  guint size, min, max;
  GstStructure *config;
  gboolean update_pool = FALSE;
  gboolean need_cuda = FALSE;
  GstCapsFeatures *features;
  gst_query_parse_allocation (query, &outcaps, NULL);
  if (!outcaps)
    return FALSE;
  features = gst_caps_get_features (outcaps, 0);
  if (features && gst_caps_features_contains (features,
          GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)) {
    need_cuda = TRUE;
  }
  if (gst_query_get_n_allocation_pools (query) > 0) {
    gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max);
    if (need_cuda && pool && !GST_IS_CUDA_BUFFER_POOL (pool)) {
      /* when cuda device memory is supported, but pool is not cudabufferpool */
      gst_object_unref (pool);
      pool = NULL;
    }
    update_pool = TRUE;
  } else {
    GstVideoInfo vinfo;
    gst_video_info_from_caps (&vinfo, outcaps);
    size = GST_VIDEO_INFO_SIZE (&vinfo);
    min = max = 0;
  }
  if (!pool) {
    GST_DEBUG_OBJECT (filter, "create our pool");
    if (need_cuda)
      pool = gst_cuda_buffer_pool_new (filter->context);
    else
      pool = gst_video_buffer_pool_new ();
  }
  config = gst_buffer_pool_get_config (pool);
  gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
  gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
  gst_buffer_pool_set_config (pool, config);
  if (update_pool)
    gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
  else
    gst_query_add_allocation_pool (query, pool, size, min, max);
  gst_object_unref (pool);
  return GST_BASE_TRANSFORM_CLASS (parent_class)->decide_allocation (trans,
      query);
 }
 static gboolean
 gst_cuda_base_transform_query (GstBaseTransform * trans,
    GstPadDirection direction, GstQuery * query)
@ -617,3 +287,72 @@ gst_cuda_base_transform_query (GstBaseTransform * trans,
  return GST_BASE_TRANSFORM_CLASS (parent_class)->query (trans, direction,
      query);
 }
 static void
 gst_cuda_base_transform_before_transform (GstBaseTransform * trans,
    GstBuffer * buffer)
 {
  GstCudaBaseTransform *self = GST_CUDA_BASE_TRANSFORM (trans);
  GstCudaMemory *cmem;
  GstMemory *mem;
  gboolean update_context = FALSE;
  GstCaps *in_caps = NULL;
  GstCaps *out_caps = NULL;
  in_caps = gst_pad_get_current_caps (GST_BASE_TRANSFORM_SINK_PAD (trans));
  if (!in_caps) {
    GST_WARNING_OBJECT (self, "sinkpad has null caps");
    goto out;
  }
  out_caps = gst_pad_get_current_caps (GST_BASE_TRANSFORM_SRC_PAD (trans));
  if (!out_caps) {
    GST_WARNING_OBJECT (self, "Has no configured output caps");
    goto out;
  }
  mem = gst_buffer_peek_memory (buffer, 0);
  /* Can happens (e.g., d3d11upload) */
  if (!gst_is_cuda_memory (mem))
    goto out;
  cmem = GST_CUDA_MEMORY_CAST (mem);
  /* Same context, nothing to do */
  if (self->context == cmem->context)
    goto out;
  /* Can accept any device, update */
  if (self->device_id < 0) {
    update_context = TRUE;
  } else {
    guint device_id = 0;
    g_object_get (cmem->context, "cuda-device-id", &device_id, NULL);
    /* The same GPU as what user wanted, update */
    if (device_id == (guint) self->device_id)
      update_context = TRUE;
  }
  if (!update_context)
    goto out;
  GST_INFO_OBJECT (self, "Updating device %" GST_PTR_FORMAT " -> %"
      GST_PTR_FORMAT, self->context, cmem->context);
  gst_object_unref (self->context);
  self->context = gst_object_ref (cmem->context);
  /* subclass will update internal object.
   * Note that gst_base_transform_reconfigure() might not trigger this
   * unless caps was changed meanwhile */
  gst_cuda_base_transform_set_caps (trans, in_caps, out_caps);
  /* Mark reconfigure so that we can update pool */
  gst_base_transform_reconfigure_src (trans);
 out:
  gst_clear_caps (&in_caps);
  gst_clear_caps (&out_caps);
  return;
 }
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudabasetransform.h
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudabasetransform.h
@ -42,14 +42,12 @@ struct _GstCudaBaseTransform
 {
  GstBaseTransform parent;
-  gboolean negotiated;
+  GstCudaContext *context;
  CUstream cuda_stream;
  GstVideoInfo in_info;
  GstVideoInfo out_info;
  GstCudaContext *context;
  CUstream cuda_stream;
  gint device_id;
 };
@ -57,19 +55,17 @@ struct _GstCudaBaseTransformClass
 {
  GstBaseTransformClass parent_class;
-  gboolean      (*set_info)           (GstCudaBaseTransform *filter,
+  gboolean  (*set_info) (GstCudaBaseTransform *filter,
-                                       GstCaps *incaps, GstVideoInfo *in_info,
+                         GstCaps *incaps,
-                                       GstCaps *outcaps, GstVideoInfo *out_info);
+                         GstVideoInfo *in_info,
-
+                         GstCaps *outcaps,
-  GstFlowReturn (*transform_frame)    (GstCudaBaseTransform *filter,
+                         GstVideoInfo *out_info);
                                       GstVideoFrame *in_frame,
                                       GstCudaMemory *in_cuda_mem,
                                       GstVideoFrame *out_frame,
                                       GstCudaMemory *out_cuda_mem);
 };
 GType gst_cuda_base_transform_get_type (void);
 G_DEFINE_AUTOPTR_CLEANUP_FUNC(GstCudaBaseTransform, gst_object_unref)
 G_END_DECLS
 #endif /* __GST_CUDA_BASE_TRANSFORM_H__ */
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudaconvert.c
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudaconvert.c
@ -246,8 +246,6 @@ gst_cuda_convert_class_init (GstCudaConvertClass * klass)
  GST_DEBUG_CATEGORY_INIT (gst_cuda_convert_debug,
      "cudaconvert", 0, "Video ColorSpace convert using CUDA");
  gst_type_mark_as_plugin_api (GST_TYPE_CUDA_BASE_FILTER, 0);
 }
 static void
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudadownload.c
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudadownload.c
@ -31,6 +31,7 @@
 #endif
 #include "gstcudadownload.h"
 #include "gstcudaformat.h"
 GST_DEBUG_CATEGORY_STATIC (gst_cuda_download_debug);
 #define GST_CAT_DEFAULT gst_cuda_download_debug
@ -38,19 +39,34 @@ GST_DEBUG_CATEGORY_STATIC (gst_cuda_download_debug);
 static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink",
    GST_PAD_SINK,
    GST_PAD_ALWAYS,
-    GST_STATIC_CAPS ("video/x-raw(" GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY
+    GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE_WITH_FEATURES
-        "); video/x-raw"));
+        (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY,
            GST_CUDA_FORMATS) ";" GST_VIDEO_CAPS_MAKE (GST_CUDA_FORMATS)));
 static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src",
    GST_PAD_SRC,
    GST_PAD_ALWAYS,
-    GST_STATIC_CAPS ("video/x-raw"));
+    GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE (GST_CUDA_FORMATS) ";"
        GST_VIDEO_CAPS_MAKE_WITH_FEATURES (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY,
            GST_CUDA_FORMATS)));
 struct _GstCudaDownload
 {
  GstCudaBaseTransform parent;
 };
 #define gst_cuda_download_parent_class parent_class
 G_DEFINE_TYPE (GstCudaDownload, gst_cuda_download,
    GST_TYPE_CUDA_BASE_TRANSFORM);
 static GstCaps *gst_cuda_download_transform_caps (GstBaseTransform * trans,
    GstPadDirection direction, GstCaps * caps, GstCaps * filter);
 static gboolean gst_cuda_download_propose_allocation (GstBaseTransform * trans,
    GstQuery * decide_query, GstQuery * query);
 static gboolean gst_cuda_download_decide_allocation (GstBaseTransform * trans,
    GstQuery * query);
 static GstFlowReturn gst_cuda_download_transform (GstBaseTransform * trans,
    GstBuffer * inbuf, GstBuffer * outbuf);
 static void
 gst_cuda_download_class_init (GstCudaDownloadClass * klass)
@ -73,6 +89,11 @@ gst_cuda_download_class_init (GstCudaDownloadClass * klass)
  trans_class->transform_caps =
      GST_DEBUG_FUNCPTR (gst_cuda_download_transform_caps);
  trans_class->propose_allocation =
      GST_DEBUG_FUNCPTR (gst_cuda_download_propose_allocation);
  trans_class->decide_allocation =
      GST_DEBUG_FUNCPTR (gst_cuda_download_decide_allocation);
  trans_class->transform = GST_DEBUG_FUNCPTR (gst_cuda_download_transform);
  GST_DEBUG_CATEGORY_INIT (gst_cuda_download_debug,
      "cudadownload", 0, "cudadownload Element");
@ -129,3 +150,191 @@ gst_cuda_download_transform_caps (GstBaseTransform * trans,
  return result;
 }
 static gboolean
 gst_cuda_download_propose_allocation (GstBaseTransform * trans,
    GstQuery * decide_query, GstQuery * query)
 {
  GstCudaBaseTransform *ctrans = GST_CUDA_BASE_TRANSFORM (trans);
  GstVideoInfo info;
  GstBufferPool *pool;
  GstCaps *caps;
  guint size;
  if (!GST_BASE_TRANSFORM_CLASS (parent_class)->propose_allocation (trans,
          decide_query, query))
    return FALSE;
  /* passthrough, we're done */
  if (decide_query == NULL)
    return TRUE;
  gst_query_parse_allocation (query, &caps, NULL);
  if (caps == NULL)
    return FALSE;
  if (!gst_video_info_from_caps (&info, caps))
    return FALSE;
  if (gst_query_get_n_allocation_pools (query) == 0) {
    GstCapsFeatures *features;
    GstStructure *config;
    GstVideoAlignment align;
    GstAllocationParams params = { 0, 31, 0, 0, };
    GstAllocator *allocator = NULL;
    gint i;
    features = gst_caps_get_features (caps, 0);
    if (features && gst_caps_features_contains (features,
            GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)) {
      GST_DEBUG_OBJECT (ctrans, "upstream support CUDA memory");
      pool = gst_cuda_buffer_pool_new (ctrans->context);
    } else {
      pool = gst_video_buffer_pool_new ();
    }
    config = gst_buffer_pool_get_config (pool);
    gst_video_alignment_reset (&align);
    for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&info); i++) {
      align.stride_align[i] = 31;
    }
    gst_video_info_align (&info, &align);
    gst_buffer_pool_config_add_option (config,
        GST_BUFFER_POOL_OPTION_VIDEO_META);
    gst_buffer_pool_config_add_option (config,
        GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
    gst_buffer_pool_config_set_video_alignment (config, &align);
    size = GST_VIDEO_INFO_SIZE (&info);
    gst_buffer_pool_config_set_params (config, caps, size, 0, 0);
    gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
    gst_query_add_allocation_pool (query, pool, size, 0, 0);
    if (gst_buffer_pool_config_get_allocator (config, &allocator, &params)) {
      if (params.align < 31)
        params.align = 31;
      gst_query_add_allocation_param (query, allocator, &params);
      gst_buffer_pool_config_set_allocator (config, allocator, &params);
    }
    if (!gst_buffer_pool_set_config (pool, config)) {
      GST_ERROR_OBJECT (ctrans, "failed to set config");
      gst_object_unref (pool);
      return FALSE;
    }
    gst_object_unref (pool);
  }
  gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
  return TRUE;
 }
 static gboolean
 gst_cuda_download_decide_allocation (GstBaseTransform * trans, GstQuery * query)
 {
  GstCudaBaseTransform *ctrans = GST_CUDA_BASE_TRANSFORM (trans);
  GstCaps *outcaps = NULL;
  GstBufferPool *pool = NULL;
  guint size, min, max;
  GstStructure *config;
  gboolean update_pool = FALSE;
  gboolean need_cuda = FALSE;
  GstCapsFeatures *features;
  gst_query_parse_allocation (query, &outcaps, NULL);
  if (!outcaps)
    return FALSE;
  features = gst_caps_get_features (outcaps, 0);
  if (features && gst_caps_features_contains (features,
          GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)) {
    need_cuda = TRUE;
  }
  if (gst_query_get_n_allocation_pools (query) > 0) {
    gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max);
    if (need_cuda && pool) {
      if (!GST_IS_CUDA_BUFFER_POOL (pool)) {
        gst_clear_object (&pool);
      } else {
        GstCudaBufferPool *cpool = GST_CUDA_BUFFER_POOL (pool);
        if (cpool->context != ctrans->context) {
          gst_clear_object (&pool);
        }
      }
    }
    update_pool = TRUE;
  } else {
    GstVideoInfo vinfo;
    gst_video_info_from_caps (&vinfo, outcaps);
    size = GST_VIDEO_INFO_SIZE (&vinfo);
    min = max = 0;
  }
  if (!pool) {
    GST_DEBUG_OBJECT (ctrans, "create our pool");
    if (need_cuda)
      pool = gst_cuda_buffer_pool_new (ctrans->context);
    else
      pool = gst_video_buffer_pool_new ();
  }
  config = gst_buffer_pool_get_config (pool);
  gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
  gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
  gst_buffer_pool_set_config (pool, config);
  if (update_pool)
    gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
  else
    gst_query_add_allocation_pool (query, pool, size, min, max);
  gst_object_unref (pool);
  return GST_BASE_TRANSFORM_CLASS (parent_class)->decide_allocation (trans,
      query);
 }
 static GstFlowReturn
 gst_cuda_download_transform (GstBaseTransform * trans, GstBuffer * inbuf,
    GstBuffer * outbuf)
 {
  GstCudaBaseTransform *ctrans = GST_CUDA_BASE_TRANSFORM (trans);
  GstVideoFrame in_frame, out_frame;
  gboolean ret;
  if (!gst_video_frame_map (&in_frame, &ctrans->in_info, inbuf, GST_MAP_READ)) {
    GST_ERROR_OBJECT (ctrans, "Failed to map input buffer");
    return GST_FLOW_ERROR;
  }
  if (!gst_video_frame_map (&out_frame,
          &ctrans->out_info, outbuf, GST_MAP_WRITE)) {
    gst_video_frame_unmap (&in_frame);
    GST_ERROR_OBJECT (ctrans, "Failed to map input buffer");
    return GST_FLOW_ERROR;
  }
  ret = gst_video_frame_copy (&out_frame, &in_frame);
  gst_video_frame_unmap (&out_frame);
  gst_video_frame_unmap (&in_frame);
  if (!ret) {
    GST_ERROR_OBJECT (ctrans, "Failed to copy frame");
    return GST_FLOW_ERROR;
  }
  return GST_FLOW_OK;
 }
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudadownload.h
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudadownload.h
@ -17,35 +17,15 @@
 * Boston, MA 02110-1301, USA.
 */
-#ifndef __GST_CUDA_DOWNLOAD_H__
+#pragma once
 #define __GST_CUDA_DOWNLOAD_H__
 #include "gstcudabasetransform.h"
 G_BEGIN_DECLS
-#define GST_TYPE_CUDA_DOWNLOAD             (gst_cuda_download_get_type())
+#define GST_TYPE_CUDA_DOWNLOAD (gst_cuda_download_get_type())
-#define GST_CUDA_DOWNLOAD(obj)             (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_CUDA_DOWNLOAD,GstCudaDownload))
+G_DECLARE_FINAL_TYPE (GstCudaDownload,
-#define GST_CUDA_DOWNLOAD_CLASS(klass)     (G_TYPE_CHECK_CLASS_CAST((klass), GST_TYPE_CUDA_DOWNLOAD,GstCudaDownloadClass))
+    gst_cuda_download, GST, CUDA_DOWNLOAD, GstCudaBaseTransform);
 #define GST_CUDA_DOWNLOAD_GET_CLASS(obj)   (G_TYPE_INSTANCE_GET_CLASS((obj), GST_TYPE_CUDA_DOWNLOAD,GstCudaDownloadClass))
 #define GST_IS_CUDA_DOWNLOAD(obj)          (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_CUDA_DOWNLOAD))
 #define GST_IS_CUDA_DOWNLOAD_CLASS(klass)  (G_TYPE_CHECK_CLASS_TYPE((klass), GST_TYPE_CUDA_DOWNLOAD))
 typedef struct _GstCudaDownload GstCudaDownload;
 typedef struct _GstCudaDownloadClass GstCudaDownloadClass;
 struct _GstCudaDownload
 {
  GstCudaBaseTransform parent;
 };
 struct _GstCudaDownloadClass
 {
  GstCudaBaseTransformClass parent_class;
 };
 GType gst_cuda_download_get_type (void);
 G_END_DECLS
 #endif /* __GST_CUDA_DOWNLOAD_H__ */
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudaformat.h
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudaformat.h
@ -0,0 +1,30 @@
 /* GStreamer
 * Copyright (C) 2022 Seungha Yang <seungha@centricular.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 */
 #pragma once
 #include <gst/gst.h>
 G_BEGIN_DECLS
 #define GST_CUDA_FORMATS \
    "{ I420, YV12, NV12, NV21, P010_10LE, P016_LE, I420_10LE, Y444, Y444_16LE, " \
    "BGRA, RGBA, RGBx, BGRx, ARGB, ABGR, RGB, BGR, BGR10A2_LE, RGB10A2_LE }"
 G_END_DECLS
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudaupload.c
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudaupload.c
@ -30,6 +30,7 @@
 #endif
 #include "gstcudaupload.h"
 #include "gstcudaformat.h"
 GST_DEBUG_CATEGORY_STATIC (gst_cuda_upload_debug);
 #define GST_CAT_DEFAULT gst_cuda_upload_debug
@ -37,27 +38,38 @@ GST_DEBUG_CATEGORY_STATIC (gst_cuda_upload_debug);
 static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink",
    GST_PAD_SINK,
    GST_PAD_ALWAYS,
-    GST_STATIC_CAPS ("video/x-raw; video/x-raw("
+    GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE (GST_CUDA_FORMATS) ";"
-        GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY ")"));
+        GST_VIDEO_CAPS_MAKE_WITH_FEATURES (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY,
            GST_CUDA_FORMATS)));
 static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src",
    GST_PAD_SRC,
    GST_PAD_ALWAYS,
-    GST_STATIC_CAPS ("video/x-raw(" GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY ")"));
+    GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE_WITH_FEATURES
        (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY,
            GST_CUDA_FORMATS) ";" GST_VIDEO_CAPS_MAKE (GST_CUDA_FORMATS)));
 struct _GstCudaUpload
 {
  GstCudaBaseTransform parent;
 };
 #define gst_cuda_upload_parent_class parent_class
 G_DEFINE_TYPE (GstCudaUpload, gst_cuda_upload, GST_TYPE_CUDA_BASE_TRANSFORM);
 static GstCaps *gst_cuda_upload_transform_caps (GstBaseTransform * trans,
    GstPadDirection direction, GstCaps * caps, GstCaps * filter);
 static gboolean gst_cuda_upload_propose_allocation (GstBaseTransform * trans,
    GstQuery * decide_query, GstQuery * query);
 static gboolean gst_cuda_upload_decide_allocation (GstBaseTransform * trans,
    GstQuery * query);
 static GstFlowReturn gst_cuda_upload_transform (GstBaseTransform * trans,
    GstBuffer * inbuf, GstBuffer * outbuf);
 static void
 gst_cuda_upload_class_init (GstCudaUploadClass * klass)
 {
-  GstElementClass *element_class;
+  GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
-  GstBaseTransformClass *trans_class;
+  GstBaseTransformClass *trans_class = GST_BASE_TRANSFORM_CLASS (klass);
  element_class = GST_ELEMENT_CLASS (klass);
  trans_class = GST_BASE_TRANSFORM_CLASS (klass);
  gst_element_class_add_static_pad_template (element_class, &sink_template);
  gst_element_class_add_static_pad_template (element_class, &src_template);
@ -71,8 +83,12 @@ gst_cuda_upload_class_init (GstCudaUploadClass * klass)
  trans_class->transform_caps =
      GST_DEBUG_FUNCPTR (gst_cuda_upload_transform_caps);
  trans_class->propose_allocation =
      GST_DEBUG_FUNCPTR (gst_cuda_upload_propose_allocation);
  trans_class->decide_allocation =
      GST_DEBUG_FUNCPTR (gst_cuda_upload_decide_allocation);
  trans_class->transform = GST_DEBUG_FUNCPTR (gst_cuda_upload_transform);
  gst_type_mark_as_plugin_api (GST_TYPE_CUDA_BASE_TRANSFORM, 0);
  GST_DEBUG_CATEGORY_INIT (gst_cuda_upload_debug,
      "cudaupload", 0, "cudaupload Element");
 }
@ -128,3 +144,191 @@ gst_cuda_upload_transform_caps (GstBaseTransform * trans,
  return result;
 }
 static gboolean
 gst_cuda_upload_propose_allocation (GstBaseTransform * trans,
    GstQuery * decide_query, GstQuery * query)
 {
  GstCudaBaseTransform *ctrans = GST_CUDA_BASE_TRANSFORM (trans);
  GstVideoInfo info;
  GstBufferPool *pool;
  GstCaps *caps;
  guint size;
  if (!GST_BASE_TRANSFORM_CLASS (parent_class)->propose_allocation (trans,
          decide_query, query))
    return FALSE;
  /* passthrough, we're done */
  if (decide_query == NULL)
    return TRUE;
  gst_query_parse_allocation (query, &caps, NULL);
  if (caps == NULL)
    return FALSE;
  if (!gst_video_info_from_caps (&info, caps))
    return FALSE;
  if (gst_query_get_n_allocation_pools (query) == 0) {
    GstCapsFeatures *features;
    GstStructure *config;
    GstVideoAlignment align;
    GstAllocationParams params = { 0, 31, 0, 0, };
    GstAllocator *allocator = NULL;
    gint i;
    features = gst_caps_get_features (caps, 0);
    if (features && gst_caps_features_contains (features,
            GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)) {
      GST_DEBUG_OBJECT (ctrans, "upstream support CUDA memory");
      pool = gst_cuda_buffer_pool_new (ctrans->context);
    } else {
      pool = gst_video_buffer_pool_new ();
    }
    config = gst_buffer_pool_get_config (pool);
    gst_video_alignment_reset (&align);
    for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&info); i++) {
      align.stride_align[i] = 31;
    }
    gst_video_info_align (&info, &align);
    gst_buffer_pool_config_add_option (config,
        GST_BUFFER_POOL_OPTION_VIDEO_META);
    gst_buffer_pool_config_add_option (config,
        GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
    gst_buffer_pool_config_set_video_alignment (config, &align);
    size = GST_VIDEO_INFO_SIZE (&info);
    gst_buffer_pool_config_set_params (config, caps, size, 0, 0);
    gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
    gst_query_add_allocation_pool (query, pool, size, 0, 0);
    if (gst_buffer_pool_config_get_allocator (config, &allocator, &params)) {
      if (params.align < 31)
        params.align = 31;
      gst_query_add_allocation_param (query, allocator, &params);
      gst_buffer_pool_config_set_allocator (config, allocator, &params);
    }
    if (!gst_buffer_pool_set_config (pool, config)) {
      GST_ERROR_OBJECT (ctrans, "failed to set config");
      gst_object_unref (pool);
      return FALSE;
    }
    gst_object_unref (pool);
  }
  gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
  return TRUE;
 }
 static gboolean
 gst_cuda_upload_decide_allocation (GstBaseTransform * trans, GstQuery * query)
 {
  GstCudaBaseTransform *ctrans = GST_CUDA_BASE_TRANSFORM (trans);
  GstCaps *outcaps = NULL;
  GstBufferPool *pool = NULL;
  guint size, min, max;
  GstStructure *config;
  gboolean update_pool = FALSE;
  gboolean need_cuda = FALSE;
  GstCapsFeatures *features;
  gst_query_parse_allocation (query, &outcaps, NULL);
  if (!outcaps)
    return FALSE;
  features = gst_caps_get_features (outcaps, 0);
  if (features && gst_caps_features_contains (features,
          GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)) {
    need_cuda = TRUE;
  }
  if (gst_query_get_n_allocation_pools (query) > 0) {
    gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max);
    if (need_cuda && pool) {
      if (!GST_IS_CUDA_BUFFER_POOL (pool)) {
        gst_clear_object (&pool);
      } else {
        GstCudaBufferPool *cpool = GST_CUDA_BUFFER_POOL (pool);
        if (cpool->context != ctrans->context) {
          gst_clear_object (&pool);
        }
      }
    }
    update_pool = TRUE;
  } else {
    GstVideoInfo vinfo;
    gst_video_info_from_caps (&vinfo, outcaps);
    size = GST_VIDEO_INFO_SIZE (&vinfo);
    min = max = 0;
  }
  if (!pool) {
    GST_DEBUG_OBJECT (ctrans, "create our pool");
    if (need_cuda)
      pool = gst_cuda_buffer_pool_new (ctrans->context);
    else
      pool = gst_video_buffer_pool_new ();
  }
  config = gst_buffer_pool_get_config (pool);
  gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
  gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
  gst_buffer_pool_set_config (pool, config);
  if (update_pool)
    gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
  else
    gst_query_add_allocation_pool (query, pool, size, min, max);
  gst_object_unref (pool);
  return GST_BASE_TRANSFORM_CLASS (parent_class)->decide_allocation (trans,
      query);
 }
 static GstFlowReturn
 gst_cuda_upload_transform (GstBaseTransform * trans, GstBuffer * inbuf,
    GstBuffer * outbuf)
 {
  GstCudaBaseTransform *ctrans = GST_CUDA_BASE_TRANSFORM (trans);
  GstVideoFrame in_frame, out_frame;
  gboolean ret;
  if (!gst_video_frame_map (&in_frame, &ctrans->in_info, inbuf, GST_MAP_READ)) {
    GST_ERROR_OBJECT (ctrans, "Failed to map input buffer");
    return GST_FLOW_ERROR;
  }
  if (!gst_video_frame_map (&out_frame,
          &ctrans->out_info, outbuf, GST_MAP_WRITE)) {
    gst_video_frame_unmap (&in_frame);
    GST_ERROR_OBJECT (ctrans, "Failed to map input buffer");
    return GST_FLOW_ERROR;
  }
  ret = gst_video_frame_copy (&out_frame, &in_frame);
  gst_video_frame_unmap (&out_frame);
  gst_video_frame_unmap (&in_frame);
  if (!ret) {
    GST_ERROR_OBJECT (ctrans, "Failed to copy frame");
    return GST_FLOW_ERROR;
  }
  return GST_FLOW_OK;
 }
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudaupload.h
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudaupload.h
@ -17,35 +17,15 @@
 * Boston, MA 02110-1301, USA.
 */
-#ifndef __GST_CUDA_UPLOAD_H__
+#pragma once
 #define __GST_CUDA_UPLOAD_H__
 #include "gstcudabasetransform.h"
 G_BEGIN_DECLS
-#define GST_TYPE_CUDA_UPLOAD             (gst_cuda_upload_get_type())
+#define GST_TYPE_CUDA_UPLOAD (gst_cuda_upload_get_type())
-#define GST_CUDA_UPLOAD(obj)             (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_CUDA_UPLOAD,GstCudaUpload))
+G_DECLARE_FINAL_TYPE (GstCudaUpload,
-#define GST_CUDA_UPLOAD_CLASS(klass)     (G_TYPE_CHECK_CLASS_CAST((klass), GST_TYPE_CUDA_UPLOAD,GstCudaUploadClass))
+    gst_cuda_upload, GST, CUDA_UPLOAD, GstCudaBaseTransform);
 #define GST_CUDA_UPLOAD_GET_CLASS(obj)   (G_TYPE_INSTANCE_GET_CLASS((obj), GST_TYPE_CUDA_UPLOAD,GstCudaUploadClass))
 #define GST_IS_CUDA_UPLOAD(obj)          (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_CUDA_UPLOAD))
 #define GST_IS_CUDA_UPLOAD_CLASS(klass)  (G_TYPE_CHECK_CLASS_TYPE((klass), GST_TYPE_CUDA_UPLOAD))
 typedef struct _GstCudaUpload GstCudaUpload;
 typedef struct _GstCudaUploadClass GstCudaUploadClass;
 struct _GstCudaUpload
 {
  GstCudaBaseTransform parent;
 };
 struct _GstCudaUploadClass
 {
  GstCudaBaseTransformClass parent_class;
 };
 GType gst_cuda_upload_get_type (void);
 G_END_DECLS
 #endif /* __GST_CUDA_UPLOAD_H__ */