examples: Add CUDA based in-place transform element example

Adding a CUDA example element for plugin developers Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/7004>
2025-06-05 06:58:56 +00:00 · 2024-06-07 00:01:10 +09:00 · 2024-06-07 00:01:10 +09:00 · 1b5f026119
commit 1b5f026119
parent c484de8d8b
5 changed files with 742 additions and 0 deletions
--- a/subprojects/gst-plugins-bad/tests/examples/cuda/cuda-template.c
+++ b/subprojects/gst-plugins-bad/tests/examples/cuda/cuda-template.c
@ -0,0 +1,166 @@
+/*
+ * GStreamer
+ * Copyright (C) 2024 Seungha Yang <seungha@centricular.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gst/gst.h>
+#include "../key-handler.h"
+
+GST_PLUGIN_STATIC_DECLARE (cuda_template);
+
+static GMainLoop *loop = NULL;
+static GstElement *filter = NULL;
+static gboolean update_image = TRUE;
+
+static void
+print_keyboard_help (void)
+{
+  /* *INDENT-OFF* */
+  static struct
+  {
+    const gchar *key_desc;
+    const gchar *key_help;
+  } key_controls[] = {
+    {
+    "q", "Quit"}, {
+    "t", "toggle processing mode (read-only or image-update)"}
+  };
+  /* *INDENT-ON* */
+
+  guint i, chars_to_pad, desc_len, max_desc_len = 0;
+
+  gst_print ("\n\n%s\n\n", "Keyboard controls:");
+
+  for (i = 0; i < G_N_ELEMENTS (key_controls); ++i) {
+    desc_len = g_utf8_strlen (key_controls[i].key_desc, -1);
+    max_desc_len = MAX (max_desc_len, desc_len);
+  }
+  ++max_desc_len;
+
+  for (i = 0; i < G_N_ELEMENTS (key_controls); ++i) {
+    chars_to_pad = max_desc_len - g_utf8_strlen (key_controls[i].key_desc, -1);
+    gst_print ("\t%s", key_controls[i].key_desc);
+    gst_print ("%-*s: ", chars_to_pad, "");
+    gst_print ("%s\n", key_controls[i].key_help);
+  }
+  gst_print ("\n");
+}
+
+static void
+keyboard_cb (gchar input, gboolean is_ascii, gpointer user_data)
+{
+  if (!is_ascii)
+    return;
+
+  switch (input) {
+    case 'q':
+    case 'Q':
+      g_main_loop_quit (loop);
+      break;
+    case 't':
+    case 'T':
+      update_image = !update_image;
+      gst_println ("Toggle image update mode: %d", update_image);
+      g_object_set (filter, "update-image", update_image, NULL);
+      break;
+    default:
+      break;
+  }
+}
+
+static gboolean
+bus_handler (GstBus * bus, GstMessage * msg, GMainLoop * loop)
+{
+  switch (GST_MESSAGE_TYPE (msg)) {
+    case GST_MESSAGE_EOS:
+      gst_println ("Got EOS");
+      g_main_loop_quit (loop);
+      break;
+    case GST_MESSAGE_ERROR:
+    {
+      GError *err = NULL;
+      gchar *name, *debug = NULL;
+
+      name = gst_object_get_path_string (msg->src);
+      gst_message_parse_error (msg, &err, &debug);
+
+      gst_printerrln ("ERROR: from element %s: %s", name, err->message);
+      if (debug != NULL)
+        gst_printerrln ("Additional debug info:\n%s", debug);
+
+      g_clear_error (&err);
+      g_free (debug);
+      g_free (name);
+
+      g_main_loop_quit (loop);
+      break;
+    }
+    default:
+      break;
+  }
+
+  return G_SOURCE_CONTINUE;
+}
+
+gint
+main (gint argc, gchar ** argv)
+{
+  GstElement *pipeline;
+  GstBus *bus;
+
+  gst_init (NULL, NULL);
+
+  GST_PLUGIN_STATIC_REGISTER (cuda_template);
+
+  loop = g_main_loop_new (NULL, FALSE);
+  pipeline = gst_parse_launch ("videotestsrc ! cudaupload ! "
+      "cuda-transform-ip name=filter ! cudadownload ! videoconvert ! "
+      "queue max-size-buffers=3 max-size-time=0 max-size-bytes=0 ! "
+      "autovideosink", NULL);
+
+  if (!pipeline) {
+    gst_printerrln ("Couldn't create pipeline");
+    return 0;
+  }
+
+  filter = gst_bin_get_by_name (GST_BIN (pipeline), "filter");
+
+  bus = gst_element_get_bus (pipeline);
+  gst_bus_add_watch (bus, (GstBusFunc) bus_handler, loop);
+
+  gst_element_set_state (pipeline, GST_STATE_PLAYING);
+
+  print_keyboard_help ();
+
+  set_key_handler ((KeyInputCallback) keyboard_cb, NULL);
+  g_main_loop_run (loop);
+  unset_key_handler ();
+
+  gst_element_set_state (pipeline, GST_STATE_NULL);
+  gst_bus_remove_watch (bus);
+  gst_object_unref (bus);
+  gst_object_unref (pipeline);
+
+  gst_deinit ();
+
+  return 0;
+}
--- a/subprojects/gst-plugins-bad/tests/examples/cuda/meson.build
+++ b/subprojects/gst-plugins-bad/tests/examples/cuda/meson.build
@ -66,3 +66,15 @@ executable('cudamemory-sync', 'cudamemory-sync.c',
  dependencies: [gst_dep, gstvideo_dep, gstcuda_dep, gl_header_dep] + cuda_deps,
  c_args : gst_plugins_bad_args + ['-DGST_USE_UNSTABLE_API'],
  install: false)
+
+executable('cuda-template',
+  [
+    '../key-handler.c',
+    'cuda-template.c',
+    'template-plugin/cuda-transform-ip-template.c',
+    'template-plugin/plugin.c',
+  ],
+  include_directories : [configinc] + compat_includes + cuda_inc_dir,
+  dependencies: [gst_dep, gstvideo_dep, gstcuda_dep, gl_header_dep] + cuda_deps,
+  c_args : gst_plugins_bad_args + ['-DGST_USE_UNSTABLE_API'],
+  install: false)
--- a/subprojects/gst-plugins-bad/tests/examples/cuda/template-plugin/cuda-transform-ip-template.c
+++ b/subprojects/gst-plugins-bad/tests/examples/cuda/template-plugin/cuda-transform-ip-template.c
@ -0,0 +1,481 @@
+/* GStreamer
+ * Copyright (C) 2024 Seungha Yang <seungha@centricular.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+/* A CUDA based inplace transform example implementation.
+ *
+ * Apart from general requirements for a GStreamer element and CUDA programming,
+ * plugin developers should implement GstContext handling for a single
+ * GstCudaContext to be shared in the pipeline. That requires
+ * GstElementClass::set_context() vfunc and GstQuery handler
+ *
+ * In addition to the GstContext handling, in case of multi-GPU system,
+ * GstCudaContext update might need to be handled since upstream element
+ * can produce CUDA memory which belongs to different GPU.
+ *
+ * This example CUDA element demonstrates:
+ * - GstContext handling (device selection and GstCudaContext allocation)
+ * - GstCudaContext update if needed via GstBaseTransform::before_transform() vfunc
+ * - Simple CUDA operation in a GstBaseTransform subclass
+ *
+ * Note that CUDA API error handling is omitted to simplify the code
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "cuda-transform-ip-template.h"
+
+#ifdef G_OS_WIN32
+#include <windows.h>
+#endif
+
+#include <gst/video/video.h>
+#include <gst/cuda/gstcuda.h>
+#include <string.h>
+
+GST_DEBUG_CATEGORY_STATIC (gst_cuda_transform_ip_debug);
+#define GST_CAT_DEFAULT gst_cuda_transform_ip_debug
+
+#define STATIC_CAPS \
+  GST_STATIC_CAPS ( \
+      GST_VIDEO_CAPS_MAKE_WITH_FEATURES (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY, \
+      "Y444"))
+
+static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink",
+    GST_PAD_SINK, GST_PAD_ALWAYS, STATIC_CAPS);
+
+static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src",
+    GST_PAD_SRC, GST_PAD_ALWAYS, STATIC_CAPS);
+
+enum
+{
+  PROP_0,
+  PROP_DEVICE_ID,
+  PROP_UPDATE_IMAGE,
+};
+
+// -1 = uses any GPU. Element will accept any already existing CUDA context
+//      in the pipeline or user provided one
+// others: Explicit GPU selection
+#define DEFAULT_DEVICE_ID -1
+#define DEFAULT_UPDATE_IMAGE TRUE
+
+struct _GstCudaTransformIp
+{
+  GstBaseTransform parent;
+
+  GstCudaContext *context;
+  GstVideoInfo info;
+  guint8 *read_host_mem;
+  guint8 *write_host_mem;
+  guint stride;
+  guint size;
+
+  /* Protects context since context update can happen in streaming thread
+   * as well */
+  GRecMutex lock;
+  gboolean update_image;
+
+  gint device_id;
+};
+
+static void gst_cuda_transform_ip_dispose (GObject * object);
+static void gst_cuda_transform_ip_finalize (GObject * object);
+static void gst_cuda_transform_ip_set_property (GObject * object,
+    guint prop_id, const GValue * value, GParamSpec * pspec);
+static void gst_cuda_transform_ip_get_property (GObject * object,
+    guint prop_id, GValue * value, GParamSpec * pspec);
+
+static void gst_cuda_transform_ip_set_context (GstElement * element,
+    GstContext * context);
+
+static gboolean gst_cuda_transform_ip_start (GstBaseTransform * trans);
+static gboolean gst_cuda_transform_ip_stop (GstBaseTransform * trans);
+static gboolean gst_cuda_transform_ip_set_caps (GstBaseTransform * trans,
+    GstCaps * incaps, GstCaps * outcaps);
+static gboolean gst_cuda_transform_ip_query (GstBaseTransform * trans,
+    GstPadDirection direction, GstQuery * query);
+static void gst_cuda_transform_ip_before_transform (GstBaseTransform * trans,
+    GstBuffer * buffer);
+static GstFlowReturn gst_cuda_transform_ip_execute (GstBaseTransform * trans,
+    GstBuffer * buffer);
+
+#define gst_cuda_transform_ip_parent_class parent_class
+G_DEFINE_TYPE (GstCudaTransformIp, gst_cuda_transform_ip,
+    GST_TYPE_BASE_TRANSFORM);
+
+static void
+gst_cuda_transform_ip_class_init (GstCudaTransformIpClass * klass)
+{
+  GObjectClass *object_class = G_OBJECT_CLASS (klass);
+  GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
+  GstBaseTransformClass *trans_class = GST_BASE_TRANSFORM_CLASS (klass);
+
+  object_class->dispose = gst_cuda_transform_ip_dispose;
+  object_class->finalize = gst_cuda_transform_ip_finalize;
+  object_class->set_property = gst_cuda_transform_ip_set_property;
+  object_class->get_property = gst_cuda_transform_ip_get_property;
+
+  g_object_class_install_property (object_class, PROP_DEVICE_ID,
+      g_param_spec_int ("cuda-device-id",
+          "CUDA Device ID", "CUDA GPU device id (-1 = auto)",
+          -1, G_MAXINT, DEFAULT_DEVICE_ID,
+          G_PARAM_READWRITE | GST_PARAM_MUTABLE_READY |
+          G_PARAM_STATIC_STRINGS));
+  g_object_class_install_property (object_class, PROP_UPDATE_IMAGE,
+      g_param_spec_boolean ("update-image", "Image Update",
+          "Update image to gray", DEFAULT_UPDATE_IMAGE,
+          G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
+
+  element_class->set_context =
+      GST_DEBUG_FUNCPTR (gst_cuda_transform_ip_set_context);
+
+  gst_element_class_add_static_pad_template (element_class, &sink_template);
+  gst_element_class_add_static_pad_template (element_class, &src_template);
+
+  gst_element_class_set_static_metadata (element_class,
+      "CUDA transform-ip", "Filter/Video",
+      "CUDA in-place transform template element",
+      "Seungha Yang <seungha@centricular.com>");
+
+  trans_class->start = GST_DEBUG_FUNCPTR (gst_cuda_transform_ip_start);
+  trans_class->stop = GST_DEBUG_FUNCPTR (gst_cuda_transform_ip_stop);
+  trans_class->query = GST_DEBUG_FUNCPTR (gst_cuda_transform_ip_query);
+  trans_class->before_transform =
+      GST_DEBUG_FUNCPTR (gst_cuda_transform_ip_before_transform);
+  trans_class->set_caps = GST_DEBUG_FUNCPTR (gst_cuda_transform_ip_set_caps);
+  trans_class->transform_ip = GST_DEBUG_FUNCPTR (gst_cuda_transform_ip_execute);
+}
+
+static void
+gst_cuda_transform_ip_init (GstCudaTransformIp * self)
+{
+  self->device_id = DEFAULT_DEVICE_ID;
+  self->update_image = DEFAULT_UPDATE_IMAGE;
+
+  g_rec_mutex_init (&self->lock);
+}
+
+static void
+gst_cuda_transform_ip_dispose (GObject * object)
+{
+  GstCudaTransformIp *self = GST_CUDA_TRANSFORM_IP (object);
+
+  gst_clear_object (&self->context);
+
+  G_OBJECT_CLASS (parent_class)->dispose (object);
+}
+
+static void
+gst_cuda_transform_ip_finalize (GObject * object)
+{
+  GstCudaTransformIp *self = GST_CUDA_TRANSFORM_IP (object);
+
+  g_rec_mutex_clear (&self->lock);
+
+  G_OBJECT_CLASS (parent_class)->finalize (object);
+}
+
+static void
+gst_cuda_transform_ip_set_property (GObject * object, guint prop_id,
+    const GValue * value, GParamSpec * pspec)
+{
+  GstCudaTransformIp *self = GST_CUDA_TRANSFORM_IP (object);
+
+  g_rec_mutex_lock (&self->lock);
+  switch (prop_id) {
+    case PROP_DEVICE_ID:
+      self->device_id = g_value_get_int (value);
+      break;
+    case PROP_UPDATE_IMAGE:
+      self->update_image = g_value_get_boolean (value);
+      break;
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+      break;
+  }
+  g_rec_mutex_unlock (&self->lock);
+}
+
+static void
+gst_cuda_transform_ip_get_property (GObject * object, guint prop_id,
+    GValue * value, GParamSpec * pspec)
+{
+  GstCudaTransformIp *self = GST_CUDA_TRANSFORM_IP (object);
+
+  g_rec_mutex_lock (&self->lock);
+  switch (prop_id) {
+    case PROP_DEVICE_ID:
+      g_value_set_int (value, self->device_id);
+      break;
+    case PROP_UPDATE_IMAGE:
+      g_value_set_boolean (value, self->update_image);
+      break;
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+      break;
+  }
+  g_rec_mutex_unlock (&self->lock);
+}
+
+static void
+gst_cuda_transform_ip_set_context (GstElement * element, GstContext * context)
+{
+  GstCudaTransformIp *self = GST_CUDA_TRANSFORM_IP (element);
+
+  g_rec_mutex_lock (&self->lock);
+
+  /* Util function which parses GstContex type and sets cuda context if
+   * given GstContext holds GstCudaContext with matching device-id */
+  gst_cuda_handle_set_context (element,
+      context, self->device_id, &self->context);
+
+  g_rec_mutex_unlock (&self->lock);
+
+  GST_ELEMENT_CLASS (parent_class)->set_context (element, context);
+}
+
+static gboolean
+gst_cuda_transform_ip_start (GstBaseTransform * trans)
+{
+  GstCudaTransformIp *self = GST_CUDA_TRANSFORM_IP (trans);
+  gboolean ret;
+
+  g_rec_mutex_lock (&self->lock);
+  /* Util function which queries GstCudaContext and creates if needed */
+  ret = gst_cuda_ensure_element_context (GST_ELEMENT (self), self->device_id,
+      &self->context);
+  g_rec_mutex_unlock (&self->lock);
+
+  if (!ret) {
+    GST_ELEMENT_ERROR (self, RESOURCE, NOT_FOUND, (NULL),
+        ("CUDA device unavailable"));
+    return FALSE;
+  }
+
+  return TRUE;
+}
+
+static void
+gst_cuda_transform_ip_prepare_resource (GstCudaTransformIp * self)
+{
+  gst_cuda_context_push (self->context);
+  CuMemAllocHost ((void **) &self->read_host_mem, self->size);
+  CuMemAllocHost ((void **) &self->write_host_mem, self->size);
+  gst_cuda_context_pop (NULL);
+
+  memset (self->write_host_mem, 128, self->size);
+}
+
+static void
+gst_cuda_transform_ip_release_resource (GstCudaTransformIp * self)
+{
+  if (self->read_host_mem) {
+    gst_cuda_context_push (self->context);
+    CuMemFreeHost (self->read_host_mem);
+    self->read_host_mem = NULL;
+
+    CuMemFreeHost (self->write_host_mem);
+    self->write_host_mem = NULL;
+    gst_cuda_context_pop (NULL);
+  }
+}
+
+static gboolean
+gst_cuda_transform_ip_stop (GstBaseTransform * trans)
+{
+  GstCudaTransformIp *self = GST_CUDA_TRANSFORM_IP (trans);
+
+  gst_cuda_transform_ip_release_resource (self);
+  gst_clear_object (&self->context);
+
+  return TRUE;
+}
+
+static gboolean
+gst_cuda_transform_ip_query (GstBaseTransform * trans,
+    GstPadDirection direction, GstQuery * query)
+{
+  GstCudaTransformIp *self = GST_CUDA_TRANSFORM_IP (trans);
+
+  switch (GST_QUERY_TYPE (query)) {
+    case GST_QUERY_CONTEXT:
+    {
+      gboolean ret;
+      g_rec_mutex_lock (&self->lock);
+      ret = gst_cuda_handle_context_query (GST_ELEMENT (self), query,
+          self->context);
+      g_rec_mutex_unlock (&self->lock);
+
+      /* Returns immediately if context query is handled here */
+      if (ret)
+        return TRUE;
+      break;
+    }
+    default:
+      break;
+  }
+
+  return GST_BASE_TRANSFORM_CLASS (parent_class)->query (trans,
+      direction, query);
+}
+
+static gboolean
+gst_cuda_transform_ip_set_caps (GstBaseTransform * trans, GstCaps * incaps,
+    GstCaps * outcaps)
+{
+  GstCudaTransformIp *self = GST_CUDA_TRANSFORM_IP (trans);
+
+  gst_cuda_transform_ip_release_resource (self);
+
+  if (!gst_video_info_from_caps (&self->info, incaps)) {
+    GST_ERROR_OBJECT (self, "Invalid caps %" GST_PTR_FORMAT, incaps);
+    return FALSE;
+  }
+
+  /* Prepare resolution dependent resources */
+  self->stride = GST_ROUND_UP_64 (self->info.stride[0]);
+  self->size = self->stride * self->info.height;
+
+  gst_cuda_transform_ip_prepare_resource (self);
+
+  return TRUE;
+}
+
+/* Checks GstCudaMemory's context and updates ours if needed */
+static void
+gst_cuda_transform_ip_before_transform (GstBaseTransform * trans,
+    GstBuffer * buffer)
+{
+  GstCudaTransformIp *self = GST_CUDA_TRANSFORM_IP (trans);
+  GstMemory *mem;
+  GstCudaMemory *cmem;
+
+  mem = gst_buffer_peek_memory (buffer, 0);
+  g_assert (gst_is_cuda_memory (mem));
+
+  cmem = GST_CUDA_MEMORY_CAST (mem);
+  if (cmem->context != self->context) {
+    GST_INFO_OBJECT (self, "updating context");
+    g_rec_mutex_lock (&self->lock);
+    gst_cuda_transform_ip_release_resource (self);
+    gst_clear_object (&self->context);
+    self->context = gst_object_ref (cmem->context);
+    gst_cuda_transform_ip_prepare_resource (self);
+    g_rec_mutex_unlock (&self->lock);
+  }
+}
+
+static GstFlowReturn
+gst_cuda_transform_ip_execute (GstBaseTransform * trans, GstBuffer * buffer)
+{
+  GstCudaTransformIp *self = GST_CUDA_TRANSFORM_IP (trans);
+  GstMemory *mem;
+  GstCudaMemory *cmem;
+  CUstream stream;
+  GstVideoFrame frame;
+  GstMapFlags flags = GST_MAP_CUDA;
+  CUDA_MEMCPY2D params;
+  gboolean update_image;
+
+  g_rec_mutex_lock (&self->lock);
+  update_image = self->update_image;
+  g_rec_mutex_unlock (&self->lock);
+
+  /* Gets memory to access cuda stream object */
+  mem = gst_buffer_peek_memory (buffer, 0);
+  g_assert (gst_is_cuda_memory (mem));
+
+  cmem = GST_CUDA_MEMORY_CAST (mem);
+  /* NOTE: gst_cuda_stream_get_handle() is null-safe and will return
+   * default stream if GstCudaStream is nullptr  */
+  stream = gst_cuda_stream_get_handle (gst_cuda_memory_get_stream (cmem));
+
+  /* BEGIN-ELEMENT-SPECIFIC-PROCESSING */
+  if (update_image) {
+    /* Emulating image update process (e.g., image enhancement) */
+    flags |= GST_MAP_WRITE;
+  } else {
+    /* Emulating image analysis process (e.g., edge detection) */
+    flags |= GST_MAP_READ;
+  }
+
+  if (!gst_video_frame_map (&frame, &self->info, buffer, flags)) {
+    GST_ERROR_OBJECT (self, "Couldn't map buffer");
+    return GST_FLOW_ERROR;
+  }
+
+  memset (&params, 0, sizeof (params));
+  gst_cuda_context_push (self->context);
+  if (update_image) {
+    params.srcMemoryType = CU_MEMORYTYPE_HOST;
+    params.srcHost = self->write_host_mem;
+    params.srcPitch = self->stride;
+
+    params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
+    params.dstDevice = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (&frame, 1);
+    params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&frame, 1);
+
+    params.WidthInBytes = GST_VIDEO_FRAME_WIDTH (&frame);
+    params.Height = GST_VIDEO_FRAME_HEIGHT (&frame);
+
+    /* Upload to U plane */
+    CuMemcpy2DAsync (&params, stream);
+
+    params.dstDevice = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (&frame, 2);
+
+    /* Upload to V plane */
+    CuMemcpy2DAsync (&params, stream);
+  } else {
+    /* Download Y plane data */
+    params.srcMemoryType = CU_MEMORYTYPE_DEVICE;
+    params.srcDevice = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (&frame, 0);
+    params.srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&frame, 0);
+
+    params.dstMemoryType = CU_MEMORYTYPE_HOST;
+    params.dstHost = self->read_host_mem;
+    params.dstPitch = self->stride;
+
+    params.WidthInBytes = GST_VIDEO_FRAME_WIDTH (&frame);
+    params.Height = GST_VIDEO_FRAME_HEIGHT (&frame);
+
+    CuMemcpy2DAsync (&params, stream);
+    CuStreamSynchronize (stream);
+
+    /* Do something */
+  }
+  gst_cuda_context_pop (NULL);
+
+  gst_video_frame_unmap (&frame);
+
+  if (update_image) {
+    /* Writable map can replace memory of the given buffer if memory was not
+     * writable when map() was called. Gets memory pointer again */
+    mem = gst_buffer_peek_memory (buffer, 0);
+
+    /* We skipped CuStreamSynchronize() above. Mark this memory is not
+     * synchronized yet */
+    GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC);
+  }
+
+  /* END-ELEMENT-SPECIFIC-PROCESSING */
+
+  return GST_FLOW_OK;
+}
--- a/subprojects/gst-plugins-bad/tests/examples/cuda/template-plugin/cuda-transform-ip-template.h
+++ b/subprojects/gst-plugins-bad/tests/examples/cuda/template-plugin/cuda-transform-ip-template.h
@ -0,0 +1,32 @@
+/* GStreamer
+ * Copyright (C) 2024 Seungha Yang <seungha@centricular.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#pragma once
+
+#include <gst/gst.h>
+#include <gst/base/gstbasetransform.h>
+
+G_BEGIN_DECLS
+
+#define GST_TYPE_CUDA_TRANSFORM_IP (gst_cuda_transform_ip_get_type())
+G_DECLARE_FINAL_TYPE (GstCudaTransformIp, gst_cuda_transform_ip,
+    GST, CUDA_TRANSFORM_IP, GstBaseTransform)
+
+G_END_DECLS
+
--- a/subprojects/gst-plugins-bad/tests/examples/cuda/template-plugin/plugin.c
+++ b/subprojects/gst-plugins-bad/tests/examples/cuda/template-plugin/plugin.c
@ -0,0 +1,51 @@
+/*
+ * GStreamer
+ * Copyright (C) 2024 Seungha Yang <seungha@centricular.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gst/gst.h>
+
+#ifdef G_OS_WIN32
+#include <windows.h>
+#endif
+
+#include <gst/cuda/gstcuda.h>
+#include "cuda-transform-ip-template.h"
+
+static gboolean
+plugin_init (GstPlugin * plugin)
+{
+  if (!gst_cuda_load_library ())
+    return TRUE;
+
+  if (CuInit (0) != CUDA_SUCCESS)
+    return TRUE;
+
+  return gst_element_register (plugin, "cuda-transform-ip", GST_RANK_NONE,
+      GST_TYPE_CUDA_TRANSFORM_IP);
+}
+
+GST_PLUGIN_DEFINE (GST_VERSION_MAJOR,
+    GST_VERSION_MINOR,
+    cuda_template,
+    "CUDA template plugin",
+    plugin_init, VERSION, "LGPL", GST_PACKAGE_NAME, GST_PACKAGE_ORIGIN)