diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudacompositor.cpp b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudacompositor.cpp new file mode 100644 index 0000000000..40d6e13f20 --- /dev/null +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudacompositor.cpp @@ -0,0 +1,1642 @@ +/* GStreamer + * Copyright (C) 2024 Seungha Yang + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include "gstcudacompositor.h" +#include "gstcudaconverter.h" + +GST_DEBUG_CATEGORY_STATIC (gst_cuda_compositor_debug); +#define GST_CAT_DEFAULT gst_cuda_compositor_debug + +enum GstCudaCompositorOperator +{ + GST_CUDA_COMPOSITOR_OPERATOR_SOURCE, + GST_CUDA_COMPOSITOR_OPERATOR_OVER, +}; + +#define GST_TYPE_CUDA_COMPOSITOR_OPERATOR (gst_cuda_compositor_operator_get_type()) +static GType +gst_cuda_compositor_operator_get_type (void) +{ + static GType compositor_operator_type = 0; + static const GEnumValue compositor_operator[] = { + {GST_CUDA_COMPOSITOR_OPERATOR_SOURCE, "Source", "source"}, + {GST_CUDA_COMPOSITOR_OPERATOR_OVER, "Over", "over"}, + {0, nullptr, nullptr}, + }; + + GST_CUDA_CALL_ONCE_BEGIN { + compositor_operator_type = + g_enum_register_static ("GstCudaCompositorOperator", + compositor_operator); + } GST_CUDA_CALL_ONCE_END; + + return compositor_operator_type; +} + +enum GstCudaCompositorSizingPolicy +{ + GST_CUDA_COMPOSITOR_SIZING_POLICY_NONE, + GST_CUDA_COMPOSITOR_SIZING_POLICY_KEEP_ASPECT_RATIO, +}; + +#define GST_TYPE_CUDA_COMPOSITOR_SIZING_POLICY (gst_cuda_compositor_sizing_policy_get_type()) +static GType +gst_cuda_compositor_sizing_policy_get_type (void) +{ + static GType sizing_policy_type = 0; + + static const GEnumValue sizing_polices[] = { + {GST_CUDA_COMPOSITOR_SIZING_POLICY_NONE, + "None: Image is scaled to fill configured destination rectangle without " + "padding or keeping the aspect ratio", "none"}, + {GST_CUDA_COMPOSITOR_SIZING_POLICY_KEEP_ASPECT_RATIO, + "Keep Aspect Ratio: Image is scaled to fit destination rectangle " + "specified by GstD3D12CompositorPad:{xpos, ypos, width, height} " + "with preserved aspect ratio. Resulting image will be centered in " + "the destination rectangle with padding if necessary", + "keep-aspect-ratio"}, + {0, nullptr, nullptr}, + }; + + GST_CUDA_CALL_ONCE_BEGIN { + sizing_policy_type = + g_enum_register_static ("GstCudaCompositorSizingPolicy", + sizing_polices); + } GST_CUDA_CALL_ONCE_END; + + return sizing_policy_type; +} + +enum +{ + PROP_PAD_0, + PROP_PAD_XPOS, + PROP_PAD_YPOS, + PROP_PAD_WIDTH, + PROP_PAD_HEIGHT, + PROP_PAD_ALPHA, + PROP_PAD_OPERATOR, + PROP_PAD_SIZING_POLICY, +}; + +#define DEFAULT_PAD_XPOS 0 +#define DEFAULT_PAD_YPOS 0 +#define DEFAULT_PAD_WIDTH 0 +#define DEFAULT_PAD_HEIGHT 0 +#define DEFAULT_PAD_ALPHA 1.0 +#define DEFAULT_PAD_OPERATOR GST_CUDA_COMPOSITOR_OPERATOR_OVER +#define DEFAULT_PAD_SIZING_POLICY GST_CUDA_COMPOSITOR_SIZING_POLICY_NONE + +enum +{ + PROP_0, + PROP_DEVICE_ID, + PROP_IGNORE_INACTIVE_PADS, +}; + +#define DEFAULT_DEVICE_ID -1 + +/* *INDENT-OFF* */ +struct GstCudaCompositorPadPrivate +{ + ~GstCudaCompositorPadPrivate () + { + gst_clear_object (&conv); + gst_clear_buffer (&prepared_buf); + if (fallback_pool) { + gst_buffer_pool_set_active (fallback_pool, FALSE); + gst_object_unref (fallback_pool); + } + } + + GstCudaConverter *conv = nullptr; + GstBufferPool *fallback_pool = nullptr; + GstBuffer *prepared_buf = nullptr; + + gboolean config_updated = FALSE; + + std::recursive_mutex lock; + + /* properties */ + gint xpos = DEFAULT_PAD_XPOS; + gint ypos = DEFAULT_PAD_YPOS; + gint width = DEFAULT_PAD_WIDTH; + gint height = DEFAULT_PAD_HEIGHT; + gdouble alpha = DEFAULT_PAD_ALPHA; + GstCudaCompositorOperator op = DEFAULT_PAD_OPERATOR; + GstCudaCompositorSizingPolicy sizing_policy = DEFAULT_PAD_SIZING_POLICY; +}; + +struct _GstCudaCompositorPad +{ + GstVideoAggregatorConvertPad parent; + + GstCudaCompositorPadPrivate *priv; +}; + +struct GstCudaCompositorPrivate +{ + std::recursive_mutex lock; + + /* properties */ + gint device_id = DEFAULT_DEVICE_ID; +}; +/* *INDENT-ON* */ + +struct _GstCudaCompositor +{ + GstVideoAggregator parent; + + GstCudaContext *context; + GstCudaStream *stream; + GstCudaStream *other_stream; + + GstCudaCompositorPrivate *priv; +}; + +static void gst_cuda_compositor_pad_finalize (GObject * object); +static void gst_cuda_compositor_pad_set_property (GObject * object, + guint prop_id, const GValue * value, GParamSpec * pspec); +static void gst_cuda_compositor_pad_get_property (GObject * object, + guint prop_id, GValue * value, GParamSpec * pspec); +static gboolean +gst_cuda_compositor_pad_prepare_frame (GstVideoAggregatorPad * pad, + GstVideoAggregator * vagg, GstBuffer * buffer, + GstVideoFrame * prepared_frame); +static void gst_cuda_compositor_pad_clean_frame (GstVideoAggregatorPad * pad, + GstVideoAggregator * vagg, GstVideoFrame * prepared_frame); + +#define gst_cuda_compositor_pad_parent_class parent_pad_class +G_DEFINE_TYPE (GstCudaCompositorPad, gst_cuda_compositor_pad, + GST_TYPE_VIDEO_AGGREGATOR_PAD); + +static void +gst_cuda_compositor_pad_class_init (GstCudaCompositorPadClass * klass) +{ + auto object_class = G_OBJECT_CLASS (klass); + auto vagg_pad_class = GST_VIDEO_AGGREGATOR_PAD_CLASS (klass); + auto param_flags = (GParamFlags) + (G_PARAM_READWRITE | GST_PARAM_CONTROLLABLE | G_PARAM_STATIC_STRINGS); + + object_class->finalize = gst_cuda_compositor_pad_finalize; + object_class->set_property = gst_cuda_compositor_pad_set_property; + object_class->get_property = gst_cuda_compositor_pad_get_property; + + g_object_class_install_property (object_class, PROP_PAD_XPOS, + g_param_spec_int ("xpos", "X Position", "X position of the picture", + G_MININT, G_MAXINT, DEFAULT_PAD_XPOS, param_flags)); + g_object_class_install_property (object_class, PROP_PAD_YPOS, + g_param_spec_int ("ypos", "Y Position", "Y position of the picture", + G_MININT, G_MAXINT, DEFAULT_PAD_YPOS, param_flags)); + g_object_class_install_property (object_class, PROP_PAD_WIDTH, + g_param_spec_int ("width", "Width", "Width of the picture", + G_MININT, G_MAXINT, DEFAULT_PAD_WIDTH, param_flags)); + g_object_class_install_property (object_class, PROP_PAD_HEIGHT, + g_param_spec_int ("height", "Height", "Height of the picture", + G_MININT, G_MAXINT, DEFAULT_PAD_HEIGHT, param_flags)); + g_object_class_install_property (object_class, PROP_PAD_ALPHA, + g_param_spec_double ("alpha", "Alpha", "Alpha of the picture", 0.0, 1.0, + DEFAULT_PAD_ALPHA, param_flags)); + g_object_class_install_property (object_class, PROP_PAD_OPERATOR, + g_param_spec_enum ("operator", "Operator", + "Blending operator to use for blending this pad over the previous ones", + GST_TYPE_CUDA_COMPOSITOR_OPERATOR, DEFAULT_PAD_OPERATOR, + param_flags)); + g_object_class_install_property (object_class, PROP_PAD_SIZING_POLICY, + g_param_spec_enum ("sizing-policy", "Sizing policy", + "Sizing policy to use for image scaling", + GST_TYPE_CUDA_COMPOSITOR_SIZING_POLICY, DEFAULT_PAD_SIZING_POLICY, + param_flags)); + + vagg_pad_class->prepare_frame = + GST_DEBUG_FUNCPTR (gst_cuda_compositor_pad_prepare_frame); + vagg_pad_class->clean_frame = + GST_DEBUG_FUNCPTR (gst_cuda_compositor_pad_clean_frame); + + gst_type_mark_as_plugin_api (GST_TYPE_CUDA_COMPOSITOR_OPERATOR, + (GstPluginAPIFlags) 0); + gst_type_mark_as_plugin_api (GST_TYPE_CUDA_COMPOSITOR_SIZING_POLICY, + (GstPluginAPIFlags) 0); +} + +static void +gst_cuda_compositor_pad_init (GstCudaCompositorPad * self) +{ + self->priv = new GstCudaCompositorPadPrivate (); +} + +static void +gst_cuda_compositor_pad_finalize (GObject * object) +{ + auto self = GST_CUDA_COMPOSITOR_PAD (object); + + delete self->priv; + + G_OBJECT_CLASS (parent_pad_class)->finalize (object); +} + +static void +pad_update_position (GstCudaCompositorPad * self, + gint * old, const GValue * value) +{ + auto priv = self->priv; + auto tmp = g_value_get_int (value); + + if (*old != tmp) { + *old = tmp; + priv->config_updated = TRUE; + } +} + +static void +gst_cuda_compositor_pad_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec) +{ + auto self = GST_CUDA_COMPOSITOR_PAD (object); + auto priv = self->priv; + + std::lock_guard < std::recursive_mutex > lk (priv->lock); + switch (prop_id) { + case PROP_PAD_XPOS: + pad_update_position (self, &priv->xpos, value); + break; + case PROP_PAD_YPOS: + pad_update_position (self, &priv->ypos, value); + break; + case PROP_PAD_WIDTH: + pad_update_position (self, &priv->width, value); + break; + case PROP_PAD_HEIGHT: + pad_update_position (self, &priv->height, value); + break; + case PROP_PAD_ALPHA: + { + gdouble alpha = g_value_get_double (value); + if (priv->alpha != alpha) { + priv->config_updated = TRUE; + priv->alpha = alpha; + } + break; + } + case PROP_PAD_OPERATOR: + { + auto op = (GstCudaCompositorOperator) g_value_get_enum (value); + if (op != priv->op) { + priv->op = op; + priv->config_updated = TRUE; + } + break; + } + case PROP_PAD_SIZING_POLICY: + { + auto policy = (GstCudaCompositorSizingPolicy) g_value_get_enum (value); + if (priv->sizing_policy != policy) { + priv->sizing_policy = policy; + priv->config_updated = TRUE; + } + break; + } + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +gst_cuda_compositor_pad_get_property (GObject * object, guint prop_id, + GValue * value, GParamSpec * pspec) +{ + auto self = GST_CUDA_COMPOSITOR_PAD (object); + auto priv = self->priv; + + std::lock_guard < std::recursive_mutex > lk (priv->lock); + switch (prop_id) { + case PROP_PAD_XPOS: + g_value_set_int (value, priv->xpos); + break; + case PROP_PAD_YPOS: + g_value_set_int (value, priv->ypos); + break; + case PROP_PAD_WIDTH: + g_value_set_int (value, priv->width); + break; + case PROP_PAD_HEIGHT: + g_value_set_int (value, priv->height); + break; + case PROP_PAD_ALPHA: + g_value_set_double (value, priv->alpha); + break; + case PROP_PAD_OPERATOR: + g_value_set_enum (value, priv->op); + break; + case PROP_PAD_SIZING_POLICY: + g_value_set_enum (value, priv->sizing_policy); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +gst_cuda_compositor_pad_get_output_size (GstCudaCompositorPad * self, + gint out_par_n, gint out_par_d, gint * width, gint * height, + gint * x_offset, gint * y_offset) +{ + auto vagg_pad = GST_VIDEO_AGGREGATOR_PAD (self); + auto priv = self->priv; + gint pad_width, pad_height; + guint dar_n, dar_d; + + *x_offset = 0; + *y_offset = 0; + *width = 0; + *height = 0; + + if (!vagg_pad->info.finfo + || vagg_pad->info.finfo->format == GST_VIDEO_FORMAT_UNKNOWN) { + GST_DEBUG_OBJECT (self, "Have no caps yet"); + return; + } + + pad_width = priv->width <= 0 ? + GST_VIDEO_INFO_WIDTH (&vagg_pad->info) : priv->width; + pad_height = priv->height <= 0 ? + GST_VIDEO_INFO_HEIGHT (&vagg_pad->info) : priv->height; + + if (pad_width == 0 || pad_height == 0) + return; + + if (!gst_video_calculate_display_ratio (&dar_n, &dar_d, pad_width, pad_height, + GST_VIDEO_INFO_PAR_N (&vagg_pad->info), + GST_VIDEO_INFO_PAR_D (&vagg_pad->info), out_par_n, out_par_d)) { + GST_WARNING_OBJECT (self, "Cannot calculate display aspect ratio"); + return; + } + + GST_TRACE_OBJECT (priv, "scaling %ux%u by %u/%u (%u/%u / %u/%u)", + pad_width, pad_height, dar_n, dar_d, + GST_VIDEO_INFO_PAR_N (&vagg_pad->info), + GST_VIDEO_INFO_PAR_D (&vagg_pad->info), out_par_n, out_par_d); + + switch (priv->sizing_policy) { + case GST_CUDA_COMPOSITOR_SIZING_POLICY_NONE: + /* Pick either height or width, whichever is an integer multiple of the + * display aspect ratio. However, prefer preserving the height to account + * for interlaced video. */ + if (pad_height % dar_n == 0) { + pad_width = gst_util_uint64_scale_int (pad_height, dar_n, dar_d); + } else if (pad_width % dar_d == 0) { + pad_height = gst_util_uint64_scale_int (pad_width, dar_d, dar_n); + } else { + pad_width = gst_util_uint64_scale_int (pad_height, dar_n, dar_d); + } + break; + case GST_CUDA_COMPOSITOR_SIZING_POLICY_KEEP_ASPECT_RATIO: + { + gint from_dar_n, from_dar_d, to_dar_n, to_dar_d, num, den; + + /* Calculate DAR again with actual video size */ + if (!gst_util_fraction_multiply (GST_VIDEO_INFO_WIDTH (&vagg_pad->info), + GST_VIDEO_INFO_HEIGHT (&vagg_pad->info), + GST_VIDEO_INFO_PAR_N (&vagg_pad->info), + GST_VIDEO_INFO_PAR_D (&vagg_pad->info), &from_dar_n, + &from_dar_d)) { + from_dar_n = from_dar_d = -1; + } + + if (!gst_util_fraction_multiply (pad_width, pad_height, + out_par_n, out_par_d, &to_dar_n, &to_dar_d)) { + to_dar_n = to_dar_d = -1; + } + + if (from_dar_n != to_dar_n || from_dar_d != to_dar_d) { + /* Calculate new output resolution */ + if (from_dar_n != -1 && from_dar_d != -1 + && gst_util_fraction_multiply (from_dar_n, from_dar_d, + out_par_d, out_par_n, &num, &den)) { + GstVideoRectangle src_rect, dst_rect, rst_rect; + + src_rect.h = gst_util_uint64_scale_int (pad_width, den, num); + if (src_rect.h == 0) { + pad_width = 0; + pad_height = 0; + break; + } + + src_rect.x = src_rect.y = 0; + src_rect.w = pad_width; + + dst_rect.x = dst_rect.y = 0; + dst_rect.w = pad_width; + dst_rect.h = pad_height; + + /* Scale rect to be centered in destination rect */ + gst_video_center_rect (&src_rect, &dst_rect, &rst_rect, TRUE); + + GST_LOG_OBJECT (self, + "Re-calculated size %dx%d -> %dx%d (x-offset %d, y-offset %d)", + pad_width, pad_height, rst_rect.w, rst_rect.h, rst_rect.x, + rst_rect.h); + + *x_offset = rst_rect.x; + *y_offset = rst_rect.y; + pad_width = rst_rect.w; + pad_height = rst_rect.h; + } else { + GST_WARNING_OBJECT (self, "Failed to calculate output size"); + + *x_offset = 0; + *y_offset = 0; + pad_width = 0; + pad_height = 0; + } + } + break; + } + } + + *width = pad_width; + *height = pad_height; +} + +static GstVideoRectangle +clamp_rectangle (gint x, gint y, gint w, gint h, gint outer_width, + gint outer_height) +{ + gint x2 = x + w; + gint y2 = y + h; + GstVideoRectangle clamped; + + /* Clamp the x/y coordinates of this frame to the output boundaries to cover + * the case where (say, with negative xpos/ypos or w/h greater than the output + * size) the non-obscured portion of the frame could be outside the bounds of + * the video itself and hence not visible at all */ + clamped.x = CLAMP (x, 0, outer_width); + clamped.y = CLAMP (y, 0, outer_height); + clamped.w = CLAMP (x2, 0, outer_width) - clamped.x; + clamped.h = CLAMP (y2, 0, outer_height) - clamped.y; + + return clamped; +} + +static gboolean +gst_cuda_compositor_pad_check_frame_obscured (GstVideoAggregatorPad * pad, + GstVideoAggregator * vagg) +{ + auto self = GST_CUDA_COMPOSITOR_PAD (pad); + auto priv = self->priv; + gint width, height; + GstVideoInfo *info = &vagg->info; + /* The rectangle representing this frame, clamped to the video's boundaries. + * Due to the clamping, this is different from the frame width/height above. */ + GstVideoRectangle frame_rect; + gint x_offset, y_offset; + + /* There's three types of width/height here: + * 1. GST_VIDEO_FRAME_WIDTH/HEIGHT: + * The frame width/height (same as pad->info.height/width; + * see gst_video_frame_map()) + * 2. cpad->width/height: + * The optional pad property for scaling the frame (if zero, the video is + * left unscaled) + */ + + if (priv->alpha == 0) + return TRUE; + + gst_cuda_compositor_pad_get_output_size (self, GST_VIDEO_INFO_PAR_N (info), + GST_VIDEO_INFO_PAR_D (info), &width, &height, &x_offset, &y_offset); + + frame_rect = clamp_rectangle (priv->xpos + x_offset, priv->ypos + y_offset, + width, height, GST_VIDEO_INFO_WIDTH (info), GST_VIDEO_INFO_HEIGHT (info)); + + if (frame_rect.w == 0 || frame_rect.h == 0) { + GST_DEBUG_OBJECT (pad, "Resulting frame is zero-width or zero-height " + "(w: %i, h: %i), skipping", frame_rect.w, frame_rect.h); + return TRUE; + } + + return FALSE; +} + +static GstBuffer * +gst_cuda_compositor_upload_frame (GstCudaCompositor * self, + GstVideoAggregatorPad * pad, GstBuffer * buffer) +{ + auto cpad = GST_CUDA_COMPOSITOR_PAD (pad); + auto priv = cpad->priv; + GstVideoFrame src, dst; + + auto mem = gst_buffer_peek_memory (buffer, 0); + if (gst_is_cuda_memory (mem)) { + auto cmem = GST_CUDA_MEMORY_CAST (mem); + if (cmem->context == self->context) + return gst_buffer_ref (buffer); + } + + if (!priv->fallback_pool) { + priv->fallback_pool = gst_cuda_buffer_pool_new (self->context); + auto config = gst_buffer_pool_get_config (priv->fallback_pool); + + if (self->stream) + gst_buffer_pool_config_set_cuda_stream (config, self->stream); + + auto caps = gst_video_info_to_caps (&pad->info); + gst_buffer_pool_config_set_params (config, caps, pad->info.size, 0, 0); + gst_caps_unref (caps); + if (!gst_buffer_pool_set_config (priv->fallback_pool, config)) { + GST_ERROR_OBJECT (pad, "Set config failed"); + gst_clear_object (&priv->fallback_pool); + return nullptr; + } + + if (!gst_buffer_pool_set_active (priv->fallback_pool, TRUE)) { + GST_ERROR_OBJECT (pad, "Set active failed"); + gst_clear_object (&priv->fallback_pool); + return nullptr; + } + } + + GstBuffer *outbuf = nullptr; + gst_buffer_pool_acquire_buffer (priv->fallback_pool, &outbuf, nullptr); + if (!outbuf) { + GST_ERROR_OBJECT (self, "Couldn't acquire buffer"); + return nullptr; + } + + if (!gst_video_frame_map (&src, &pad->info, buffer, GST_MAP_READ)) { + GST_ERROR_OBJECT (pad, "Couldn't map src frame"); + gst_buffer_unref (outbuf); + return nullptr; + } + + if (!gst_video_frame_map (&dst, &pad->info, outbuf, GST_MAP_WRITE)) { + GST_ERROR_OBJECT (pad, "Couldn't map dst frame"); + gst_video_frame_unmap (&src); + gst_buffer_unref (outbuf); + return nullptr; + } + + auto ret = gst_video_frame_copy (&dst, &src); + gst_video_frame_unmap (&dst); + gst_video_frame_unmap (&src); + + if (!ret) { + GST_ERROR_OBJECT (pad, "Couldn't copy frame"); + gst_buffer_unref (outbuf); + return nullptr; + } + + return outbuf; +} + +static gboolean +gst_cuda_compositor_pad_prepare_frame (GstVideoAggregatorPad * pad, + GstVideoAggregator * vagg, GstBuffer * buffer, + GstVideoFrame * prepared_frame) +{ + auto self = GST_CUDA_COMPOSITOR_PAD (pad); + auto priv = self->priv; + + std::lock_guard < std::recursive_mutex > lk (priv->lock); + if (gst_cuda_compositor_pad_check_frame_obscured (pad, vagg)) + return TRUE; + + buffer = gst_cuda_compositor_upload_frame (GST_CUDA_COMPOSITOR (vagg), + pad, buffer); + if (!buffer) + return FALSE; + + if (!gst_video_frame_map (prepared_frame, + &pad->info, buffer, (GstMapFlags) (GST_MAP_READ | GST_MAP_CUDA))) { + GST_ERROR_OBJECT (self, "Couldn't map frame"); + gst_buffer_unref (buffer); + } + + prepared_frame->buffer = buffer; + priv->prepared_buf = gst_buffer_ref (buffer); + + return TRUE; +} + +static void +gst_cuda_compositor_pad_clean_frame (GstVideoAggregatorPad * pad, + GstVideoAggregator * vagg, GstVideoFrame * prepared_frame) +{ + auto self = GST_CUDA_COMPOSITOR_PAD (pad); + auto priv = self->priv; + + if (prepared_frame->buffer) + gst_video_frame_unmap (prepared_frame); + + memset (prepared_frame, 0, sizeof (GstVideoFrame)); + gst_clear_buffer (&priv->prepared_buf); +} + +static gboolean +gst_cuda_compositor_pad_setup_converter (GstVideoAggregatorPad * pad, + GstVideoAggregator * vagg) +{ + auto self = GST_CUDA_COMPOSITOR (vagg); + auto cpad = GST_CUDA_COMPOSITOR_PAD (pad); + auto priv = cpad->priv; + gint width, height; + GstVideoInfo *info = &vagg->info; + GstVideoRectangle frame_rect; + gint x_offset, y_offset; + + std::lock_guard < std::recursive_mutex > lk (priv->lock); + if (!priv->conv) { + priv->conv = gst_cuda_converter_new (&pad->info, &vagg->info, self->context, + nullptr); + if (!priv->conv) { + GST_ERROR_OBJECT (self, "Couldn't create converter"); + return FALSE; + } + + priv->config_updated = TRUE; + } + + if (!priv->config_updated) + return TRUE; + + gst_cuda_compositor_pad_get_output_size (cpad, GST_VIDEO_INFO_PAR_N (info), + GST_VIDEO_INFO_PAR_D (info), &width, &height, &x_offset, &y_offset); + + frame_rect = clamp_rectangle (priv->xpos + x_offset, priv->ypos + y_offset, + width, height, GST_VIDEO_INFO_WIDTH (info), GST_VIDEO_INFO_HEIGHT (info)); + +#ifndef GST_DISABLE_GST_DEBUG + guint zorder = 0; + g_object_get (pad, "zorder", &zorder, nullptr); + + GST_LOG_OBJECT (pad, "Update position, pad-xpos %d, pad-ypos %d, " + "pad-zorder %d, pad-width %d, pad-height %d, in-resolution %dx%d, " + "out-resoution %dx%d, dst-{x,y,width,height} %d-%d-%d-%d", + priv->xpos, priv->ypos, zorder, priv->width, priv->height, + GST_VIDEO_INFO_WIDTH (&pad->info), GST_VIDEO_INFO_HEIGHT (&pad->info), + GST_VIDEO_INFO_WIDTH (info), GST_VIDEO_INFO_HEIGHT (info), + frame_rect.x, frame_rect.y, frame_rect.w, frame_rect.h); +#endif + + g_object_set (priv->conv, "dest-x", frame_rect.x, + "dest-y", frame_rect.y, "dest-width", frame_rect.w, + "dest-height", frame_rect.h, "alpha", priv->alpha, + "blend", priv->op == GST_CUDA_COMPOSITOR_OPERATOR_SOURCE ? FALSE : TRUE, + nullptr); + priv->config_updated = FALSE; + + return TRUE; +} + +#define GST_CUDA_COMPOSITOR_FORMATS \ + "{ I420, YV12, NV12, NV21, P010_10LE, P012_LE, P016_LE, I420_10LE, I420_12LE, Y444, " \ + "Y444_10LE, Y444_12LE, Y444_16LE, BGRA, RGBA, RGBx, BGRx, ARGB, ABGR, RGB, " \ + "BGR, BGR10A2_LE, RGB10A2_LE, Y42B, I422_10LE, I422_12LE, RGBP, BGRP, GBR, " \ + "GBRA, GBR_10LE, GBR_12LE, GBR_16LE, VUYA }" + +static GstStaticPadTemplate sink_template = +GST_STATIC_PAD_TEMPLATE ("sink_%u", GST_PAD_SINK, GST_PAD_REQUEST, + GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE_WITH_FEATURES + (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY, GST_CUDA_COMPOSITOR_FORMATS))); + +static GstStaticPadTemplate src_template = +GST_STATIC_PAD_TEMPLATE ("src", GST_PAD_SRC, GST_PAD_ALWAYS, + GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE_WITH_FEATURES + (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY, GST_CUDA_COMPOSITOR_FORMATS))); + +static void gst_cuda_compositor_child_proxy_init (gpointer g_iface, + gpointer iface_data); +static void gst_cuda_compositor_finalize (GObject * object); +static void gst_cuda_compositor_set_property (GObject * object, + guint prop_id, const GValue * value, GParamSpec * pspec); +static void gst_cuda_compositor_get_property (GObject * object, + guint prop_id, GValue * value, GParamSpec * pspec); + +static GstPad *gst_cuda_compositor_request_new_pad (GstElement * element, + GstPadTemplate * templ, const gchar * name, const GstCaps * caps); +static void gst_cuda_compositor_release_pad (GstElement * element, + GstPad * pad); +static void gst_cuda_compositor_set_context (GstElement * element, + GstContext * context); + +static gboolean gst_cuda_compositor_start (GstAggregator * agg); +static gboolean gst_cuda_compositor_stop (GstAggregator * agg); +static gboolean gst_cuda_compositor_sink_query (GstAggregator * agg, + GstAggregatorPad * pad, GstQuery * query); +static gboolean gst_cuda_compositor_src_query (GstAggregator * agg, + GstQuery * query); +static GstCaps *gst_cuda_compositor_fixate_src_caps (GstAggregator * agg, + GstCaps * caps); +static gboolean gst_cuda_compositor_negotiated_src_caps (GstAggregator * agg, + GstCaps * caps); +static gboolean +gst_cuda_compositor_propose_allocation (GstAggregator * agg, + GstAggregatorPad * pad, GstQuery * decide_query, GstQuery * query); +static gboolean gst_cuda_compositor_decide_allocation (GstAggregator * agg, + GstQuery * query); +static GstFlowReturn +gst_cuda_compositor_aggregate_frames (GstVideoAggregator * vagg, + GstBuffer * outbuf); + +#define gst_cuda_compositor_parent_class parent_class +G_DEFINE_TYPE_WITH_CODE (GstCudaCompositor, gst_cuda_compositor, + GST_TYPE_VIDEO_AGGREGATOR, G_IMPLEMENT_INTERFACE (GST_TYPE_CHILD_PROXY, + gst_cuda_compositor_child_proxy_init)); + +static void +gst_cuda_compositor_class_init (GstCudaCompositorClass * klass) +{ + auto object_class = G_OBJECT_CLASS (klass); + auto element_class = GST_ELEMENT_CLASS (klass); + auto agg_class = GST_AGGREGATOR_CLASS (klass); + auto vagg_class = GST_VIDEO_AGGREGATOR_CLASS (klass); + + object_class->finalize = gst_cuda_compositor_finalize; + object_class->set_property = gst_cuda_compositor_set_property; + object_class->get_property = gst_cuda_compositor_get_property; + + g_object_class_install_property (object_class, PROP_DEVICE_ID, + g_param_spec_int ("cuda-device-id", "Cuda Device ID", + "Set the GPU device to use for operations (-1 = auto)", + -1, G_MAXINT, DEFAULT_DEVICE_ID, + (GParamFlags) (G_PARAM_READWRITE | GST_PARAM_MUTABLE_READY | + G_PARAM_STATIC_STRINGS))); + + g_object_class_install_property (object_class, + PROP_IGNORE_INACTIVE_PADS, g_param_spec_boolean ("ignore-inactive-pads", + "Ignore inactive pads", + "Avoid timing out waiting for inactive pads", FALSE, + (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); + + element_class->request_new_pad = + GST_DEBUG_FUNCPTR (gst_cuda_compositor_request_new_pad); + element_class->release_pad = + GST_DEBUG_FUNCPTR (gst_cuda_compositor_release_pad); + element_class->set_context = + GST_DEBUG_FUNCPTR (gst_cuda_compositor_set_context); + + agg_class->start = GST_DEBUG_FUNCPTR (gst_cuda_compositor_start); + agg_class->stop = GST_DEBUG_FUNCPTR (gst_cuda_compositor_stop); + agg_class->sink_query = GST_DEBUG_FUNCPTR (gst_cuda_compositor_sink_query); + agg_class->src_query = GST_DEBUG_FUNCPTR (gst_cuda_compositor_src_query); + agg_class->fixate_src_caps = + GST_DEBUG_FUNCPTR (gst_cuda_compositor_fixate_src_caps); + agg_class->negotiated_src_caps = + GST_DEBUG_FUNCPTR (gst_cuda_compositor_negotiated_src_caps); + agg_class->propose_allocation = + GST_DEBUG_FUNCPTR (gst_cuda_compositor_propose_allocation); + agg_class->decide_allocation = + GST_DEBUG_FUNCPTR (gst_cuda_compositor_decide_allocation); + + vagg_class->aggregate_frames = + GST_DEBUG_FUNCPTR (gst_cuda_compositor_aggregate_frames); + + gst_element_class_add_static_pad_template_with_gtype (element_class, + &sink_template, GST_TYPE_CUDA_COMPOSITOR_PAD); + gst_element_class_add_static_pad_template_with_gtype (element_class, + &src_template, GST_TYPE_AGGREGATOR_PAD); + + gst_element_class_set_static_metadata (element_class, "CUDA Compositor", + "Filter/Editor/Video/Compositor/Hardware", "A CUDA compositor", + "Seungha Yang "); + + gst_type_mark_as_plugin_api (GST_TYPE_CUDA_COMPOSITOR_PAD, + (GstPluginAPIFlags) 0); + + GST_DEBUG_CATEGORY_INIT (gst_cuda_compositor_debug, + "cudacompositor", 0, "cudacompositor"); +} + +static void +gst_cuda_compositor_init (GstCudaCompositor * self) +{ + self->priv = new GstCudaCompositorPrivate (); +} + +static void +gst_cuda_compositor_finalize (GObject * object) +{ + auto self = GST_CUDA_COMPOSITOR (object); + + delete self->priv; + + gst_clear_cuda_stream (&self->other_stream); + gst_clear_cuda_stream (&self->stream); + gst_clear_object (&self->context); + + G_OBJECT_CLASS (parent_class)->finalize (object); +} + +static void +gst_cuda_compositor_set_property (GObject * object, + guint prop_id, const GValue * value, GParamSpec * pspec) +{ + auto self = GST_CUDA_COMPOSITOR (object); + auto priv = self->priv; + + std::lock_guard < std::recursive_mutex > lk (priv->lock); + switch (prop_id) { + case PROP_DEVICE_ID: + priv->device_id = g_value_get_int (value); + break; + case PROP_IGNORE_INACTIVE_PADS: + gst_aggregator_set_ignore_inactive_pads (GST_AGGREGATOR (object), + g_value_get_boolean (value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +gst_cuda_compositor_get_property (GObject * object, + guint prop_id, GValue * value, GParamSpec * pspec) +{ + auto self = GST_CUDA_COMPOSITOR (object); + auto priv = self->priv; + + std::lock_guard < std::recursive_mutex > lk (priv->lock); + switch (prop_id) { + case PROP_DEVICE_ID: + g_value_set_int (value, priv->device_id); + break; + case PROP_IGNORE_INACTIVE_PADS: + g_value_set_boolean (value, + gst_aggregator_get_ignore_inactive_pads (GST_AGGREGATOR (object))); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static GObject * +gst_cuda_compositor_child_proxy_get_child_by_index (GstChildProxy * proxy, + guint index) +{ + auto self = GST_CUDA_COMPOSITOR (proxy); + GObject *obj = nullptr; + + GST_OBJECT_LOCK (self); + obj = (GObject *) g_list_nth_data (GST_ELEMENT_CAST (self)->sinkpads, index); + if (obj) + gst_object_ref (obj); + GST_OBJECT_UNLOCK (self); + + return obj; +} + +static guint +gst_cuda_compositor_child_proxy_get_children_count (GstChildProxy * proxy) +{ + auto self = GST_CUDA_COMPOSITOR (proxy); + guint count = 0; + + GST_OBJECT_LOCK (self); + count = GST_ELEMENT_CAST (self)->numsinkpads; + GST_OBJECT_UNLOCK (self); + GST_INFO_OBJECT (self, "Children Count: %d", count); + + return count; +} + +static void +gst_cuda_compositor_child_proxy_init (gpointer g_iface, gpointer iface_data) +{ + GstChildProxyInterface *iface = (GstChildProxyInterface *) g_iface; + + iface->get_child_by_index = + gst_cuda_compositor_child_proxy_get_child_by_index; + iface->get_children_count = + gst_cuda_compositor_child_proxy_get_children_count; +} + +static GstPad * +gst_cuda_compositor_request_new_pad (GstElement * element, + GstPadTemplate * templ, const gchar * name, const GstCaps * caps) +{ + GstPad *pad; + + pad = GST_ELEMENT_CLASS (parent_class)->request_new_pad (element, + templ, name, caps); + + if (!pad) { + GST_DEBUG_OBJECT (element, "could not create/add pad"); + return nullptr; + } + + gst_child_proxy_child_added (GST_CHILD_PROXY (element), G_OBJECT (pad), + GST_OBJECT_NAME (pad)); + + GST_DEBUG_OBJECT (element, "Created new pad %s:%s", GST_DEBUG_PAD_NAME (pad)); + + return pad; +} + +static void +gst_cuda_compositor_release_pad (GstElement * element, GstPad * pad) +{ + auto self = GST_CUDA_COMPOSITOR (element); + + GST_DEBUG_OBJECT (self, "Releasing pad %s:%s", GST_DEBUG_PAD_NAME (pad)); + + gst_child_proxy_child_removed (GST_CHILD_PROXY (self), G_OBJECT (pad), + GST_OBJECT_NAME (pad)); + + GST_ELEMENT_CLASS (parent_class)->release_pad (element, pad); +} + +static void +gst_cuda_compositor_set_context (GstElement * element, GstContext * context) +{ + auto self = GST_CUDA_COMPOSITOR (element); + auto priv = self->priv; + + { + std::lock_guard < std::recursive_mutex > lk (priv->lock); + gst_cuda_handle_set_context (element, context, priv->device_id, + &self->context); + } + + GST_ELEMENT_CLASS (parent_class)->set_context (element, context); +} + +static gboolean +gst_cuda_compositor_start (GstAggregator * agg) +{ + auto self = GST_CUDA_COMPOSITOR (agg); + auto priv = self->priv; + + { + std::lock_guard < std::recursive_mutex > lk (priv->lock); + if (!gst_cuda_ensure_element_context (GST_ELEMENT_CAST (self), + priv->device_id, &self->context)) { + GST_ERROR_OBJECT (self, "Failed to get context"); + return FALSE; + } + } + + self->stream = gst_cuda_stream_new (self->context); + + return GST_AGGREGATOR_CLASS (parent_class)->start (agg); +} + +static gboolean +gst_cuda_compositor_stop (GstAggregator * agg) +{ + auto self = GST_CUDA_COMPOSITOR (agg); + auto priv = self->priv; + + { + std::lock_guard < std::recursive_mutex > lk (priv->lock); + gst_clear_cuda_stream (&self->other_stream); + gst_clear_cuda_stream (&self->stream); + gst_clear_object (&self->context); + } + + return GST_AGGREGATOR_CLASS (parent_class)->stop (agg); +} + +static GstCaps * +gst_cuda_compositor_sink_getcaps (GstPad * pad, GstCaps * filter) +{ + GstCaps *sinkcaps; + GstCaps *template_caps; + GstCaps *filtered_caps; + GstCaps *returned_caps; + + template_caps = gst_pad_get_pad_template_caps (pad); + + sinkcaps = gst_pad_get_current_caps (pad); + if (sinkcaps == nullptr) { + sinkcaps = gst_caps_ref (template_caps); + } else { + sinkcaps = gst_caps_merge (sinkcaps, gst_caps_ref (template_caps)); + } + + if (filter) { + filtered_caps = gst_caps_intersect (sinkcaps, filter); + gst_caps_unref (sinkcaps); + } else { + filtered_caps = sinkcaps; /* pass ownership */ + } + + returned_caps = gst_caps_intersect (filtered_caps, template_caps); + + gst_caps_unref (template_caps); + gst_caps_unref (filtered_caps); + + GST_DEBUG_OBJECT (pad, "returning %" GST_PTR_FORMAT, returned_caps); + + return returned_caps; +} + +static gboolean +gst_cuda_compositor_sink_acceptcaps (GstPad * pad, GstCaps * caps) +{ + gboolean ret; + GstCaps *template_caps; + + GST_DEBUG_OBJECT (pad, "try accept caps of %" GST_PTR_FORMAT, caps); + + template_caps = gst_pad_get_pad_template_caps (pad); + template_caps = gst_caps_make_writable (template_caps); + + ret = gst_caps_can_intersect (caps, template_caps); + GST_DEBUG_OBJECT (pad, "%saccepted caps %" GST_PTR_FORMAT, + (ret ? "" : "not "), caps); + gst_caps_unref (template_caps); + + return ret; +} + +static gboolean +gst_cuda_compositor_sink_query (GstAggregator * agg, + GstAggregatorPad * pad, GstQuery * query) +{ + auto self = GST_CUDA_COMPOSITOR (agg); + auto priv = self->priv; + + switch (GST_QUERY_TYPE (query)) { + case GST_QUERY_CONTEXT: + { + std::lock_guard < std::recursive_mutex > lk (priv->lock); + if (gst_cuda_handle_context_query (GST_ELEMENT (agg), query, + self->context)) { + return TRUE; + } + break; + } + case GST_QUERY_CAPS: + { + GstCaps *filter, *caps; + + gst_query_parse_caps (query, &filter); + caps = gst_cuda_compositor_sink_getcaps (GST_PAD (pad), filter); + gst_query_set_caps_result (query, caps); + gst_caps_unref (caps); + return TRUE; + } + case GST_QUERY_ACCEPT_CAPS: + { + GstCaps *caps; + gboolean ret; + + gst_query_parse_accept_caps (query, &caps); + ret = gst_cuda_compositor_sink_acceptcaps (GST_PAD (pad), caps); + gst_query_set_accept_caps_result (query, ret); + return TRUE; + } + default: + break; + } + + return GST_AGGREGATOR_CLASS (parent_class)->sink_query (agg, pad, query); +} + +static gboolean +gst_cuda_compositor_src_query (GstAggregator * agg, GstQuery * query) +{ + auto self = GST_CUDA_COMPOSITOR (agg); + + switch (GST_QUERY_TYPE (query)) { + case GST_QUERY_CONTEXT: + if (gst_cuda_handle_context_query (GST_ELEMENT (agg), query, + self->context)) { + return TRUE; + } + break; + default: + break; + } + + return GST_AGGREGATOR_CLASS (parent_class)->src_query (agg, query); +} + +static GstCaps * +gst_cuda_compositor_fixate_src_caps (GstAggregator * agg, GstCaps * caps) +{ + auto vagg = GST_VIDEO_AGGREGATOR (agg); + GList *l; + gint best_width = -1, best_height = -1; + gint best_fps_n = -1, best_fps_d = -1; + gint par_n, par_d; + gdouble best_fps = 0.; + GstCaps *ret = nullptr; + GstStructure *s; + + ret = gst_caps_make_writable (caps); + + /* we need this to calculate how large to make the output frame */ + s = gst_caps_get_structure (ret, 0); + if (gst_structure_has_field (s, "pixel-aspect-ratio")) { + gst_structure_fixate_field_nearest_fraction (s, "pixel-aspect-ratio", 1, 1); + gst_structure_get_fraction (s, "pixel-aspect-ratio", &par_n, &par_d); + } else { + par_n = par_d = 1; + } + + GST_OBJECT_LOCK (vagg); + for (l = GST_ELEMENT (vagg)->sinkpads; l; l = l->next) { + auto vaggpad = GST_VIDEO_AGGREGATOR_PAD (l->data); + auto cpad = GST_CUDA_COMPOSITOR_PAD (vaggpad); + auto priv = cpad->priv; + gint this_width, this_height; + gint width, height; + gint fps_n, fps_d; + gdouble cur_fps; + gint x_offset; + gint y_offset; + + fps_n = GST_VIDEO_INFO_FPS_N (&vaggpad->info); + fps_d = GST_VIDEO_INFO_FPS_D (&vaggpad->info); + gst_cuda_compositor_pad_get_output_size (cpad, + par_n, par_d, &width, &height, &x_offset, &y_offset); + + if (width == 0 || height == 0) + continue; + + /* {x,y}_offset represent padding size of each top and left area. + * To calculate total resolution, count bottom and right padding area + * as well here */ + this_width = width + MAX (priv->xpos + 2 * x_offset, 0); + this_height = height + MAX (priv->ypos + 2 * y_offset, 0); + + if (best_width < this_width) + best_width = this_width; + if (best_height < this_height) + best_height = this_height; + + if (fps_d == 0) + cur_fps = 0.0; + else + gst_util_fraction_to_double (fps_n, fps_d, &cur_fps); + + if (best_fps < cur_fps) { + best_fps = cur_fps; + best_fps_n = fps_n; + best_fps_d = fps_d; + } + } + GST_OBJECT_UNLOCK (vagg); + + if (best_fps_n <= 0 || best_fps_d <= 0 || best_fps == 0.0) { + best_fps_n = 25; + best_fps_d = 1; + best_fps = 25.0; + } + + if (best_width <= 0 || best_height <= 0) { + best_width = 320; + best_height = 240; + } + + gst_structure_fixate_field_nearest_int (s, "width", best_width); + gst_structure_fixate_field_nearest_int (s, "height", best_height); + gst_structure_fixate_field_nearest_fraction (s, "framerate", best_fps_n, + best_fps_d); + ret = gst_caps_fixate (ret); + + GST_LOG_OBJECT (agg, "Fixated caps %" GST_PTR_FORMAT, ret); + + return ret; +} + +static gboolean +gst_cuda_compositor_clear_pad_context (GstCudaCompositor * self, + GstCudaCompositorPad * cpad, gpointer user_data) +{ + auto priv = cpad->priv; + + gst_clear_object (&priv->conv); + + return TRUE; +} + +static gboolean +gst_cuda_compositor_negotiated_src_caps (GstAggregator * agg, GstCaps * caps) +{ + gst_element_foreach_sink_pad (GST_ELEMENT_CAST (agg), + (GstElementForeachPadFunc) gst_cuda_compositor_clear_pad_context, + nullptr); + + return GST_AGGREGATOR_CLASS (parent_class)->negotiated_src_caps (agg, caps); +} + +static gboolean +gst_cuda_compositor_propose_allocation (GstAggregator * agg, + GstAggregatorPad * pad, GstQuery * decide_query, GstQuery * query) +{ + auto self = GST_CUDA_COMPOSITOR (agg); + GstVideoInfo info; + GstCaps *caps; + + gst_query_parse_allocation (query, &caps, nullptr); + + if (!caps) + return FALSE; + + if (!gst_video_info_from_caps (&info, caps)) + return FALSE; + + if (gst_query_get_n_allocation_pools (query) == 0) { + auto pool = gst_cuda_buffer_pool_new (self->context); + + if (!pool) { + GST_ERROR_OBJECT (self, "Failed to create buffer pool"); + return FALSE; + } + + auto config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_add_option (config, + GST_BUFFER_POOL_OPTION_VIDEO_META); + + if (self->other_stream) + gst_buffer_pool_config_set_cuda_stream (config, self->other_stream); + else if (self->stream) + gst_buffer_pool_config_set_cuda_stream (config, self->stream); + + guint size = GST_VIDEO_INFO_SIZE (&info); + gst_buffer_pool_config_set_params (config, caps, size, 0, 0); + + if (!gst_buffer_pool_set_config (pool, config)) { + GST_ERROR_OBJECT (pool, "Couldn't set config"); + gst_object_unref (pool); + + return FALSE; + } + + config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_get_params (config, + nullptr, &size, nullptr, nullptr); + gst_structure_free (config); + + gst_query_add_allocation_pool (query, pool, size, 0, 0); + gst_object_unref (pool); + } + + gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, nullptr); + + return TRUE; +} + +static gboolean +gst_cuda_compositor_decide_allocation (GstAggregator * agg, GstQuery * query) +{ + auto self = GST_CUDA_COMPOSITOR (agg); + GstCaps *caps; + GstBufferPool *pool = nullptr; + guint n, size, min, max; + GstVideoInfo info; + + gst_query_parse_allocation (query, &caps, nullptr); + + if (!caps) { + GST_DEBUG_OBJECT (self, "No output caps"); + return FALSE; + } + + if (!gst_video_info_from_caps (&info, caps)) { + GST_ERROR_OBJECT (self, "Invalid caps"); + return FALSE; + } + + n = gst_query_get_n_allocation_pools (query); + if (n > 0) + gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max); + + /* create our own pool */ + if (pool) { + if (!GST_IS_CUDA_BUFFER_POOL (pool)) { + GST_DEBUG_OBJECT (self, + "Downstream pool is not cuda, will create new one"); + gst_clear_object (&pool); + } else { + auto cpool = GST_CUDA_BUFFER_POOL (pool); + if (cpool->context != self->context) { + GST_DEBUG_OBJECT (self, "Different context, will create new one"); + gst_clear_object (&pool); + } + } + } + + size = (guint) info.size; + + if (!pool) { + pool = gst_cuda_buffer_pool_new (self->context); + min = 0; + max = 0; + } + + auto config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); + gst_buffer_pool_config_set_params (config, caps, size, min, max); + gst_clear_cuda_stream (&self->other_stream); + self->other_stream = gst_buffer_pool_config_get_cuda_stream (config); + if (self->other_stream) { + GST_DEBUG_OBJECT (self, "Downstream provided CUDA stream"); + } else if (self->stream) { + GST_DEBUG_OBJECT (self, "Set our stream to decided buffer pool"); + gst_buffer_pool_config_set_cuda_stream (config, self->stream); + } + + if (!gst_buffer_pool_set_config (pool, config)) { + GST_ERROR_OBJECT (self, "Set config failed"); + gst_object_unref (pool); + return FALSE; + } + + config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL); + gst_structure_free (config); + + if (n > 0) + gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max); + else + gst_query_add_allocation_pool (query, pool, size, min, max); + + gst_object_unref (pool); + + return TRUE; +} + +static gboolean +gst_cuda_compositor_draw_background (GstCudaCompositor * self, + GstVideoFrame * frame, CUstream stream) +{ + CUresult ret; + CUdeviceptr data; + guint width, height, stride; + guint16 uv_val; + auto format = GST_VIDEO_FRAME_FORMAT (frame); + switch (format) { + case GST_VIDEO_FORMAT_I420: + case GST_VIDEO_FORMAT_YV12: + case GST_VIDEO_FORMAT_Y42B: + case GST_VIDEO_FORMAT_Y444: + data = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, 0); + width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); + height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 0); + stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0); + + ret = CuMemsetD2D8Async (data, stride, 0, width, height, stream); + if (!gst_cuda_result (ret)) + return FALSE; + + for (guint i = 1; i < GST_VIDEO_FRAME_N_PLANES (frame); i++) { + data = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, i); + width = GST_VIDEO_FRAME_COMP_WIDTH (frame, i); + height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, i); + stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i); + + ret = CuMemsetD2D8Async (data, stride, 128, width, height, stream); + if (!gst_cuda_result (ret)) + return FALSE; + } + break; + case GST_VIDEO_FORMAT_NV12: + case GST_VIDEO_FORMAT_NV21: + data = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, 0); + width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); + height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 0); + stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0); + + ret = CuMemsetD2D8Async (data, stride, 0, width, height, stream); + if (!gst_cuda_result (ret)) + return FALSE; + + data = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, 1); + ret = CuMemsetD2D8Async (data, stride, 128, width, height / 2, stream); + if (!gst_cuda_result (ret)) + return FALSE; + break; + case GST_VIDEO_FORMAT_P010_10LE: + case GST_VIDEO_FORMAT_P012_LE: + case GST_VIDEO_FORMAT_P016_LE: + data = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, 0); + width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); + height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 0); + stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0); + + ret = CuMemsetD2D16Async (data, stride, 0, width, height, stream); + if (!gst_cuda_result (ret)) + return FALSE; + + data = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, 1); + ret = CuMemsetD2D16Async (data, + stride, G_MAXUINT16 / 2, width, height / 2, stream); + if (!gst_cuda_result (ret)) + return FALSE; + break; + case GST_VIDEO_FORMAT_I420_10LE: + case GST_VIDEO_FORMAT_I420_12LE: + case GST_VIDEO_FORMAT_I422_10LE: + case GST_VIDEO_FORMAT_I422_12LE: + case GST_VIDEO_FORMAT_Y444_10LE: + case GST_VIDEO_FORMAT_Y444_12LE: + case GST_VIDEO_FORMAT_Y444_16LE: + data = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, 0); + width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); + height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 0); + stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0); + + ret = CuMemsetD2D16Async (data, stride, 0, width, height, stream); + if (!gst_cuda_result (ret)) + return FALSE; + + uv_val = (((guint) 1 << GST_VIDEO_FRAME_COMP_DEPTH (frame, 0)) / 2); + for (guint i = 1; i < GST_VIDEO_FRAME_N_PLANES (frame); i++) { + data = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, i); + width = GST_VIDEO_FRAME_COMP_WIDTH (frame, i); + height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, i); + stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i); + + ret = CuMemsetD2D16Async (data, stride, uv_val, width, height, stream); + if (!gst_cuda_result (ret)) + return FALSE; + } + break; + case GST_VIDEO_FORMAT_RGBA: + case GST_VIDEO_FORMAT_BGRA: + case GST_VIDEO_FORMAT_RGBx: + case GST_VIDEO_FORMAT_BGRx: + case GST_VIDEO_FORMAT_ARGB: + case GST_VIDEO_FORMAT_ABGR: + case GST_VIDEO_FORMAT_RGB10A2_LE: + case GST_VIDEO_FORMAT_BGR10A2_LE: + case GST_VIDEO_FORMAT_VUYA: + { + guint32 packed = 0; + if (format == GST_VIDEO_FORMAT_ARGB || format == GST_VIDEO_FORMAT_ABGR) { + packed = 0xff; + } else if (format == GST_VIDEO_FORMAT_RGB10A2_LE || + format == GST_VIDEO_FORMAT_BGR10A2_LE) { + packed = ((guint32) 0x3) << 30; + } else if (format == GST_VIDEO_FORMAT_VUYA) { + packed = (((guint32) 0xff) << 24) | (((guint32) 0x80) << 8) | + ((guint32) 0x80); + } else { + packed = ((guint32) 0xff) << 24; + } + + data = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, 0); + width = GST_VIDEO_FRAME_WIDTH (frame); + height = GST_VIDEO_FRAME_HEIGHT (frame); + stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0); + + ret = CuMemsetD2D32Async (data, stride, packed, width, height, stream); + if (!gst_cuda_result (ret)) + return FALSE; + break; + } + case GST_VIDEO_FORMAT_RGB: + case GST_VIDEO_FORMAT_BGR: + data = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, 0); + width = GST_VIDEO_FRAME_WIDTH (frame) * 3; + height = GST_VIDEO_FRAME_HEIGHT (frame); + stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0); + + ret = CuMemsetD2D8Async (data, stride, 0, width, height, stream); + if (!gst_cuda_result (ret)) + return FALSE; + break; + case GST_VIDEO_FORMAT_RGBP: + case GST_VIDEO_FORMAT_BGRP: + case GST_VIDEO_FORMAT_GBR: + case GST_VIDEO_FORMAT_GBRA: + for (guint i = 0; i < GST_VIDEO_FRAME_N_PLANES (frame); i++) { + guint8 val = 0; + if (format == GST_VIDEO_FORMAT_GBRA && i == 3) + val = 255; + + data = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, i); + width = GST_VIDEO_FRAME_COMP_WIDTH (frame, i); + height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, i); + stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i); + + ret = CuMemsetD2D8Async (data, stride, val, width, height, stream); + if (!gst_cuda_result (ret)) + return FALSE; + } + break; + case GST_VIDEO_FORMAT_GBR_10LE: + case GST_VIDEO_FORMAT_GBR_12LE: + case GST_VIDEO_FORMAT_GBR_16LE: + for (guint i = 0; i < GST_VIDEO_FRAME_N_PLANES (frame); i++) { + data = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, i); + width = GST_VIDEO_FRAME_COMP_WIDTH (frame, i); + height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, i); + stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i); + + ret = CuMemsetD2D16Async (data, stride, 0, width, height, stream); + if (!gst_cuda_result (ret)) + return FALSE; + } + break; + default: + g_assert_not_reached (); + return FALSE; + } + + return TRUE; +} + +static GstFlowReturn +gst_cuda_compositor_aggregate_frames (GstVideoAggregator * vagg, + GstBuffer * outbuf) +{ + auto self = GST_CUDA_COMPOSITOR (vagg); + GList *iter; + GstFlowReturn ret = GST_FLOW_OK; + GstVideoFrame frame; + GstCudaMemory *cmem; + GstCudaStream *stream; + + GST_LOG_OBJECT (self, "aggregate"); + + if (!gst_cuda_context_push (self->context)) { + GST_ERROR_OBJECT (self, "Couldn't push context"); + return GST_FLOW_ERROR; + } + + if (!gst_video_frame_map (&frame, &vagg->info, outbuf, + (GstMapFlags) (GST_MAP_WRITE | GST_MAP_CUDA))) { + GST_ERROR_OBJECT (self, "Couldn't map output frame"); + gst_cuda_context_pop (nullptr); + return GST_FLOW_ERROR; + } + + cmem = (GstCudaMemory *) gst_buffer_peek_memory (outbuf, 0); + stream = gst_cuda_memory_get_stream (cmem); + auto stream_handle = gst_cuda_stream_get_handle (stream); + + if (!gst_cuda_compositor_draw_background (self, &frame, stream_handle)) { + GST_ERROR_OBJECT (self, "Couldn't draw background"); + ret = GST_FLOW_ERROR; + goto out; + } + + GST_OBJECT_LOCK (self); + for (iter = GST_ELEMENT (vagg)->sinkpads; iter; iter = g_list_next (iter)) { + auto pad = GST_VIDEO_AGGREGATOR_PAD (iter->data); + auto cpad = GST_CUDA_COMPOSITOR_PAD (pad); + auto pad_priv = cpad->priv; + auto in_frame = gst_video_aggregator_pad_get_prepared_frame (pad); + + if (!in_frame) + continue; + + if (!gst_cuda_compositor_pad_setup_converter (pad, vagg)) { + GST_ERROR_OBJECT (self, "Couldn't setup converter"); + ret = GST_FLOW_ERROR; + break; + } + + auto in_cmem = (GstCudaMemory *) + gst_buffer_peek_memory (in_frame->buffer, 0); + auto in_stream = gst_cuda_memory_get_stream (in_cmem); + if (in_stream != stream) + gst_cuda_memory_sync (in_cmem); + + if (!gst_cuda_converter_convert_frame (pad_priv->conv, in_frame, + &frame, stream_handle, nullptr)) { + GST_ERROR_OBJECT (pad, "Couldn't convert frame"); + ret = GST_FLOW_ERROR; + break; + } + } + GST_OBJECT_UNLOCK (self); + + if (ret == GST_FLOW_OK) + CuStreamSynchronize (stream_handle); + +out: + gst_video_frame_unmap (&frame); + gst_cuda_context_pop (nullptr); + + return ret; +} diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudacompositor.h b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudacompositor.h new file mode 100644 index 0000000000..773030f050 --- /dev/null +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudacompositor.h @@ -0,0 +1,38 @@ +/* GStreamer + * Copyright (C) 2024 Seungha Yang + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#pragma once + +#include +#include +#include +#include + +G_BEGIN_DECLS + +#define GST_TYPE_CUDA_COMPOSITOR_PAD (gst_cuda_compositor_pad_get_type()) +G_DECLARE_FINAL_TYPE (GstCudaCompositorPad, gst_cuda_compositor_pad, + GST, CUDA_COMPOSITOR_PAD, GstVideoAggregatorPad) + +#define GST_TYPE_CUDA_COMPOSITOR (gst_cuda_compositor_get_type()) +G_DECLARE_FINAL_TYPE (GstCudaCompositor, gst_cuda_compositor, + GST, CUDA_COMPOSITOR, GstVideoAggregator) + +G_END_DECLS + diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/meson.build b/subprojects/gst-plugins-bad/sys/nvcodec/meson.build index b7d69ab6cf..b44d5ddd28 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/meson.build +++ b/subprojects/gst-plugins-bad/sys/nvcodec/meson.build @@ -1,5 +1,6 @@ nvcodec_sources = [ 'gstcudabasetransform.c', + 'gstcudacompositor.cpp', 'gstcudaconverter.cpp', 'gstcudaconvertscale.c', 'gstcudaipc.cpp', diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/plugin.c b/subprojects/gst-plugins-bad/sys/nvcodec/plugin.c index a2f50c00ad..cafec0f207 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/plugin.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/plugin.c @@ -57,6 +57,7 @@ #include "gstcudaipcsink.h" #include "gstcudaipcsrc.h" #include "gstnvcodecutils.h" +#include "gstcudacompositor.h" #include @@ -350,6 +351,8 @@ plugin_init (GstPlugin * plugin) GST_TYPE_CUDA_SCALE); gst_element_register (plugin, "cudaconvertscale", GST_RANK_NONE, GST_TYPE_CUDA_CONVERT_SCALE); + gst_element_register (plugin, "cudacompositor", GST_RANK_NONE, + GST_TYPE_CUDA_COMPOSITOR); } gst_element_register (plugin, "cudaipcsink", GST_RANK_NONE, GST_TYPE_CUDA_IPC_SINK);