/* GStreamer * Copyright (C) 2022 Seungha Yang * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301, USA. */ /** * SECTION:element-nvav1dec * @title: nvav1dec * * GstCodecs based NVIDIA AV1 video decoder * * ## Example launch line * ``` * gst-launch-1.0 filesrc location=/path/to/av1/file ! parsebin ! nvav1dec ! videoconvert ! autovideosink * ``` * * Since: 1.22 * */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "gstnvav1dec.h" #include "gstnvdecoder.h" #include GST_DEBUG_CATEGORY_STATIC (gst_nv_av1_dec_debug); #define GST_CAT_DEFAULT gst_nv_av1_dec_debug typedef struct _GstNvAV1Dec { GstAV1Decoder parent; GstCudaContext *context; GstNvDecoder *decoder; GstAV1SequenceHeaderOBU seq_hdr; CUVIDPICPARAMS params; /* slice buffer which will be passed to CUVIDPICPARAMS::pBitstreamData */ guint8 *bitstream_buffer; /* allocated memory size of bitstream_buffer */ gsize bitstream_buffer_alloc_size; /* current offset of bitstream_buffer (per frame) */ gsize bitstream_buffer_offset; guint *tile_offsets; guint tile_offsets_alloc_len; guint num_tiles; guint max_width; guint max_height; guint bitdepth; guint8 film_grain_params_present; guint num_output_surfaces; guint init_max_width; guint init_max_height; } GstNvAV1Dec; typedef struct _GstNvAV1DecClass { GstAV1DecoderClass parent_class; guint cuda_device_id; guint max_width; guint max_height; } GstNvAV1DecClass; enum { PROP_0, PROP_CUDA_DEVICE_ID, PROP_NUM_OUTPUT_SURFACES, PROP_INIT_MAX_WIDTH, PROP_INIT_MAX_HEIGHT, }; #define DEFAULT_NUM_OUTPUT_SURFACES 0 static GTypeClass *parent_class = nullptr; #define GST_NV_AV1_DEC(object) ((GstNvAV1Dec *) (object)) #define GST_NV_AV1_DEC_GET_CLASS(object) \ (G_TYPE_INSTANCE_GET_CLASS ((object),G_TYPE_FROM_INSTANCE (object),GstNvAV1DecClass)) static void gst_nv_av1_dec_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec); static void gst_nv_av1_dec_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec); static void gst_nv_av1_dec_set_context (GstElement * element, GstContext * context); static gboolean gst_nv_av1_dec_open (GstVideoDecoder * decoder); static gboolean gst_nv_av1_dec_close (GstVideoDecoder * decoder); static gboolean gst_nv_av1_dec_stop (GstVideoDecoder * decoder); static gboolean gst_nv_av1_dec_negotiate (GstVideoDecoder * decoder); static gboolean gst_nv_av1_dec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query); static gboolean gst_nv_av1_dec_src_query (GstVideoDecoder * decoder, GstQuery * query); static gboolean gst_nv_av1_dec_sink_event (GstVideoDecoder * decoder, GstEvent * event); static GstFlowReturn gst_nv_av1_dec_new_sequence (GstAV1Decoder * decoder, const GstAV1SequenceHeaderOBU * seq_hdr, gint max_dpb_size); static GstFlowReturn gst_nv_av1_dec_new_picture (GstAV1Decoder * decoder, GstVideoCodecFrame * frame, GstAV1Picture * picture); static GstAV1Picture *gst_nv_av1_dec_duplicate_picture (GstAV1Decoder * decoder, GstVideoCodecFrame * frame, GstAV1Picture * picture); static GstFlowReturn gst_nv_av1_dec_start_picture (GstAV1Decoder * decoder, GstAV1Picture * picture, GstAV1Dpb * dpb); static GstFlowReturn gst_nv_av1_dec_decode_tile (GstAV1Decoder * decoder, GstAV1Picture * picture, GstAV1Tile * tile); static GstFlowReturn gst_nv_av1_dec_end_picture (GstAV1Decoder * decoder, GstAV1Picture * picture); static GstFlowReturn gst_nv_av1_dec_output_picture (GstAV1Decoder * decoder, GstVideoCodecFrame * frame, GstAV1Picture * picture); static guint gst_nv_av1_dec_get_preferred_output_delay (GstAV1Decoder * decoder, gboolean is_live); static void gst_nv_av1_dec_class_init (GstNvAV1DecClass * klass, GstNvDecoderClassData * cdata) { GObjectClass *object_class = G_OBJECT_CLASS (klass); GstElementClass *element_class = GST_ELEMENT_CLASS (klass); GstVideoDecoderClass *decoder_class = GST_VIDEO_DECODER_CLASS (klass); GstAV1DecoderClass *av1decoder_class = GST_AV1_DECODER_CLASS (klass); object_class->set_property = gst_nv_av1_dec_set_property; object_class->get_property = gst_nv_av1_dec_get_property; g_object_class_install_property (object_class, PROP_CUDA_DEVICE_ID, g_param_spec_uint ("cuda-device-id", "CUDA device id", "Assigned CUDA device id", 0, G_MAXINT, 0, (GParamFlags) (G_PARAM_READABLE | G_PARAM_STATIC_STRINGS))); /** * GstNvAV1Dec:num-output-surfaces: * * The number of output surfaces (0 = auto). This property will be used to * calculate the CUVIDDECODECREATEINFO.ulNumOutputSurfaces parameter * in case of CUDA output mode * * Since: 1.24 */ g_object_class_install_property (object_class, PROP_NUM_OUTPUT_SURFACES, g_param_spec_uint ("num-output-surfaces", "Num Output Surfaces", "Maximum number of output surfaces simultaneously mapped in CUDA " "output mode (0 = auto)", 0, 64, DEFAULT_NUM_OUTPUT_SURFACES, (GParamFlags) (GST_PARAM_MUTABLE_READY | G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); /** * GstNvAV1Dec:init-max-width: * * Initial CUVIDDECODECREATEINFO.ulMaxWidth value * * Since: 1.24 */ g_object_class_install_property (object_class, PROP_INIT_MAX_WIDTH, g_param_spec_uint ("init-max-width", "Initial Maximum Width", "Expected maximum coded width of stream. This value is used to " "pre-allocate higher dimension of output surfaces than " "that of input stream, in order to help decoder reconfiguration", 0, cdata->max_width, 0, (GParamFlags) (GST_PARAM_MUTABLE_READY | G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); /** * GstNvAV1Dec:init-max-height: * * Initial CUVIDDECODECREATEINFO.ulMaxHeight value * * Since: 1.24 */ g_object_class_install_property (object_class, PROP_INIT_MAX_HEIGHT, g_param_spec_uint ("init-max-height", "Initial Maximum Height", "Expected maximum coded height of stream. This value is used to " "pre-allocate higher dimension of output surfaces than " "that of input stream, in order to help decoder reconfiguration", 0, cdata->max_height, 0, (GParamFlags) (GST_PARAM_MUTABLE_READY | G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS))); element_class->set_context = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_set_context); parent_class = (GTypeClass *) g_type_class_peek_parent (klass); gst_element_class_set_static_metadata (element_class, "NVDEC AV1 Decoder", "Codec/Decoder/Video/Hardware", "NVIDIA AV1 video decoder", "Seungha Yang "); gst_element_class_add_pad_template (element_class, gst_pad_template_new ("sink", GST_PAD_SINK, GST_PAD_ALWAYS, cdata->sink_caps)); gst_element_class_add_pad_template (element_class, gst_pad_template_new ("src", GST_PAD_SRC, GST_PAD_ALWAYS, cdata->src_caps)); decoder_class->open = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_open); decoder_class->close = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_close); decoder_class->stop = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_stop); decoder_class->negotiate = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_negotiate); decoder_class->decide_allocation = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_decide_allocation); decoder_class->src_query = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_src_query); decoder_class->sink_event = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_sink_event); av1decoder_class->new_sequence = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_new_sequence); av1decoder_class->new_picture = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_new_picture); av1decoder_class->duplicate_picture = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_duplicate_picture); av1decoder_class->start_picture = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_start_picture); av1decoder_class->decode_tile = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_decode_tile); av1decoder_class->end_picture = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_end_picture); av1decoder_class->output_picture = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_output_picture); av1decoder_class->get_preferred_output_delay = GST_DEBUG_FUNCPTR (gst_nv_av1_dec_get_preferred_output_delay); klass->cuda_device_id = cdata->cuda_device_id; klass->max_width = cdata->max_width; klass->max_height = cdata->max_height; gst_caps_unref (cdata->sink_caps); gst_caps_unref (cdata->src_caps); g_free (cdata); } static void gst_nv_av1_dec_init (GstNvAV1Dec * self) { self->num_output_surfaces = DEFAULT_NUM_OUTPUT_SURFACES; } static void gst_nv_av1_dec_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec) { GstNvAV1Dec *self = GST_NV_AV1_DEC (object); switch (prop_id) { case PROP_NUM_OUTPUT_SURFACES: self->num_output_surfaces = g_value_get_uint (value); break; case PROP_INIT_MAX_WIDTH: self->init_max_width = g_value_get_uint (value); break; case PROP_INIT_MAX_HEIGHT: self->init_max_height = g_value_get_uint (value); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; } } static void gst_nv_av1_dec_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec) { GstNvAV1Dec *self = GST_NV_AV1_DEC (object); GstNvAV1DecClass *klass = GST_NV_AV1_DEC_GET_CLASS (object); switch (prop_id) { case PROP_CUDA_DEVICE_ID: g_value_set_uint (value, klass->cuda_device_id); break; case PROP_NUM_OUTPUT_SURFACES: g_value_set_uint (value, self->num_output_surfaces); break; case PROP_INIT_MAX_WIDTH: g_value_set_uint (value, self->init_max_width); break; case PROP_INIT_MAX_HEIGHT: g_value_set_uint (value, self->init_max_height); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; } } static void gst_nv_av1_dec_set_context (GstElement * element, GstContext * context) { GstNvAV1Dec *self = GST_NV_AV1_DEC (element); GstNvAV1DecClass *klass = GST_NV_AV1_DEC_GET_CLASS (self); GST_DEBUG_OBJECT (self, "set context %s", gst_context_get_context_type (context)); if (gst_cuda_handle_set_context (element, context, klass->cuda_device_id, &self->context)) { goto done; } if (self->decoder) gst_nv_decoder_handle_set_context (self->decoder, element, context); done: GST_ELEMENT_CLASS (parent_class)->set_context (element, context); } static gboolean gst_nv_av1_dec_open (GstVideoDecoder * decoder) { GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); GstNvAV1DecClass *klass = GST_NV_AV1_DEC_GET_CLASS (self); if (!gst_cuda_ensure_element_context (GST_ELEMENT (self), klass->cuda_device_id, &self->context)) { GST_ERROR_OBJECT (self, "Required element data is unavailable"); return FALSE; } self->decoder = gst_nv_decoder_new (self->context); if (!self->decoder) { GST_ERROR_OBJECT (self, "Failed to create decoder object"); gst_clear_object (&self->context); return FALSE; } return TRUE; } static void gst_nv_av1_dec_reset_bitstream_params (GstNvAV1Dec * self) { self->bitstream_buffer_offset = 0; self->num_tiles = 0; self->params.nBitstreamDataLen = 0; self->params.pBitstreamData = nullptr; self->params.nNumSlices = 0; self->params.pSliceDataOffsets = nullptr; } static gboolean gst_nv_av1_dec_close (GstVideoDecoder * decoder) { GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); gst_clear_object (&self->decoder); gst_clear_object (&self->context); gst_nv_av1_dec_reset_bitstream_params (self); g_free (self->bitstream_buffer); self->bitstream_buffer = nullptr; g_free (self->tile_offsets); self->tile_offsets = nullptr; self->bitstream_buffer_alloc_size = 0; self->tile_offsets_alloc_len = 0; return TRUE; } static gboolean gst_nv_av1_dec_stop (GstVideoDecoder * decoder) { GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); gboolean ret; ret = GST_VIDEO_DECODER_CLASS (parent_class)->stop (decoder); if (self->decoder) gst_nv_decoder_reset (self->decoder); return ret; } static gboolean gst_nv_av1_dec_negotiate (GstVideoDecoder * decoder) { GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); GstAV1Decoder *av1dec = GST_AV1_DECODER (decoder); GST_DEBUG_OBJECT (self, "negotiate"); gst_nv_decoder_negotiate (self->decoder, decoder, av1dec->input_state); return GST_VIDEO_DECODER_CLASS (parent_class)->negotiate (decoder); } static gboolean gst_nv_av1_dec_decide_allocation (GstVideoDecoder * decoder, GstQuery * query) { GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); if (!gst_nv_decoder_decide_allocation (self->decoder, decoder, query)) { GST_WARNING_OBJECT (self, "Failed to handle decide allocation"); return FALSE; } return GST_VIDEO_DECODER_CLASS (parent_class)->decide_allocation (decoder, query); } static gboolean gst_nv_av1_dec_src_query (GstVideoDecoder * decoder, GstQuery * query) { GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); switch (GST_QUERY_TYPE (query)) { case GST_QUERY_CONTEXT: if (gst_cuda_handle_context_query (GST_ELEMENT (decoder), query, self->context)) { return TRUE; } else if (self->decoder && gst_nv_decoder_handle_context_query (self->decoder, decoder, query)) { return TRUE; } break; default: break; } return GST_VIDEO_DECODER_CLASS (parent_class)->src_query (decoder, query); } static gboolean gst_nv_av1_dec_sink_event (GstVideoDecoder * decoder, GstEvent * event) { GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); if (!self->decoder) goto done; switch (GST_EVENT_TYPE (event)) { case GST_EVENT_FLUSH_START: gst_nv_decoder_set_flushing (self->decoder, TRUE); break; case GST_EVENT_FLUSH_STOP: gst_nv_decoder_set_flushing (self->decoder, FALSE); break; default: break; } done: return GST_VIDEO_DECODER_CLASS (parent_class)->sink_event (decoder, event); } static GstFlowReturn gst_nv_av1_dec_new_sequence (GstAV1Decoder * decoder, const GstAV1SequenceHeaderOBU * seq_hdr, gint max_dpb_size) { GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); GstNvAV1DecClass *klass = GST_NV_AV1_DEC_GET_CLASS (self); gboolean modified = FALSE; guint max_width, max_height; GST_LOG_OBJECT (self, "new sequence"); if (seq_hdr->seq_profile != GST_AV1_PROFILE_0) { GST_WARNING_OBJECT (self, "Unsupported profile %d", seq_hdr->seq_profile); return GST_FLOW_NOT_NEGOTIATED; } if (seq_hdr->num_planes != 3) { GST_WARNING_OBJECT (self, "Monochrome is not supported"); return GST_FLOW_NOT_NEGOTIATED; } self->seq_hdr = *seq_hdr; if (self->bitdepth != seq_hdr->bit_depth) { GST_INFO_OBJECT (self, "Bitdepth changed %d -> %d", self->bitdepth, seq_hdr->bit_depth); self->bitdepth = seq_hdr->bit_depth; modified = TRUE; } max_width = seq_hdr->max_frame_width_minus_1 + 1; max_height = seq_hdr->max_frame_height_minus_1 + 1; if (self->max_width != max_width || self->max_height != max_height) { GST_INFO_OBJECT (self, "Resolution changed %dx%d -> %dx%d", self->max_width, self->max_height, max_width, max_height); self->max_width = max_width; self->max_height = max_height; modified = TRUE; } if (self->film_grain_params_present != seq_hdr->film_grain_params_present) { GST_INFO_OBJECT (self, "Film grain present changed %d -> %d", self->film_grain_params_present, seq_hdr->film_grain_params_present); self->film_grain_params_present = seq_hdr->film_grain_params_present; modified = TRUE; } if (modified || !gst_nv_decoder_is_configured (self->decoder)) { GstVideoInfo info; GstVideoFormat out_format = GST_VIDEO_FORMAT_UNKNOWN; if (self->bitdepth == 8) { out_format = GST_VIDEO_FORMAT_NV12; } else if (self->bitdepth == 10) { out_format = GST_VIDEO_FORMAT_P010_10LE; } else { GST_WARNING_OBJECT (self, "Invalid bit-depth %d", seq_hdr->bit_depth); return GST_FLOW_NOT_NEGOTIATED; } gst_video_info_set_format (&info, out_format, GST_ROUND_UP_2 (self->max_width), GST_ROUND_UP_2 (self->max_height)); max_width = gst_nv_decoder_get_max_output_size (self->max_width, self->init_max_width, klass->max_width); max_height = gst_nv_decoder_get_max_output_size (self->max_height, self->init_max_height, klass->max_height); if (!gst_nv_decoder_configure (self->decoder, cudaVideoCodec_AV1, &info, self->max_width, self->max_height, self->bitdepth, max_dpb_size, self->film_grain_params_present ? TRUE : FALSE, self->num_output_surfaces, max_width, max_height)) { GST_ERROR_OBJECT (self, "Failed to create decoder"); return GST_FLOW_NOT_NEGOTIATED; } if (!gst_video_decoder_negotiate (GST_VIDEO_DECODER (self))) { GST_ERROR_OBJECT (self, "Failed to negotiate with downstream"); return GST_FLOW_NOT_NEGOTIATED; } } return GST_FLOW_OK; } static GstFlowReturn gst_nv_av1_dec_new_picture (GstAV1Decoder * decoder, GstVideoCodecFrame * frame, GstAV1Picture * picture) { GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); GstNvDecSurface *surface; GstFlowReturn ret; ret = gst_nv_decoder_acquire_surface (self->decoder, &surface); if (ret != GST_FLOW_OK) return ret; GST_LOG_OBJECT (self, "New decoder surface %p (index %d)", surface, surface->index); gst_av1_picture_set_user_data (picture, surface, (GDestroyNotify) gst_nv_dec_surface_unref); return GST_FLOW_OK; } static GstNvDecSurface * gst_nv_av1_dec_get_decoder_surface_from_picture (GstNvAV1Dec * self, GstAV1Picture * picture) { GstNvDecSurface *surface; surface = (GstNvDecSurface *) gst_av1_picture_get_user_data (picture); if (!surface) GST_DEBUG_OBJECT (self, "current picture does not have decoder surface"); return surface; } static GstAV1Picture * gst_nv_av1_dec_duplicate_picture (GstAV1Decoder * decoder, GstVideoCodecFrame * frame, GstAV1Picture * picture) { GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); GstNvDecSurface *surface; GstAV1Picture *new_picture; surface = gst_nv_av1_dec_get_decoder_surface_from_picture (self, picture); if (!surface) { GST_ERROR_OBJECT (self, "Parent picture does not have decoder surface"); return nullptr; } new_picture = gst_av1_picture_new (); new_picture->frame_hdr = picture->frame_hdr; gst_av1_picture_set_user_data (new_picture, gst_nv_dec_surface_ref (surface), (GDestroyNotify) gst_nv_dec_surface_unref); return new_picture; } static inline guint8 gst_nv_av1_dec_get_lr_unit_size (guint size) { switch (size) { case 32: return 0; case 64: return 1; case 128: return 2; case 256: return 3; default: break; } return 3; } static GstFlowReturn gst_nv_av1_dec_start_picture (GstAV1Decoder * decoder, GstAV1Picture * picture, GstAV1Dpb * dpb) { GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); CUVIDPICPARAMS *params = &self->params; CUVIDAV1PICPARAMS *av1_params = ¶ms->CodecSpecific.av1; const GstAV1SequenceHeaderOBU *seq_hdr = &self->seq_hdr; const GstAV1FrameHeaderOBU *frame_hdr = &picture->frame_hdr; const GstAV1GlobalMotionParams *gmp = &frame_hdr->global_motion_params; const GstAV1QuantizationParams *qp = &frame_hdr->quantization_params; const GstAV1TileInfo *ti = &frame_hdr->tile_info; const GstAV1CDEFParams *cp = &frame_hdr->cdef_params; const GstAV1SegmenationParams *sp = &frame_hdr->segmentation_params; const GstAV1LoopFilterParams *lp = &frame_hdr->loop_filter_params; const GstAV1LoopRestorationParams *lrp = &frame_hdr->loop_restoration_params; const GstAV1FilmGrainParams *fgp = &frame_hdr->film_grain_params; GstNvDecSurface *surface; GstNvDecSurface *other_surface; GstAV1Picture *other_pic; guint i, j; surface = gst_nv_av1_dec_get_decoder_surface_from_picture (self, picture); if (!surface) { GST_ERROR_OBJECT (self, "Decoder frame is unavailable"); return GST_FLOW_ERROR; } memset (params, 0, sizeof (CUVIDPICPARAMS)); params->PicWidthInMbs = GST_ROUND_UP_16 (frame_hdr->frame_width) >> 4; params->FrameHeightInMbs = GST_ROUND_UP_16 (frame_hdr->frame_height) >> 4; params->CurrPicIdx = surface->index; params->intra_pic_flag = frame_hdr->frame_is_intra; av1_params->width = frame_hdr->frame_width; av1_params->height = frame_hdr->frame_height; av1_params->frame_offset = frame_hdr->order_hint; av1_params->decodePicIdx = surface->decode_frame_index; /* sequence header */ av1_params->profile = seq_hdr->seq_profile; av1_params->use_128x128_superblock = seq_hdr->use_128x128_superblock; av1_params->subsampling_x = seq_hdr->color_config.subsampling_x; av1_params->subsampling_y = seq_hdr->color_config.subsampling_y; av1_params->mono_chrome = seq_hdr->color_config.mono_chrome; av1_params->bit_depth_minus8 = seq_hdr->bit_depth - 8; av1_params->enable_filter_intra = seq_hdr->enable_filter_intra; av1_params->enable_intra_edge_filter = seq_hdr->enable_intra_edge_filter; av1_params->enable_interintra_compound = seq_hdr->enable_interintra_compound; av1_params->enable_masked_compound = seq_hdr->enable_masked_compound; av1_params->enable_dual_filter = seq_hdr->enable_dual_filter; av1_params->enable_order_hint = seq_hdr->enable_order_hint; av1_params->order_hint_bits_minus1 = seq_hdr->order_hint_bits_minus_1; av1_params->enable_jnt_comp = seq_hdr->enable_jnt_comp; av1_params->enable_superres = seq_hdr->enable_superres; av1_params->enable_cdef = seq_hdr->enable_cdef; av1_params->enable_restoration = seq_hdr->enable_restoration; av1_params->enable_fgs = seq_hdr->film_grain_params_present; /* frame header */ av1_params->frame_type = frame_hdr->frame_type; av1_params->show_frame = frame_hdr->show_frame; av1_params->disable_cdf_update = frame_hdr->disable_cdf_update; av1_params->allow_screen_content_tools = frame_hdr->allow_screen_content_tools; if (frame_hdr->force_integer_mv || frame_hdr->frame_is_intra) av1_params->force_integer_mv = 1; else av1_params->force_integer_mv = 0; if (frame_hdr->use_superres) { av1_params->coded_denom = frame_hdr->superres_denom - GST_AV1_SUPERRES_DENOM_MIN; } else { av1_params->coded_denom = 0; } av1_params->allow_intrabc = frame_hdr->allow_intrabc; av1_params->allow_high_precision_mv = frame_hdr->allow_high_precision_mv; av1_params->interp_filter = frame_hdr->interpolation_filter; av1_params->switchable_motion_mode = frame_hdr->is_motion_mode_switchable; av1_params->use_ref_frame_mvs = frame_hdr->use_ref_frame_mvs; av1_params->disable_frame_end_update_cdf = frame_hdr->disable_frame_end_update_cdf; av1_params->delta_q_present = qp->delta_q_present; av1_params->delta_q_res = qp->delta_q_res; av1_params->using_qmatrix = qp->using_qmatrix; av1_params->coded_lossless = frame_hdr->coded_lossless; av1_params->use_superres = frame_hdr->use_superres; av1_params->tx_mode = frame_hdr->tx_mode; av1_params->reference_mode = frame_hdr->reference_select; av1_params->allow_warped_motion = frame_hdr->allow_warped_motion; av1_params->reduced_tx_set = frame_hdr->reduced_tx_set; av1_params->skip_mode = frame_hdr->skip_mode_present; /* tiling info */ av1_params->num_tile_cols = ti->tile_cols; av1_params->num_tile_rows = ti->tile_rows; av1_params->context_update_tile_id = ti->context_update_tile_id; for (i = 0; i < ti->tile_cols; i++) av1_params->tile_widths[i] = ti->width_in_sbs_minus_1[i] + 1; for (i = 0; i < ti->tile_rows; i++) av1_params->tile_heights[i] = ti->height_in_sbs_minus_1[i] + 1; /* CDEF */ av1_params->cdef_damping_minus_3 = cp->cdef_damping - 3; av1_params->cdef_bits = cp->cdef_bits; for (i = 0; i < GST_AV1_CDEF_MAX; i++) { guint8 primary; guint8 secondary; primary = cp->cdef_y_pri_strength[i]; secondary = cp->cdef_y_sec_strength[i]; if (secondary == 4) secondary--; av1_params->cdef_y_strength[i] = (primary & 0x0f) | (secondary << 4); primary = cp->cdef_uv_pri_strength[i]; secondary = cp->cdef_uv_sec_strength[i]; if (secondary == 4) secondary--; av1_params->cdef_uv_strength[i] = (primary & 0x0f) | (secondary << 4); } /* SkipModeFrames */ if (frame_hdr->skip_mode_present) { av1_params->SkipModeFrame0 = frame_hdr->skip_mode_frame[0]; av1_params->SkipModeFrame1 = frame_hdr->skip_mode_frame[1]; } /* qp information */ av1_params->base_qindex = qp->base_q_idx; av1_params->qp_y_dc_delta_q = qp->delta_q_y_dc; av1_params->qp_u_dc_delta_q = qp->delta_q_u_dc; av1_params->qp_u_ac_delta_q = qp->delta_q_u_ac; av1_params->qp_v_dc_delta_q = qp->delta_q_v_dc; av1_params->qp_v_ac_delta_q = qp->delta_q_v_ac; av1_params->qm_y = qp->qm_y; av1_params->qm_u = qp->qm_u; av1_params->qm_v = qp->qm_v; /* segmentation */ av1_params->segmentation_enabled = sp->segmentation_enabled; av1_params->segmentation_update_map = sp->segmentation_update_map; av1_params->segmentation_update_data = sp->segmentation_update_data; av1_params->segmentation_temporal_update = sp->segmentation_temporal_update; for (i = 0; i < GST_AV1_MAX_SEGMENTS; i++) { for (j = 0; j < GST_AV1_SEG_LVL_MAX; j++) { av1_params->segmentation_feature_data[i][j] = sp->feature_data[i][j]; av1_params->segmentation_feature_mask[i] |= sp->feature_enabled[i][j] << j; } } /* loopfilter */ av1_params->loop_filter_level[0] = lp->loop_filter_level[0]; av1_params->loop_filter_level[1] = lp->loop_filter_level[1]; av1_params->loop_filter_level_u = lp->loop_filter_level[2]; av1_params->loop_filter_level_v = lp->loop_filter_level[3]; av1_params->loop_filter_sharpness = lp->loop_filter_sharpness; for (i = 0; i < GST_AV1_TOTAL_REFS_PER_FRAME; i++) { av1_params->loop_filter_ref_deltas[i] = lp->loop_filter_ref_deltas[i]; } av1_params->loop_filter_mode_deltas[0] = lp->loop_filter_mode_deltas[0]; av1_params->loop_filter_mode_deltas[1] = lp->loop_filter_mode_deltas[1]; av1_params->loop_filter_delta_enabled = lp->loop_filter_delta_enabled; av1_params->loop_filter_delta_update = lp->loop_filter_delta_update; av1_params->delta_lf_present = lp->delta_lf_present; av1_params->delta_lf_res = lp->delta_lf_res; av1_params->delta_lf_multi = lp->delta_lf_multi; /* restoration */ for (i = 0; i < 3; i++) { av1_params->lr_unit_size[i] = gst_nv_av1_dec_get_lr_unit_size (lrp->loop_restoration_size[i]); } av1_params->lr_type[0] = lrp->frame_restoration_type[0]; av1_params->lr_type[1] = lrp->frame_restoration_type[1]; av1_params->lr_type[2] = lrp->frame_restoration_type[2]; /* reference frames */ for (i = 0; i < GST_AV1_TOTAL_REFS_PER_FRAME; i++) { guint8 ref_idx = 0xff; other_pic = dpb->pic_list[i]; if (other_pic) { other_surface = gst_nv_av1_dec_get_decoder_surface_from_picture (self, other_pic); if (!other_surface) { GST_ERROR_OBJECT (self, "reference frame is unavailable"); return GST_FLOW_ERROR; } ref_idx = other_surface->decode_frame_index; } av1_params->ref_frame_map[i] = ref_idx; } if (frame_hdr->primary_ref_frame == GST_AV1_PRIMARY_REF_NONE) { av1_params->primary_ref_frame = 0xff; } else { guint8 primary_ref_idx; g_assert (frame_hdr->primary_ref_frame < 8); primary_ref_idx = frame_hdr->ref_frame_idx[frame_hdr->primary_ref_frame]; av1_params->primary_ref_frame = av1_params->ref_frame_map[primary_ref_idx]; } av1_params->temporal_layer_id = picture->temporal_id; av1_params->spatial_layer_id = picture->spatial_id; /* ref frame list and global motion */ for (i = 0; i < GST_AV1_REFS_PER_FRAME; i++) { gint8 ref_idx = frame_hdr->ref_frame_idx[i]; other_pic = nullptr; if (ref_idx >= 0) other_pic = dpb->pic_list[ref_idx]; if (other_pic) { other_surface = gst_nv_av1_dec_get_decoder_surface_from_picture (self, other_pic); av1_params->ref_frame[i].index = other_surface->decode_frame_index; av1_params->ref_frame[i].width = other_pic->frame_hdr.frame_width; av1_params->ref_frame[i].height = other_pic->frame_hdr.frame_height; } else { av1_params->ref_frame[i].index = 0xff; } av1_params->global_motion[i].invalid = gmp->invalid[i]; av1_params->global_motion[i].wmtype = gmp->gm_type[GST_AV1_REF_LAST_FRAME + i]; for (j = 0; j < 6; j++) { av1_params->global_motion[i].wmmat[j] = gmp->gm_params[GST_AV1_REF_LAST_FRAME + i][j]; } } /* film grain params */ if (seq_hdr->film_grain_params_present) { av1_params->apply_grain = fgp->apply_grain; av1_params->overlap_flag = fgp->overlap_flag; av1_params->scaling_shift_minus8 = fgp->grain_scaling_minus_8; av1_params->chroma_scaling_from_luma = fgp->chroma_scaling_from_luma; av1_params->ar_coeff_lag = fgp->ar_coeff_lag; av1_params->ar_coeff_shift_minus6 = fgp->ar_coeff_shift_minus_6; av1_params->grain_scale_shift = fgp->grain_scale_shift; av1_params->clip_to_restricted_range = fgp->clip_to_restricted_range; av1_params->num_y_points = fgp->num_y_points; for (i = 0; i < fgp->num_y_points && i < 14; i++) { av1_params->scaling_points_y[i][0] = fgp->point_y_value[i]; av1_params->scaling_points_y[i][1] = fgp->point_y_scaling[i]; } av1_params->num_cb_points = fgp->num_cb_points; for (i = 0; i < fgp->num_cb_points && i < 10; i++) { av1_params->scaling_points_cb[i][0] = fgp->point_cb_value[i]; av1_params->scaling_points_cb[i][1] = fgp->point_cb_scaling[i]; } av1_params->num_cr_points = fgp->num_cr_points; for (i = 0; i < fgp->num_cr_points && i < 10; i++) { av1_params->scaling_points_cr[i][0] = fgp->point_cr_value[i]; av1_params->scaling_points_cr[i][1] = fgp->point_cr_scaling[i]; } av1_params->random_seed = fgp->grain_seed; for (i = 0; i < 24; i++) { av1_params->ar_coeffs_y[i] = (short) fgp->ar_coeffs_y_plus_128[i] - 128; } for (i = 0; i < 25; i++) { av1_params->ar_coeffs_cb[i] = (short) fgp->ar_coeffs_cb_plus_128[i] - 128; av1_params->ar_coeffs_cr[i] = (short) fgp->ar_coeffs_cr_plus_128[i] - 128; } av1_params->cb_mult = fgp->cb_mult; av1_params->cb_luma_mult = fgp->cb_luma_mult; av1_params->cb_offset = fgp->cb_offset; av1_params->cr_mult = fgp->cr_mult; av1_params->cr_luma_mult = fgp->cr_luma_mult; av1_params->cr_offset = fgp->cr_offset; } gst_nv_av1_dec_reset_bitstream_params (self); return GST_FLOW_OK; } static GstFlowReturn gst_nv_av1_dec_decode_tile (GstAV1Decoder * decoder, GstAV1Picture * picture, GstAV1Tile * tile) { GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); GstAV1TileGroupOBU *tile_group = &tile->tile_group; guint i; guint buffer_size; if (tile_group->num_tiles * 2 > self->tile_offsets_alloc_len) { self->tile_offsets_alloc_len = tile_group->num_tiles * 2; self->tile_offsets = (guint *) g_realloc_n (self->tile_offsets, self->tile_offsets_alloc_len, sizeof (guint)); } self->num_tiles = tile_group->num_tiles; for (i = tile_group->tg_start; i <= tile_group->tg_end; i++) { guint offset = self->bitstream_buffer_offset + tile_group->entry[i].tile_offset; self->tile_offsets[i * 2] = offset; self->tile_offsets[i * 2 + 1] = offset + tile_group->entry[i].tile_size; } buffer_size = self->bitstream_buffer_offset + tile->obu.obu_size; if (buffer_size > self->bitstream_buffer_alloc_size) { guint alloc_size = buffer_size * 2; self->bitstream_buffer = (guint8 *) g_realloc (self->bitstream_buffer, alloc_size); self->bitstream_buffer_alloc_size = alloc_size; } memcpy (self->bitstream_buffer + self->bitstream_buffer_offset, tile->obu.data, tile->obu.obu_size); self->bitstream_buffer_offset += tile->obu.obu_size; return GST_FLOW_OK; } static GstFlowReturn gst_nv_av1_dec_end_picture (GstAV1Decoder * decoder, GstAV1Picture * picture) { GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); gboolean ret; CUVIDPICPARAMS *params = &self->params; params->nBitstreamDataLen = self->bitstream_buffer_offset; params->pBitstreamData = self->bitstream_buffer; params->nNumSlices = self->num_tiles; params->pSliceDataOffsets = self->tile_offsets; ret = gst_nv_decoder_decode (self->decoder, params); if (!ret) { GST_ERROR_OBJECT (self, "Failed to decode picture"); return GST_FLOW_ERROR; } return GST_FLOW_OK; } static GstFlowReturn gst_nv_av1_dec_output_picture (GstAV1Decoder * decoder, GstVideoCodecFrame * frame, GstAV1Picture * picture) { GstNvAV1Dec *self = GST_NV_AV1_DEC (decoder); GstVideoDecoder *vdec = GST_VIDEO_DECODER (decoder); GstNvDecSurface *surface; GstFlowReturn ret = GST_FLOW_ERROR; GST_LOG_OBJECT (self, "Outputting picture %p", picture); surface = (GstNvDecSurface *) gst_av1_picture_get_user_data (picture); if (!surface) { GST_ERROR_OBJECT (self, "No decoder frame in picture %p", picture); goto error; } ret = gst_nv_decoder_finish_surface (self->decoder, vdec, picture->discont_state, surface, &frame->output_buffer); if (ret != GST_FLOW_OK) goto error; gst_av1_picture_unref (picture); return gst_video_decoder_finish_frame (vdec, frame); error: gst_video_decoder_drop_frame (vdec, frame); gst_av1_picture_unref (picture); return ret; } static guint gst_nv_av1_dec_get_preferred_output_delay (GstAV1Decoder * decoder, gboolean is_live) { /* Prefer to zero latency for live pipeline */ if (is_live) return 0; return 2; } void gst_nv_av1_dec_register (GstPlugin * plugin, guint device_id, guint rank, GstCaps * sink_caps, GstCaps * src_caps) { GType type; gchar *type_name; gchar *feature_name; guint index = 0; const GValue *value; GstStructure *s; GTypeInfo type_info = { sizeof (GstNvAV1DecClass), nullptr, nullptr, (GClassInitFunc) gst_nv_av1_dec_class_init, nullptr, nullptr, sizeof (GstNvAV1Dec), 0, (GInstanceInitFunc) gst_nv_av1_dec_init, }; GstNvDecoderClassData *cdata; GST_DEBUG_CATEGORY_INIT (gst_nv_av1_dec_debug, "nvav1dec", 0, "nvav1dec"); cdata = g_new0 (GstNvDecoderClassData, 1); s = gst_caps_get_structure (sink_caps, 0); value = gst_structure_get_value (s, "width"); cdata->max_width = (guint) gst_value_get_int_range_max (value); value = gst_structure_get_value (s, "height"); cdata->max_height = (guint) gst_value_get_int_range_max (value); cdata->sink_caps = gst_caps_ref (sink_caps); cdata->src_caps = gst_caps_ref (src_caps); cdata->cuda_device_id = device_id; type_info.class_data = cdata; type_name = g_strdup ("GstNvAV1Dec"); feature_name = g_strdup ("nvav1dec"); while (g_type_from_name (type_name)) { index++; g_free (type_name); g_free (feature_name); type_name = g_strdup_printf ("GstNvAV1Device%dDec", index); feature_name = g_strdup_printf ("nvav1device%ddec", index); } type = g_type_register_static (GST_TYPE_AV1_DECODER, type_name, &type_info, (GTypeFlags) 0); /* make lower rank than default device */ if (rank > 0 && index != 0) rank--; if (index != 0) gst_element_type_set_skip_documentation (type); if (!gst_element_register (plugin, feature_name, rank, type)) GST_WARNING ("Failed to register plugin '%s'", type_name); g_free (type_name); g_free (feature_name); }