gstreamer/subprojects/gst-plugins-bad/gst-libs/gst/codecs/gstav1decoder.c
Nicolas Dufresne 049655c824 codecs: av1: Fix state when we show existing keyframe
Showing existing keyframe have special meaning in AV1. All the references
frame will be refreshed with the original keyframe information. The refresh
process (7.20) is implemented by saving data from the frame_header into the
state. To fix this special case, load all the relevant information into the
frame_header.

As there is nothing happening in between this and the loading of the key-frame
into the state, this patch also remove the separate API function, using it
internally instead.

Fixes #1090

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/1971>
2022-03-19 11:56:33 +00:00

649 lines
18 KiB
C

/* GStreamer
* Copyright (C) 2020 He Junyan <junyan.he@intel.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
/**
* SECTION:gstav1decoder
* @title: Gstav1Decoder
* @short_description: Base class to implement stateless AV1 decoders
* @sources:
* - gstav1picture.h
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "gstav1decoder.h"
GST_DEBUG_CATEGORY (gst_av1_decoder_debug);
#define GST_CAT_DEFAULT gst_av1_decoder_debug
struct _GstAV1DecoderPrivate
{
gint max_width;
gint max_height;
GstAV1Profile profile;
GstAV1Parser *parser;
GstAV1Dpb *dpb;
GstAV1Picture *current_picture;
GstVideoCodecFrame *current_frame;
};
#define parent_class gst_av1_decoder_parent_class
G_DEFINE_ABSTRACT_TYPE_WITH_CODE (GstAV1Decoder, gst_av1_decoder,
GST_TYPE_VIDEO_DECODER,
G_ADD_PRIVATE (GstAV1Decoder);
GST_DEBUG_CATEGORY_INIT (gst_av1_decoder_debug, "av1decoder", 0,
"AV1 Video Decoder"));
static gint
_floor_log2 (guint32 x)
{
gint s = 0;
while (x != 0) {
x = x >> 1;
s++;
}
return s - 1;
}
static gboolean gst_av1_decoder_start (GstVideoDecoder * decoder);
static gboolean gst_av1_decoder_stop (GstVideoDecoder * decoder);
static gboolean gst_av1_decoder_set_format (GstVideoDecoder * decoder,
GstVideoCodecState * state);
static GstFlowReturn gst_av1_decoder_finish (GstVideoDecoder * decoder);
static gboolean gst_av1_decoder_flush (GstVideoDecoder * decoder);
static GstFlowReturn gst_av1_decoder_drain (GstVideoDecoder * decoder);
static GstFlowReturn gst_av1_decoder_handle_frame (GstVideoDecoder * decoder,
GstVideoCodecFrame * frame);
static GstAV1Picture *gst_av1_decoder_duplicate_picture_default (GstAV1Decoder *
decoder, GstAV1Picture * picture);
static void
gst_av1_decoder_class_init (GstAV1DecoderClass * klass)
{
GstVideoDecoderClass *decoder_class = GST_VIDEO_DECODER_CLASS (klass);
decoder_class->start = GST_DEBUG_FUNCPTR (gst_av1_decoder_start);
decoder_class->stop = GST_DEBUG_FUNCPTR (gst_av1_decoder_stop);
decoder_class->set_format = GST_DEBUG_FUNCPTR (gst_av1_decoder_set_format);
decoder_class->finish = GST_DEBUG_FUNCPTR (gst_av1_decoder_finish);
decoder_class->flush = GST_DEBUG_FUNCPTR (gst_av1_decoder_flush);
decoder_class->drain = GST_DEBUG_FUNCPTR (gst_av1_decoder_drain);
decoder_class->handle_frame =
GST_DEBUG_FUNCPTR (gst_av1_decoder_handle_frame);
klass->duplicate_picture =
GST_DEBUG_FUNCPTR (gst_av1_decoder_duplicate_picture_default);
}
static void
gst_av1_decoder_init (GstAV1Decoder * self)
{
gst_video_decoder_set_packetized (GST_VIDEO_DECODER (self), TRUE);
self->priv = gst_av1_decoder_get_instance_private (self);
}
static void
gst_av1_decoder_reset (GstAV1Decoder * self)
{
GstAV1DecoderPrivate *priv = self->priv;
priv->max_width = 0;
priv->max_height = 0;
gst_clear_av1_picture (&priv->current_picture);
priv->current_frame = NULL;
priv->profile = GST_AV1_PROFILE_UNDEFINED;
if (priv->dpb)
gst_av1_dpb_clear (priv->dpb);
if (priv->parser)
gst_av1_parser_reset (priv->parser, FALSE);
}
static gboolean
gst_av1_decoder_start (GstVideoDecoder * decoder)
{
GstAV1Decoder *self = GST_AV1_DECODER (decoder);
GstAV1DecoderPrivate *priv = self->priv;
priv->parser = gst_av1_parser_new ();
priv->dpb = gst_av1_dpb_new ();
gst_av1_decoder_reset (self);
return TRUE;
}
static gboolean
gst_av1_decoder_stop (GstVideoDecoder * decoder)
{
GstAV1Decoder *self = GST_AV1_DECODER (decoder);
GstAV1DecoderPrivate *priv = self->priv;
gst_av1_decoder_reset (self);
g_clear_pointer (&self->input_state, gst_video_codec_state_unref);
g_clear_pointer (&priv->parser, gst_av1_parser_free);
g_clear_pointer (&priv->dpb, gst_av1_dpb_free);
return TRUE;
}
static gboolean
gst_av1_decoder_set_format (GstVideoDecoder * decoder,
GstVideoCodecState * state)
{
GstAV1Decoder *self = GST_AV1_DECODER (decoder);
GstAV1DecoderPrivate *priv = self->priv;
GST_DEBUG_OBJECT (decoder, "Set format");
if (self->input_state)
gst_video_codec_state_unref (self->input_state);
self->input_state = gst_video_codec_state_ref (state);
priv->max_width = GST_VIDEO_INFO_WIDTH (&state->info);
priv->max_height = GST_VIDEO_INFO_HEIGHT (&state->info);
return TRUE;
}
static GstFlowReturn
gst_av1_decoder_finish (GstVideoDecoder * decoder)
{
GST_DEBUG_OBJECT (decoder, "finish");
gst_av1_decoder_reset (GST_AV1_DECODER (decoder));
return GST_FLOW_OK;
}
static gboolean
gst_av1_decoder_flush (GstVideoDecoder * decoder)
{
GST_DEBUG_OBJECT (decoder, "flush");
gst_av1_decoder_reset (GST_AV1_DECODER (decoder));
return TRUE;
}
static GstFlowReturn
gst_av1_decoder_drain (GstVideoDecoder * decoder)
{
GST_DEBUG_OBJECT (decoder, "drain");
gst_av1_decoder_reset (GST_AV1_DECODER (decoder));
return GST_FLOW_OK;
}
static GstAV1Picture *
gst_av1_decoder_duplicate_picture_default (GstAV1Decoder * decoder,
GstAV1Picture * picture)
{
GstAV1Picture *new_picture;
new_picture = gst_av1_picture_new ();
return new_picture;
}
static const gchar *
get_obu_name (GstAV1OBUType type)
{
switch (type) {
case GST_AV1_OBU_SEQUENCE_HEADER:
return "sequence header";
case GST_AV1_OBU_TEMPORAL_DELIMITER:
return "temporal delimiter";
case GST_AV1_OBU_FRAME_HEADER:
return "frame header";
case GST_AV1_OBU_TILE_GROUP:
return "tile group";
case GST_AV1_OBU_METADATA:
return "metadata";
case GST_AV1_OBU_FRAME:
return "frame";
case GST_AV1_OBU_REDUNDANT_FRAME_HEADER:
return "redundant frame header";
case GST_AV1_OBU_TILE_LIST:
return "tile list";
case GST_AV1_OBU_PADDING:
return "padding";
default:
return "unknown";
}
return NULL;
}
static const gchar *
gst_av1_decoder_profile_to_string (GstAV1Profile profile)
{
switch (profile) {
case GST_AV1_PROFILE_0:
return "0";
case GST_AV1_PROFILE_1:
return "1";
case GST_AV1_PROFILE_2:
return "2";
default:
break;
}
return NULL;
}
static GstFlowReturn
gst_av1_decoder_process_sequence (GstAV1Decoder * self, GstAV1OBU * obu)
{
GstAV1ParserResult res;
GstAV1DecoderPrivate *priv = self->priv;
GstAV1SequenceHeaderOBU seq_header;
GstAV1SequenceHeaderOBU old_seq_header = { 0, };
GstAV1DecoderClass *klass = GST_AV1_DECODER_GET_CLASS (self);
GstFlowReturn ret = GST_FLOW_OK;
if (priv->parser->seq_header)
old_seq_header = *priv->parser->seq_header;
res = gst_av1_parser_parse_sequence_header_obu (priv->parser,
obu, &seq_header);
if (res != GST_AV1_PARSER_OK) {
GST_WARNING_OBJECT (self, "Parsing sequence failed.");
return GST_FLOW_ERROR;
}
if (!memcmp (&old_seq_header, &seq_header, sizeof (GstAV1SequenceHeaderOBU))) {
GST_DEBUG_OBJECT (self, "Get same sequence header.");
return GST_FLOW_OK;
}
g_assert (klass->new_sequence);
GST_DEBUG_OBJECT (self,
"Sequence updated, profile %s -> %s, max resolution: %dx%d -> %dx%d",
gst_av1_decoder_profile_to_string (priv->profile),
gst_av1_decoder_profile_to_string (seq_header.seq_profile),
priv->max_width, priv->max_height, seq_header.max_frame_width_minus_1 + 1,
seq_header.max_frame_height_minus_1 + 1);
ret = klass->new_sequence (self, &seq_header);
if (ret != GST_FLOW_OK) {
GST_ERROR_OBJECT (self, "subclass does not want accept new sequence");
return ret;
}
priv->profile = seq_header.seq_profile;
priv->max_width = seq_header.max_frame_width_minus_1 + 1;
priv->max_height = seq_header.max_frame_height_minus_1 + 1;
gst_av1_dpb_clear (priv->dpb);
return GST_FLOW_OK;
}
static GstFlowReturn
gst_av1_decoder_decode_tile_group (GstAV1Decoder * self,
GstAV1TileGroupOBU * tile_group, GstAV1OBU * obu)
{
GstAV1DecoderPrivate *priv = self->priv;
GstAV1DecoderClass *klass = GST_AV1_DECODER_GET_CLASS (self);
GstAV1Picture *picture = priv->current_picture;
GstAV1Tile tile;
GstFlowReturn ret = GST_FLOW_OK;
if (!picture) {
GST_ERROR_OBJECT (self, "No picture has created for current frame");
return GST_FLOW_ERROR;
}
if (picture->frame_hdr.show_existing_frame) {
GST_ERROR_OBJECT (self, "Current picture is showing the existing frame.");
return GST_FLOW_ERROR;
}
tile.obu = *obu;
tile.tile_group = *tile_group;
g_assert (klass->decode_tile);
ret = klass->decode_tile (self, picture, &tile);
if (ret != GST_FLOW_OK) {
GST_WARNING_OBJECT (self, "Decode tile error");
return ret;
}
return GST_FLOW_OK;
}
static GstFlowReturn
gst_av1_decoder_decode_frame_header (GstAV1Decoder * self,
GstAV1FrameHeaderOBU * frame_header)
{
GstAV1DecoderPrivate *priv = self->priv;
GstAV1DecoderClass *klass = GST_AV1_DECODER_GET_CLASS (self);
GstAV1Picture *picture = NULL;
GstFlowReturn ret = GST_FLOW_OK;
g_assert (priv->current_frame);
if (priv->current_picture != NULL) {
GST_ERROR_OBJECT (self, "Already have picture for current frame");
return GST_FLOW_ERROR;
}
if (frame_header->show_existing_frame) {
GstAV1Picture *ref_picture;
ref_picture = priv->dpb->pic_list[frame_header->frame_to_show_map_idx];
if (!ref_picture) {
GST_WARNING_OBJECT (self, "Failed to find the frame index %d to show.",
frame_header->frame_to_show_map_idx);
return GST_FLOW_ERROR;
}
/* FIXME: duplicate picture might be optional feature like that of VP9
* decoder baseclass */
g_assert (klass->duplicate_picture);
picture = klass->duplicate_picture (self, ref_picture);
if (!picture) {
GST_ERROR_OBJECT (self, "subclass didn't provide duplicated picture");
return GST_FLOW_ERROR;
}
picture->system_frame_number = priv->current_frame->system_frame_number;
picture->frame_hdr = *frame_header;
priv->current_picture = picture;
} else {
picture = gst_av1_picture_new ();
picture->frame_hdr = *frame_header;
picture->display_frame_id = frame_header->display_frame_id;
picture->show_frame = frame_header->show_frame;
picture->showable_frame = frame_header->showable_frame;
picture->apply_grain = frame_header->film_grain_params.apply_grain;
picture->system_frame_number = priv->current_frame->system_frame_number;
if (!frame_header->show_frame && !frame_header->showable_frame)
GST_VIDEO_CODEC_FRAME_FLAG_SET (priv->current_frame,
GST_VIDEO_CODEC_FRAME_FLAG_DECODE_ONLY);
if (klass->new_picture) {
ret = klass->new_picture (self, priv->current_frame, picture);
if (ret != GST_FLOW_OK) {
GST_WARNING_OBJECT (self, "new picture error");
return ret;
}
}
priv->current_picture = picture;
if (klass->start_picture) {
ret = klass->start_picture (self, picture, priv->dpb);
if (ret != GST_FLOW_OK) {
GST_WARNING_OBJECT (self, "start picture error");
return ret;
}
}
}
g_assert (priv->current_picture != NULL);
return GST_FLOW_OK;
}
static GstFlowReturn
gst_av1_decoder_process_frame_header (GstAV1Decoder * self, GstAV1OBU * obu)
{
GstAV1ParserResult res;
GstAV1DecoderPrivate *priv = self->priv;
GstAV1FrameHeaderOBU frame_header;
res = gst_av1_parser_parse_frame_header_obu (priv->parser, obu,
&frame_header);
if (res != GST_AV1_PARSER_OK) {
GST_WARNING_OBJECT (self, "Parsing frame header failed.");
return GST_FLOW_ERROR;
}
return gst_av1_decoder_decode_frame_header (self, &frame_header);
}
static GstFlowReturn
gst_av1_decoder_process_tile_group (GstAV1Decoder * self, GstAV1OBU * obu)
{
GstAV1ParserResult res;
GstAV1DecoderPrivate *priv = self->priv;
GstAV1TileGroupOBU tile_group;
res = gst_av1_parser_parse_tile_group_obu (priv->parser, obu, &tile_group);
if (res != GST_AV1_PARSER_OK) {
GST_WARNING_OBJECT (self, "Parsing tile group failed.");
return GST_FLOW_ERROR;
}
return gst_av1_decoder_decode_tile_group (self, &tile_group, obu);
}
static GstFlowReturn
gst_av1_decoder_process_frame (GstAV1Decoder * self, GstAV1OBU * obu)
{
GstAV1ParserResult res;
GstAV1DecoderPrivate *priv = self->priv;
GstAV1FrameOBU frame;
GstFlowReturn ret = GST_FLOW_OK;
res = gst_av1_parser_parse_frame_obu (priv->parser, obu, &frame);
if (res != GST_AV1_PARSER_OK) {
GST_WARNING_OBJECT (self, "Parsing frame failed.");
return GST_FLOW_ERROR;
}
ret = gst_av1_decoder_decode_frame_header (self, &frame.frame_header);
if (ret != GST_FLOW_OK)
return ret;
return gst_av1_decoder_decode_tile_group (self, &frame.tile_group, obu);
}
static GstFlowReturn
gst_av1_decoder_temporal_delimiter (GstAV1Decoder * self, GstAV1OBU * obu)
{
GstAV1DecoderPrivate *priv = self->priv;
if (gst_av1_parser_parse_temporal_delimiter_obu (priv->parser, obu) ==
GST_AV1_PARSER_OK) {
return GST_FLOW_OK;
}
return GST_FLOW_ERROR;
}
static GstFlowReturn
gst_av1_decoder_decode_one_obu (GstAV1Decoder * self, GstAV1OBU * obu)
{
GstFlowReturn ret = GST_FLOW_OK;
GST_LOG_OBJECT (self, "Decode obu %s", get_obu_name (obu->obu_type));
switch (obu->obu_type) {
case GST_AV1_OBU_SEQUENCE_HEADER:
ret = gst_av1_decoder_process_sequence (self, obu);
break;
case GST_AV1_OBU_FRAME_HEADER:
ret = gst_av1_decoder_process_frame_header (self, obu);
break;
case GST_AV1_OBU_FRAME:
ret = gst_av1_decoder_process_frame (self, obu);
break;
case GST_AV1_OBU_TILE_GROUP:
ret = gst_av1_decoder_process_tile_group (self, obu);
break;
case GST_AV1_OBU_TEMPORAL_DELIMITER:
ret = gst_av1_decoder_temporal_delimiter (self, obu);
break;
/* TODO: may need to handled. */
case GST_AV1_OBU_METADATA:
case GST_AV1_OBU_REDUNDANT_FRAME_HEADER:
case GST_AV1_OBU_TILE_LIST:
case GST_AV1_OBU_PADDING:
break;
default:
GST_WARNING_OBJECT (self, "an unrecognized obu type %d", obu->obu_type);
break;
}
if (ret != GST_FLOW_OK)
GST_WARNING_OBJECT (self, "Failed to handle %s OBU",
get_obu_name (obu->obu_type));
return ret;
}
static void
gst_av1_decoder_update_state (GstAV1Decoder * self)
{
GstAV1DecoderPrivate *priv = self->priv;
GstAV1Picture *picture = priv->current_picture;
GstAV1ParserResult res;
GstAV1FrameHeaderOBU *fh;
g_assert (picture);
fh = &picture->frame_hdr;
/* This is a show_existing_frame case, only update key frame */
if (fh->show_existing_frame && fh->frame_type != GST_AV1_KEY_FRAME)
return;
res = gst_av1_parser_reference_frame_update (priv->parser, fh);
if (res != GST_AV1_PARSER_OK) {
GST_ERROR_OBJECT (self, "failed to update the reference.");
return;
}
gst_av1_dpb_add (priv->dpb, gst_av1_picture_ref (picture));
}
static GstFlowReturn
gst_av1_decoder_handle_frame (GstVideoDecoder * decoder,
GstVideoCodecFrame * frame)
{
GstAV1Decoder *self = GST_AV1_DECODER (decoder);
GstAV1DecoderPrivate *priv = self->priv;
GstAV1DecoderClass *klass = GST_AV1_DECODER_GET_CLASS (self);
GstBuffer *in_buf = frame->input_buffer;
GstMapInfo map;
GstFlowReturn ret = GST_FLOW_OK;
guint32 total_consumed, consumed;
GstAV1OBU obu;
GstAV1ParserResult res;
GST_LOG_OBJECT (self, "handle frame id %d, buf %" GST_PTR_FORMAT,
frame->system_frame_number, in_buf);
priv->current_frame = frame;
g_assert (!priv->current_picture);
if (!gst_buffer_map (in_buf, &map, GST_MAP_READ)) {
priv->current_frame = NULL;
GST_ERROR_OBJECT (self, "can not map input buffer");
return GST_FLOW_ERROR;
}
total_consumed = 0;
while (total_consumed < map.size) {
res = gst_av1_parser_identify_one_obu (priv->parser,
map.data + total_consumed, map.size, &obu, &consumed);
if (res != GST_AV1_PARSER_OK) {
ret = GST_FLOW_ERROR;
goto out;
}
ret = gst_av1_decoder_decode_one_obu (self, &obu);
if (ret != GST_FLOW_OK) {
goto out;
}
total_consumed += consumed;
}
if (!priv->current_picture) {
GST_ERROR_OBJECT (self, "No valid picture after exhaust input frame");
ret = GST_FLOW_ERROR;
goto out;
}
if (!priv->current_picture->frame_hdr.show_existing_frame) {
if (klass->end_picture) {
ret = klass->end_picture (self, priv->current_picture);
if (ret != GST_FLOW_OK) {
GST_WARNING_OBJECT (self, "end picture error");
goto out;
}
}
}
gst_av1_decoder_update_state (self);
out:
gst_buffer_unmap (in_buf, &map);
if (ret == GST_FLOW_OK) {
if (priv->current_picture->frame_hdr.show_frame ||
priv->current_picture->frame_hdr.show_existing_frame) {
/* Only output one frame with the highest spatial id from each TU
* when there are multiple spatial layers.
*/
if (priv->parser->state.operating_point_idc &&
obu.header.obu_spatial_id <
_floor_log2 (priv->parser->state.operating_point_idc >> 8)) {
gst_av1_picture_unref (priv->current_picture);
gst_video_decoder_release_frame (decoder, frame);
} else {
g_assert (klass->output_picture);
/* transfer ownership of frame and picture */
ret = klass->output_picture (self, frame, priv->current_picture);
}
} else {
GST_LOG_OBJECT (self, "Decode only picture %p", priv->current_picture);
GST_VIDEO_CODEC_FRAME_SET_DECODE_ONLY (frame);
gst_av1_picture_unref (priv->current_picture);
ret = gst_video_decoder_finish_frame (GST_VIDEO_DECODER (self), frame);
}
} else {
if (priv->current_picture)
gst_av1_picture_unref (priv->current_picture);
gst_video_decoder_drop_frame (decoder, frame);
}
priv->current_picture = NULL;
priv->current_frame = NULL;
if (ret == GST_FLOW_ERROR) {
GST_VIDEO_DECODER_ERROR (decoder, 1, STREAM, DECODE,
("Failed to handle the frame %d", frame->system_frame_number),
NULL, ret);
}
return ret;
}