/* GStreamer * Copyright (C) 2020 He Junyan * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301, USA. */ /* * SECTION:element-av1parse * @title: av1parse * @short_description: An AV1 stream parse. * * The minimal unit should be the BYTE. * There are four types of AV1 alignment in the AV1 stream. * * alignment: byte, obu, frame, tu * * 1. Aligned to byte. The basic and default one for input. * 2. Aligned to obu(Open Bitstream Units). * 3. Aligned to frame. The default one for output. This ensures that * each buffer contains only one frame or frame header with the * show_existing flag for the base or sub layer. It is useful for * the decoder. * 4. Aligned to tu(Temporal Unit). A temporal unit consists of all the * OBUs that are associated with a specific, distinct time instant. * When scalability is disabled, it contains just exact one showing * frame(may contain several unshowing frames). When scalability is * enabled, it contains frames depending on the layer number. It should * begin with a temporal delimiter obu. It may be useful for mux/demux * to index the data of some timestamp. * * The annex B define a special format for the temporal unit. The size of * each temporal unit is extract out to the header of the buffer, and no * size field inside the each obu. There are two stream formats: * * stream-format: obu-stream, annexb * * 1. obu-stream. The basic and default one. * 2. annexb. A special stream of temporal unit. It also implies that the * alignment should be TU. * * This AV1 parse implements the conversion between the alignments and the * stream-formats. If the input and output have the same alignment and the * same stream-format, it will check and bypass the data. * * ## Example launch line to generate annex B format AV1 stream: * ``` * gst-launch-1.0 filesrc location=sample.av1 ! ivfparse ! av1parse ! \ * video/x-av1,alignment=\(string\)tu,stream-format=\(string\)annexb ! \ * filesink location=matroskamux ! filesink location=trans.mkv * ``` * * Since: 1.20 */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include #include "gstvideoparserselements.h" #include "gstav1parse.h" #include #define GST_AV1_MAX_LEB_128_SIZE 8 GST_DEBUG_CATEGORY (av1_parse_debug); #define GST_CAT_DEFAULT av1_parse_debug /* We combine the stream format and the alignment together. When stream format is annexb, the alignment must be TU. */ typedef enum { GST_AV1_PARSE_ALIGN_ERROR = -1, GST_AV1_PARSE_ALIGN_NONE = 0, GST_AV1_PARSE_ALIGN_BYTE, GST_AV1_PARSE_ALIGN_OBU, GST_AV1_PARSE_ALIGN_FRAME, GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT, GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B, } GstAV1ParseAligment; struct _GstAV1Parse { GstBaseParse parent; gint width; gint height; gint subsampling_x; gint subsampling_y; gboolean mono_chrome; guint8 bit_depth; gchar *colorimetry; GstAV1Profile profile; GstAV1ParseAligment in_align; gboolean detect_annex_b; GstAV1ParseAligment align; GstAV1Parser *parser; GstAdapter *cache_out; guint last_parsed_offset; GstAdapter *frame_cache; guint highest_spatial_id; gint last_shown_frame_temporal_id; gint last_shown_frame_spatial_id; gboolean within_one_frame; gboolean update_caps; gboolean discont; gboolean header; gboolean keyframe; gboolean show_frame; }; static GstStaticPadTemplate sinktemplate = GST_STATIC_PAD_TEMPLATE ("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS ("video/x-av1")); static GstStaticPadTemplate srctemplate = GST_STATIC_PAD_TEMPLATE ("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS ("video/x-av1, parsed = (boolean) true, " "stream-format=(string) { obu-stream, annexb }, " "alignment=(string) { obu, tu, frame }")); #define parent_class gst_av1_parse_parent_class G_DEFINE_TYPE (GstAV1Parse, gst_av1_parse, GST_TYPE_BASE_PARSE); GST_ELEMENT_REGISTER_DEFINE_WITH_CODE (av1parse, "av1parse", GST_RANK_SECONDARY, GST_TYPE_AV1_PARSE, videoparsers_element_init (plugin)); static void remove_fields (GstCaps * caps, gboolean all) { guint i, n; n = gst_caps_get_size (caps); for (i = 0; i < n; i++) { GstStructure *s = gst_caps_get_structure (caps, i); if (all) { gst_structure_remove_field (s, "alignment"); gst_structure_remove_field (s, "stream-format"); } gst_structure_remove_field (s, "parsed"); } } static const gchar * _obu_name (GstAV1OBUType type) { switch (type) { case GST_AV1_OBU_SEQUENCE_HEADER: return "sequence header"; case GST_AV1_OBU_TEMPORAL_DELIMITER: return "temporal delimiter"; case GST_AV1_OBU_FRAME_HEADER: return "frame header"; case GST_AV1_OBU_TILE_GROUP: return "tile group"; case GST_AV1_OBU_METADATA: return "metadata"; case GST_AV1_OBU_FRAME: return "frame"; case GST_AV1_OBU_REDUNDANT_FRAME_HEADER: return "redundant frame header"; case GST_AV1_OBU_TILE_LIST: return "tile list"; case GST_AV1_OBU_PADDING: return "padding"; default: return "unknown"; } return NULL; } static guint32 _read_leb128 (guint8 * data, GstAV1ParserResult * retval, guint32 * comsumed) { guint8 leb128_byte = 0; guint64 value = 0; gint i; gboolean result; GstBitReader br; guint32 cur_pos; gst_bit_reader_init (&br, data, 8); cur_pos = gst_bit_reader_get_pos (&br); for (i = 0; i < 8; i++) { leb128_byte = 0; result = gst_bit_reader_get_bits_uint8 (&br, &leb128_byte, 8); if (result == FALSE) { *retval = GST_AV1_PARSER_BITSTREAM_ERROR; return 0; } value |= (((gint) leb128_byte & 0x7f) << (i * 7)); if (!(leb128_byte & 0x80)) break; } *comsumed = (gst_bit_reader_get_pos (&br) - cur_pos) / 8; /* check for bitstream conformance see chapter4.10.5 */ if (value < G_MAXUINT32) { *retval = GST_AV1_PARSER_OK; return (guint32) value; } else { GST_WARNING ("invalid leb128"); *retval = GST_AV1_PARSER_BITSTREAM_ERROR; return 0; } } static gsize _leb_size_in_bytes (guint64 value) { gsize size = 0; do { ++size; } while ((value >>= 7) != 0); return size; } static gboolean _write_leb128 (guint8 * data, guint * len, guint64 value) { guint leb_size = _leb_size_in_bytes (value); guint i; if (value > G_MAXUINT32 || leb_size > GST_AV1_MAX_LEB_128_SIZE) return FALSE; for (i = 0; i < leb_size; ++i) { guint8 byte = value & 0x7f; value >>= 7; /* Signal that more bytes follow. */ if (value != 0) byte |= 0x80; *(data + i) = byte; } *len = leb_size; return TRUE; } static gboolean gst_av1_parse_start (GstBaseParse * parse); static gboolean gst_av1_parse_stop (GstBaseParse * parse); static GstFlowReturn gst_av1_parse_handle_frame (GstBaseParse * parse, GstBaseParseFrame * frame, gint * skipsize); static gboolean gst_av1_parse_set_sink_caps (GstBaseParse * parse, GstCaps * caps); static GstCaps *gst_av1_parse_get_sink_caps (GstBaseParse * parse, GstCaps * filter); /* Clear the parse state related to data kind OBUs. */ static void gst_av1_parse_reset_obu_data_state (GstAV1Parse * self) { self->last_shown_frame_temporal_id = -1; self->last_shown_frame_spatial_id = -1; self->within_one_frame = FALSE; } static void gst_av1_parse_reset (GstAV1Parse * self) { self->width = 0; self->height = 0; self->subsampling_x = -1; self->subsampling_y = -1; self->mono_chrome = FALSE; self->profile = GST_AV1_PROFILE_UNDEFINED; self->bit_depth = 0; self->align = GST_AV1_PARSE_ALIGN_NONE; self->in_align = GST_AV1_PARSE_ALIGN_NONE; self->detect_annex_b = FALSE; self->discont = TRUE; self->header = FALSE; self->keyframe = FALSE; self->show_frame = FALSE; self->last_parsed_offset = 0; self->highest_spatial_id = 0; gst_av1_parse_reset_obu_data_state (self); g_clear_pointer (&self->colorimetry, g_free); g_clear_pointer (&self->parser, gst_av1_parser_free); gst_adapter_clear (self->cache_out); gst_adapter_clear (self->frame_cache); } static void gst_av1_parse_init (GstAV1Parse * self) { gst_base_parse_set_pts_interpolation (GST_BASE_PARSE (self), FALSE); gst_base_parse_set_infer_ts (GST_BASE_PARSE (self), FALSE); GST_PAD_SET_ACCEPT_INTERSECT (GST_BASE_PARSE_SINK_PAD (self)); GST_PAD_SET_ACCEPT_TEMPLATE (GST_BASE_PARSE_SINK_PAD (self)); self->cache_out = gst_adapter_new (); self->frame_cache = gst_adapter_new (); } static void gst_av1_parse_finalize (GObject * object) { GstAV1Parse *self = GST_AV1_PARSE (object); gst_av1_parse_reset (self); g_object_unref (self->cache_out); g_object_unref (self->frame_cache); G_OBJECT_CLASS (parent_class)->finalize (object); } static void gst_av1_parse_class_init (GstAV1ParseClass * klass) { GObjectClass *gobject_class = (GObjectClass *) klass; GstBaseParseClass *parse_class = GST_BASE_PARSE_CLASS (klass); GstElementClass *element_class = GST_ELEMENT_CLASS (klass); gobject_class->finalize = gst_av1_parse_finalize; parse_class->start = GST_DEBUG_FUNCPTR (gst_av1_parse_start); parse_class->stop = GST_DEBUG_FUNCPTR (gst_av1_parse_stop); parse_class->handle_frame = GST_DEBUG_FUNCPTR (gst_av1_parse_handle_frame); parse_class->set_sink_caps = GST_DEBUG_FUNCPTR (gst_av1_parse_set_sink_caps); parse_class->get_sink_caps = GST_DEBUG_FUNCPTR (gst_av1_parse_get_sink_caps); gst_element_class_add_static_pad_template (element_class, &srctemplate); gst_element_class_add_static_pad_template (element_class, &sinktemplate); gst_element_class_set_static_metadata (element_class, "AV1 parser", "Codec/Parser/Converter/Video", "Parses AV1 streams", "He Junyan "); GST_DEBUG_CATEGORY_INIT (av1_parse_debug, "av1parse", 0, "av1 parser"); } static gboolean gst_av1_parse_start (GstBaseParse * parse) { GstAV1Parse *self = GST_AV1_PARSE (parse); GST_DEBUG_OBJECT (self, "start"); gst_av1_parse_reset (self); self->parser = gst_av1_parser_new (); /* At least the OBU header. */ gst_base_parse_set_min_frame_size (parse, 1); return TRUE; } static gboolean gst_av1_parse_stop (GstBaseParse * parse) { GstAV1Parse *self = GST_AV1_PARSE (parse); GST_DEBUG_OBJECT (self, "stop"); g_clear_pointer (&self->parser, gst_av1_parser_free); return TRUE; } static const gchar * gst_av1_parse_profile_to_string (GstAV1Profile profile) { switch (profile) { case GST_AV1_PROFILE_0: return "main"; case GST_AV1_PROFILE_1: return "high"; case GST_AV1_PROFILE_2: return "professional"; default: break; } return NULL; } static GstAV1Profile gst_av1_parse_profile_from_string (const gchar * profile) { if (!profile) return GST_AV1_PROFILE_UNDEFINED; if (g_strcmp0 (profile, "main") == 0) return GST_AV1_PROFILE_0; else if (g_strcmp0 (profile, "high") == 0) return GST_AV1_PROFILE_1; else if (g_strcmp0 (profile, "professional") == 0) return GST_AV1_PROFILE_2; return GST_AV1_PROFILE_UNDEFINED; } static const gchar * gst_av1_parse_alignment_to_steam_format_string (GstAV1ParseAligment align) { switch (align) { case GST_AV1_PARSE_ALIGN_BYTE: return "obu-stream"; case GST_AV1_PARSE_ALIGN_OBU: case GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT: case GST_AV1_PARSE_ALIGN_FRAME: return "obu-stream"; case GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B: return "annexb"; default: GST_WARNING ("Unrecognized steam format"); break; } return NULL; } static const gchar * gst_av1_parse_alignment_to_string (GstAV1ParseAligment align) { switch (align) { case GST_AV1_PARSE_ALIGN_BYTE: return "byte"; case GST_AV1_PARSE_ALIGN_OBU: return "obu"; case GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT: case GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B: return "tu"; case GST_AV1_PARSE_ALIGN_FRAME: return "frame"; default: GST_WARNING ("Unrecognized alignment"); break; } return NULL; } static GstAV1ParseAligment gst_av1_parse_alignment_from_string (const gchar * align, const gchar * stream_format) { if (!align && !stream_format) return GST_AV1_PARSE_ALIGN_NONE; if (stream_format) { if (g_strcmp0 (stream_format, "annexb") == 0) { if (align && g_strcmp0 (align, "tu") != 0) { /* annex b stream must align to TU. */ return GST_AV1_PARSE_ALIGN_ERROR; } else { return GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B; } } else if (g_strcmp0 (stream_format, "obu-stream") != 0) { /* unrecognized */ return GST_AV1_PARSE_ALIGN_NONE; } /* stream-format is obu-stream, depends on align */ } if (align) { if (g_strcmp0 (align, "byte") == 0) { return GST_AV1_PARSE_ALIGN_BYTE; } else if (g_strcmp0 (align, "obu") == 0) { return GST_AV1_PARSE_ALIGN_OBU; } else if (g_strcmp0 (align, "tu") == 0) { return GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT; } else if (g_strcmp0 (align, "frame") == 0) { return GST_AV1_PARSE_ALIGN_FRAME; } else { /* unrecognized */ return GST_AV1_PARSE_ALIGN_NONE; } } return GST_AV1_PARSE_ALIGN_NONE; } static gboolean gst_av1_parse_caps_has_alignment (GstCaps * caps, GstAV1ParseAligment alignment) { guint i, j, caps_size; const gchar *cmp_align_str = NULL; const gchar *cmp_stream_str = NULL; GST_DEBUG ("Try to find alignment %d in caps: %" GST_PTR_FORMAT, alignment, caps); caps_size = gst_caps_get_size (caps); if (caps_size == 0) return FALSE; switch (alignment) { case GST_AV1_PARSE_ALIGN_BYTE: cmp_align_str = "byte"; cmp_stream_str = "obu-stream"; break; case GST_AV1_PARSE_ALIGN_OBU: cmp_align_str = "obu"; cmp_stream_str = "obu-stream"; break; case GST_AV1_PARSE_ALIGN_FRAME: cmp_align_str = "frame"; cmp_stream_str = "obu-stream"; break; case GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT: cmp_align_str = "tu"; cmp_stream_str = "obu-stream"; break; case GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B: cmp_align_str = "tu"; cmp_stream_str = "annexb"; break; default: return FALSE; } for (i = 0; i < caps_size; i++) { GstStructure *s = gst_caps_get_structure (caps, i); const GValue *alignment_value = gst_structure_get_value (s, "alignment"); const GValue *stream_value = gst_structure_get_value (s, "stream-format"); if (!alignment_value || !stream_value) continue; if (G_VALUE_HOLDS_STRING (alignment_value)) { const gchar *align_str = g_value_get_string (alignment_value); if (g_strcmp0 (align_str, cmp_align_str) != 0) continue; } else if (GST_VALUE_HOLDS_LIST (alignment_value)) { guint num_values = gst_value_list_get_size (alignment_value); for (j = 0; j < num_values; j++) { const GValue *v = gst_value_list_get_value (alignment_value, j); const gchar *align_str = g_value_get_string (v); if (g_strcmp0 (align_str, cmp_align_str) == 0) break; } if (j == num_values) continue; } if (G_VALUE_HOLDS_STRING (stream_value)) { const gchar *stream_str = g_value_get_string (stream_value); if (g_strcmp0 (stream_str, cmp_stream_str) != 0) continue; } else if (GST_VALUE_HOLDS_LIST (stream_value)) { guint num_values = gst_value_list_get_size (stream_value); for (j = 0; j < num_values; j++) { const GValue *v = gst_value_list_get_value (stream_value, j); const gchar *stream_str = g_value_get_string (v); if (g_strcmp0 (stream_str, cmp_stream_str) == 0) break; } if (j == num_values) continue; } return TRUE; } return FALSE; } static GstAV1ParseAligment gst_av1_parse_alignment_from_caps (GstCaps * caps) { GstAV1ParseAligment align; align = GST_AV1_PARSE_ALIGN_NONE; GST_DEBUG ("parsing caps: %" GST_PTR_FORMAT, caps); if (caps && gst_caps_get_size (caps) > 0) { GstStructure *s = gst_caps_get_structure (caps, 0); const gchar *str_align = NULL; const gchar *str_stream = NULL; str_align = gst_structure_get_string (s, "alignment"); str_stream = gst_structure_get_string (s, "stream-format"); align = gst_av1_parse_alignment_from_string (str_align, str_stream); } return align; } static void gst_av1_parse_update_src_caps (GstAV1Parse * self, GstCaps * caps) { GstCaps *sink_caps, *src_caps; GstCaps *final_caps = NULL; GstStructure *s = NULL; gint width, height; gint par_n = 0, par_d = 0; gint fps_n = 0, fps_d = 0; const gchar *profile = NULL; if (G_UNLIKELY (!gst_pad_has_current_caps (GST_BASE_PARSE_SRC_PAD (self)))) self->update_caps = TRUE; if (!self->update_caps) return; /* if this is being called from the first _setcaps call, caps on the sinkpad * aren't set yet and so they need to be passed as an argument */ if (caps) sink_caps = gst_caps_ref (caps); else sink_caps = gst_pad_get_current_caps (GST_BASE_PARSE_SINK_PAD (self)); /* carry over input caps as much as possible; override with our own stuff */ if (!sink_caps) sink_caps = gst_caps_new_empty_simple ("video/x-av1"); else s = gst_caps_get_structure (sink_caps, 0); final_caps = gst_caps_copy (sink_caps); if (s && gst_structure_has_field (s, "width") && gst_structure_has_field (s, "height")) { gst_structure_get_int (s, "width", &width); gst_structure_get_int (s, "height", &height); } else { width = self->width; height = self->height; } if (width > 0 && height > 0) gst_caps_set_simple (final_caps, "width", G_TYPE_INT, width, "height", G_TYPE_INT, height, NULL); if (s && gst_structure_get_fraction (s, "pixel-aspect-ratio", &par_n, &par_d)) { if (par_n != 0 && par_d != 0) { gst_caps_set_simple (final_caps, "pixel-aspect-ratio", GST_TYPE_FRACTION, par_n, par_d, NULL); } } if (s && gst_structure_has_field (s, "framerate")) { gst_structure_get_fraction (s, "framerate", &fps_n, &fps_d); } if (fps_n > 0 && fps_d > 0) { gst_caps_set_simple (final_caps, "framerate", GST_TYPE_FRACTION, fps_n, fps_d, NULL); gst_base_parse_set_frame_rate (GST_BASE_PARSE (self), fps_n, fps_d, 0, 0); } /* When not RGB, the chroma format is needed. */ if (self->colorimetry == NULL || (g_strcmp0 (self->colorimetry, GST_VIDEO_COLORIMETRY_SRGB) != 0)) { const gchar *chroma_format = NULL; if (self->subsampling_x == 1 && self->subsampling_y == 1) { if (!self->mono_chrome) { chroma_format = "4:2:0"; } else { chroma_format = "4:0:0"; } } else if (self->subsampling_x == 1 && self->subsampling_y == 0) { chroma_format = "4:2:2"; } else if (self->subsampling_x == 0 && self->subsampling_y == 0) { chroma_format = "4:4:4"; } if (chroma_format) gst_caps_set_simple (final_caps, "chroma-format", G_TYPE_STRING, chroma_format, NULL); } if (self->bit_depth) gst_caps_set_simple (final_caps, "bit-depth-luma", G_TYPE_UINT, self->bit_depth, "bit-depth-chroma", G_TYPE_UINT, self->bit_depth, NULL); if (self->colorimetry && (!s || !gst_structure_has_field (s, "colorimetry"))) gst_caps_set_simple (final_caps, "colorimetry", G_TYPE_STRING, self->colorimetry, NULL); g_assert (self->align > GST_AV1_PARSE_ALIGN_NONE); gst_caps_set_simple (final_caps, "parsed", G_TYPE_BOOLEAN, TRUE, "stream-format", G_TYPE_STRING, gst_av1_parse_alignment_to_steam_format_string (self->align), "alignment", G_TYPE_STRING, gst_av1_parse_alignment_to_string (self->align), NULL); profile = gst_av1_parse_profile_to_string (self->profile); if (profile) gst_caps_set_simple (final_caps, "profile", G_TYPE_STRING, profile, NULL); src_caps = gst_pad_get_current_caps (GST_BASE_PARSE_SRC_PAD (self)); if (!(src_caps && gst_caps_is_strictly_equal (src_caps, final_caps))) { GST_DEBUG_OBJECT (self, "Update src caps %" GST_PTR_FORMAT, final_caps); gst_pad_set_caps (GST_BASE_PARSE_SRC_PAD (self), final_caps); } gst_clear_caps (&src_caps); gst_caps_unref (final_caps); gst_caps_unref (sink_caps); self->update_caps = FALSE; } /* check downstream caps to configure format and alignment */ static void gst_av1_parse_negotiate (GstAV1Parse * self, GstCaps * in_caps) { GstCaps *caps; GstAV1ParseAligment align; caps = gst_pad_get_allowed_caps (GST_BASE_PARSE_SRC_PAD (self)); GST_DEBUG_OBJECT (self, "allowed caps: %" GST_PTR_FORMAT, caps); /* concentrate on leading structure, since decodebin parser * capsfilter always includes parser template caps */ if (caps) { caps = gst_caps_truncate (caps); GST_DEBUG_OBJECT (self, "negotiating with caps: %" GST_PTR_FORMAT, caps); } /* prefer TU as default */ if (gst_av1_parse_caps_has_alignment (caps, GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT)) { self->align = GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT; goto done; } /* Both upsteam and downstream support, best */ if (in_caps && caps) { if (gst_caps_can_intersect (in_caps, caps)) { GstCaps *common_caps = NULL; common_caps = gst_caps_intersect (in_caps, caps); align = gst_av1_parse_alignment_from_caps (common_caps); gst_clear_caps (&common_caps); if (align != GST_AV1_PARSE_ALIGN_NONE && align != GST_AV1_PARSE_ALIGN_ERROR) { self->align = align; goto done; } } } /* Select first one of downstream support */ if (caps && !gst_caps_is_empty (caps)) { /* fixate to avoid ambiguity with lists when parsing */ caps = gst_caps_fixate (caps); align = gst_av1_parse_alignment_from_caps (caps); if (align != GST_AV1_PARSE_ALIGN_NONE && align != GST_AV1_PARSE_ALIGN_ERROR) { self->align = align; goto done; } } /* default */ self->align = GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT; done: GST_INFO_OBJECT (self, "selected alignment %s", gst_av1_parse_alignment_to_string (self->align)); gst_clear_caps (&caps); } static GstCaps * gst_av1_parse_get_sink_caps (GstBaseParse * parse, GstCaps * filter) { GstCaps *peercaps, *templ; GstCaps *res, *tmp, *pcopy; templ = gst_pad_get_pad_template_caps (GST_BASE_PARSE_SINK_PAD (parse)); if (filter) { GstCaps *fcopy = gst_caps_copy (filter); /* Remove the fields we convert */ remove_fields (fcopy, TRUE); peercaps = gst_pad_peer_query_caps (GST_BASE_PARSE_SRC_PAD (parse), fcopy); gst_caps_unref (fcopy); } else { peercaps = gst_pad_peer_query_caps (GST_BASE_PARSE_SRC_PAD (parse), NULL); } pcopy = gst_caps_copy (peercaps); remove_fields (pcopy, TRUE); res = gst_caps_intersect_full (pcopy, templ, GST_CAPS_INTERSECT_FIRST); gst_caps_unref (pcopy); gst_caps_unref (templ); if (filter) { GstCaps *tmp = gst_caps_intersect_full (res, filter, GST_CAPS_INTERSECT_FIRST); gst_caps_unref (res); res = tmp; } /* Try if we can put the downstream caps first */ pcopy = gst_caps_copy (peercaps); remove_fields (pcopy, FALSE); tmp = gst_caps_intersect_full (pcopy, res, GST_CAPS_INTERSECT_FIRST); gst_caps_unref (pcopy); if (!gst_caps_is_empty (tmp)) res = gst_caps_merge (tmp, res); else gst_caps_unref (tmp); gst_caps_unref (peercaps); return res; } static gboolean gst_av1_parse_set_sink_caps (GstBaseParse * parse, GstCaps * caps) { GstAV1Parse *self = GST_AV1_PARSE (parse); GstStructure *str; GstAV1ParseAligment align; GstCaps *in_caps = NULL; const gchar *profile; str = gst_caps_get_structure (caps, 0); /* accept upstream info if provided */ gst_structure_get_int (str, "width", &self->width); gst_structure_get_int (str, "height", &self->height); profile = gst_structure_get_string (str, "profile"); if (profile) self->profile = gst_av1_parse_profile_from_string (profile); /* get upstream align from caps */ align = gst_av1_parse_alignment_from_caps (caps); if (align == GST_AV1_PARSE_ALIGN_ERROR) { GST_ERROR_OBJECT (self, "Sink caps %" GST_PTR_FORMAT " set stream-format" " and alignment conflict.", caps); return FALSE; } in_caps = gst_caps_copy (caps); /* default */ if (align == GST_AV1_PARSE_ALIGN_NONE) { align = GST_AV1_PARSE_ALIGN_BYTE; gst_caps_set_simple (in_caps, "alignment", G_TYPE_STRING, gst_av1_parse_alignment_to_string (align), "stream-format", G_TYPE_STRING, "obu-stream", NULL); } /* negotiate with downstream, set output align */ gst_av1_parse_negotiate (self, in_caps); self->update_caps = TRUE; /* if all of decoder's capability related values are provided * by upstream, update src caps now */ if (self->width > 0 && self->height > 0 && profile) gst_av1_parse_update_src_caps (self, in_caps); gst_caps_unref (in_caps); self->in_align = align; if (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT) self->detect_annex_b = TRUE; if (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B) { gst_av1_parser_reset (self->parser, TRUE); } else { gst_av1_parser_reset (self->parser, FALSE); } return TRUE; } static GstFlowReturn gst_av1_parse_push_data (GstAV1Parse * self, GstBaseParseFrame * frame, guint32 finish_sz, gboolean frame_finished) { gsize sz; GstBuffer *buf, *header_buf; GstBuffer *buffer = frame->buffer; GstFlowReturn ret = GST_FLOW_OK; /* Need to generate the final TU annex-b format */ if (self->align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B) { guint8 size_data[GST_AV1_MAX_LEB_128_SIZE]; guint size_len = 0; guint len; /* When push a TU, it must also be a frame end. */ g_assert (frame_finished); /* Still some left in the frame cache */ len = gst_adapter_available (self->frame_cache); if (len) { buf = gst_adapter_take_buffer (self->frame_cache, len); /* frame_unit_size */ _write_leb128 (size_data, &size_len, len); header_buf = gst_buffer_new_memdup (size_data, size_len); GST_BUFFER_PTS (header_buf) = GST_BUFFER_PTS (buf); GST_BUFFER_DTS (header_buf) = GST_BUFFER_DTS (buf); GST_BUFFER_DURATION (header_buf) = GST_BUFFER_DURATION (buf); gst_adapter_push (self->cache_out, header_buf); gst_adapter_push (self->cache_out, buf); } len = gst_adapter_available (self->cache_out); if (len) { buf = gst_adapter_take_buffer (self->cache_out, len); /* temporal_unit_size */ _write_leb128 (size_data, &size_len, len); header_buf = gst_buffer_new_memdup (size_data, size_len); GST_BUFFER_PTS (header_buf) = GST_BUFFER_PTS (buf); GST_BUFFER_DTS (header_buf) = GST_BUFFER_DTS (buf); GST_BUFFER_DURATION (header_buf) = GST_BUFFER_DURATION (buf); gst_adapter_push (self->cache_out, header_buf); gst_adapter_push (self->cache_out, buf); } } sz = gst_adapter_available (self->cache_out); if (sz) { buf = gst_adapter_take_buffer (self->cache_out, sz); gst_buffer_copy_into (buf, buffer, GST_BUFFER_COPY_METADATA, 0, -1); if (self->discont) { GST_BUFFER_FLAG_SET (buf, GST_BUFFER_FLAG_DISCONT); self->discont = FALSE; } else { GST_BUFFER_FLAG_UNSET (buf, GST_BUFFER_FLAG_DISCONT); } if (self->header) { GST_BUFFER_FLAG_SET (buf, GST_BUFFER_FLAG_HEADER); self->header = FALSE; } else { GST_BUFFER_FLAG_UNSET (buf, GST_BUFFER_FLAG_HEADER); } if (self->keyframe) { GST_BUFFER_FLAG_UNSET (buf, GST_BUFFER_FLAG_DELTA_UNIT); self->keyframe = FALSE; } else { GST_BUFFER_FLAG_SET (buf, GST_BUFFER_FLAG_DELTA_UNIT); } if (frame_finished) { GST_BUFFER_FLAG_SET (buf, GST_BUFFER_FLAG_MARKER); } else { GST_BUFFER_FLAG_UNSET (buf, GST_BUFFER_FLAG_MARKER); } if (self->align == GST_AV1_PARSE_ALIGN_FRAME) { if (!self->show_frame) { GST_BUFFER_FLAG_SET (buf, GST_BUFFER_FLAG_DECODE_ONLY); } else { GST_BUFFER_FLAG_UNSET (buf, GST_BUFFER_FLAG_DECODE_ONLY); } } else { GST_BUFFER_FLAG_UNSET (buf, GST_BUFFER_FLAG_DECODE_ONLY); } gst_buffer_replace (&frame->out_buffer, buf); gst_buffer_unref (buf); gst_av1_parse_update_src_caps (self, NULL); GST_LOG_OBJECT (self, "comsumed %d, output one buffer with size %" G_GSSIZE_FORMAT, finish_sz, sz); ret = gst_base_parse_finish_frame (GST_BASE_PARSE (self), frame, finish_sz); } return ret; } static void gst_av1_parse_convert_to_annexb (GstAV1Parse * self, GstBuffer * buffer, GstAV1OBU * obu, gboolean frame_complete) { guint8 size_data[GST_AV1_MAX_LEB_128_SIZE]; guint size_len = 0; GstBitWriter bs; GstBuffer *buf, *buf2; guint8 *data; guint len, len2, offset; /* obu_length */ _write_leb128 (size_data, &size_len, obu->obu_size + 1 + obu->header.obu_extention_flag); gst_bit_writer_init_with_size (&bs, 128, FALSE); /* obu_forbidden_bit */ gst_bit_writer_put_bits_uint8 (&bs, 0, 1); /* obu_type */ gst_bit_writer_put_bits_uint8 (&bs, obu->obu_type, 4); /* obu_extension_flag */ gst_bit_writer_put_bits_uint8 (&bs, obu->header.obu_extention_flag, 1); /* obu_has_size_field */ gst_bit_writer_put_bits_uint8 (&bs, 0, 1); /* obu_reserved_1bit */ gst_bit_writer_put_bits_uint8 (&bs, 0, 1); if (obu->header.obu_extention_flag) { /* temporal_id */ gst_bit_writer_put_bits_uint8 (&bs, obu->header.obu_temporal_id, 3); /* spatial_id */ gst_bit_writer_put_bits_uint8 (&bs, obu->header.obu_spatial_id, 2); /* extension_header_reserved_3bits */ gst_bit_writer_put_bits_uint8 (&bs, 0, 3); } g_assert (GST_BIT_WRITER_BIT_SIZE (&bs) % 8 == 0); len = size_len; len += GST_BIT_WRITER_BIT_SIZE (&bs) / 8; len += obu->obu_size; data = g_malloc (len); offset = 0; memcpy (data + offset, size_data, size_len); offset += size_len; memcpy (data + offset, GST_BIT_WRITER_DATA (&bs), GST_BIT_WRITER_BIT_SIZE (&bs) / 8); offset += GST_BIT_WRITER_BIT_SIZE (&bs) / 8; memcpy (data + offset, obu->data, obu->obu_size); /* The buf of this OBU */ buf = gst_buffer_new_wrapped (data, len); GST_BUFFER_PTS (buf) = GST_BUFFER_PTS (buffer); GST_BUFFER_DTS (buf) = GST_BUFFER_DTS (buffer); GST_BUFFER_DURATION (buf) = GST_BUFFER_DURATION (buffer); gst_adapter_push (self->frame_cache, buf); if (frame_complete) { len2 = gst_adapter_available (self->frame_cache); buf2 = gst_adapter_take_buffer (self->frame_cache, len2); /* frame_unit_size */ _write_leb128 (size_data, &size_len, len2); buf = gst_buffer_new_memdup (size_data, size_len); GST_BUFFER_PTS (buf) = GST_BUFFER_PTS (buf2); GST_BUFFER_DTS (buf) = GST_BUFFER_DTS (buf2); GST_BUFFER_DURATION (buf) = GST_BUFFER_DURATION (buf2); gst_adapter_push (self->cache_out, buf); gst_adapter_push (self->cache_out, buf2); } gst_bit_writer_reset (&bs); } static void gst_av1_parse_convert_from_annexb (GstAV1Parse * self, GstBuffer * buffer, GstAV1OBU * obu) { guint8 size_data[GST_AV1_MAX_LEB_128_SIZE]; guint size_len = 0; GstBuffer *buf; guint len, offset; guint8 *data; GstBitWriter bs; _write_leb128 (size_data, &size_len, obu->obu_size); /* obu_header */ len = 1; if (obu->header.obu_extention_flag) len += 1; len += size_len; len += obu->obu_size; gst_bit_writer_init_with_size (&bs, 128, FALSE); /* obu_forbidden_bit */ gst_bit_writer_put_bits_uint8 (&bs, 0, 1); /* obu_type */ gst_bit_writer_put_bits_uint8 (&bs, obu->obu_type, 4); /* obu_extension_flag */ gst_bit_writer_put_bits_uint8 (&bs, obu->header.obu_extention_flag, 1); /* obu_has_size_field */ gst_bit_writer_put_bits_uint8 (&bs, 1, 1); /* obu_reserved_1bit */ gst_bit_writer_put_bits_uint8 (&bs, 0, 1); if (obu->header.obu_extention_flag) { /* temporal_id */ gst_bit_writer_put_bits_uint8 (&bs, obu->header.obu_temporal_id, 3); /* spatial_id */ gst_bit_writer_put_bits_uint8 (&bs, obu->header.obu_spatial_id, 2); /* extension_header_reserved_3bits */ gst_bit_writer_put_bits_uint8 (&bs, 0, 3); } g_assert (GST_BIT_WRITER_BIT_SIZE (&bs) % 8 == 0); data = g_malloc (len); offset = 0; memcpy (data + offset, GST_BIT_WRITER_DATA (&bs), GST_BIT_WRITER_BIT_SIZE (&bs) / 8); offset += GST_BIT_WRITER_BIT_SIZE (&bs) / 8; memcpy (data + offset, size_data, size_len); offset += size_len; memcpy (data + offset, obu->data, obu->obu_size); buf = gst_buffer_new_wrapped (data, len); GST_BUFFER_PTS (buf) = GST_BUFFER_PTS (buffer); GST_BUFFER_DTS (buf) = GST_BUFFER_DTS (buffer); GST_BUFFER_DURATION (buf) = GST_BUFFER_DURATION (buffer); gst_adapter_push (self->cache_out, buf); gst_bit_writer_reset (&bs); } static void gst_av1_parse_cache_one_obu (GstAV1Parse * self, GstBuffer * buffer, GstAV1OBU * obu, guint8 * data, guint32 size, gboolean frame_complete) { gboolean need_convert = FALSE; GstBuffer *buf; if (self->in_align != self->align && (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B || self->align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B)) need_convert = TRUE; if (need_convert) { if (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B) { gst_av1_parse_convert_from_annexb (self, buffer, obu); } else { gst_av1_parse_convert_to_annexb (self, buffer, obu, frame_complete); } } else if (self->align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B) { g_assert (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B); gst_av1_parse_convert_to_annexb (self, buffer, obu, frame_complete); } else { buf = gst_buffer_new_memdup (data, size); GST_BUFFER_PTS (buf) = GST_BUFFER_PTS (buffer); GST_BUFFER_DTS (buf) = GST_BUFFER_DTS (buffer); GST_BUFFER_DURATION (buf) = GST_BUFFER_DURATION (buffer); gst_adapter_push (self->cache_out, buf); } } static GstAV1ParserResult gst_av1_parse_handle_sequence_obu (GstAV1Parse * self, GstAV1OBU * obu) { GstAV1SequenceHeaderOBU seq_header; GstAV1ParserResult res; guint i; guint val; res = gst_av1_parser_parse_sequence_header_obu (self->parser, obu, &seq_header); if (res != GST_AV1_PARSER_OK) return res; if (self->width != seq_header.max_frame_width_minus_1 + 1) { self->width = seq_header.max_frame_width_minus_1 + 1; self->update_caps = TRUE; } if (self->height != seq_header.max_frame_height_minus_1 + 1) { self->height = seq_header.max_frame_height_minus_1 + 1; self->update_caps = TRUE; } if (seq_header.color_config.color_description_present_flag) { GstVideoColorimetry cinfo; gchar *colorimetry = NULL; if (seq_header.color_config.color_range) cinfo.range = GST_VIDEO_COLOR_RANGE_0_255; else cinfo.range = GST_VIDEO_COLOR_RANGE_16_235; cinfo.matrix = gst_video_color_matrix_from_iso (seq_header.color_config.matrix_coefficients); cinfo.transfer = gst_video_transfer_function_from_iso (seq_header.color_config.transfer_characteristics); cinfo.primaries = gst_video_color_primaries_from_iso (seq_header.color_config.color_primaries); colorimetry = gst_video_colorimetry_to_string (&cinfo); if (g_strcmp0 (colorimetry, self->colorimetry) != 0) { g_free (self->colorimetry); self->colorimetry = colorimetry; colorimetry = NULL; self->update_caps = TRUE; } g_clear_pointer (&colorimetry, g_free); } if (self->subsampling_x != seq_header.color_config.subsampling_x) { self->subsampling_x = seq_header.color_config.subsampling_x; self->update_caps = TRUE; } if (self->subsampling_y != seq_header.color_config.subsampling_y) { self->subsampling_y = seq_header.color_config.subsampling_y; self->update_caps = TRUE; } if (self->mono_chrome != seq_header.color_config.mono_chrome) { self->mono_chrome = seq_header.color_config.mono_chrome; self->update_caps = TRUE; } if (self->bit_depth != seq_header.bit_depth) { self->bit_depth = seq_header.bit_depth; self->update_caps = TRUE; } if (self->profile != seq_header.seq_profile) { self->profile = seq_header.seq_profile; self->update_caps = TRUE; } val = (self->parser->state.operating_point_idc >> 8) & 0x0f; for (i = 0; i < (1 << GST_AV1_MAX_SPATIAL_LAYERS); i++) { if (val & (1 << i)) self->highest_spatial_id = i; } return GST_AV1_PARSER_OK; } /* Check whether the frame start a new TU. The obu here should be a shown frame/frame header. */ static gboolean gst_av1_parse_frame_start_new_temporal_unit (GstAV1Parse * self, GstAV1OBU * obu) { gboolean ret = FALSE; g_assert (obu->obu_type == GST_AV1_OBU_FRAME_HEADER || obu->obu_type == GST_AV1_OBU_FRAME); /* 7.5.Ordering of OBUs: The value of temporal_id must be the same in all OBU extension headers that are contained in the same temporal unit. */ if (self->last_shown_frame_temporal_id >= 0 && obu->header.obu_temporal_id != self->last_shown_frame_temporal_id) { ret = TRUE; goto new_tu; } /* If scalability is not being used, only one shown frame for each temporal unit. So the new frame belongs to a new temporal unit. */ if (!self->within_one_frame && self->last_shown_frame_temporal_id >= 0 && self->parser->state.operating_point_idc == 0) { ret = TRUE; goto new_tu; } /* The new frame has the same layer IDs with the last shown frame, it should belong to a new temporal unit. */ if (!self->within_one_frame && obu->header.obu_temporal_id == self->last_shown_frame_temporal_id && obu->header.obu_spatial_id == self->last_shown_frame_spatial_id) { ret = TRUE; goto new_tu; } new_tu: if (ret) { if (self->within_one_frame) GST_WARNING_OBJECT (self, "Start a new temporal unit with incompleted frame."); gst_av1_parse_reset_obu_data_state (self); } return ret; } /* frame_complete will be set true if it is the frame edge. */ static GstAV1ParserResult gst_av1_parse_handle_one_obu (GstAV1Parse * self, GstAV1OBU * obu, gboolean * frame_complete, gboolean * check_new_tu) { GstAV1ParserResult res = GST_AV1_PARSER_OK; GstAV1MetadataOBU metadata; GstAV1FrameHeaderOBU frame_header; GstAV1TileListOBU tile_list; GstAV1TileGroupOBU tile_group; GstAV1FrameOBU frame; *frame_complete = FALSE; switch (obu->obu_type) { case GST_AV1_OBU_TEMPORAL_DELIMITER: res = gst_av1_parser_parse_temporal_delimiter_obu (self->parser, obu); break; case GST_AV1_OBU_SEQUENCE_HEADER: res = gst_av1_parse_handle_sequence_obu (self, obu); break; case GST_AV1_OBU_REDUNDANT_FRAME_HEADER: res = gst_av1_parser_parse_frame_header_obu (self->parser, obu, &frame_header); break; case GST_AV1_OBU_FRAME_HEADER: res = gst_av1_parser_parse_frame_header_obu (self->parser, obu, &frame_header); break; case GST_AV1_OBU_FRAME: res = gst_av1_parser_parse_frame_obu (self->parser, obu, &frame); break; case GST_AV1_OBU_METADATA: res = gst_av1_parser_parse_metadata_obu (self->parser, obu, &metadata); break; case GST_AV1_OBU_TILE_GROUP: res = gst_av1_parser_parse_tile_group_obu (self->parser, obu, &tile_group); break; case GST_AV1_OBU_TILE_LIST: res = gst_av1_parser_parse_tile_list_obu (self->parser, obu, &tile_list); break; case GST_AV1_OBU_PADDING: break; default: GST_WARNING_OBJECT (self, "an unrecognized obu type %d", obu->obu_type); res = GST_AV1_PARSER_BITSTREAM_ERROR; break; } GST_LOG_OBJECT (self, "parsing the obu %s, result is %d", _obu_name (obu->obu_type), res); if (res != GST_AV1_PARSER_OK) goto out; /* 7.5: All OBU extension headers that are contained in the same temporal unit and have the same spatial_id value must have the same temporal_id value. And OBUs with spatial level IDs (spatial_id) greater than 0 must appear within a temporal unit in increasing order of the spatial level ID values. */ if (obu->header.obu_spatial_id > self->highest_spatial_id) { GST_WARNING_OBJECT (self, "spatial_id %d is bigger than highest_spatial_id %d", obu->header.obu_spatial_id, self->highest_spatial_id); res = GST_AV1_PARSER_BITSTREAM_ERROR; goto out; } /* If to check a new temporal starts, return early. In 7.5.Ordering of OBUs: Sequence header OBUs may appear in any order within a coded video sequence. So it is allowed to repeat the sequence header within one temporal unit, and sequence header does not definitely start a TU. We only check TD here. */ if (obu->obu_type == GST_AV1_OBU_TEMPORAL_DELIMITER) { gst_av1_parse_reset_obu_data_state (self); if (check_new_tu) { *check_new_tu = TRUE; res = GST_AV1_PARSER_OK; goto out; } } if (obu->obu_type == GST_AV1_OBU_SEQUENCE_HEADER) self->header = TRUE; if (obu->obu_type == GST_AV1_OBU_FRAME_HEADER || obu->obu_type == GST_AV1_OBU_FRAME || obu->obu_type == GST_AV1_OBU_REDUNDANT_FRAME_HEADER) { GstAV1FrameHeaderOBU *fh = &frame_header; if (obu->obu_type == GST_AV1_OBU_FRAME) fh = &frame.frame_header; self->show_frame = fh->show_frame || fh->show_existing_frame; if (self->show_frame) { /* Check whether a new temporal starts, and return early. */ if (check_new_tu && obu->obu_type != GST_AV1_OBU_REDUNDANT_FRAME_HEADER && gst_av1_parse_frame_start_new_temporal_unit (self, obu)) { *check_new_tu = TRUE; res = GST_AV1_PARSER_OK; goto out; } self->last_shown_frame_temporal_id = obu->header.obu_temporal_id; self->last_shown_frame_spatial_id = obu->header.obu_spatial_id; } self->within_one_frame = TRUE; /* if a show_existing_frame case, only update key frame. otherwise, update all type of frame. */ if (!fh->show_existing_frame || fh->frame_type == GST_AV1_KEY_FRAME) res = gst_av1_parser_reference_frame_update (self->parser, fh); if (res != GST_AV1_PARSER_OK) GST_WARNING_OBJECT (self, "update frame get result %d", res); if (fh->show_existing_frame) { *frame_complete = TRUE; self->within_one_frame = FALSE; } if (fh->frame_type == GST_AV1_KEY_FRAME) self->keyframe = TRUE; } if (obu->obu_type == GST_AV1_OBU_TILE_GROUP || obu->obu_type == GST_AV1_OBU_FRAME) { GstAV1TileGroupOBU *tg = &tile_group; self->within_one_frame = TRUE; if (obu->obu_type == GST_AV1_OBU_FRAME) tg = &frame.tile_group; if (tg->tg_end == tg->num_tiles - 1) { *frame_complete = TRUE; self->within_one_frame = FALSE; } } out: if (res != GST_AV1_PARSER_OK) { /* Some verbose OBU can be skip */ if (obu->obu_type == GST_AV1_OBU_REDUNDANT_FRAME_HEADER) { GST_WARNING_OBJECT (self, "Ignore a verbose %s OBU parsing error", _obu_name (obu->obu_type)); gst_av1_parse_reset_obu_data_state (self); res = GST_AV1_PARSER_OK; } } return res; } static GstFlowReturn gst_av1_parse_handle_obu_to_obu (GstBaseParse * parse, GstBaseParseFrame * frame, gint * skipsize) { GstAV1Parse *self = GST_AV1_PARSE (parse); GstMapInfo map_info; GstAV1OBU obu; GstFlowReturn ret = GST_FLOW_OK; GstAV1ParserResult res; GstBuffer *buffer = gst_buffer_ref (frame->buffer); guint32 consumed; gboolean frame_complete; if (!gst_buffer_map (buffer, &map_info, GST_MAP_READ)) { *skipsize = 0; GST_ERROR_OBJECT (parse, "Couldn't map incoming buffer"); return GST_FLOW_ERROR; } consumed = 0; frame_complete = FALSE; res = gst_av1_parser_identify_one_obu (self->parser, map_info.data, map_info.size, &obu, &consumed); if (res == GST_AV1_PARSER_OK) res = gst_av1_parse_handle_one_obu (self, &obu, &frame_complete, NULL); g_assert (consumed <= map_info.size); if (res == GST_AV1_PARSER_BITSTREAM_ERROR || res == GST_AV1_PARSER_MISSING_OBU_REFERENCE) { if (consumed) { *skipsize = consumed; } else { *skipsize = map_info.size; } GST_WARNING_OBJECT (parse, "Parse obu error, discard %d.", *skipsize); gst_av1_parse_reset_obu_data_state (self); ret = GST_FLOW_OK; goto out; } else if (res == GST_AV1_PARSER_NO_MORE_DATA) { *skipsize = 0; if (self->in_align == GST_AV1_PARSE_ALIGN_OBU) { /* The buffer is already aligned to OBU, should not happen. */ if (consumed) { *skipsize = consumed; } else { *skipsize = map_info.size; } GST_WARNING_OBJECT (parse, "Parse obu need more data, discard %d.", *skipsize); gst_av1_parse_reset_obu_data_state (self); } ret = GST_FLOW_OK; goto out; } else if (res == GST_AV1_PARSER_DROP) { GST_DEBUG_OBJECT (parse, "Drop %d data", consumed); *skipsize = consumed; gst_av1_parse_reset_obu_data_state (self); ret = GST_FLOW_OK; goto out; } else if (res != GST_AV1_PARSER_OK) { GST_ERROR_OBJECT (parse, "Parse obu get unexpect error %d", res); *skipsize = 0; ret = GST_FLOW_ERROR; goto out; } g_assert (consumed); gst_av1_parse_update_src_caps (self, NULL); if (self->discont) { GST_BUFFER_FLAG_SET (buffer, GST_BUFFER_FLAG_DISCONT); self->discont = FALSE; } if (self->header) { GST_BUFFER_FLAG_SET (buffer, GST_BUFFER_FLAG_HEADER); self->header = FALSE; } /* happen to be a frame boundary */ if (frame_complete) GST_BUFFER_FLAG_SET (buffer, GST_BUFFER_FLAG_MARKER); GST_LOG_OBJECT (self, "Output one buffer with size %d", consumed); ret = gst_base_parse_finish_frame (parse, frame, consumed); *skipsize = 0; out: gst_buffer_unmap (buffer, &map_info); gst_buffer_unref (buffer); return ret; } static void gst_av1_parse_create_subframe (GstBaseParseFrame * frame, GstBaseParseFrame * subframe, GstBuffer * buffer) { gst_base_parse_frame_init (subframe); subframe->flags |= frame->flags; subframe->offset = frame->offset; subframe->overhead = frame->overhead; /* Just ref the input buffer. The base parse will check that pointer, and it will be replaced by its out_buffer later. */ subframe->buffer = gst_buffer_ref (buffer); } static GstFlowReturn gst_av1_parse_handle_to_small_and_equal_align (GstBaseParse * parse, GstBaseParseFrame * frame, gint * skipsize) { GstAV1Parse *self = GST_AV1_PARSE (parse); GstMapInfo map_info; GstAV1OBU obu; GstFlowReturn ret = GST_FLOW_OK; GstAV1ParserResult res = GST_AV1_PARSER_INVALID_OPERATION; GstBuffer *buffer = gst_buffer_ref (frame->buffer); guint32 offset, consumed_before_push, consumed; gboolean frame_complete; GstBaseParseFrame subframe; if (!gst_buffer_map (buffer, &map_info, GST_MAP_READ)) { GST_ERROR_OBJECT (parse, "Couldn't map incoming buffer"); return GST_FLOW_ERROR; } consumed_before_push = 0; offset = 0; frame_complete = FALSE; again: while (offset < map_info.size) { res = gst_av1_parser_identify_one_obu (self->parser, map_info.data + offset, map_info.size - offset, &obu, &consumed); if (res == GST_AV1_PARSER_OK) res = gst_av1_parse_handle_one_obu (self, &obu, &frame_complete, NULL); if (res != GST_AV1_PARSER_OK) break; if (obu.obu_type == GST_AV1_OBU_TEMPORAL_DELIMITER && consumed_before_push > 0) { GST_DEBUG_OBJECT (self, "Encounter TD inside one %s aligned" " buffer, should not happen normally.", gst_av1_parse_alignment_to_string (self->in_align)); if (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B) gst_av1_parser_reset_annex_b (self->parser); /* Not include this TD obu, it should belong to the next TU or frame, we push all the data we already got. */ gst_av1_parse_create_subframe (frame, &subframe, buffer); ret = gst_av1_parse_push_data (self, &subframe, consumed_before_push, TRUE); if (ret != GST_FLOW_OK) goto out; /* Begin to find the next. */ frame_complete = FALSE; consumed_before_push = 0; continue; } gst_av1_parse_cache_one_obu (self, buffer, &obu, map_info.data + offset, consumed, frame_complete); offset += consumed; consumed_before_push += consumed; if ((self->align == GST_AV1_PARSE_ALIGN_OBU) || (self->align == GST_AV1_PARSE_ALIGN_FRAME && frame_complete)) { gst_av1_parse_create_subframe (frame, &subframe, buffer); ret = gst_av1_parse_push_data (self, &subframe, consumed_before_push, frame_complete); if (ret != GST_FLOW_OK) goto out; /* Begin to find the next. */ frame_complete = FALSE; consumed_before_push = 0; continue; } } if (res == GST_AV1_PARSER_BITSTREAM_ERROR || res == GST_AV1_PARSER_MISSING_OBU_REFERENCE) { /* Discard the whole frame */ *skipsize = map_info.size; GST_WARNING_OBJECT (parse, "Parse obu error, discard %d", *skipsize); if (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B) gst_av1_parser_reset_annex_b (self->parser); gst_av1_parse_reset_obu_data_state (self); ret = GST_FLOW_OK; goto out; } else if (res == GST_AV1_PARSER_NO_MORE_DATA) { /* Discard the whole buffer */ *skipsize = map_info.size; GST_WARNING_OBJECT (parse, "Parse obu need more data, discard %d.", *skipsize); if (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B) gst_av1_parser_reset_annex_b (self->parser); gst_av1_parse_reset_obu_data_state (self); ret = GST_FLOW_OK; goto out; } else if (res == GST_AV1_PARSER_DROP) { GST_DEBUG_OBJECT (parse, "Drop %d data", consumed); offset += consumed; gst_av1_parse_reset_obu_data_state (self); res = GST_AV1_PARSER_OK; goto again; } else if (res != GST_AV1_PARSER_OK) { GST_ERROR_OBJECT (parse, "Parse obu get unexpect error %d", res); *skipsize = 0; ret = GST_FLOW_ERROR; goto out; } /* If the total buffer exhausted but frame is not complete, we just push the left data and consider it as a frame. */ if (consumed_before_push > 0 && !frame_complete && self->align == GST_AV1_PARSE_ALIGN_FRAME) { g_assert (offset >= map_info.size); /* Warning and still consider the frame is complete */ GST_WARNING_OBJECT (self, "Exhaust the buffer but still incomplete frame," " should not happend in %s alignment", gst_av1_parse_alignment_to_string (self->in_align)); } ret = gst_av1_parse_push_data (self, frame, consumed_before_push, TRUE); out: gst_buffer_unmap (buffer, &map_info); gst_buffer_unref (buffer); return ret; } static GstFlowReturn gst_av1_parse_handle_to_big_align (GstBaseParse * parse, GstBaseParseFrame * frame, gint * skipsize) { GstAV1Parse *self = GST_AV1_PARSE (parse); GstMapInfo map_info; GstAV1OBU obu; GstFlowReturn ret = GST_FLOW_OK; GstAV1ParserResult res = GST_AV1_PARSER_OK; GstBuffer *buffer = gst_buffer_ref (frame->buffer); guint32 consumed; gboolean frame_complete; gboolean check_new_tu; gboolean complete; g_assert (self->in_align <= GST_AV1_PARSE_ALIGN_FRAME); if (!gst_buffer_map (buffer, &map_info, GST_MAP_READ)) { *skipsize = 0; GST_ERROR_OBJECT (parse, "Couldn't map incoming buffer"); return GST_FLOW_ERROR; } complete = FALSE; again: while (self->last_parsed_offset < map_info.size) { res = gst_av1_parser_identify_one_obu (self->parser, map_info.data + self->last_parsed_offset, map_info.size - self->last_parsed_offset, &obu, &consumed); if (res != GST_AV1_PARSER_OK) break; check_new_tu = FALSE; res = gst_av1_parse_handle_one_obu (self, &obu, &frame_complete, &check_new_tu); if (res != GST_AV1_PARSER_OK) break; if (check_new_tu && (gst_adapter_available (self->cache_out) || gst_adapter_available (self->frame_cache))) { complete = TRUE; break; } if (self->align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT || self->align == GST_AV1_PARSE_ALIGN_FRAME) { GstBuffer *buf = gst_buffer_copy_region (buffer, GST_BUFFER_COPY_ALL, self->last_parsed_offset, consumed); gst_adapter_push (self->cache_out, buf); } else if (self->align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B) { gst_av1_parse_convert_to_annexb (self, buffer, &obu, frame_complete); } else { g_assert_not_reached (); } self->last_parsed_offset += consumed; if (self->align == GST_AV1_PARSE_ALIGN_FRAME && frame_complete) complete = TRUE; if (complete) break; } /* Finish a complete frame anyway */ if (complete || GST_BASE_PARSE_DRAINING (parse)) { *skipsize = 0; /* push the left anyway if no error */ if (res == GST_AV1_PARSER_OK) ret = gst_av1_parse_push_data (self, frame, self->last_parsed_offset, TRUE); self->last_parsed_offset = 0; goto out; } if (res == GST_AV1_PARSER_BITSTREAM_ERROR || res == GST_AV1_PARSER_MISSING_OBU_REFERENCE) { *skipsize = map_info.size; GST_WARNING_OBJECT (parse, "Parse obu error, discard whole buffer %d.", *skipsize); /* The adapter will be cleared in next loop because of GST_BASE_PARSE_FRAME_FLAG_NEW_FRAME flag */ gst_av1_parse_reset_obu_data_state (self); ret = GST_FLOW_OK; } else if (res == GST_AV1_PARSER_NO_MORE_DATA) { *skipsize = 0; if (self->in_align >= GST_AV1_PARSE_ALIGN_OBU) { /* The buffer is already aligned to OBU, should not happen. The adapter will be cleared in next loop because of GST_BASE_PARSE_FRAME_FLAG_NEW_FRAME flag */ *skipsize = map_info.size; gst_av1_parse_reset_obu_data_state (self); GST_WARNING_OBJECT (parse, "Parse obu need more data, discard whole buffer %d.", *skipsize); } ret = GST_FLOW_OK; } else if (res == GST_AV1_PARSER_DROP) { GST_DEBUG_OBJECT (parse, "Drop %d data", consumed); self->last_parsed_offset += consumed; gst_av1_parse_reset_obu_data_state (self); res = GST_AV1_PARSER_OK; goto again; } else if (res == GST_AV1_PARSER_OK) { /* Everything is correct but still not get a frame or tu, need more data */ GST_DEBUG_OBJECT (parse, "Need more data"); *skipsize = 0; ret = GST_FLOW_OK; } else { GST_ERROR_OBJECT (parse, "Parse obu get unexpect error %d", res); *skipsize = 0; ret = GST_FLOW_ERROR; } out: gst_buffer_unmap (buffer, &map_info); gst_buffer_unref (buffer); return ret; } /* Try to recognize whether the input is annex-b format. return TRUE if we decide, FALSE if we can not decide or encounter some error. */ static gboolean gst_av1_parse_detect_stream_format (GstBaseParse * parse, GstBaseParseFrame * frame) { GstAV1Parse *self = GST_AV1_PARSE (parse); GstMapInfo map_info; GstAV1OBU obu; GstAV1ParserResult res = GST_AV1_PARSER_INVALID_OPERATION; GstBuffer *buffer = gst_buffer_ref (frame->buffer); gboolean got_seq, got_frame; gboolean frame_complete; guint32 consumed; guint32 total_consumed; guint32 tu_sz; gboolean ret = FALSE; g_assert (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT); g_assert (self->detect_annex_b == TRUE); if (!gst_buffer_map (buffer, &map_info, GST_MAP_READ)) { GST_ERROR_OBJECT (parse, "Couldn't map incoming buffer"); return FALSE; } gst_av1_parser_reset (self->parser, FALSE); got_seq = FALSE; got_frame = FALSE; total_consumed = 0; again: while (total_consumed < map_info.size) { res = gst_av1_parser_identify_one_obu (self->parser, map_info.data + total_consumed, map_info.size - total_consumed, &obu, &consumed); if (res == GST_AV1_PARSER_OK) { total_consumed += consumed; res = gst_av1_parse_handle_one_obu (self, &obu, &frame_complete, NULL); } if (res != GST_AV1_PARSER_OK) break; if (obu.obu_type == GST_AV1_OBU_SEQUENCE_HEADER) got_seq = TRUE; if (obu.obu_type == GST_AV1_OBU_REDUNDANT_FRAME_HEADER || obu.obu_type == GST_AV1_OBU_FRAME || obu.obu_type == GST_AV1_OBU_FRAME_HEADER) got_frame = TRUE; if (got_seq || got_frame) break; } gst_av1_parser_reset (self->parser, FALSE); /* If succeed recognize seq or frame, it's done. otherwise, just need to get more data. */ if (got_seq || got_frame) { ret = TRUE; self->detect_annex_b = FALSE; goto out; } if (res == GST_AV1_PARSER_DROP) { total_consumed += consumed; res = GST_AV1_PARSER_OK; gst_av1_parse_reset_obu_data_state (self); goto again; } /* Try the annex b format. The buffer should contain the whole TU, and the buffer start with the TU size in leb128() format. */ if (map_info.size < 8) { /* Too small. */ goto out; } tu_sz = _read_leb128 (map_info.data, &res, &consumed); if (tu_sz == 0 || res != GST_AV1_PARSER_OK) { /* error to get the TU size, should not be annex b. */ goto out; } if (tu_sz + consumed != map_info.size) { GST_DEBUG_OBJECT (self, "Buffer size %" G_GSSIZE_FORMAT ", TU size %d," " do not match.", map_info.size, tu_sz); goto out; } GST_INFO_OBJECT (self, "Detect the annex-b format"); self->in_align = GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B; self->detect_annex_b = FALSE; gst_av1_parser_reset (self->parser, TRUE); ret = TRUE; out: gst_av1_parse_reset_obu_data_state (self); gst_buffer_unmap (buffer, &map_info); gst_buffer_unref (buffer); return ret; } static GstFlowReturn gst_av1_parse_handle_frame (GstBaseParse * parse, GstBaseParseFrame * frame, gint * skipsize) { GstAV1Parse *self = GST_AV1_PARSE (parse); GstFlowReturn ret = GST_FLOW_OK; guint in_level, out_level; if (GST_BUFFER_FLAG_IS_SET (frame->buffer, GST_BUFFER_FLAG_DISCONT)) { self->discont = TRUE; if (frame->flags & GST_BASE_PARSE_FRAME_FLAG_NEW_FRAME) gst_av1_parse_reset_obu_data_state (self); } else { self->discont = FALSE; } GST_LOG_OBJECT (self, "Input frame size %" G_GSSIZE_FORMAT, gst_buffer_get_size (frame->buffer)); /* avoid stale cached parsing state */ if (frame->flags & GST_BASE_PARSE_FRAME_FLAG_NEW_FRAME) { GST_LOG_OBJECT (self, "parsing new frame"); gst_adapter_clear (self->cache_out); gst_adapter_clear (self->frame_cache); self->last_parsed_offset = 0; self->header = FALSE; self->keyframe = FALSE; self->show_frame = FALSE; } else { GST_LOG_OBJECT (self, "resuming frame parsing"); } /* When in pull mode, the sink pad has no caps, we may get the caps by query the upstream element */ if (self->in_align == GST_AV1_PARSE_ALIGN_NONE) { GstCaps *upstream_caps; upstream_caps = gst_pad_peer_query_caps (GST_BASE_PARSE_SINK_PAD (self), NULL); if (upstream_caps) { if (!gst_caps_is_empty (upstream_caps) && !gst_caps_is_any (upstream_caps)) { GstAV1ParseAligment align; GST_LOG_OBJECT (self, "upstream caps: %" GST_PTR_FORMAT, upstream_caps); /* fixate to avoid ambiguity with lists when parsing */ upstream_caps = gst_caps_fixate (upstream_caps); align = gst_av1_parse_alignment_from_caps (upstream_caps); if (align == GST_AV1_PARSE_ALIGN_ERROR) { GST_ERROR_OBJECT (self, "upstream caps %" GST_PTR_FORMAT " set stream-format and alignment conflict.", upstream_caps); gst_caps_unref (upstream_caps); return GST_FLOW_ERROR; } self->in_align = align; } gst_caps_unref (upstream_caps); gst_av1_parser_reset (self->parser, self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B); } if (self->in_align != GST_AV1_PARSE_ALIGN_NONE) { GST_LOG_OBJECT (self, "Query the upstream get the alignment %s", gst_av1_parse_alignment_to_string (self->in_align)); } else { self->in_align = GST_AV1_PARSE_ALIGN_BYTE; GST_DEBUG_OBJECT (self, "alignment set to default %s", gst_av1_parse_alignment_to_string (GST_AV1_PARSE_ALIGN_BYTE)); } } if (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT && self->detect_annex_b) { /* Only happend at the first time of handle_frame, try to recognize the annex b stream format. */ if (gst_av1_parse_detect_stream_format (parse, frame)) { GST_INFO_OBJECT (self, "Input alignment %s", gst_av1_parse_alignment_to_string (self->in_align)); } else { /* Because the input is already TU aligned, we should skip the whole problematic TU and check the next one. */ *skipsize = gst_buffer_get_size (frame->buffer); GST_WARNING_OBJECT (self, "Fail to detect the stream format for TU," " skip the whole TU %d", *skipsize); return GST_FLOW_OK; } } /* We may in pull mode and no caps is set */ if (self->align == GST_AV1_PARSE_ALIGN_NONE) gst_av1_parse_negotiate (self, NULL); in_level = self->in_align; if (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B) in_level = GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT; out_level = self->align; if (self->align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B) out_level = GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT; if (self->in_align <= GST_AV1_PARSE_ALIGN_OBU && self->align == GST_AV1_PARSE_ALIGN_OBU) { ret = gst_av1_parse_handle_obu_to_obu (parse, frame, skipsize); } else if (in_level < out_level) { ret = gst_av1_parse_handle_to_big_align (parse, frame, skipsize); } else { ret = gst_av1_parse_handle_to_small_and_equal_align (parse, frame, skipsize); } return ret; }