/* GStreamer
 * Copyright (C) 2020 He Junyan <junyan.he@intel.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 */

/*
 * SECTION:element-av1parse
 * @title: av1parse
 * @short_description: An AV1 stream parse.
 *
 * The minimal unit should be the BYTE.
 * There are four types of AV1 alignment in the AV1 stream.
 *
 * alignment: byte, obu, frame, tu
 *
 * 1. Aligned to byte. The basic and default one for input.
 * 2. Aligned to obu(Open Bitstream Units).
 * 3. Aligned to frame. The default one for output. This ensures that
 *    each buffer contains only one frame or frame header with the
 *    show_existing flag for the base or sub layer. It is useful for
 *    the decoder.
 * 4. Aligned to tu(Temporal Unit). A temporal unit consists of all the
 *    OBUs that are associated with a specific, distinct time instant.
 *    When scalability is disabled, it contains just exact one showing
 *    frame(may contain several unshowing frames). When scalability is
 *    enabled, it contains frames depending on the layer number. It should
 *    begin with a temporal delimiter obu. It may be useful for mux/demux
 *    to index the data of some timestamp.
 *
 * The annex B define a special format for the temporal unit. The size of
 * each temporal unit is extract out to the header of the buffer, and no
 * size field inside the each obu. There are two stream formats:
 *
 * stream-format: obu-stream, annexb
 *
 * 1. obu-stream. The basic and default one.
 * 2. annexb. A special stream of temporal unit. It also implies that the
 *    alignment should be TU.
 *
 * This AV1 parse implements the conversion between the alignments and the
 * stream-formats. If the input and output have the same alignment and the
 * same stream-format, it will check and bypass the data.
 *
 * ## Example launch line to generate annex B format AV1 stream:
 * ```
 * gst-launch-1.0 filesrc location=sample.av1 ! ivfparse ! av1parse !  \
 *   video/x-av1,alignment=\(string\)tu,stream-format=\(string\)annexb ! \
 *   filesink location=matroskamux ! filesink location=trans.mkv
 * ```
 *
 * Since: 1.20
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <gst/base/gstbitreader.h>
#include <gst/base/gstbitwriter.h>
#include <gst/codecparsers/gstav1parser.h>
#include <gst/video/video.h>
#include "gstvideoparserselements.h"
#include "gstav1parse.h"

#include <string.h>

#define GST_AV1_MAX_LEB_128_SIZE 8

GST_DEBUG_CATEGORY (av1_parse_debug);
#define GST_CAT_DEFAULT av1_parse_debug

/* We combine the stream format and the alignment
   together. When stream format is annexb, the
   alignment must be TU. */
typedef enum
{
  GST_AV1_PARSE_ALIGN_ERROR = -1,
  GST_AV1_PARSE_ALIGN_NONE = 0,
  GST_AV1_PARSE_ALIGN_BYTE,
  GST_AV1_PARSE_ALIGN_OBU,
  GST_AV1_PARSE_ALIGN_FRAME,
  GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT,
  GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B,
} GstAV1ParseAligment;

struct _GstAV1Parse
{
  GstBaseParse parent;

  gint width;
  gint height;
  gint subsampling_x;
  gint subsampling_y;
  gboolean mono_chrome;
  guint8 bit_depth;
  gchar *colorimetry;
  GstAV1Profile profile;

  GstAV1ParseAligment in_align;
  gboolean detect_annex_b;
  GstAV1ParseAligment align;

  GstAV1Parser *parser;
  GstAdapter *cache_out;
  guint last_parsed_offset;
  GstAdapter *frame_cache;
  guint highest_spatial_id;
  gint last_shown_frame_temporal_id;
  gint last_shown_frame_spatial_id;
  gboolean within_one_frame;
  gboolean update_caps;
  gboolean discont;
  gboolean header;
  gboolean keyframe;
  gboolean show_frame;
};

static GstStaticPadTemplate sinktemplate = GST_STATIC_PAD_TEMPLATE ("sink",
    GST_PAD_SINK,
    GST_PAD_ALWAYS,
    GST_STATIC_CAPS ("video/x-av1"));

static GstStaticPadTemplate srctemplate = GST_STATIC_PAD_TEMPLATE ("src",
    GST_PAD_SRC,
    GST_PAD_ALWAYS,
    GST_STATIC_CAPS ("video/x-av1, parsed = (boolean) true, "
        "stream-format=(string) { obu-stream, annexb }, "
        "alignment=(string) { obu, tu, frame }"));

#define parent_class gst_av1_parse_parent_class
G_DEFINE_TYPE (GstAV1Parse, gst_av1_parse, GST_TYPE_BASE_PARSE);
GST_ELEMENT_REGISTER_DEFINE_WITH_CODE (av1parse, "av1parse", GST_RANK_SECONDARY,
    GST_TYPE_AV1_PARSE, videoparsers_element_init (plugin));

static void
remove_fields (GstCaps * caps, gboolean all)
{
  guint i, n;

  n = gst_caps_get_size (caps);
  for (i = 0; i < n; i++) {
    GstStructure *s = gst_caps_get_structure (caps, i);

    if (all) {
      gst_structure_remove_field (s, "alignment");
      gst_structure_remove_field (s, "stream-format");
    }
    gst_structure_remove_field (s, "parsed");
  }
}

static const gchar *
_obu_name (GstAV1OBUType type)
{
  switch (type) {
    case GST_AV1_OBU_SEQUENCE_HEADER:
      return "sequence header";
    case GST_AV1_OBU_TEMPORAL_DELIMITER:
      return "temporal delimiter";
    case GST_AV1_OBU_FRAME_HEADER:
      return "frame header";
    case GST_AV1_OBU_TILE_GROUP:
      return "tile group";
    case GST_AV1_OBU_METADATA:
      return "metadata";
    case GST_AV1_OBU_FRAME:
      return "frame";
    case GST_AV1_OBU_REDUNDANT_FRAME_HEADER:
      return "redundant frame header";
    case GST_AV1_OBU_TILE_LIST:
      return "tile list";
    case GST_AV1_OBU_PADDING:
      return "padding";
    default:
      return "unknown";
  }

  return NULL;
}

static guint32
_read_leb128 (guint8 * data, GstAV1ParserResult * retval, guint32 * comsumed)
{
  guint8 leb128_byte = 0;
  guint64 value = 0;
  gint i;
  gboolean result;
  GstBitReader br;
  guint32 cur_pos;

  gst_bit_reader_init (&br, data, 8);

  cur_pos = gst_bit_reader_get_pos (&br);
  for (i = 0; i < 8; i++) {
    leb128_byte = 0;
    result = gst_bit_reader_get_bits_uint8 (&br, &leb128_byte, 8);
    if (result == FALSE) {
      *retval = GST_AV1_PARSER_BITSTREAM_ERROR;
      return 0;
    }

    value |= (((gint) leb128_byte & 0x7f) << (i * 7));
    if (!(leb128_byte & 0x80))
      break;
  }

  *comsumed = (gst_bit_reader_get_pos (&br) - cur_pos) / 8;
  /* check for bitstream conformance see chapter4.10.5 */
  if (value < G_MAXUINT32) {
    *retval = GST_AV1_PARSER_OK;
    return (guint32) value;
  } else {
    GST_WARNING ("invalid leb128");
    *retval = GST_AV1_PARSER_BITSTREAM_ERROR;
    return 0;
  }
}

static gsize
_leb_size_in_bytes (guint64 value)
{
  gsize size = 0;
  do {
    ++size;
  } while ((value >>= 7) != 0);

  return size;
}

static gboolean
_write_leb128 (guint8 * data, guint * len, guint64 value)
{
  guint leb_size = _leb_size_in_bytes (value);
  guint i;

  if (value > G_MAXUINT32 || leb_size > GST_AV1_MAX_LEB_128_SIZE)
    return FALSE;

  for (i = 0; i < leb_size; ++i) {
    guint8 byte = value & 0x7f;
    value >>= 7;

    /* Signal that more bytes follow. */
    if (value != 0)
      byte |= 0x80;

    *(data + i) = byte;
  }

  *len = leb_size;
  return TRUE;
}

static gboolean gst_av1_parse_start (GstBaseParse * parse);
static gboolean gst_av1_parse_stop (GstBaseParse * parse);
static GstFlowReturn gst_av1_parse_handle_frame (GstBaseParse * parse,
    GstBaseParseFrame * frame, gint * skipsize);
static gboolean gst_av1_parse_set_sink_caps (GstBaseParse * parse,
    GstCaps * caps);
static GstCaps *gst_av1_parse_get_sink_caps (GstBaseParse * parse,
    GstCaps * filter);

/* Clear the parse state related to data kind OBUs. */
static void
gst_av1_parse_reset_obu_data_state (GstAV1Parse * self)
{
  self->last_shown_frame_temporal_id = -1;
  self->last_shown_frame_spatial_id = -1;
  self->within_one_frame = FALSE;
}

static void
gst_av1_parse_reset (GstAV1Parse * self)
{
  self->width = 0;
  self->height = 0;
  self->subsampling_x = -1;
  self->subsampling_y = -1;
  self->mono_chrome = FALSE;
  self->profile = GST_AV1_PROFILE_UNDEFINED;
  self->bit_depth = 0;
  self->align = GST_AV1_PARSE_ALIGN_NONE;
  self->in_align = GST_AV1_PARSE_ALIGN_NONE;
  self->detect_annex_b = FALSE;
  self->discont = TRUE;
  self->header = FALSE;
  self->keyframe = FALSE;
  self->show_frame = FALSE;
  self->last_parsed_offset = 0;
  self->highest_spatial_id = 0;
  gst_av1_parse_reset_obu_data_state (self);
  g_clear_pointer (&self->colorimetry, g_free);
  g_clear_pointer (&self->parser, gst_av1_parser_free);
  gst_adapter_clear (self->cache_out);
  gst_adapter_clear (self->frame_cache);
}

static void
gst_av1_parse_init (GstAV1Parse * self)
{
  gst_base_parse_set_pts_interpolation (GST_BASE_PARSE (self), FALSE);
  gst_base_parse_set_infer_ts (GST_BASE_PARSE (self), FALSE);

  GST_PAD_SET_ACCEPT_INTERSECT (GST_BASE_PARSE_SINK_PAD (self));
  GST_PAD_SET_ACCEPT_TEMPLATE (GST_BASE_PARSE_SINK_PAD (self));

  self->cache_out = gst_adapter_new ();
  self->frame_cache = gst_adapter_new ();
}

static void
gst_av1_parse_finalize (GObject * object)
{
  GstAV1Parse *self = GST_AV1_PARSE (object);

  gst_av1_parse_reset (self);
  g_object_unref (self->cache_out);
  g_object_unref (self->frame_cache);

  G_OBJECT_CLASS (parent_class)->finalize (object);
}

static void
gst_av1_parse_class_init (GstAV1ParseClass * klass)
{
  GObjectClass *gobject_class = (GObjectClass *) klass;
  GstBaseParseClass *parse_class = GST_BASE_PARSE_CLASS (klass);
  GstElementClass *element_class = GST_ELEMENT_CLASS (klass);

  gobject_class->finalize = gst_av1_parse_finalize;
  parse_class->start = GST_DEBUG_FUNCPTR (gst_av1_parse_start);
  parse_class->stop = GST_DEBUG_FUNCPTR (gst_av1_parse_stop);
  parse_class->handle_frame = GST_DEBUG_FUNCPTR (gst_av1_parse_handle_frame);
  parse_class->set_sink_caps = GST_DEBUG_FUNCPTR (gst_av1_parse_set_sink_caps);
  parse_class->get_sink_caps = GST_DEBUG_FUNCPTR (gst_av1_parse_get_sink_caps);

  gst_element_class_add_static_pad_template (element_class, &srctemplate);
  gst_element_class_add_static_pad_template (element_class, &sinktemplate);

  gst_element_class_set_static_metadata (element_class, "AV1 parser",
      "Codec/Parser/Converter/Video",
      "Parses AV1 streams", "He Junyan <junyan.he@intel.com>");

  GST_DEBUG_CATEGORY_INIT (av1_parse_debug, "av1parse", 0, "av1 parser");
}

static gboolean
gst_av1_parse_start (GstBaseParse * parse)
{
  GstAV1Parse *self = GST_AV1_PARSE (parse);

  GST_DEBUG_OBJECT (self, "start");

  gst_av1_parse_reset (self);
  self->parser = gst_av1_parser_new ();

  /* At least the OBU header. */
  gst_base_parse_set_min_frame_size (parse, 1);

  return TRUE;
}

static gboolean
gst_av1_parse_stop (GstBaseParse * parse)
{
  GstAV1Parse *self = GST_AV1_PARSE (parse);

  GST_DEBUG_OBJECT (self, "stop");
  g_clear_pointer (&self->parser, gst_av1_parser_free);

  return TRUE;
}

static const gchar *
gst_av1_parse_profile_to_string (GstAV1Profile profile)
{
  switch (profile) {
    case GST_AV1_PROFILE_0:
      return "main";
    case GST_AV1_PROFILE_1:
      return "high";
    case GST_AV1_PROFILE_2:
      return "professional";
    default:
      break;
  }

  return NULL;
}

static GstAV1Profile
gst_av1_parse_profile_from_string (const gchar * profile)
{
  if (!profile)
    return GST_AV1_PROFILE_UNDEFINED;

  if (g_strcmp0 (profile, "main") == 0)
    return GST_AV1_PROFILE_0;
  else if (g_strcmp0 (profile, "high") == 0)
    return GST_AV1_PROFILE_1;
  else if (g_strcmp0 (profile, "professional") == 0)
    return GST_AV1_PROFILE_2;

  return GST_AV1_PROFILE_UNDEFINED;
}

static const gchar *
gst_av1_parse_alignment_to_steam_format_string (GstAV1ParseAligment align)
{
  switch (align) {
    case GST_AV1_PARSE_ALIGN_BYTE:
      return "obu-stream";
    case GST_AV1_PARSE_ALIGN_OBU:
    case GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT:
    case GST_AV1_PARSE_ALIGN_FRAME:
      return "obu-stream";
    case GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B:
      return "annexb";
    default:
      GST_WARNING ("Unrecognized steam format");
      break;
  }

  return NULL;
}

static const gchar *
gst_av1_parse_alignment_to_string (GstAV1ParseAligment align)
{
  switch (align) {
    case GST_AV1_PARSE_ALIGN_BYTE:
      return "byte";
    case GST_AV1_PARSE_ALIGN_OBU:
      return "obu";
    case GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT:
    case GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B:
      return "tu";
    case GST_AV1_PARSE_ALIGN_FRAME:
      return "frame";
    default:
      GST_WARNING ("Unrecognized alignment");
      break;
  }

  return NULL;
}

static GstAV1ParseAligment
gst_av1_parse_alignment_from_string (const gchar * align,
    const gchar * stream_format)
{
  if (!align && !stream_format)
    return GST_AV1_PARSE_ALIGN_NONE;

  if (stream_format) {
    if (g_strcmp0 (stream_format, "annexb") == 0) {
      if (align && g_strcmp0 (align, "tu") != 0) {
        /* annex b stream must align to TU. */
        return GST_AV1_PARSE_ALIGN_ERROR;
      } else {
        return GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B;
      }
    } else if (g_strcmp0 (stream_format, "obu-stream") != 0) {
      /* unrecognized */
      return GST_AV1_PARSE_ALIGN_NONE;
    }

    /* stream-format is obu-stream, depends on align */
  }

  if (align) {
    if (g_strcmp0 (align, "byte") == 0) {
      return GST_AV1_PARSE_ALIGN_BYTE;
    } else if (g_strcmp0 (align, "obu") == 0) {
      return GST_AV1_PARSE_ALIGN_OBU;
    } else if (g_strcmp0 (align, "tu") == 0) {
      return GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT;
    } else if (g_strcmp0 (align, "frame") == 0) {
      return GST_AV1_PARSE_ALIGN_FRAME;
    } else {
      /* unrecognized */
      return GST_AV1_PARSE_ALIGN_NONE;
    }
  }

  return GST_AV1_PARSE_ALIGN_NONE;
}

static gboolean
gst_av1_parse_caps_has_alignment (GstCaps * caps, GstAV1ParseAligment alignment)
{
  guint i, j, caps_size;
  const gchar *cmp_align_str = NULL;
  const gchar *cmp_stream_str = NULL;

  GST_DEBUG ("Try to find alignment %d in caps: %" GST_PTR_FORMAT,
      alignment, caps);

  caps_size = gst_caps_get_size (caps);
  if (caps_size == 0)
    return FALSE;

  switch (alignment) {
    case GST_AV1_PARSE_ALIGN_BYTE:
      cmp_align_str = "byte";
      cmp_stream_str = "obu-stream";
      break;
    case GST_AV1_PARSE_ALIGN_OBU:
      cmp_align_str = "obu";
      cmp_stream_str = "obu-stream";
      break;
    case GST_AV1_PARSE_ALIGN_FRAME:
      cmp_align_str = "frame";
      cmp_stream_str = "obu-stream";
      break;
    case GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT:
      cmp_align_str = "tu";
      cmp_stream_str = "obu-stream";
      break;
    case GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B:
      cmp_align_str = "tu";
      cmp_stream_str = "annexb";
      break;
    default:
      return FALSE;
  }

  for (i = 0; i < caps_size; i++) {
    GstStructure *s = gst_caps_get_structure (caps, i);
    const GValue *alignment_value = gst_structure_get_value (s, "alignment");
    const GValue *stream_value = gst_structure_get_value (s, "stream-format");

    if (!alignment_value || !stream_value)
      continue;

    if (G_VALUE_HOLDS_STRING (alignment_value)) {
      const gchar *align_str = g_value_get_string (alignment_value);

      if (g_strcmp0 (align_str, cmp_align_str) != 0)
        continue;
    } else if (GST_VALUE_HOLDS_LIST (alignment_value)) {
      guint num_values = gst_value_list_get_size (alignment_value);

      for (j = 0; j < num_values; j++) {
        const GValue *v = gst_value_list_get_value (alignment_value, j);
        const gchar *align_str = g_value_get_string (v);

        if (g_strcmp0 (align_str, cmp_align_str) == 0)
          break;
      }

      if (j == num_values)
        continue;
    }

    if (G_VALUE_HOLDS_STRING (stream_value)) {
      const gchar *stream_str = g_value_get_string (stream_value);

      if (g_strcmp0 (stream_str, cmp_stream_str) != 0)
        continue;
    } else if (GST_VALUE_HOLDS_LIST (stream_value)) {
      guint num_values = gst_value_list_get_size (stream_value);

      for (j = 0; j < num_values; j++) {
        const GValue *v = gst_value_list_get_value (stream_value, j);
        const gchar *stream_str = g_value_get_string (v);

        if (g_strcmp0 (stream_str, cmp_stream_str) == 0)
          break;
      }

      if (j == num_values)
        continue;
    }

    return TRUE;
  }

  return FALSE;
}

static GstAV1ParseAligment
gst_av1_parse_alignment_from_caps (GstCaps * caps)
{
  GstAV1ParseAligment align;

  align = GST_AV1_PARSE_ALIGN_NONE;

  GST_DEBUG ("parsing caps: %" GST_PTR_FORMAT, caps);

  if (caps && gst_caps_get_size (caps) > 0) {
    GstStructure *s = gst_caps_get_structure (caps, 0);
    const gchar *str_align = NULL;
    const gchar *str_stream = NULL;

    str_align = gst_structure_get_string (s, "alignment");
    str_stream = gst_structure_get_string (s, "stream-format");

    align = gst_av1_parse_alignment_from_string (str_align, str_stream);
  }

  return align;
}

static void
gst_av1_parse_update_src_caps (GstAV1Parse * self, GstCaps * caps)
{
  GstCaps *sink_caps, *src_caps;
  GstCaps *final_caps = NULL;
  GstStructure *s = NULL;
  gint width, height;
  gint par_n = 0, par_d = 0;
  gint fps_n = 0, fps_d = 0;
  const gchar *profile = NULL;

  if (G_UNLIKELY (!gst_pad_has_current_caps (GST_BASE_PARSE_SRC_PAD (self))))
    self->update_caps = TRUE;

  if (!self->update_caps)
    return;

  /* if this is being called from the first _setcaps call, caps on the sinkpad
   * aren't set yet and so they need to be passed as an argument */
  if (caps)
    sink_caps = gst_caps_ref (caps);
  else
    sink_caps = gst_pad_get_current_caps (GST_BASE_PARSE_SINK_PAD (self));

  /* carry over input caps as much as possible; override with our own stuff */
  if (!sink_caps)
    sink_caps = gst_caps_new_empty_simple ("video/x-av1");
  else
    s = gst_caps_get_structure (sink_caps, 0);

  final_caps = gst_caps_copy (sink_caps);

  if (s && gst_structure_has_field (s, "width") &&
      gst_structure_has_field (s, "height")) {
    gst_structure_get_int (s, "width", &width);
    gst_structure_get_int (s, "height", &height);
  } else {
    width = self->width;
    height = self->height;
  }

  if (width > 0 && height > 0)
    gst_caps_set_simple (final_caps, "width", G_TYPE_INT, width,
        "height", G_TYPE_INT, height, NULL);

  if (s && gst_structure_get_fraction (s, "pixel-aspect-ratio", &par_n, &par_d)) {
    if (par_n != 0 && par_d != 0) {
      gst_caps_set_simple (final_caps, "pixel-aspect-ratio",
          GST_TYPE_FRACTION, par_n, par_d, NULL);
    }
  }

  if (s && gst_structure_has_field (s, "framerate")) {
    gst_structure_get_fraction (s, "framerate", &fps_n, &fps_d);
  }

  if (fps_n > 0 && fps_d > 0) {
    gst_caps_set_simple (final_caps, "framerate",
        GST_TYPE_FRACTION, fps_n, fps_d, NULL);
    gst_base_parse_set_frame_rate (GST_BASE_PARSE (self), fps_n, fps_d, 0, 0);
  }

  /* When not RGB, the chroma format is needed. */
  if (self->colorimetry == NULL ||
      (g_strcmp0 (self->colorimetry, GST_VIDEO_COLORIMETRY_SRGB) != 0)) {
    const gchar *chroma_format = NULL;

    if (self->subsampling_x == 1 && self->subsampling_y == 1) {
      if (!self->mono_chrome) {
        chroma_format = "4:2:0";
      } else {
        chroma_format = "4:0:0";
      }
    } else if (self->subsampling_x == 1 && self->subsampling_y == 0) {
      chroma_format = "4:2:2";
    } else if (self->subsampling_x == 0 && self->subsampling_y == 0) {
      chroma_format = "4:4:4";
    }

    if (chroma_format)
      gst_caps_set_simple (final_caps,
          "chroma-format", G_TYPE_STRING, chroma_format, NULL);
  }

  if (self->bit_depth)
    gst_caps_set_simple (final_caps,
        "bit-depth-luma", G_TYPE_UINT, self->bit_depth,
        "bit-depth-chroma", G_TYPE_UINT, self->bit_depth, NULL);

  if (self->colorimetry && (!s || !gst_structure_has_field (s, "colorimetry")))
    gst_caps_set_simple (final_caps,
        "colorimetry", G_TYPE_STRING, self->colorimetry, NULL);

  g_assert (self->align > GST_AV1_PARSE_ALIGN_NONE);
  gst_caps_set_simple (final_caps, "parsed", G_TYPE_BOOLEAN, TRUE,
      "stream-format", G_TYPE_STRING,
      gst_av1_parse_alignment_to_steam_format_string (self->align),
      "alignment", G_TYPE_STRING,
      gst_av1_parse_alignment_to_string (self->align), NULL);

  profile = gst_av1_parse_profile_to_string (self->profile);
  if (profile)
    gst_caps_set_simple (final_caps, "profile", G_TYPE_STRING, profile, NULL);

  src_caps = gst_pad_get_current_caps (GST_BASE_PARSE_SRC_PAD (self));

  if (!(src_caps && gst_caps_is_strictly_equal (src_caps, final_caps))) {
    GST_DEBUG_OBJECT (self, "Update src caps %" GST_PTR_FORMAT, final_caps);
    gst_pad_set_caps (GST_BASE_PARSE_SRC_PAD (self), final_caps);
  }

  gst_clear_caps (&src_caps);
  gst_caps_unref (final_caps);
  gst_caps_unref (sink_caps);

  self->update_caps = FALSE;
}

/* check downstream caps to configure format and alignment */
static void
gst_av1_parse_negotiate (GstAV1Parse * self, GstCaps * in_caps)
{
  GstCaps *caps;
  GstAV1ParseAligment align;

  caps = gst_pad_get_allowed_caps (GST_BASE_PARSE_SRC_PAD (self));
  GST_DEBUG_OBJECT (self, "allowed caps: %" GST_PTR_FORMAT, caps);

  /* concentrate on leading structure, since decodebin parser
   * capsfilter always includes parser template caps */
  if (caps) {
    caps = gst_caps_truncate (caps);
    GST_DEBUG_OBJECT (self, "negotiating with caps: %" GST_PTR_FORMAT, caps);
  }

  /* prefer TU as default */
  if (gst_av1_parse_caps_has_alignment (caps,
          GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT)) {
    self->align = GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT;
    goto done;
  }

  /* Both upsteam and downstream support, best */
  if (in_caps && caps) {
    if (gst_caps_can_intersect (in_caps, caps)) {
      GstCaps *common_caps = NULL;

      common_caps = gst_caps_intersect (in_caps, caps);
      align = gst_av1_parse_alignment_from_caps (common_caps);
      gst_clear_caps (&common_caps);

      if (align != GST_AV1_PARSE_ALIGN_NONE
          && align != GST_AV1_PARSE_ALIGN_ERROR) {
        self->align = align;
        goto done;
      }
    }
  }

  /* Select first one of downstream support */
  if (caps && !gst_caps_is_empty (caps)) {
    /* fixate to avoid ambiguity with lists when parsing */
    caps = gst_caps_fixate (caps);
    align = gst_av1_parse_alignment_from_caps (caps);

    if (align != GST_AV1_PARSE_ALIGN_NONE && align != GST_AV1_PARSE_ALIGN_ERROR) {
      self->align = align;
      goto done;
    }
  }

  /* default */
  self->align = GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT;

done:
  GST_INFO_OBJECT (self, "selected alignment %s",
      gst_av1_parse_alignment_to_string (self->align));

  gst_clear_caps (&caps);
}

static GstCaps *
gst_av1_parse_get_sink_caps (GstBaseParse * parse, GstCaps * filter)
{
  GstCaps *peercaps, *templ;
  GstCaps *res, *tmp, *pcopy;

  templ = gst_pad_get_pad_template_caps (GST_BASE_PARSE_SINK_PAD (parse));
  if (filter) {
    GstCaps *fcopy = gst_caps_copy (filter);
    /* Remove the fields we convert */
    remove_fields (fcopy, TRUE);
    peercaps = gst_pad_peer_query_caps (GST_BASE_PARSE_SRC_PAD (parse), fcopy);
    gst_caps_unref (fcopy);
  } else {
    peercaps = gst_pad_peer_query_caps (GST_BASE_PARSE_SRC_PAD (parse), NULL);
  }

  pcopy = gst_caps_copy (peercaps);
  remove_fields (pcopy, TRUE);

  res = gst_caps_intersect_full (pcopy, templ, GST_CAPS_INTERSECT_FIRST);
  gst_caps_unref (pcopy);
  gst_caps_unref (templ);

  if (filter) {
    GstCaps *tmp = gst_caps_intersect_full (res, filter,
        GST_CAPS_INTERSECT_FIRST);
    gst_caps_unref (res);
    res = tmp;
  }

  /* Try if we can put the downstream caps first */
  pcopy = gst_caps_copy (peercaps);
  remove_fields (pcopy, FALSE);
  tmp = gst_caps_intersect_full (pcopy, res, GST_CAPS_INTERSECT_FIRST);
  gst_caps_unref (pcopy);
  if (!gst_caps_is_empty (tmp))
    res = gst_caps_merge (tmp, res);
  else
    gst_caps_unref (tmp);

  gst_caps_unref (peercaps);

  return res;
}

static gboolean
gst_av1_parse_set_sink_caps (GstBaseParse * parse, GstCaps * caps)
{
  GstAV1Parse *self = GST_AV1_PARSE (parse);
  GstStructure *str;
  GstAV1ParseAligment align;
  GstCaps *in_caps = NULL;
  const gchar *profile;

  str = gst_caps_get_structure (caps, 0);

  /* accept upstream info if provided */
  gst_structure_get_int (str, "width", &self->width);
  gst_structure_get_int (str, "height", &self->height);
  profile = gst_structure_get_string (str, "profile");
  if (profile)
    self->profile = gst_av1_parse_profile_from_string (profile);

  /* get upstream align from caps */
  align = gst_av1_parse_alignment_from_caps (caps);
  if (align == GST_AV1_PARSE_ALIGN_ERROR) {
    GST_ERROR_OBJECT (self, "Sink caps %" GST_PTR_FORMAT " set stream-format"
        " and alignment conflict.", caps);
    return FALSE;
  }

  in_caps = gst_caps_copy (caps);
  /* default */
  if (align == GST_AV1_PARSE_ALIGN_NONE) {
    align = GST_AV1_PARSE_ALIGN_BYTE;
    gst_caps_set_simple (in_caps, "alignment", G_TYPE_STRING,
        gst_av1_parse_alignment_to_string (align),
        "stream-format", G_TYPE_STRING, "obu-stream", NULL);
  }

  /* negotiate with downstream, set output align */
  gst_av1_parse_negotiate (self, in_caps);

  self->update_caps = TRUE;

  /* if all of decoder's capability related values are provided
   * by upstream, update src caps now */
  if (self->width > 0 && self->height > 0 && profile)
    gst_av1_parse_update_src_caps (self, in_caps);

  gst_caps_unref (in_caps);

  self->in_align = align;

  if (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT)
    self->detect_annex_b = TRUE;

  if (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B) {
    gst_av1_parser_reset (self->parser, TRUE);
  } else {
    gst_av1_parser_reset (self->parser, FALSE);
  }

  return TRUE;
}

static GstFlowReturn
gst_av1_parse_push_data (GstAV1Parse * self, GstBaseParseFrame * frame,
    guint32 finish_sz, gboolean frame_finished)
{
  gsize sz;
  GstBuffer *buf, *header_buf;
  GstBuffer *buffer = frame->buffer;
  GstFlowReturn ret = GST_FLOW_OK;

  /* Need to generate the final TU annex-b format */
  if (self->align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B) {
    guint8 size_data[GST_AV1_MAX_LEB_128_SIZE];
    guint size_len = 0;
    guint len;

    /* When push a TU, it must also be a frame end. */
    g_assert (frame_finished);

    /* Still some left in the frame cache */
    len = gst_adapter_available (self->frame_cache);
    if (len) {
      buf = gst_adapter_take_buffer (self->frame_cache, len);

      /* frame_unit_size */
      _write_leb128 (size_data, &size_len, len);
      header_buf = gst_buffer_new_memdup (size_data, size_len);
      GST_BUFFER_PTS (header_buf) = GST_BUFFER_PTS (buf);
      GST_BUFFER_DTS (header_buf) = GST_BUFFER_DTS (buf);
      GST_BUFFER_DURATION (header_buf) = GST_BUFFER_DURATION (buf);

      gst_adapter_push (self->cache_out, header_buf);
      gst_adapter_push (self->cache_out, buf);
    }

    len = gst_adapter_available (self->cache_out);
    if (len) {
      buf = gst_adapter_take_buffer (self->cache_out, len);

      /* temporal_unit_size */
      _write_leb128 (size_data, &size_len, len);
      header_buf = gst_buffer_new_memdup (size_data, size_len);
      GST_BUFFER_PTS (header_buf) = GST_BUFFER_PTS (buf);
      GST_BUFFER_DTS (header_buf) = GST_BUFFER_DTS (buf);
      GST_BUFFER_DURATION (header_buf) = GST_BUFFER_DURATION (buf);

      gst_adapter_push (self->cache_out, header_buf);
      gst_adapter_push (self->cache_out, buf);
    }
  }

  sz = gst_adapter_available (self->cache_out);
  if (sz) {
    buf = gst_adapter_take_buffer (self->cache_out, sz);
    gst_buffer_copy_into (buf, buffer, GST_BUFFER_COPY_METADATA, 0, -1);
    if (self->discont) {
      GST_BUFFER_FLAG_SET (buf, GST_BUFFER_FLAG_DISCONT);
      self->discont = FALSE;
    } else {
      GST_BUFFER_FLAG_UNSET (buf, GST_BUFFER_FLAG_DISCONT);
    }

    if (self->header) {
      GST_BUFFER_FLAG_SET (buf, GST_BUFFER_FLAG_HEADER);
      self->header = FALSE;
    } else {
      GST_BUFFER_FLAG_UNSET (buf, GST_BUFFER_FLAG_HEADER);
    }

    if (self->keyframe) {
      GST_BUFFER_FLAG_UNSET (buf, GST_BUFFER_FLAG_DELTA_UNIT);
      self->keyframe = FALSE;
    } else {
      GST_BUFFER_FLAG_SET (buf, GST_BUFFER_FLAG_DELTA_UNIT);
    }

    if (frame_finished) {
      GST_BUFFER_FLAG_SET (buf, GST_BUFFER_FLAG_MARKER);
    } else {
      GST_BUFFER_FLAG_UNSET (buf, GST_BUFFER_FLAG_MARKER);
    }

    if (self->align == GST_AV1_PARSE_ALIGN_FRAME) {
      if (!self->show_frame) {
        GST_BUFFER_FLAG_SET (buf, GST_BUFFER_FLAG_DECODE_ONLY);
      } else {
        GST_BUFFER_FLAG_UNSET (buf, GST_BUFFER_FLAG_DECODE_ONLY);
      }
    } else {
      GST_BUFFER_FLAG_UNSET (buf, GST_BUFFER_FLAG_DECODE_ONLY);
    }

    gst_buffer_replace (&frame->out_buffer, buf);
    gst_buffer_unref (buf);

    gst_av1_parse_update_src_caps (self, NULL);
    GST_LOG_OBJECT (self, "comsumed %d, output one buffer with size %"
        G_GSSIZE_FORMAT, finish_sz, sz);
    ret = gst_base_parse_finish_frame (GST_BASE_PARSE (self), frame, finish_sz);
  }

  return ret;
}

static void
gst_av1_parse_convert_to_annexb (GstAV1Parse * self, GstBuffer * buffer,
    GstAV1OBU * obu, gboolean frame_complete)
{
  guint8 size_data[GST_AV1_MAX_LEB_128_SIZE];
  guint size_len = 0;
  GstBitWriter bs;
  GstBuffer *buf, *buf2;
  guint8 *data;
  guint len, len2, offset;

  /* obu_length */
  _write_leb128 (size_data, &size_len,
      obu->obu_size + 1 + obu->header.obu_extention_flag);

  gst_bit_writer_init_with_size (&bs, 128, FALSE);
  /* obu_forbidden_bit */
  gst_bit_writer_put_bits_uint8 (&bs, 0, 1);
  /* obu_type */
  gst_bit_writer_put_bits_uint8 (&bs, obu->obu_type, 4);
  /* obu_extension_flag */
  gst_bit_writer_put_bits_uint8 (&bs, obu->header.obu_extention_flag, 1);
  /* obu_has_size_field */
  gst_bit_writer_put_bits_uint8 (&bs, 0, 1);
  /* obu_reserved_1bit */
  gst_bit_writer_put_bits_uint8 (&bs, 0, 1);
  if (obu->header.obu_extention_flag) {
    /* temporal_id */
    gst_bit_writer_put_bits_uint8 (&bs, obu->header.obu_temporal_id, 3);
    /* spatial_id */
    gst_bit_writer_put_bits_uint8 (&bs, obu->header.obu_spatial_id, 2);
    /* extension_header_reserved_3bits */
    gst_bit_writer_put_bits_uint8 (&bs, 0, 3);
  }
  g_assert (GST_BIT_WRITER_BIT_SIZE (&bs) % 8 == 0);

  len = size_len;
  len += GST_BIT_WRITER_BIT_SIZE (&bs) / 8;
  len += obu->obu_size;

  data = g_malloc (len);
  offset = 0;

  memcpy (data + offset, size_data, size_len);
  offset += size_len;

  memcpy (data + offset, GST_BIT_WRITER_DATA (&bs),
      GST_BIT_WRITER_BIT_SIZE (&bs) / 8);
  offset += GST_BIT_WRITER_BIT_SIZE (&bs) / 8;

  memcpy (data + offset, obu->data, obu->obu_size);

  /* The buf of this OBU */
  buf = gst_buffer_new_wrapped (data, len);
  GST_BUFFER_PTS (buf) = GST_BUFFER_PTS (buffer);
  GST_BUFFER_DTS (buf) = GST_BUFFER_DTS (buffer);
  GST_BUFFER_DURATION (buf) = GST_BUFFER_DURATION (buffer);

  gst_adapter_push (self->frame_cache, buf);

  if (frame_complete) {
    len2 = gst_adapter_available (self->frame_cache);
    buf2 = gst_adapter_take_buffer (self->frame_cache, len2);

    /* frame_unit_size */
    _write_leb128 (size_data, &size_len, len2);
    buf = gst_buffer_new_memdup (size_data, size_len);
    GST_BUFFER_PTS (buf) = GST_BUFFER_PTS (buf2);
    GST_BUFFER_DTS (buf) = GST_BUFFER_DTS (buf2);
    GST_BUFFER_DURATION (buf) = GST_BUFFER_DURATION (buf2);

    gst_adapter_push (self->cache_out, buf);
    gst_adapter_push (self->cache_out, buf2);
  }

  gst_bit_writer_reset (&bs);
}

static void
gst_av1_parse_convert_from_annexb (GstAV1Parse * self, GstBuffer * buffer,
    GstAV1OBU * obu)
{
  guint8 size_data[GST_AV1_MAX_LEB_128_SIZE];
  guint size_len = 0;
  GstBuffer *buf;
  guint len, offset;
  guint8 *data;
  GstBitWriter bs;

  _write_leb128 (size_data, &size_len, obu->obu_size);

  /* obu_header */
  len = 1;
  if (obu->header.obu_extention_flag)
    len += 1;
  len += size_len;
  len += obu->obu_size;

  gst_bit_writer_init_with_size (&bs, 128, FALSE);
  /* obu_forbidden_bit */
  gst_bit_writer_put_bits_uint8 (&bs, 0, 1);
  /* obu_type */
  gst_bit_writer_put_bits_uint8 (&bs, obu->obu_type, 4);
  /* obu_extension_flag */
  gst_bit_writer_put_bits_uint8 (&bs, obu->header.obu_extention_flag, 1);
  /* obu_has_size_field */
  gst_bit_writer_put_bits_uint8 (&bs, 1, 1);
  /* obu_reserved_1bit */
  gst_bit_writer_put_bits_uint8 (&bs, 0, 1);
  if (obu->header.obu_extention_flag) {
    /* temporal_id */
    gst_bit_writer_put_bits_uint8 (&bs, obu->header.obu_temporal_id, 3);
    /* spatial_id */
    gst_bit_writer_put_bits_uint8 (&bs, obu->header.obu_spatial_id, 2);
    /* extension_header_reserved_3bits */
    gst_bit_writer_put_bits_uint8 (&bs, 0, 3);
  }
  g_assert (GST_BIT_WRITER_BIT_SIZE (&bs) % 8 == 0);

  data = g_malloc (len);
  offset = 0;
  memcpy (data + offset, GST_BIT_WRITER_DATA (&bs),
      GST_BIT_WRITER_BIT_SIZE (&bs) / 8);
  offset += GST_BIT_WRITER_BIT_SIZE (&bs) / 8;

  memcpy (data + offset, size_data, size_len);
  offset += size_len;

  memcpy (data + offset, obu->data, obu->obu_size);

  buf = gst_buffer_new_wrapped (data, len);
  GST_BUFFER_PTS (buf) = GST_BUFFER_PTS (buffer);
  GST_BUFFER_DTS (buf) = GST_BUFFER_DTS (buffer);
  GST_BUFFER_DURATION (buf) = GST_BUFFER_DURATION (buffer);

  gst_adapter_push (self->cache_out, buf);

  gst_bit_writer_reset (&bs);
}

static void
gst_av1_parse_cache_one_obu (GstAV1Parse * self, GstBuffer * buffer,
    GstAV1OBU * obu, guint8 * data, guint32 size, gboolean frame_complete)
{
  gboolean need_convert = FALSE;
  GstBuffer *buf;

  if (self->in_align != self->align
      && (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B
          || self->align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B))
    need_convert = TRUE;

  if (need_convert) {
    if (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B) {
      gst_av1_parse_convert_from_annexb (self, buffer, obu);
    } else {
      gst_av1_parse_convert_to_annexb (self, buffer, obu, frame_complete);
    }
  } else if (self->align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B) {
    g_assert (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B);
    gst_av1_parse_convert_to_annexb (self, buffer, obu, frame_complete);
  } else {
    buf = gst_buffer_new_memdup (data, size);
    GST_BUFFER_PTS (buf) = GST_BUFFER_PTS (buffer);
    GST_BUFFER_DTS (buf) = GST_BUFFER_DTS (buffer);
    GST_BUFFER_DURATION (buf) = GST_BUFFER_DURATION (buffer);

    gst_adapter_push (self->cache_out, buf);
  }
}

static GstAV1ParserResult
gst_av1_parse_handle_sequence_obu (GstAV1Parse * self, GstAV1OBU * obu)
{
  GstAV1SequenceHeaderOBU seq_header;
  GstAV1ParserResult res;
  guint i;
  guint val;

  res = gst_av1_parser_parse_sequence_header_obu (self->parser,
      obu, &seq_header);
  if (res != GST_AV1_PARSER_OK)
    return res;

  if (self->width != seq_header.max_frame_width_minus_1 + 1) {
    self->width = seq_header.max_frame_width_minus_1 + 1;
    self->update_caps = TRUE;
  }
  if (self->height != seq_header.max_frame_height_minus_1 + 1) {
    self->height = seq_header.max_frame_height_minus_1 + 1;
    self->update_caps = TRUE;
  }

  if (seq_header.color_config.color_description_present_flag) {
    GstVideoColorimetry cinfo;
    gchar *colorimetry = NULL;

    if (seq_header.color_config.color_range)
      cinfo.range = GST_VIDEO_COLOR_RANGE_0_255;
    else
      cinfo.range = GST_VIDEO_COLOR_RANGE_16_235;

    cinfo.matrix = gst_video_color_matrix_from_iso
        (seq_header.color_config.matrix_coefficients);
    cinfo.transfer = gst_video_transfer_function_from_iso
        (seq_header.color_config.transfer_characteristics);
    cinfo.primaries = gst_video_color_primaries_from_iso
        (seq_header.color_config.color_primaries);

    colorimetry = gst_video_colorimetry_to_string (&cinfo);

    if (g_strcmp0 (colorimetry, self->colorimetry) != 0) {
      g_free (self->colorimetry);
      self->colorimetry = colorimetry;
      colorimetry = NULL;
      self->update_caps = TRUE;
    }

    g_clear_pointer (&colorimetry, g_free);
  }

  if (self->subsampling_x != seq_header.color_config.subsampling_x) {
    self->subsampling_x = seq_header.color_config.subsampling_x;
    self->update_caps = TRUE;
  }

  if (self->subsampling_y != seq_header.color_config.subsampling_y) {
    self->subsampling_y = seq_header.color_config.subsampling_y;
    self->update_caps = TRUE;
  }

  if (self->mono_chrome != seq_header.color_config.mono_chrome) {
    self->mono_chrome = seq_header.color_config.mono_chrome;
    self->update_caps = TRUE;
  }

  if (self->bit_depth != seq_header.bit_depth) {
    self->bit_depth = seq_header.bit_depth;
    self->update_caps = TRUE;
  }

  if (self->profile != seq_header.seq_profile) {
    self->profile = seq_header.seq_profile;
    self->update_caps = TRUE;
  }

  val = (self->parser->state.operating_point_idc >> 8) & 0x0f;
  for (i = 0; i < (1 << GST_AV1_MAX_SPATIAL_LAYERS); i++) {
    if (val & (1 << i))
      self->highest_spatial_id = i;
  }

  return GST_AV1_PARSER_OK;
}

/* Check whether the frame start a new TU.
   The obu here should be a shown frame/frame header. */
static gboolean
gst_av1_parse_frame_start_new_temporal_unit (GstAV1Parse * self,
    GstAV1OBU * obu)
{
  gboolean ret = FALSE;

  g_assert (obu->obu_type == GST_AV1_OBU_FRAME_HEADER
      || obu->obu_type == GST_AV1_OBU_FRAME);

  /* 7.5.Ordering of OBUs: The value of temporal_id must be the same in all
     OBU extension headers that are contained in the same temporal unit. */
  if (self->last_shown_frame_temporal_id >= 0 &&
      obu->header.obu_temporal_id != self->last_shown_frame_temporal_id) {
    ret = TRUE;
    goto new_tu;
  }

  /* If scalability is not being used, only one shown frame for each
     temporal unit. So the new frame belongs to a new temporal unit. */
  if (!self->within_one_frame && self->last_shown_frame_temporal_id >= 0 &&
      self->parser->state.operating_point_idc == 0) {
    ret = TRUE;
    goto new_tu;
  }

  /* The new frame has the same layer IDs with the last shown frame,
     it should belong to a new temporal unit. */
  if (!self->within_one_frame &&
      obu->header.obu_temporal_id == self->last_shown_frame_temporal_id &&
      obu->header.obu_spatial_id == self->last_shown_frame_spatial_id) {
    ret = TRUE;
    goto new_tu;
  }

new_tu:
  if (ret) {
    if (self->within_one_frame)
      GST_WARNING_OBJECT (self,
          "Start a new temporal unit with incompleted frame.");

    gst_av1_parse_reset_obu_data_state (self);
  }

  return ret;
}

/* frame_complete will be set true if it is the frame edge. */
static GstAV1ParserResult
gst_av1_parse_handle_one_obu (GstAV1Parse * self, GstAV1OBU * obu,
    gboolean * frame_complete, gboolean * check_new_tu)
{
  GstAV1ParserResult res = GST_AV1_PARSER_OK;
  GstAV1MetadataOBU metadata;
  GstAV1FrameHeaderOBU frame_header;
  GstAV1TileListOBU tile_list;
  GstAV1TileGroupOBU tile_group;
  GstAV1FrameOBU frame;

  *frame_complete = FALSE;

  switch (obu->obu_type) {
    case GST_AV1_OBU_TEMPORAL_DELIMITER:
      res = gst_av1_parser_parse_temporal_delimiter_obu (self->parser, obu);
      break;
    case GST_AV1_OBU_SEQUENCE_HEADER:
      res = gst_av1_parse_handle_sequence_obu (self, obu);
      break;
    case GST_AV1_OBU_REDUNDANT_FRAME_HEADER:
      res = gst_av1_parser_parse_frame_header_obu (self->parser, obu,
          &frame_header);
      break;
    case GST_AV1_OBU_FRAME_HEADER:
      res = gst_av1_parser_parse_frame_header_obu (self->parser, obu,
          &frame_header);
      break;
    case GST_AV1_OBU_FRAME:
      res = gst_av1_parser_parse_frame_obu (self->parser, obu, &frame);
      break;
    case GST_AV1_OBU_METADATA:
      res = gst_av1_parser_parse_metadata_obu (self->parser, obu, &metadata);
      break;
    case GST_AV1_OBU_TILE_GROUP:
      res =
          gst_av1_parser_parse_tile_group_obu (self->parser, obu, &tile_group);
      break;
    case GST_AV1_OBU_TILE_LIST:
      res = gst_av1_parser_parse_tile_list_obu (self->parser, obu, &tile_list);
      break;
    case GST_AV1_OBU_PADDING:
      break;
    default:
      GST_WARNING_OBJECT (self, "an unrecognized obu type %d", obu->obu_type);
      res = GST_AV1_PARSER_BITSTREAM_ERROR;
      break;
  }

  GST_LOG_OBJECT (self, "parsing the obu %s, result is %d",
      _obu_name (obu->obu_type), res);
  if (res != GST_AV1_PARSER_OK)
    goto out;

  /* 7.5:
     All OBU extension headers that are contained in the same temporal
     unit and have the same spatial_id value must have the same temporal_id
     value.
     And
     OBUs with spatial level IDs (spatial_id) greater than 0 must
     appear within a temporal unit in increasing order of the spatial
     level ID values. */
  if (obu->header.obu_spatial_id > self->highest_spatial_id) {
    GST_WARNING_OBJECT (self,
        "spatial_id %d is bigger than highest_spatial_id %d",
        obu->header.obu_spatial_id, self->highest_spatial_id);
    res = GST_AV1_PARSER_BITSTREAM_ERROR;
    goto out;
  }

  /* If to check a new temporal starts, return early.
     In 7.5.Ordering of OBUs: Sequence header OBUs may appear in any order
     within a coded video sequence. So it is allowed to repeat the sequence
     header within one temporal unit, and sequence header does not definitely
     start a TU. We only check TD here. */
  if (obu->obu_type == GST_AV1_OBU_TEMPORAL_DELIMITER) {
    gst_av1_parse_reset_obu_data_state (self);

    if (check_new_tu) {
      *check_new_tu = TRUE;
      res = GST_AV1_PARSER_OK;
      goto out;
    }
  }

  if (obu->obu_type == GST_AV1_OBU_SEQUENCE_HEADER)
    self->header = TRUE;

  if (obu->obu_type == GST_AV1_OBU_FRAME_HEADER
      || obu->obu_type == GST_AV1_OBU_FRAME
      || obu->obu_type == GST_AV1_OBU_REDUNDANT_FRAME_HEADER) {
    GstAV1FrameHeaderOBU *fh = &frame_header;

    if (obu->obu_type == GST_AV1_OBU_FRAME)
      fh = &frame.frame_header;

    self->show_frame = fh->show_frame || fh->show_existing_frame;
    if (self->show_frame) {
      /* Check whether a new temporal starts, and return early. */
      if (check_new_tu && obu->obu_type != GST_AV1_OBU_REDUNDANT_FRAME_HEADER
          && gst_av1_parse_frame_start_new_temporal_unit (self, obu)) {
        *check_new_tu = TRUE;
        res = GST_AV1_PARSER_OK;
        goto out;
      }

      self->last_shown_frame_temporal_id = obu->header.obu_temporal_id;
      self->last_shown_frame_spatial_id = obu->header.obu_spatial_id;
    }

    self->within_one_frame = TRUE;

    /* if a show_existing_frame case, only update key frame.
       otherwise, update all type of frame.  */
    if (!fh->show_existing_frame || fh->frame_type == GST_AV1_KEY_FRAME)
      res = gst_av1_parser_reference_frame_update (self->parser, fh);

    if (res != GST_AV1_PARSER_OK)
      GST_WARNING_OBJECT (self, "update frame get result %d", res);

    if (fh->show_existing_frame) {
      *frame_complete = TRUE;
      self->within_one_frame = FALSE;
    }

    if (fh->frame_type == GST_AV1_KEY_FRAME)
      self->keyframe = TRUE;
  }

  if (obu->obu_type == GST_AV1_OBU_TILE_GROUP
      || obu->obu_type == GST_AV1_OBU_FRAME) {
    GstAV1TileGroupOBU *tg = &tile_group;

    self->within_one_frame = TRUE;

    if (obu->obu_type == GST_AV1_OBU_FRAME)
      tg = &frame.tile_group;

    if (tg->tg_end == tg->num_tiles - 1) {
      *frame_complete = TRUE;
      self->within_one_frame = FALSE;
    }
  }

out:
  if (res != GST_AV1_PARSER_OK) {
    /* Some verbose OBU can be skip */
    if (obu->obu_type == GST_AV1_OBU_REDUNDANT_FRAME_HEADER) {
      GST_WARNING_OBJECT (self, "Ignore a verbose %s OBU parsing error",
          _obu_name (obu->obu_type));
      gst_av1_parse_reset_obu_data_state (self);
      res = GST_AV1_PARSER_OK;
    }
  }

  return res;
}

static GstFlowReturn
gst_av1_parse_handle_obu_to_obu (GstBaseParse * parse,
    GstBaseParseFrame * frame, gint * skipsize)
{
  GstAV1Parse *self = GST_AV1_PARSE (parse);
  GstMapInfo map_info;
  GstAV1OBU obu;
  GstFlowReturn ret = GST_FLOW_OK;
  GstAV1ParserResult res;
  GstBuffer *buffer = gst_buffer_ref (frame->buffer);
  guint32 consumed;
  gboolean frame_complete;

  if (!gst_buffer_map (buffer, &map_info, GST_MAP_READ)) {
    *skipsize = 0;
    GST_ERROR_OBJECT (parse, "Couldn't map incoming buffer");
    return GST_FLOW_ERROR;
  }

  consumed = 0;
  frame_complete = FALSE;
  res = gst_av1_parser_identify_one_obu (self->parser, map_info.data,
      map_info.size, &obu, &consumed);
  if (res == GST_AV1_PARSER_OK)
    res = gst_av1_parse_handle_one_obu (self, &obu, &frame_complete, NULL);

  g_assert (consumed <= map_info.size);

  if (res == GST_AV1_PARSER_BITSTREAM_ERROR ||
      res == GST_AV1_PARSER_MISSING_OBU_REFERENCE) {
    if (consumed) {
      *skipsize = consumed;
    } else {
      *skipsize = map_info.size;
    }
    GST_WARNING_OBJECT (parse, "Parse obu error, discard %d.", *skipsize);
    gst_av1_parse_reset_obu_data_state (self);
    ret = GST_FLOW_OK;
    goto out;
  } else if (res == GST_AV1_PARSER_NO_MORE_DATA) {
    *skipsize = 0;

    if (self->in_align == GST_AV1_PARSE_ALIGN_OBU) {
      /* The buffer is already aligned to OBU, should not happen. */
      if (consumed) {
        *skipsize = consumed;
      } else {
        *skipsize = map_info.size;
      }
      GST_WARNING_OBJECT (parse, "Parse obu need more data, discard %d.",
          *skipsize);
      gst_av1_parse_reset_obu_data_state (self);
    }
    ret = GST_FLOW_OK;
    goto out;
  } else if (res == GST_AV1_PARSER_DROP) {
    GST_DEBUG_OBJECT (parse, "Drop %d data", consumed);
    *skipsize = consumed;
    gst_av1_parse_reset_obu_data_state (self);
    ret = GST_FLOW_OK;
    goto out;
  } else if (res != GST_AV1_PARSER_OK) {
    GST_ERROR_OBJECT (parse, "Parse obu get unexpect error %d", res);
    *skipsize = 0;
    ret = GST_FLOW_ERROR;
    goto out;
  }

  g_assert (consumed);

  gst_av1_parse_update_src_caps (self, NULL);
  if (self->discont) {
    GST_BUFFER_FLAG_SET (buffer, GST_BUFFER_FLAG_DISCONT);
    self->discont = FALSE;
  }
  if (self->header) {
    GST_BUFFER_FLAG_SET (buffer, GST_BUFFER_FLAG_HEADER);
    self->header = FALSE;
  }
  /* happen to be a frame boundary */
  if (frame_complete)
    GST_BUFFER_FLAG_SET (buffer, GST_BUFFER_FLAG_MARKER);

  GST_LOG_OBJECT (self, "Output one buffer with size %d", consumed);
  ret = gst_base_parse_finish_frame (parse, frame, consumed);
  *skipsize = 0;

out:
  gst_buffer_unmap (buffer, &map_info);
  gst_buffer_unref (buffer);
  return ret;
}

static void
gst_av1_parse_create_subframe (GstBaseParseFrame * frame,
    GstBaseParseFrame * subframe, GstBuffer * buffer)
{
  gst_base_parse_frame_init (subframe);
  subframe->flags |= frame->flags;
  subframe->offset = frame->offset;
  subframe->overhead = frame->overhead;
  /* Just ref the input buffer. The base parse will check that
     pointer, and it will be replaced by its out_buffer later. */
  subframe->buffer = gst_buffer_ref (buffer);
}

static GstFlowReturn
gst_av1_parse_handle_to_small_and_equal_align (GstBaseParse * parse,
    GstBaseParseFrame * frame, gint * skipsize)
{
  GstAV1Parse *self = GST_AV1_PARSE (parse);
  GstMapInfo map_info;
  GstAV1OBU obu;
  GstFlowReturn ret = GST_FLOW_OK;
  GstAV1ParserResult res = GST_AV1_PARSER_INVALID_OPERATION;
  GstBuffer *buffer = gst_buffer_ref (frame->buffer);
  guint32 offset, consumed_before_push, consumed;
  gboolean frame_complete;
  GstBaseParseFrame subframe;

  if (!gst_buffer_map (buffer, &map_info, GST_MAP_READ)) {
    GST_ERROR_OBJECT (parse, "Couldn't map incoming buffer");
    return GST_FLOW_ERROR;
  }

  consumed_before_push = 0;
  offset = 0;
  frame_complete = FALSE;
again:
  while (offset < map_info.size) {
    res = gst_av1_parser_identify_one_obu (self->parser,
        map_info.data + offset, map_info.size - offset, &obu, &consumed);
    if (res == GST_AV1_PARSER_OK)
      res = gst_av1_parse_handle_one_obu (self, &obu, &frame_complete, NULL);
    if (res != GST_AV1_PARSER_OK)
      break;

    if (obu.obu_type == GST_AV1_OBU_TEMPORAL_DELIMITER
        && consumed_before_push > 0) {
      GST_DEBUG_OBJECT (self, "Encounter TD inside one %s aligned"
          " buffer, should not happen normally.",
          gst_av1_parse_alignment_to_string (self->in_align));

      if (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B)
        gst_av1_parser_reset_annex_b (self->parser);

      /* Not include this TD obu, it should belong to the next TU or frame,
         we push all the data we already got. */
      gst_av1_parse_create_subframe (frame, &subframe, buffer);
      ret = gst_av1_parse_push_data (self, &subframe,
          consumed_before_push, TRUE);
      if (ret != GST_FLOW_OK)
        goto out;

      /* Begin to find the next. */
      frame_complete = FALSE;
      consumed_before_push = 0;
      continue;
    }

    gst_av1_parse_cache_one_obu (self, buffer, &obu,
        map_info.data + offset, consumed, frame_complete);

    offset += consumed;
    consumed_before_push += consumed;

    if ((self->align == GST_AV1_PARSE_ALIGN_OBU) ||
        (self->align == GST_AV1_PARSE_ALIGN_FRAME && frame_complete)) {
      gst_av1_parse_create_subframe (frame, &subframe, buffer);
      ret = gst_av1_parse_push_data (self, &subframe,
          consumed_before_push, frame_complete);
      if (ret != GST_FLOW_OK)
        goto out;

      /* Begin to find the next. */
      frame_complete = FALSE;
      consumed_before_push = 0;
      continue;
    }
  }

  if (res == GST_AV1_PARSER_BITSTREAM_ERROR ||
      res == GST_AV1_PARSER_MISSING_OBU_REFERENCE) {
    /* Discard the whole frame */
    *skipsize = map_info.size;
    GST_WARNING_OBJECT (parse, "Parse obu error, discard %d", *skipsize);
    if (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B)
      gst_av1_parser_reset_annex_b (self->parser);
    gst_av1_parse_reset_obu_data_state (self);
    ret = GST_FLOW_OK;
    goto out;
  } else if (res == GST_AV1_PARSER_NO_MORE_DATA) {
    /* Discard the whole buffer */
    *skipsize = map_info.size;
    GST_WARNING_OBJECT (parse, "Parse obu need more data, discard %d.",
        *skipsize);
    if (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B)
      gst_av1_parser_reset_annex_b (self->parser);

    gst_av1_parse_reset_obu_data_state (self);
    ret = GST_FLOW_OK;
    goto out;
  } else if (res == GST_AV1_PARSER_DROP) {
    GST_DEBUG_OBJECT (parse, "Drop %d data", consumed);
    offset += consumed;
    gst_av1_parse_reset_obu_data_state (self);
    res = GST_AV1_PARSER_OK;
    goto again;
  } else if (res != GST_AV1_PARSER_OK) {
    GST_ERROR_OBJECT (parse, "Parse obu get unexpect error %d", res);
    *skipsize = 0;
    ret = GST_FLOW_ERROR;
    goto out;
  }

  /* If the total buffer exhausted but frame is not complete, we just
     push the left data and consider it as a frame. */
  if (consumed_before_push > 0 && !frame_complete
      && self->align == GST_AV1_PARSE_ALIGN_FRAME) {
    g_assert (offset >= map_info.size);
    /* Warning and still consider the frame is complete */
    GST_WARNING_OBJECT (self, "Exhaust the buffer but still incomplete frame,"
        " should not happend in %s alignment",
        gst_av1_parse_alignment_to_string (self->in_align));
  }

  ret = gst_av1_parse_push_data (self, frame, consumed_before_push, TRUE);

out:
  gst_buffer_unmap (buffer, &map_info);
  gst_buffer_unref (buffer);
  return ret;
}

static GstFlowReturn
gst_av1_parse_handle_to_big_align (GstBaseParse * parse,
    GstBaseParseFrame * frame, gint * skipsize)
{
  GstAV1Parse *self = GST_AV1_PARSE (parse);
  GstMapInfo map_info;
  GstAV1OBU obu;
  GstFlowReturn ret = GST_FLOW_OK;
  GstAV1ParserResult res = GST_AV1_PARSER_OK;
  GstBuffer *buffer = gst_buffer_ref (frame->buffer);
  guint32 consumed;
  gboolean frame_complete;
  gboolean check_new_tu;
  gboolean complete;

  g_assert (self->in_align <= GST_AV1_PARSE_ALIGN_FRAME);

  if (!gst_buffer_map (buffer, &map_info, GST_MAP_READ)) {
    *skipsize = 0;
    GST_ERROR_OBJECT (parse, "Couldn't map incoming buffer");
    return GST_FLOW_ERROR;
  }

  complete = FALSE;
again:
  while (self->last_parsed_offset < map_info.size) {
    res = gst_av1_parser_identify_one_obu (self->parser,
        map_info.data + self->last_parsed_offset,
        map_info.size - self->last_parsed_offset, &obu, &consumed);
    if (res != GST_AV1_PARSER_OK)
      break;

    check_new_tu = FALSE;
    res = gst_av1_parse_handle_one_obu (self, &obu, &frame_complete,
        &check_new_tu);
    if (res != GST_AV1_PARSER_OK)
      break;

    if (check_new_tu && (gst_adapter_available (self->cache_out) ||
            gst_adapter_available (self->frame_cache))) {
      complete = TRUE;
      break;
    }

    if (self->align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT ||
        self->align == GST_AV1_PARSE_ALIGN_FRAME) {
      GstBuffer *buf = gst_buffer_copy_region (buffer, GST_BUFFER_COPY_ALL,
          self->last_parsed_offset, consumed);
      gst_adapter_push (self->cache_out, buf);
    } else if (self->align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B) {
      gst_av1_parse_convert_to_annexb (self, buffer, &obu, frame_complete);
    } else {
      g_assert_not_reached ();
    }
    self->last_parsed_offset += consumed;

    if (self->align == GST_AV1_PARSE_ALIGN_FRAME && frame_complete)
      complete = TRUE;

    if (complete)
      break;
  }

  /* Finish a complete frame anyway */
  if (complete || GST_BASE_PARSE_DRAINING (parse)) {
    *skipsize = 0;

    /* push the left anyway if no error */
    if (res == GST_AV1_PARSER_OK)
      ret = gst_av1_parse_push_data (self, frame,
          self->last_parsed_offset, TRUE);

    self->last_parsed_offset = 0;

    goto out;
  }

  if (res == GST_AV1_PARSER_BITSTREAM_ERROR ||
      res == GST_AV1_PARSER_MISSING_OBU_REFERENCE) {
    *skipsize = map_info.size;
    GST_WARNING_OBJECT (parse, "Parse obu error, discard whole buffer %d.",
        *skipsize);
    /* The adapter will be cleared in next loop because of
       GST_BASE_PARSE_FRAME_FLAG_NEW_FRAME flag */
    gst_av1_parse_reset_obu_data_state (self);
    ret = GST_FLOW_OK;
  } else if (res == GST_AV1_PARSER_NO_MORE_DATA) {
    *skipsize = 0;

    if (self->in_align >= GST_AV1_PARSE_ALIGN_OBU) {
      /* The buffer is already aligned to OBU, should not happen.
         The adapter will be cleared in next loop because of
         GST_BASE_PARSE_FRAME_FLAG_NEW_FRAME flag */
      *skipsize = map_info.size;
      gst_av1_parse_reset_obu_data_state (self);
      GST_WARNING_OBJECT (parse,
          "Parse obu need more data, discard whole buffer %d.", *skipsize);
    }
    ret = GST_FLOW_OK;
  } else if (res == GST_AV1_PARSER_DROP) {
    GST_DEBUG_OBJECT (parse, "Drop %d data", consumed);
    self->last_parsed_offset += consumed;
    gst_av1_parse_reset_obu_data_state (self);
    res = GST_AV1_PARSER_OK;
    goto again;
  } else if (res == GST_AV1_PARSER_OK) {
    /* Everything is correct but still not get a frame or tu,
       need more data */
    GST_DEBUG_OBJECT (parse, "Need more data");
    *skipsize = 0;
    ret = GST_FLOW_OK;
  } else {
    GST_ERROR_OBJECT (parse, "Parse obu get unexpect error %d", res);
    *skipsize = 0;
    ret = GST_FLOW_ERROR;
  }

out:
  gst_buffer_unmap (buffer, &map_info);
  gst_buffer_unref (buffer);
  return ret;
}

/* Try to recognize whether the input is annex-b format.
   return TRUE if we decide, FALSE if we can not decide or
   encounter some error. */
static gboolean
gst_av1_parse_detect_stream_format (GstBaseParse * parse,
    GstBaseParseFrame * frame)
{
  GstAV1Parse *self = GST_AV1_PARSE (parse);
  GstMapInfo map_info;
  GstAV1OBU obu;
  GstAV1ParserResult res = GST_AV1_PARSER_INVALID_OPERATION;
  GstBuffer *buffer = gst_buffer_ref (frame->buffer);
  gboolean got_seq, got_frame;
  gboolean frame_complete;
  guint32 consumed;
  guint32 total_consumed;
  guint32 tu_sz;
  gboolean ret = FALSE;

  g_assert (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT);
  g_assert (self->detect_annex_b == TRUE);

  if (!gst_buffer_map (buffer, &map_info, GST_MAP_READ)) {
    GST_ERROR_OBJECT (parse, "Couldn't map incoming buffer");
    return FALSE;
  }

  gst_av1_parser_reset (self->parser, FALSE);

  got_seq = FALSE;
  got_frame = FALSE;
  total_consumed = 0;

again:
  while (total_consumed < map_info.size) {
    res = gst_av1_parser_identify_one_obu (self->parser,
        map_info.data + total_consumed, map_info.size - total_consumed,
        &obu, &consumed);
    if (res == GST_AV1_PARSER_OK) {
      total_consumed += consumed;
      res = gst_av1_parse_handle_one_obu (self, &obu, &frame_complete, NULL);
    }

    if (res != GST_AV1_PARSER_OK)
      break;

    if (obu.obu_type == GST_AV1_OBU_SEQUENCE_HEADER)
      got_seq = TRUE;

    if (obu.obu_type == GST_AV1_OBU_REDUNDANT_FRAME_HEADER ||
        obu.obu_type == GST_AV1_OBU_FRAME ||
        obu.obu_type == GST_AV1_OBU_FRAME_HEADER)
      got_frame = TRUE;

    if (got_seq || got_frame)
      break;
  }

  gst_av1_parser_reset (self->parser, FALSE);

  /* If succeed recognize seq or frame, it's done.
     otherwise, just need to get more data. */
  if (got_seq || got_frame) {
    ret = TRUE;
    self->detect_annex_b = FALSE;
    goto out;
  }

  if (res == GST_AV1_PARSER_DROP) {
    total_consumed += consumed;
    res = GST_AV1_PARSER_OK;
    gst_av1_parse_reset_obu_data_state (self);
    goto again;
  }

  /* Try the annex b format. The buffer should contain the whole TU,
     and the buffer start with the TU size in leb128() format. */
  if (map_info.size < 8) {
    /* Too small. */
    goto out;
  }

  tu_sz = _read_leb128 (map_info.data, &res, &consumed);
  if (tu_sz == 0 || res != GST_AV1_PARSER_OK) {
    /* error to get the TU size, should not be annex b. */
    goto out;
  }

  if (tu_sz + consumed != map_info.size) {
    GST_DEBUG_OBJECT (self, "Buffer size %" G_GSSIZE_FORMAT ", TU size %d,"
        " do not match.", map_info.size, tu_sz);
    goto out;
  }

  GST_INFO_OBJECT (self, "Detect the annex-b format");
  self->in_align = GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B;
  self->detect_annex_b = FALSE;
  gst_av1_parser_reset (self->parser, TRUE);
  ret = TRUE;

out:
  gst_av1_parse_reset_obu_data_state (self);
  gst_buffer_unmap (buffer, &map_info);
  gst_buffer_unref (buffer);
  return ret;
}

static GstFlowReturn
gst_av1_parse_handle_frame (GstBaseParse * parse,
    GstBaseParseFrame * frame, gint * skipsize)
{
  GstAV1Parse *self = GST_AV1_PARSE (parse);
  GstFlowReturn ret = GST_FLOW_OK;
  guint in_level, out_level;

  if (GST_BUFFER_FLAG_IS_SET (frame->buffer, GST_BUFFER_FLAG_DISCONT)) {
    self->discont = TRUE;

    if (frame->flags & GST_BASE_PARSE_FRAME_FLAG_NEW_FRAME)
      gst_av1_parse_reset_obu_data_state (self);
  } else {
    self->discont = FALSE;
  }

  GST_LOG_OBJECT (self, "Input frame size %" G_GSSIZE_FORMAT,
      gst_buffer_get_size (frame->buffer));

  /* avoid stale cached parsing state */
  if (frame->flags & GST_BASE_PARSE_FRAME_FLAG_NEW_FRAME) {
    GST_LOG_OBJECT (self, "parsing new frame");
    gst_adapter_clear (self->cache_out);
    gst_adapter_clear (self->frame_cache);
    self->last_parsed_offset = 0;
    self->header = FALSE;
    self->keyframe = FALSE;
    self->show_frame = FALSE;
  } else {
    GST_LOG_OBJECT (self, "resuming frame parsing");
  }

  /* When in pull mode, the sink pad has no caps, we may get the
     caps by query the upstream element */
  if (self->in_align == GST_AV1_PARSE_ALIGN_NONE) {
    GstCaps *upstream_caps;

    upstream_caps =
        gst_pad_peer_query_caps (GST_BASE_PARSE_SINK_PAD (self), NULL);
    if (upstream_caps) {
      if (!gst_caps_is_empty (upstream_caps)
          && !gst_caps_is_any (upstream_caps)) {
        GstAV1ParseAligment align;

        GST_LOG_OBJECT (self, "upstream caps: %" GST_PTR_FORMAT, upstream_caps);

        /* fixate to avoid ambiguity with lists when parsing */
        upstream_caps = gst_caps_fixate (upstream_caps);
        align = gst_av1_parse_alignment_from_caps (upstream_caps);
        if (align == GST_AV1_PARSE_ALIGN_ERROR) {
          GST_ERROR_OBJECT (self, "upstream caps %" GST_PTR_FORMAT
              " set stream-format and alignment conflict.", upstream_caps);

          gst_caps_unref (upstream_caps);
          return GST_FLOW_ERROR;
        }

        self->in_align = align;
      }

      gst_caps_unref (upstream_caps);

      gst_av1_parser_reset (self->parser,
          self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B);
    }

    if (self->in_align != GST_AV1_PARSE_ALIGN_NONE) {
      GST_LOG_OBJECT (self, "Query the upstream get the alignment %s",
          gst_av1_parse_alignment_to_string (self->in_align));
    } else {
      self->in_align = GST_AV1_PARSE_ALIGN_BYTE;
      GST_DEBUG_OBJECT (self, "alignment set to default %s",
          gst_av1_parse_alignment_to_string (GST_AV1_PARSE_ALIGN_BYTE));
    }
  }

  if (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT
      && self->detect_annex_b) {
    /* Only happend at the first time of handle_frame, try to
       recognize the annex b stream format. */
    if (gst_av1_parse_detect_stream_format (parse, frame)) {
      GST_INFO_OBJECT (self, "Input alignment %s",
          gst_av1_parse_alignment_to_string (self->in_align));
    } else {
      /* Because the input is already TU aligned, we should skip
         the whole problematic TU and check the next one. */
      *skipsize = gst_buffer_get_size (frame->buffer);
      GST_WARNING_OBJECT (self, "Fail to detect the stream format for TU,"
          " skip the whole TU %d", *skipsize);
      return GST_FLOW_OK;
    }
  }

  /* We may in pull mode and no caps is set */
  if (self->align == GST_AV1_PARSE_ALIGN_NONE)
    gst_av1_parse_negotiate (self, NULL);

  in_level = self->in_align;
  if (self->in_align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B)
    in_level = GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT;
  out_level = self->align;
  if (self->align == GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT_ANNEX_B)
    out_level = GST_AV1_PARSE_ALIGN_TEMPORAL_UNIT;

  if (self->in_align <= GST_AV1_PARSE_ALIGN_OBU
      && self->align == GST_AV1_PARSE_ALIGN_OBU) {
    ret = gst_av1_parse_handle_obu_to_obu (parse, frame, skipsize);
  } else if (in_level < out_level) {
    ret = gst_av1_parse_handle_to_big_align (parse, frame, skipsize);
  } else {
    ret = gst_av1_parse_handle_to_small_and_equal_align (parse,
        frame, skipsize);
  }

  return ret;
}