vp9parse: Correct the pts for frames inside a super frame.

When the alignment is "FRAME" and the parse is likely connecting to a decoder, the current PTS setting for VP9 frames inside a super frame is not very correct. For example, the super frame may begin with non-displayed frames and end with a displayed frame. The current way will assign the PTS to the first non-displayed frame, which is a decode-only frame and the PTS will be discarded in the video decoder. While the last displayed frame has invalid PTS, and so the video decoder needs to guess its PTS based on the frame rate and previous frame's PTS. This is not a decent and robust way. And more important, when the previous frames provide DTS, the video decoder will also guess the PTS based on the previous frames' DTS and trigger the warning like: gstvideodecoder.c:3147:gst_video_decoder_prepare_finish_frame: \ <vavp9dec0> decreasing timestame It sets the reordered_output and makes the decoder in free run mode. We should correct the PTS for a super frame, let the non-displayed frames have no PTS while set the correct PTS to the displayed one. Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/3155>
2025-09-03 02:33:53 +00:00 · 2022-10-11 16:17:26 +08:00 · 2022-10-11 16:17:26 +08:00 · eac9c33cc1
commit eac9c33cc1
parent 1e70525bc9
1 changed files with 54 additions and 0 deletions
--- a/subprojects/gst-plugins-bad/gst/videoparsers/gstvp9parse.c
+++ b/subprojects/gst-plugins-bad/gst/videoparsers/gstvp9parse.c
@ -61,6 +61,10 @@ struct _GstVp9Parse

  /* per frame status */
  gboolean discont;
+
+  GstClockTime super_frame_pts;
+  GstClockTime super_frame_dts;
+  GstClockTime super_frame_duration;
 };

 static GstStaticPadTemplate sinktemplate = GST_STATIC_PAD_TEMPLATE ("sink",
@ -90,6 +94,8 @@ static GstCaps *gst_vp9_parse_get_sink_caps (GstBaseParse * parse,
 static void gst_vp9_parse_update_src_caps (GstVp9Parse * self, GstCaps * caps);
 static GstFlowReturn gst_vp9_parse_parse_frame (GstVp9Parse * self,
    GstBaseParseFrame * frame, GstVp9FrameHdr * frame_hdr);
+static GstFlowReturn gst_vp9_parse_pre_push_frame (GstBaseParse * parse,
+    GstBaseParseFrame * frame);

 static void
 gst_vp9_parse_class_init (GstVp9ParseClass * klass)
@ -100,6 +106,8 @@ gst_vp9_parse_class_init (GstVp9ParseClass * klass)
  parse_class->start = GST_DEBUG_FUNCPTR (gst_vp9_parse_start);
  parse_class->stop = GST_DEBUG_FUNCPTR (gst_vp9_parse_stop);
  parse_class->handle_frame = GST_DEBUG_FUNCPTR (gst_vp9_parse_handle_frame);
+  parse_class->pre_push_frame =
+      GST_DEBUG_FUNCPTR (gst_vp9_parse_pre_push_frame);
  parse_class->set_sink_caps = GST_DEBUG_FUNCPTR (gst_vp9_parse_set_sink_caps);
  parse_class->get_sink_caps = GST_DEBUG_FUNCPTR (gst_vp9_parse_get_sink_caps);

@ -123,6 +131,14 @@ gst_vp9_parse_init (GstVp9Parse * self)
  GST_PAD_SET_ACCEPT_TEMPLATE (GST_BASE_PARSE_SINK_PAD (self));
 }

+static void
+gst_vp9_parse_reset_super_frame (GstVp9Parse * self)
+{
+  self->super_frame_pts = GST_CLOCK_TIME_NONE;
+  self->super_frame_dts = GST_CLOCK_TIME_NONE;
+  self->super_frame_duration = GST_CLOCK_TIME_NONE;
+}
+
 static void
 gst_vp9_parse_reset (GstVp9Parse * self)
 {
@ -135,6 +151,7 @@ gst_vp9_parse_reset (GstVp9Parse * self)
  self->profile = GST_VP9_PROFILE_UNDEFINED;
  self->bit_depth = (GstVp9BitDepth) 0;
  self->codec_alpha = FALSE;
+  gst_vp9_parse_reset_super_frame (self);
 }

 static gboolean
@ -414,6 +431,36 @@ gst_vp9_parse_process_frame (GstVp9Parse * self, GstVp9FrameHdr * frame_hdr)
  return TRUE;
 }

+static GstFlowReturn
+gst_vp9_parse_pre_push_frame (GstBaseParse * parse, GstBaseParseFrame * frame)
+{
+  GstVp9Parse *self = GST_VP9_PARSE (parse);
+
+  frame->flags |= GST_BASE_PARSE_FRAME_FLAG_CLIP;
+
+  if (!frame->buffer)
+    return GST_FLOW_OK;
+
+  /* The super frame may contain more than one frames inside its buffer.
+     When splitting a super frame into frames, the base parse class only
+     assign the PTS to the first frame and leave the others' PTS invalid.
+     But in fact, all decode only frames should have invalid PTS while
+     showable frames should have correct PTS setting. */
+  if (self->align != GST_VP9_PARSE_ALIGN_FRAME)
+    return GST_FLOW_OK;
+
+  if (GST_BUFFER_FLAG_IS_SET (frame->buffer, GST_BUFFER_FLAG_DECODE_ONLY)) {
+    GST_BUFFER_PTS (frame->buffer) = GST_CLOCK_TIME_NONE;
+    GST_BUFFER_DURATION (frame->buffer) = GST_CLOCK_TIME_NONE;
+  } else {
+    GST_BUFFER_PTS (frame->buffer) = self->super_frame_pts;
+    GST_BUFFER_DURATION (frame->buffer) = self->super_frame_duration;
+  }
+  GST_BUFFER_DTS (frame->buffer) = self->super_frame_dts;
+
+  return GST_FLOW_OK;
+}
+
 static GstFlowReturn
 gst_vp9_parse_handle_frame (GstBaseParse * parse, GstBaseParseFrame * frame,
    gint * skipsize)
@ -460,6 +507,10 @@ gst_vp9_parse_handle_frame (GstBaseParse * parse, GstBaseParseFrame * frame,
    goto done;
  }

+  self->super_frame_pts = GST_BUFFER_PTS (buffer);
+  self->super_frame_dts = GST_BUFFER_DTS (buffer);
+  self->super_frame_duration = GST_BUFFER_DURATION (buffer);
+
  for (i = 0; i < superframe_info.frames_in_superframe; i++) {
    guint32 frame_size;

@ -489,6 +540,7 @@ gst_vp9_parse_handle_frame (GstBaseParse * parse, GstBaseParseFrame * frame,
       * Real data is either taken from input by baseclass or
       * a replacement output buffer is provided anyway. */
      gst_vp9_parse_parse_frame (self, &subframe, &frame_hdr);
+
      ret = gst_base_parse_finish_frame (parse, &subframe, frame_size);
    } else {
      /* FIXME: need to parse all frames belong to this superframe? */
@ -498,6 +550,8 @@ gst_vp9_parse_handle_frame (GstBaseParse * parse, GstBaseParseFrame * frame,
    offset += frame_size;
  }

+  gst_vp9_parse_reset_super_frame (self);
+
 done:
  gst_buffer_unmap (buffer, &map);