audiodecoder: add _finish_subframe() method

This allows us to output audio samples without discarding any input frames, which is useful for some formats/codecs (e.g. the MonkeysAudio decoder implementation in ffmpeg which will might return e.g. 16 output buffers for an input buffer for certain files). In the past decoder implementations just concatenated the returned audio buffers until a full frame had been decoded, but that's no longer possible to do efficiently when the decoder returns audio samples in non-interleaved layout. Allowing subframes to be output before the entire input frame is decoded can also be useful to decrease startup latency/delay. https://gitlab.freedesktop.org/gstreamer/gst-libav/issues/49
2025-09-02 18:23:56 +00:00 · 2019-02-25 23:57:13 +00:00 · 2019-02-25 23:57:13 +00:00 · 8d1122013b
commit 8d1122013b
parent 2e5f5e67ce
3 changed files with 100 additions and 5 deletions
--- a/docs/libs/gst-plugins-base-libs-sections.txt
+++ b/docs/libs/gst-plugins-base-libs-sections.txt
@ -392,6 +392,7 @@ GST_AUDIO_DECODER_OUTPUT_SEGMENT
 GST_AUDIO_DECODER_STREAM_LOCK
 GST_AUDIO_DECODER_STREAM_UNLOCK
 gst_audio_decoder_finish_frame
+gst_audio_decoder_finish_subframe
 gst_audio_decoder_set_output_format
 gst_audio_decoder_set_output_caps
 gst_audio_decoder_negotiate
--- a/gst-libs/gst/audio/gstaudiodecoder.c
+++ b/gst-libs/gst/audio/gstaudiodecoder.c
@ -217,6 +217,8 @@ struct _GstAudioDecoderPrivate
  /* expecting the buffer with DISCONT flag */
  gboolean expecting_discont_buf;

+  /* number of samples pushed out via _finish_subframe(), resets on _finish_frame() */
+  guint subframe_samples;

  /* input bps estimatation */
  /* global in bytes seen */
@ -311,6 +313,10 @@ static gboolean gst_audio_decoder_src_query_default (GstAudioDecoder * dec,
 static gboolean gst_audio_decoder_transform_meta_default (GstAudioDecoder *
    decoder, GstBuffer * outbuf, GstMeta * meta, GstBuffer * inbuf);

+static GstFlowReturn
+gst_audio_decoder_finish_frame_or_subframe (GstAudioDecoder * dec,
+    GstBuffer * buf, gint frames);
+
 static GstElementClass *parent_class = NULL;
 static gint private_offset = 0;

@ -1225,6 +1231,40 @@ foreach_metadata (GstBuffer * inbuf, GstMeta ** meta, gpointer user_data)
  return TRUE;
 }

+/**
+ * gst_audio_decoder_finish_subframe:
+ * @dec: a #GstAudioDecoder
+ * @buf: decoded data
+ *
+ * Collects decoded data and pushes it downstream. This function may be called
+ * multiple times for a given input frame.
+ *
+ * @buf may be NULL in which case it is assumed that the current input frame is
+ * finished. This is equivalent to calling gst_audio_decoder_finish_subframe()
+ * with a NULL buffer and frames=1 after having pushed out all decoded audio
+ * subframes using this function.
+ *
+ * When called with valid data in @buf the source pad caps must have been set
+ * already.
+ *
+ * Note that a frame received in #GstAudioDecoderClass.handle_frame() may be
+ * invalidated by a call to this function.
+ *
+ * Returns: a #GstFlowReturn that should be escalated to caller (of caller)
+ *
+ * Since: 1.16
+ */
+GstFlowReturn
+gst_audio_decoder_finish_subframe (GstAudioDecoder * dec, GstBuffer * buf)
+{
+  g_return_val_if_fail (GST_IS_AUDIO_DECODER (dec), GST_FLOW_ERROR);
+
+  if (buf == NULL)
+    return gst_audio_decoder_finish_frame_or_subframe (dec, NULL, 1);
+  else
+    return gst_audio_decoder_finish_frame_or_subframe (dec, buf, 0);
+}
+
 /**
 * gst_audio_decoder_finish_frame:
 * @dec: a #GstAudioDecoder
@ -1247,6 +1287,20 @@ foreach_metadata (GstBuffer * inbuf, GstMeta ** meta, gpointer user_data)
 GstFlowReturn
 gst_audio_decoder_finish_frame (GstAudioDecoder * dec, GstBuffer * buf,
    gint frames)
+{
+  g_return_val_if_fail (GST_IS_AUDIO_DECODER (dec), GST_FLOW_ERROR);
+
+  /* no dummy calls please */
+  g_return_val_if_fail (frames != 0, GST_FLOW_ERROR);
+
+  return gst_audio_decoder_finish_frame_or_subframe (dec, buf, frames);
+}
+
+/* frames == 0 indicates that this is a sub-frame and further sub-frames may
+ * follow for the current input frame. */
+static GstFlowReturn
+gst_audio_decoder_finish_frame_or_subframe (GstAudioDecoder * dec,
+    GstBuffer * buf, gint frames)
 {
  GstAudioDecoderPrivate *priv;
  GstAudioDecoderContext *ctx;
@ -1256,12 +1310,15 @@ gst_audio_decoder_finish_frame (GstAudioDecoder * dec, GstBuffer * buf,
  gsize size, samples = 0;
  GstFlowReturn ret = GST_FLOW_OK;
  GQueue inbufs = G_QUEUE_INIT;
+  gboolean is_subframe = (frames == 0);
+  gboolean do_check_resync;

  /* subclass should not hand us no data */
  g_return_val_if_fail (buf == NULL || gst_buffer_get_size (buf) > 0,
      GST_FLOW_ERROR);
-  /* no dummy calls please */
-  g_return_val_if_fail (frames != 0, GST_FLOW_ERROR);
+
+  /* if it's a subframe (frames == 0) we must have a valid buffer */
+  g_assert (!is_subframe || buf != NULL);

  priv = dec->priv;
  ctx = &dec->priv->ctx;
@ -1279,7 +1336,7 @@ gst_audio_decoder_finish_frame (GstAudioDecoder * dec, GstBuffer * buf,

  GST_AUDIO_DECODER_STREAM_LOCK (dec);

-  if (buf) {
+  if (buf != NULL && priv->subframe_samples == 0) {
    ret = check_pending_reconfigure (dec);
    if (ret == GST_FLOW_FLUSHING || ret == GST_FLOW_NOT_NEGOTIATED) {
      gst_buffer_unref (buf);
@ -1333,6 +1390,10 @@ gst_audio_decoder_finish_frame (GstAudioDecoder * dec, GstBuffer * buf,
  GST_DEBUG_OBJECT (dec, "leading frame ts %" GST_TIME_FORMAT,
      GST_TIME_ARGS (ts));

+  if (is_subframe && priv->frames.length == 0)
+    goto subframe_without_pending_input_frame;
+
+  /* this will be skipped in the is_subframe case because frames will be 0 */
  while (priv->frames.length && frames) {
    g_queue_push_tail (&inbufs, g_queue_pop_head (&priv->frames));
    dec->priv->ctx.delay = dec->priv->frames.length;
@ -1354,8 +1415,11 @@ gst_audio_decoder_finish_frame (GstAudioDecoder * dec, GstBuffer * buf,
    priv->base_ts = dec->output_segment.start;
  }

-  /* slightly convoluted approach caters for perfect ts if subclass desires */
-  if (GST_CLOCK_TIME_IS_VALID (ts)) {
+  /* only check for resync at the beginning of an input/output frame */
+  do_check_resync = !is_subframe || priv->subframe_samples == 0;
+
+  /* slightly convoluted approach caters for perfect ts if subclass desires. */
+  if (do_check_resync && GST_CLOCK_TIME_IS_VALID (ts)) {
    if (dec->priv->tolerance > 0) {
      GstClockTimeDiff diff;

@ -1420,6 +1484,16 @@ gst_audio_decoder_finish_frame (GstAudioDecoder * dec, GstBuffer * buf,
        data.outbuf = buf;
        gst_buffer_foreach_meta (l->data, foreach_metadata, &data);
      }
+    } else if (is_subframe) {
+      CopyMetaData data;
+      GstBuffer *in_buf;
+
+      /* For subframes we assume a 1:N relationship for now, so we just take
+       * metas from the first pending input buf */
+      in_buf = g_queue_peek_head (&priv->frames);
+      data.decoder = dec;
+      data.outbuf = buf;
+      gst_buffer_foreach_meta (in_buf, foreach_metadata, &data);
    } else {
      GST_WARNING_OBJECT (dec,
          "Can't copy metadata because input buffers disappeared");
@ -1441,6 +1515,11 @@ exit:
  g_queue_foreach (&inbufs, (GFunc) gst_buffer_unref, NULL);
  g_queue_clear (&inbufs);

+  if (is_subframe)
+    dec->priv->subframe_samples += samples;
+  else
+    dec->priv->subframe_samples = 0;
+
  GST_AUDIO_DECODER_STREAM_UNLOCK (dec);

  return ret;
@ -1448,6 +1527,7 @@ exit:
  /* ERRORS */
 wrong_buffer:
  {
+    /* arguably more of a programming error? */
    GST_ELEMENT_ERROR (dec, STREAM, DECODE, (NULL),
        ("buffer size %" G_GSIZE_FORMAT " not a multiple of %d", size,
            ctx->info.bpf));
@ -1457,6 +1537,7 @@ wrong_buffer:
  }
 wrong_samples:
  {
+    /* arguably more of a programming error? */
    GST_ELEMENT_ERROR (dec, STREAM, DECODE, (NULL),
        ("GstAudioMeta samples (%" G_GSIZE_FORMAT ") are inconsistent with "
            "the buffer size and layout (size/bpf = %" G_GSIZE_FORMAT ")",
@ -1465,6 +1546,15 @@ wrong_samples:
    ret = GST_FLOW_ERROR;
    goto exit;
  }
+subframe_without_pending_input_frame:
+  {
+    /* arguably more of a programming error? */
+    GST_ELEMENT_ERROR (dec, STREAM, DECODE, (NULL),
+        ("Received decoded subframe, but no pending frame"));
+    gst_buffer_unref (buf);
+    ret = GST_FLOW_ERROR;
+    goto exit;
+  }
 }

 static GstFlowReturn
--- a/gst-libs/gst/audio/gstaudiodecoder.h
+++ b/gst-libs/gst/audio/gstaudiodecoder.h
@ -332,6 +332,10 @@ GstCaps *         gst_audio_decoder_proxy_getcaps (GstAudioDecoder * decoder,
 GST_AUDIO_API
 gboolean          gst_audio_decoder_negotiate (GstAudioDecoder * dec);

+GST_AUDIO_API
+GstFlowReturn     gst_audio_decoder_finish_subframe (GstAudioDecoder * dec,
+                                                     GstBuffer       * buf);
+
 GST_AUDIO_API
 GstFlowReturn     gst_audio_decoder_finish_frame (GstAudioDecoder * dec,
                                                  GstBuffer * buf, gint frames);