audiodecoder: add _finish_subframe() method

This allows us to output audio samples without discarding
any input frames, which is useful for some formats/codecs
(e.g. the MonkeysAudio decoder implementation in ffmpeg
which will might return e.g. 16 output buffers for an
input buffer for certain files).

In the past decoder implementations just concatenated
the returned audio buffers until a full frame had been
decoded, but that's no longer possible to do efficiently
when the decoder returns audio samples in non-interleaved
layout.

Allowing subframes to be output before the entire input
frame is decoded can also be useful to decrease startup
latency/delay.

https://gitlab.freedesktop.org/gstreamer/gst-libav/issues/49
This commit is contained in:
Tim-Philipp Müller 2019-02-25 23:57:13 +00:00
parent 2e5f5e67ce
commit 8d1122013b
3 changed files with 100 additions and 5 deletions

View file

@ -392,6 +392,7 @@ GST_AUDIO_DECODER_OUTPUT_SEGMENT
GST_AUDIO_DECODER_STREAM_LOCK GST_AUDIO_DECODER_STREAM_LOCK
GST_AUDIO_DECODER_STREAM_UNLOCK GST_AUDIO_DECODER_STREAM_UNLOCK
gst_audio_decoder_finish_frame gst_audio_decoder_finish_frame
gst_audio_decoder_finish_subframe
gst_audio_decoder_set_output_format gst_audio_decoder_set_output_format
gst_audio_decoder_set_output_caps gst_audio_decoder_set_output_caps
gst_audio_decoder_negotiate gst_audio_decoder_negotiate

View file

@ -217,6 +217,8 @@ struct _GstAudioDecoderPrivate
/* expecting the buffer with DISCONT flag */ /* expecting the buffer with DISCONT flag */
gboolean expecting_discont_buf; gboolean expecting_discont_buf;
/* number of samples pushed out via _finish_subframe(), resets on _finish_frame() */
guint subframe_samples;
/* input bps estimatation */ /* input bps estimatation */
/* global in bytes seen */ /* global in bytes seen */
@ -311,6 +313,10 @@ static gboolean gst_audio_decoder_src_query_default (GstAudioDecoder * dec,
static gboolean gst_audio_decoder_transform_meta_default (GstAudioDecoder * static gboolean gst_audio_decoder_transform_meta_default (GstAudioDecoder *
decoder, GstBuffer * outbuf, GstMeta * meta, GstBuffer * inbuf); decoder, GstBuffer * outbuf, GstMeta * meta, GstBuffer * inbuf);
static GstFlowReturn
gst_audio_decoder_finish_frame_or_subframe (GstAudioDecoder * dec,
GstBuffer * buf, gint frames);
static GstElementClass *parent_class = NULL; static GstElementClass *parent_class = NULL;
static gint private_offset = 0; static gint private_offset = 0;
@ -1225,6 +1231,40 @@ foreach_metadata (GstBuffer * inbuf, GstMeta ** meta, gpointer user_data)
return TRUE; return TRUE;
} }
/**
* gst_audio_decoder_finish_subframe:
* @dec: a #GstAudioDecoder
* @buf: decoded data
*
* Collects decoded data and pushes it downstream. This function may be called
* multiple times for a given input frame.
*
* @buf may be NULL in which case it is assumed that the current input frame is
* finished. This is equivalent to calling gst_audio_decoder_finish_subframe()
* with a NULL buffer and frames=1 after having pushed out all decoded audio
* subframes using this function.
*
* When called with valid data in @buf the source pad caps must have been set
* already.
*
* Note that a frame received in #GstAudioDecoderClass.handle_frame() may be
* invalidated by a call to this function.
*
* Returns: a #GstFlowReturn that should be escalated to caller (of caller)
*
* Since: 1.16
*/
GstFlowReturn
gst_audio_decoder_finish_subframe (GstAudioDecoder * dec, GstBuffer * buf)
{
g_return_val_if_fail (GST_IS_AUDIO_DECODER (dec), GST_FLOW_ERROR);
if (buf == NULL)
return gst_audio_decoder_finish_frame_or_subframe (dec, NULL, 1);
else
return gst_audio_decoder_finish_frame_or_subframe (dec, buf, 0);
}
/** /**
* gst_audio_decoder_finish_frame: * gst_audio_decoder_finish_frame:
* @dec: a #GstAudioDecoder * @dec: a #GstAudioDecoder
@ -1247,6 +1287,20 @@ foreach_metadata (GstBuffer * inbuf, GstMeta ** meta, gpointer user_data)
GstFlowReturn GstFlowReturn
gst_audio_decoder_finish_frame (GstAudioDecoder * dec, GstBuffer * buf, gst_audio_decoder_finish_frame (GstAudioDecoder * dec, GstBuffer * buf,
gint frames) gint frames)
{
g_return_val_if_fail (GST_IS_AUDIO_DECODER (dec), GST_FLOW_ERROR);
/* no dummy calls please */
g_return_val_if_fail (frames != 0, GST_FLOW_ERROR);
return gst_audio_decoder_finish_frame_or_subframe (dec, buf, frames);
}
/* frames == 0 indicates that this is a sub-frame and further sub-frames may
* follow for the current input frame. */
static GstFlowReturn
gst_audio_decoder_finish_frame_or_subframe (GstAudioDecoder * dec,
GstBuffer * buf, gint frames)
{ {
GstAudioDecoderPrivate *priv; GstAudioDecoderPrivate *priv;
GstAudioDecoderContext *ctx; GstAudioDecoderContext *ctx;
@ -1256,12 +1310,15 @@ gst_audio_decoder_finish_frame (GstAudioDecoder * dec, GstBuffer * buf,
gsize size, samples = 0; gsize size, samples = 0;
GstFlowReturn ret = GST_FLOW_OK; GstFlowReturn ret = GST_FLOW_OK;
GQueue inbufs = G_QUEUE_INIT; GQueue inbufs = G_QUEUE_INIT;
gboolean is_subframe = (frames == 0);
gboolean do_check_resync;
/* subclass should not hand us no data */ /* subclass should not hand us no data */
g_return_val_if_fail (buf == NULL || gst_buffer_get_size (buf) > 0, g_return_val_if_fail (buf == NULL || gst_buffer_get_size (buf) > 0,
GST_FLOW_ERROR); GST_FLOW_ERROR);
/* no dummy calls please */
g_return_val_if_fail (frames != 0, GST_FLOW_ERROR); /* if it's a subframe (frames == 0) we must have a valid buffer */
g_assert (!is_subframe || buf != NULL);
priv = dec->priv; priv = dec->priv;
ctx = &dec->priv->ctx; ctx = &dec->priv->ctx;
@ -1279,7 +1336,7 @@ gst_audio_decoder_finish_frame (GstAudioDecoder * dec, GstBuffer * buf,
GST_AUDIO_DECODER_STREAM_LOCK (dec); GST_AUDIO_DECODER_STREAM_LOCK (dec);
if (buf) { if (buf != NULL && priv->subframe_samples == 0) {
ret = check_pending_reconfigure (dec); ret = check_pending_reconfigure (dec);
if (ret == GST_FLOW_FLUSHING || ret == GST_FLOW_NOT_NEGOTIATED) { if (ret == GST_FLOW_FLUSHING || ret == GST_FLOW_NOT_NEGOTIATED) {
gst_buffer_unref (buf); gst_buffer_unref (buf);
@ -1333,6 +1390,10 @@ gst_audio_decoder_finish_frame (GstAudioDecoder * dec, GstBuffer * buf,
GST_DEBUG_OBJECT (dec, "leading frame ts %" GST_TIME_FORMAT, GST_DEBUG_OBJECT (dec, "leading frame ts %" GST_TIME_FORMAT,
GST_TIME_ARGS (ts)); GST_TIME_ARGS (ts));
if (is_subframe && priv->frames.length == 0)
goto subframe_without_pending_input_frame;
/* this will be skipped in the is_subframe case because frames will be 0 */
while (priv->frames.length && frames) { while (priv->frames.length && frames) {
g_queue_push_tail (&inbufs, g_queue_pop_head (&priv->frames)); g_queue_push_tail (&inbufs, g_queue_pop_head (&priv->frames));
dec->priv->ctx.delay = dec->priv->frames.length; dec->priv->ctx.delay = dec->priv->frames.length;
@ -1354,8 +1415,11 @@ gst_audio_decoder_finish_frame (GstAudioDecoder * dec, GstBuffer * buf,
priv->base_ts = dec->output_segment.start; priv->base_ts = dec->output_segment.start;
} }
/* slightly convoluted approach caters for perfect ts if subclass desires */ /* only check for resync at the beginning of an input/output frame */
if (GST_CLOCK_TIME_IS_VALID (ts)) { do_check_resync = !is_subframe || priv->subframe_samples == 0;
/* slightly convoluted approach caters for perfect ts if subclass desires. */
if (do_check_resync && GST_CLOCK_TIME_IS_VALID (ts)) {
if (dec->priv->tolerance > 0) { if (dec->priv->tolerance > 0) {
GstClockTimeDiff diff; GstClockTimeDiff diff;
@ -1420,6 +1484,16 @@ gst_audio_decoder_finish_frame (GstAudioDecoder * dec, GstBuffer * buf,
data.outbuf = buf; data.outbuf = buf;
gst_buffer_foreach_meta (l->data, foreach_metadata, &data); gst_buffer_foreach_meta (l->data, foreach_metadata, &data);
} }
} else if (is_subframe) {
CopyMetaData data;
GstBuffer *in_buf;
/* For subframes we assume a 1:N relationship for now, so we just take
* metas from the first pending input buf */
in_buf = g_queue_peek_head (&priv->frames);
data.decoder = dec;
data.outbuf = buf;
gst_buffer_foreach_meta (in_buf, foreach_metadata, &data);
} else { } else {
GST_WARNING_OBJECT (dec, GST_WARNING_OBJECT (dec,
"Can't copy metadata because input buffers disappeared"); "Can't copy metadata because input buffers disappeared");
@ -1441,6 +1515,11 @@ exit:
g_queue_foreach (&inbufs, (GFunc) gst_buffer_unref, NULL); g_queue_foreach (&inbufs, (GFunc) gst_buffer_unref, NULL);
g_queue_clear (&inbufs); g_queue_clear (&inbufs);
if (is_subframe)
dec->priv->subframe_samples += samples;
else
dec->priv->subframe_samples = 0;
GST_AUDIO_DECODER_STREAM_UNLOCK (dec); GST_AUDIO_DECODER_STREAM_UNLOCK (dec);
return ret; return ret;
@ -1448,6 +1527,7 @@ exit:
/* ERRORS */ /* ERRORS */
wrong_buffer: wrong_buffer:
{ {
/* arguably more of a programming error? */
GST_ELEMENT_ERROR (dec, STREAM, DECODE, (NULL), GST_ELEMENT_ERROR (dec, STREAM, DECODE, (NULL),
("buffer size %" G_GSIZE_FORMAT " not a multiple of %d", size, ("buffer size %" G_GSIZE_FORMAT " not a multiple of %d", size,
ctx->info.bpf)); ctx->info.bpf));
@ -1457,6 +1537,7 @@ wrong_buffer:
} }
wrong_samples: wrong_samples:
{ {
/* arguably more of a programming error? */
GST_ELEMENT_ERROR (dec, STREAM, DECODE, (NULL), GST_ELEMENT_ERROR (dec, STREAM, DECODE, (NULL),
("GstAudioMeta samples (%" G_GSIZE_FORMAT ") are inconsistent with " ("GstAudioMeta samples (%" G_GSIZE_FORMAT ") are inconsistent with "
"the buffer size and layout (size/bpf = %" G_GSIZE_FORMAT ")", "the buffer size and layout (size/bpf = %" G_GSIZE_FORMAT ")",
@ -1465,6 +1546,15 @@ wrong_samples:
ret = GST_FLOW_ERROR; ret = GST_FLOW_ERROR;
goto exit; goto exit;
} }
subframe_without_pending_input_frame:
{
/* arguably more of a programming error? */
GST_ELEMENT_ERROR (dec, STREAM, DECODE, (NULL),
("Received decoded subframe, but no pending frame"));
gst_buffer_unref (buf);
ret = GST_FLOW_ERROR;
goto exit;
}
} }
static GstFlowReturn static GstFlowReturn

View file

@ -332,6 +332,10 @@ GstCaps * gst_audio_decoder_proxy_getcaps (GstAudioDecoder * decoder,
GST_AUDIO_API GST_AUDIO_API
gboolean gst_audio_decoder_negotiate (GstAudioDecoder * dec); gboolean gst_audio_decoder_negotiate (GstAudioDecoder * dec);
GST_AUDIO_API
GstFlowReturn gst_audio_decoder_finish_subframe (GstAudioDecoder * dec,
GstBuffer * buf);
GST_AUDIO_API GST_AUDIO_API
GstFlowReturn gst_audio_decoder_finish_frame (GstAudioDecoder * dec, GstFlowReturn gst_audio_decoder_finish_frame (GstAudioDecoder * dec,
GstBuffer * buf, gint frames); GstBuffer * buf, gint frames);