avauddec: add support for decoding in non-interleaved layout

This removes the internal interleave loop and always negotiates
the native output layout of the libav decoder. Users can use
audioconvert to interleave if necessary.

Special care has been taken to leave the encoder unaffected by
the changes in avcodecmap, since GstAudioEncoder doesn't support
the non-interleaved layout yet.

https://bugzilla.gnome.org/show_bug.cgi?id=705977
This commit is contained in:
George Kiagiadakis 2018-07-23 16:16:10 +03:00
parent db82350245
commit 4596249496
3 changed files with 119 additions and 90 deletions

View file

@ -360,16 +360,17 @@ static gboolean
settings_changed (GstFFMpegAudDec * ffmpegdec, AVFrame * frame) settings_changed (GstFFMpegAudDec * ffmpegdec, AVFrame * frame)
{ {
GstAudioFormat format; GstAudioFormat format;
GstAudioLayout layout;
gint channels = av_get_channel_layout_nb_channels (frame->channel_layout); gint channels = av_get_channel_layout_nb_channels (frame->channel_layout);
format = gst_ffmpeg_smpfmt_to_audioformat (frame->format); format = gst_ffmpeg_smpfmt_to_audioformat (frame->format, &layout);
if (format == GST_AUDIO_FORMAT_UNKNOWN) if (format == GST_AUDIO_FORMAT_UNKNOWN)
return TRUE; return TRUE;
return !(ffmpegdec->info.rate == return !(ffmpegdec->info.rate == frame->sample_rate &&
frame->sample_rate &&
ffmpegdec->info.channels == channels && ffmpegdec->info.channels == channels &&
ffmpegdec->info.finfo->format == format); ffmpegdec->info.finfo->format == format &&
ffmpegdec->info.layout == layout);
} }
static gboolean static gboolean
@ -378,12 +379,13 @@ gst_ffmpegauddec_negotiate (GstFFMpegAudDec * ffmpegdec,
{ {
GstFFMpegAudDecClass *oclass; GstFFMpegAudDecClass *oclass;
GstAudioFormat format; GstAudioFormat format;
GstAudioLayout layout;
gint channels; gint channels;
GstAudioChannelPosition pos[64] = { 0, }; GstAudioChannelPosition pos[64] = { 0, };
oclass = (GstFFMpegAudDecClass *) (G_OBJECT_GET_CLASS (ffmpegdec)); oclass = (GstFFMpegAudDecClass *) (G_OBJECT_GET_CLASS (ffmpegdec));
format = gst_ffmpeg_smpfmt_to_audioformat (frame->format); format = gst_ffmpeg_smpfmt_to_audioformat (frame->format, &layout);
if (format == GST_AUDIO_FORMAT_UNKNOWN) if (format == GST_AUDIO_FORMAT_UNKNOWN)
goto no_caps; goto no_caps;
channels = av_get_channel_layout_nb_channels (frame->channel_layout); channels = av_get_channel_layout_nb_channels (frame->channel_layout);
@ -396,9 +398,13 @@ gst_ffmpegauddec_negotiate (GstFFMpegAudDec * ffmpegdec,
return TRUE; return TRUE;
GST_DEBUG_OBJECT (ffmpegdec, GST_DEBUG_OBJECT (ffmpegdec,
"Renegotiating audio from %dHz@%dchannels (%d) to %dHz@%dchannels (%d)", "Renegotiating audio from %dHz@%dchannels (%d, interleaved=%d) "
"to %dHz@%dchannels (%d, interleaved=%d)",
ffmpegdec->info.rate, ffmpegdec->info.channels, ffmpegdec->info.rate, ffmpegdec->info.channels,
ffmpegdec->info.finfo->format, frame->sample_rate, channels, format); ffmpegdec->info.finfo->format,
ffmpegdec->info.layout == GST_AUDIO_LAYOUT_INTERLEAVED,
frame->sample_rate, channels, format,
layout == GST_AUDIO_LAYOUT_INTERLEAVED);
gst_ffmpeg_channel_layout_to_gst (frame->channel_layout, channels, pos); gst_ffmpeg_channel_layout_to_gst (frame->channel_layout, channels, pos);
memcpy (ffmpegdec->ffmpeg_layout, pos, memcpy (ffmpegdec->ffmpeg_layout, pos,
@ -410,6 +416,7 @@ gst_ffmpegauddec_negotiate (GstFFMpegAudDec * ffmpegdec,
memcmp (pos, ffmpegdec->ffmpeg_layout, sizeof (pos[0]) * channels) != 0; memcmp (pos, ffmpegdec->ffmpeg_layout, sizeof (pos[0]) * channels) != 0;
gst_audio_info_set_format (&ffmpegdec->info, format, gst_audio_info_set_format (&ffmpegdec->info, format,
frame->sample_rate, channels, pos); frame->sample_rate, channels, pos);
ffmpegdec->info.layout = layout;
if (!gst_audio_decoder_set_output_format (GST_AUDIO_DECODER (ffmpegdec), if (!gst_audio_decoder_set_output_format (GST_AUDIO_DECODER (ffmpegdec),
&ffmpegdec->info)) &ffmpegdec->info))
@ -472,6 +479,7 @@ gst_ffmpegauddec_audio_frame (GstFFMpegAudDec * ffmpegdec,
if (res >= 0) { if (res >= 0) {
gint nsamples, channels, byte_per_sample; gint nsamples, channels, byte_per_sample;
gsize output_size; gsize output_size;
gboolean planar;
if (!gst_ffmpegauddec_negotiate (ffmpegdec, ffmpegdec->context, if (!gst_ffmpegauddec_negotiate (ffmpegdec, ffmpegdec->context,
ffmpegdec->frame, FALSE)) { ffmpegdec->frame, FALSE)) {
@ -485,81 +493,33 @@ gst_ffmpegauddec_audio_frame (GstFFMpegAudDec * ffmpegdec,
channels = ffmpegdec->info.channels; channels = ffmpegdec->info.channels;
nsamples = ffmpegdec->frame->nb_samples; nsamples = ffmpegdec->frame->nb_samples;
byte_per_sample = ffmpegdec->info.finfo->width / 8; byte_per_sample = ffmpegdec->info.finfo->width / 8;
planar = av_sample_fmt_is_planar (ffmpegdec->context->sample_fmt);
g_return_val_if_fail (ffmpegdec->info.layout == (planar ?
GST_AUDIO_LAYOUT_NON_INTERLEAVED : GST_AUDIO_LAYOUT_INTERLEAVED),
GST_FLOW_NOT_NEGOTIATED);
GST_DEBUG_OBJECT (ffmpegdec, "Creating output buffer");
/* ffmpegdec->frame->linesize[0] might contain padding, allocate only what's needed */ /* ffmpegdec->frame->linesize[0] might contain padding, allocate only what's needed */
output_size = nsamples * byte_per_sample * channels; output_size = nsamples * byte_per_sample * channels;
GST_DEBUG_OBJECT (ffmpegdec, "Creating output buffer"); *outbuf =
if (av_sample_fmt_is_planar (ffmpegdec->context->sample_fmt) gst_audio_decoder_allocate_output_buffer (GST_AUDIO_DECODER
&& channels > 1) { (ffmpegdec), output_size);
gint i, j;
GstMapInfo minfo;
/* note: linesize[0] might contain padding, allocate only what's needed */ if (planar) {
*outbuf = gint i;
gst_audio_decoder_allocate_output_buffer (GST_AUDIO_DECODER GstAudioMeta *meta;
(ffmpegdec), output_size);
gst_buffer_map (*outbuf, &minfo, GST_MAP_WRITE); meta = gst_buffer_add_audio_meta (*outbuf, &ffmpegdec->info, nsamples,
NULL);
switch (ffmpegdec->info.finfo->width) { for (i = 0; i < channels; i++) {
case 8:{ gst_buffer_fill (*outbuf, meta->offsets[i],
guint8 *odata = minfo.data; ffmpegdec->frame->extended_data[i], nsamples * byte_per_sample);
for (i = 0; i < nsamples; i++) {
for (j = 0; j < channels; j++) {
odata[j] =
((const guint8 *) ffmpegdec->frame->extended_data[j])[i];
}
odata += channels;
}
break;
}
case 16:{
guint16 *odata = (guint16 *) minfo.data;
for (i = 0; i < nsamples; i++) {
for (j = 0; j < channels; j++) {
odata[j] =
((const guint16 *) ffmpegdec->frame->extended_data[j])[i];
}
odata += channels;
}
break;
}
case 32:{
guint32 *odata = (guint32 *) minfo.data;
for (i = 0; i < nsamples; i++) {
for (j = 0; j < channels; j++) {
odata[j] =
((const guint32 *) ffmpegdec->frame->extended_data[j])[i];
}
odata += channels;
}
break;
}
case 64:{
guint64 *odata = (guint64 *) minfo.data;
for (i = 0; i < nsamples; i++) {
for (j = 0; j < channels; j++) {
odata[j] =
((const guint64 *) ffmpegdec->frame->extended_data[j])[i];
}
odata += channels;
}
break;
}
default:
g_assert_not_reached ();
break;
} }
gst_buffer_unmap (*outbuf, &minfo);
} else { } else {
*outbuf =
gst_audio_decoder_allocate_output_buffer (GST_AUDIO_DECODER
(ffmpegdec), output_size);
gst_buffer_fill (*outbuf, 0, ffmpegdec->frame->data[0], output_size); gst_buffer_fill (*outbuf, 0, ffmpegdec->frame->data[0], output_size);
} }

View file

@ -474,11 +474,14 @@ get_nbits_set (guint64 n)
static void static void
gst_ffmpeg_audio_set_sample_fmts (GstCaps * caps, gst_ffmpeg_audio_set_sample_fmts (GstCaps * caps,
const enum AVSampleFormat *fmts) const enum AVSampleFormat *fmts, gboolean always_interleaved)
{ {
GValue va = { 0, }; GValue va = { 0, };
GValue vap = { 0, };
GValue v = { 0, }; GValue v = { 0, };
GstAudioFormat format; GstAudioFormat format;
GstAudioLayout layout;
GstCaps *caps_copy = NULL;
if (!fmts || fmts[0] == -1) { if (!fmts || fmts[0] == -1) {
gint i; gint i;
@ -486,38 +489,73 @@ gst_ffmpeg_audio_set_sample_fmts (GstCaps * caps,
g_value_init (&va, GST_TYPE_LIST); g_value_init (&va, GST_TYPE_LIST);
g_value_init (&v, G_TYPE_STRING); g_value_init (&v, G_TYPE_STRING);
for (i = 0; i <= AV_SAMPLE_FMT_DBL; i++) { for (i = 0; i <= AV_SAMPLE_FMT_DBL; i++) {
format = gst_ffmpeg_smpfmt_to_audioformat (i); format = gst_ffmpeg_smpfmt_to_audioformat (i, NULL);
if (format == GST_AUDIO_FORMAT_UNKNOWN) if (format == GST_AUDIO_FORMAT_UNKNOWN)
continue; continue;
g_value_set_string (&v, gst_audio_format_to_string (format)); g_value_set_string (&v, gst_audio_format_to_string (format));
gst_value_list_append_value (&va, &v); gst_value_list_append_value (&va, &v);
} }
gst_caps_set_value (caps, "format", &va); gst_caps_set_value (caps, "format", &va);
if (!always_interleaved) {
g_value_init (&vap, GST_TYPE_LIST);
g_value_set_string (&v, "interleaved");
gst_value_list_append_value (&vap, &v);
g_value_set_string (&v, "non-interleaved");
gst_value_list_append_value (&vap, &v);
gst_caps_set_value (caps, "layout", &vap);
g_value_unset (&vap);
} else {
gst_caps_set_simple (caps, "layout", G_TYPE_STRING, "interleaved", NULL);
}
g_value_unset (&v); g_value_unset (&v);
g_value_unset (&va); g_value_unset (&va);
return; return;
} }
g_value_init (&va, GST_TYPE_LIST); g_value_init (&va, GST_TYPE_LIST);
g_value_init (&vap, GST_TYPE_LIST);
g_value_init (&v, G_TYPE_STRING); g_value_init (&v, G_TYPE_STRING);
while (*fmts != -1) { while (*fmts != -1) {
format = gst_ffmpeg_smpfmt_to_audioformat (*fmts); format = gst_ffmpeg_smpfmt_to_audioformat (*fmts, &layout);
if (format != GST_AUDIO_FORMAT_UNKNOWN) { if (format != GST_AUDIO_FORMAT_UNKNOWN) {
g_value_set_string (&v, gst_audio_format_to_string (format)); g_value_set_string (&v, gst_audio_format_to_string (format));
/* Only append values we don't have yet */ /* Only append values we don't have yet */
if (!_gst_value_list_contains (&va, &v)) if (layout == GST_AUDIO_LAYOUT_INTERLEAVED || always_interleaved) {
gst_value_list_append_value (&va, &v); if (!_gst_value_list_contains (&va, &v))
gst_value_list_append_value (&va, &v);
} else {
if (!_gst_value_list_contains (&vap, &v))
gst_value_list_append_value (&vap, &v);
}
} }
fmts++; fmts++;
} }
if (gst_value_list_get_size (&va) >= 1 && gst_value_list_get_size (&vap) >= 1) {
caps_copy = gst_caps_copy (caps);
}
if (gst_value_list_get_size (&va) == 1) { if (gst_value_list_get_size (&va) == 1) {
/* The single value is still in v */ gst_caps_set_value (caps, "format", gst_value_list_get_value (&va, 0));
gst_caps_set_value (caps, "format", &v); gst_caps_set_simple (caps, "layout", G_TYPE_STRING, "interleaved", NULL);
} else if (gst_value_list_get_size (&va) > 1) { } else if (gst_value_list_get_size (&va) > 1) {
gst_caps_set_value (caps, "format", &va); gst_caps_set_value (caps, "format", &va);
gst_caps_set_simple (caps, "layout", G_TYPE_STRING, "interleaved", NULL);
}
if (gst_value_list_get_size (&vap) == 1) {
gst_caps_set_value (caps_copy ? caps_copy : caps, "format",
gst_value_list_get_value (&vap, 0));
gst_caps_set_simple (caps_copy ? caps_copy : caps, "layout", G_TYPE_STRING,
"non-interleaved", NULL);
} else if (gst_value_list_get_size (&vap) > 1) {
gst_caps_set_value (caps_copy ? caps_copy : caps, "format", &vap);
gst_caps_set_simple (caps_copy ? caps_copy : caps, "layout", G_TYPE_STRING,
"non-interleaved", NULL);
}
if (caps_copy) {
gst_caps_append (caps, caps_copy);
} }
g_value_unset (&v); g_value_unset (&v);
g_value_unset (&va); g_value_unset (&va);
g_value_unset (&vap);
} }
/* same for audio - now with channels/sample rate /* same for audio - now with channels/sample rate
@ -2333,29 +2371,47 @@ gst_ffmpeg_pixfmt_to_caps (enum AVPixelFormat pix_fmt, AVCodecContext * context,
} }
GstAudioFormat GstAudioFormat
gst_ffmpeg_smpfmt_to_audioformat (enum AVSampleFormat sample_fmt) gst_ffmpeg_smpfmt_to_audioformat (enum AVSampleFormat sample_fmt,
GstAudioLayout * layout)
{ {
if (layout)
*layout = GST_AUDIO_LAYOUT_NON_INTERLEAVED;
switch (sample_fmt) { switch (sample_fmt) {
case AV_SAMPLE_FMT_U8: case AV_SAMPLE_FMT_U8:
if (layout)
*layout = GST_AUDIO_LAYOUT_INTERLEAVED;
case AV_SAMPLE_FMT_U8P: case AV_SAMPLE_FMT_U8P:
return GST_AUDIO_FORMAT_U8; return GST_AUDIO_FORMAT_U8;
break; break;
case AV_SAMPLE_FMT_S16: case AV_SAMPLE_FMT_S16:
if (layout)
*layout = GST_AUDIO_LAYOUT_INTERLEAVED;
case AV_SAMPLE_FMT_S16P: case AV_SAMPLE_FMT_S16P:
return GST_AUDIO_FORMAT_S16; return GST_AUDIO_FORMAT_S16;
break; break;
case AV_SAMPLE_FMT_S32: case AV_SAMPLE_FMT_S32:
if (layout)
*layout = GST_AUDIO_LAYOUT_INTERLEAVED;
case AV_SAMPLE_FMT_S32P: case AV_SAMPLE_FMT_S32P:
return GST_AUDIO_FORMAT_S32; return GST_AUDIO_FORMAT_S32;
break; break;
case AV_SAMPLE_FMT_FLT: case AV_SAMPLE_FMT_FLT:
if (layout)
*layout = GST_AUDIO_LAYOUT_INTERLEAVED;
case AV_SAMPLE_FMT_FLTP: case AV_SAMPLE_FMT_FLTP:
return GST_AUDIO_FORMAT_F32; return GST_AUDIO_FORMAT_F32;
break; break;
case AV_SAMPLE_FMT_DBL: case AV_SAMPLE_FMT_DBL:
if (layout)
*layout = GST_AUDIO_LAYOUT_INTERLEAVED;
case AV_SAMPLE_FMT_DBLP: case AV_SAMPLE_FMT_DBLP:
return GST_AUDIO_FORMAT_F64; return GST_AUDIO_FORMAT_F64;
break; break;
default: default:
/* .. */ /* .. */
return GST_AUDIO_FORMAT_UNKNOWN; return GST_AUDIO_FORMAT_UNKNOWN;
@ -2376,13 +2432,16 @@ gst_ffmpeg_smpfmt_to_caps (enum AVSampleFormat sample_fmt,
{ {
GstCaps *caps = NULL; GstCaps *caps = NULL;
GstAudioFormat format; GstAudioFormat format;
GstAudioLayout layout;
format = gst_ffmpeg_smpfmt_to_audioformat (sample_fmt); format = gst_ffmpeg_smpfmt_to_audioformat (sample_fmt, &layout);
if (format != GST_AUDIO_FORMAT_UNKNOWN) { if (format != GST_AUDIO_FORMAT_UNKNOWN) {
caps = gst_ff_aud_caps_new (context, codec, codec_id, TRUE, "audio/x-raw", caps = gst_ff_aud_caps_new (context, codec, codec_id, TRUE, "audio/x-raw",
"format", G_TYPE_STRING, gst_audio_format_to_string (format), "format", G_TYPE_STRING, gst_audio_format_to_string (format),
"layout", G_TYPE_STRING, "interleaved", NULL); "layout", G_TYPE_STRING,
(layout == GST_AUDIO_LAYOUT_INTERLEAVED) ?
"interleaved" : "non-interleaved", NULL);
GST_LOG ("caps for sample_fmt=%d: %" GST_PTR_FORMAT, sample_fmt, caps); GST_LOG ("caps for sample_fmt=%d: %" GST_PTR_FORMAT, sample_fmt, caps);
} else { } else {
GST_LOG ("No caps found for sample_fmt=%d", sample_fmt); GST_LOG ("No caps found for sample_fmt=%d", sample_fmt);
@ -2426,10 +2485,10 @@ gst_ffmpeg_codectype_to_audio_caps (AVCodecContext * context,
codec_id); codec_id);
} else { } else {
caps = gst_ff_aud_caps_new (context, codec, codec_id, encode, "audio/x-raw", caps = gst_ff_aud_caps_new (context, codec, codec_id, encode, "audio/x-raw",
"layout", G_TYPE_STRING, "interleaved", NULL); NULL);
if (!caps_has_field (caps, "format")) if (!caps_has_field (caps, "format"))
gst_ffmpeg_audio_set_sample_fmts (caps, gst_ffmpeg_audio_set_sample_fmts (caps,
codec ? codec->sample_fmts : NULL); codec ? codec->sample_fmts : NULL, encode);
} }
return caps; return caps;
@ -2470,6 +2529,8 @@ gst_ffmpeg_caps_to_smpfmt (const GstCaps * caps,
const gchar *fmt; const gchar *fmt;
GstAudioFormat format = GST_AUDIO_FORMAT_UNKNOWN; GstAudioFormat format = GST_AUDIO_FORMAT_UNKNOWN;
gint bitrate; gint bitrate;
const gchar *layout;
gboolean interleaved;
g_return_if_fail (gst_caps_get_size (caps) == 1); g_return_if_fail (gst_caps_get_size (caps) == 1);
@ -2490,18 +2551,25 @@ gst_ffmpeg_caps_to_smpfmt (const GstCaps * caps,
} }
} }
layout = gst_structure_get_string (structure, "layout");
interleaved = ! !g_strcmp0 (layout, "non-interleaved");
switch (format) { switch (format) {
case GST_AUDIO_FORMAT_F32: case GST_AUDIO_FORMAT_F32:
context->sample_fmt = AV_SAMPLE_FMT_FLT; context->sample_fmt =
interleaved ? AV_SAMPLE_FMT_FLT : AV_SAMPLE_FMT_FLTP;
break; break;
case GST_AUDIO_FORMAT_F64: case GST_AUDIO_FORMAT_F64:
context->sample_fmt = AV_SAMPLE_FMT_DBL; context->sample_fmt =
interleaved ? AV_SAMPLE_FMT_DBL : AV_SAMPLE_FMT_DBLP;
break; break;
case GST_AUDIO_FORMAT_S32: case GST_AUDIO_FORMAT_S32:
context->sample_fmt = AV_SAMPLE_FMT_S32; context->sample_fmt =
interleaved ? AV_SAMPLE_FMT_S32 : AV_SAMPLE_FMT_S32P;
break; break;
case GST_AUDIO_FORMAT_S16: case GST_AUDIO_FORMAT_S16:
context->sample_fmt = AV_SAMPLE_FMT_S16; context->sample_fmt =
interleaved ? AV_SAMPLE_FMT_S16 : AV_SAMPLE_FMT_S16P;
break; break;
default: default:
break; break;

View file

@ -141,7 +141,8 @@ gst_ffmpeg_audioinfo_to_context (GstAudioInfo *info,
GstVideoFormat gst_ffmpeg_pixfmt_to_videoformat (enum AVPixelFormat pixfmt); GstVideoFormat gst_ffmpeg_pixfmt_to_videoformat (enum AVPixelFormat pixfmt);
enum AVPixelFormat gst_ffmpeg_videoformat_to_pixfmt (GstVideoFormat format); enum AVPixelFormat gst_ffmpeg_videoformat_to_pixfmt (GstVideoFormat format);
GstAudioFormat gst_ffmpeg_smpfmt_to_audioformat (enum AVSampleFormat sample_fmt); GstAudioFormat gst_ffmpeg_smpfmt_to_audioformat (enum AVSampleFormat sample_fmt,
GstAudioLayout * layout);
/* /*
* _formatid_to_caps () is meant for muxers/demuxers, it * _formatid_to_caps () is meant for muxers/demuxers, it