audio-converter: optimize endian conversion

Optimize LE<->BE conversion by adding a dedicated fast path instead of
using the generic converter. Implement transform_ip function in order to do the
endian swap in place.

This saves buffer allocation for the intermediate format, can be done in place
and also performs the conversion in one step instead of unpack-convert-pack.

For all bit widths the naive algorithm is implemented, which provides the best
performance when compiled with -O3. ORC was considered but eventually removed
as it requires a dedicated function for in-place conversion (due to the
"restrict" parameters).

A more complex algorithm for the 24-bit conversion with unrolled loop and
32-bit processing is implemented in the #if 0 section. It performs better if
compiled with -O2. With -O3 however the naive algorithm performs better.

https://bugzilla.gnome.org/show_bug.cgi?id=773073
This commit is contained in:
Petr Kulhavy 2016-10-19 12:21:37 +02:00 committed by Sebastian Dröge
parent 640c54d8f8
commit 010b9547d3
3 changed files with 241 additions and 10 deletions

View file

@ -80,6 +80,8 @@ typedef void (*AudioConvertFunc) (gpointer dst, const gpointer src, gint count);
typedef gboolean (*AudioConvertSamplesFunc) (GstAudioConverter * convert, typedef gboolean (*AudioConvertSamplesFunc) (GstAudioConverter * convert,
GstAudioConverterFlags flags, gpointer in[], gsize in_frames, GstAudioConverterFlags flags, gpointer in[], gsize in_frames,
gpointer out[], gsize out_frames); gpointer out[], gsize out_frames);
typedef void (*AudioConvertEndianFunc) (gpointer dst, const gpointer src,
gint count);
/* int/int int/float float/int float/float /* int/int int/float float/int float/float
* *
@ -113,6 +115,8 @@ struct _GstAudioConverter
gpointer *out_data; gpointer *out_data;
gsize out_frames; gsize out_frames;
gboolean in_place; /* the conversion can be done in place; returned by gst_audio_converter_supports_inplace() */
/* unpack */ /* unpack */
gboolean in_default; gboolean in_default;
gboolean unpack_ip; gboolean unpack_ip;
@ -137,6 +141,9 @@ struct _GstAudioConverter
gboolean out_default; gboolean out_default;
AudioChain *chain_end; /* NULL for empty chain or points to the last element in the chain */ AudioChain *chain_end; /* NULL for empty chain or points to the last element in the chain */
/* endian swap */
AudioConvertEndianFunc swap_endian;
AudioConvertSamplesFunc convert; AudioConvertSamplesFunc convert;
}; };
@ -826,6 +833,12 @@ converter_passthrough (GstAudioConverter * convert,
AudioChain *chain; AudioChain *chain;
gsize samples; gsize samples;
/* in-place passthrough -> do nothing */
if (in == out) {
g_assert (convert->in_place);
return TRUE;
}
chain = convert->chain_end; chain = convert->chain_end;
samples = in_frames * chain->inc; samples = in_frames * chain->inc;
@ -847,6 +860,152 @@ converter_passthrough (GstAudioConverter * convert,
return TRUE; return TRUE;
} }
/* perform LE<->BE conversion on a block of @count 16-bit samples
* dst may equal src for in-place conversion
*/
static void
converter_swap_endian_16 (gpointer dst, const gpointer src, gint count)
{
guint16 *out = dst;
const guint16 *in = src;
gint i;
for (i = 0; i < count; i++)
out[i] = GUINT16_SWAP_LE_BE (in[i]);
}
/* perform LE<->BE conversion on a block of @count 24-bit samples
* dst may equal src for in-place conversion
*
* naive algorithm, which performs better with -O3 and worse with -O2
* than the commented out optimized algorithm below
*/
static void
converter_swap_endian_24 (gpointer dst, const gpointer src, gint count)
{
guint8 *out = dst;
const guint8 *in = src;
gint i;
count *= 3;
for (i = 0; i < count; i += 3) {
guint8 x = in[i + 0];
out[i + 0] = in[i + 2];
out[i + 1] = in[i + 1];
out[i + 2] = x;
}
}
/* the below code performs better with -O2 but worse with -O3 */
#if 0
/* perform LE<->BE conversion on a block of @count 24-bit samples
* dst may equal src for in-place conversion
*
* assumes that dst and src are 32-bit aligned
*/
static void
converter_swap_endian_24 (gpointer dst, const gpointer src, gint count)
{
guint32 *out = dst;
const guint32 *in = src;
guint8 *out8;
const guint8 *in8;
gint i;
/* first convert 24-bit samples in multiples of 4 reading 3x 32-bits in one cycle
*
* input: A1 B1 C1 A2 , B2 C2 A3 B3 , C3 A4 B4 C4
* 32-bit endian swap: A2 C1 B1 A1 , B3 A3 C2 B2 , C4 B4 A4 C3
* <-- x --> <-- y --> , <-- z -->
*
* desired output: C1 B1 A1 C2 , B2 A2 C3 B3 , A3 C4 B4 A4
*/
for (i = 0; i < count / 4; i++, in += 3, out += 3) {
guint32 x, y, z;
x = GUINT32_SWAP_LE_BE (in[0]);
y = GUINT32_SWAP_LE_BE (in[1]);
z = GUINT32_SWAP_LE_BE (in[2]);
#if G_BYTE_ORDER == G_BIG_ENDIAN
out[0] = (x << 8) + ((y >> 8) & 0xff);
out[1] = (in[1] & 0xff0000ff) + ((x >> 8) & 0xff0000) + ((z << 8) & 0xff00);
out[2] = (z >> 8) + ((y << 8) & 0xff000000);
#else
out[0] = (x >> 8) + ((y << 8) & 0xff000000);
out[1] = (in[1] & 0xff0000ff) + ((x << 8) & 0xff00) + ((z >> 8) & 0xff0000);
out[2] = (z << 8) + ((y >> 8) & 0xff);
#endif
}
/* convert the remainder less efficiently */
for (out8 = (guint8 *) out, in8 = (const guint8 *) in, i = 0; i < (count & 3);
i++) {
guint8 x = in8[i + 0];
out8[i + 0] = in8[i + 2];
out8[i + 1] = in8[i + 1];
out8[i + 2] = x;
}
}
#endif
/* perform LE<->BE conversion on a block of @count 32-bit samples
* dst may equal src for in-place conversion
*/
static void
converter_swap_endian_32 (gpointer dst, const gpointer src, gint count)
{
guint32 *out = dst;
const guint32 *in = src;
gint i;
for (i = 0; i < count; i++)
out[i] = GUINT32_SWAP_LE_BE (in[i]);
}
/* perform LE<->BE conversion on a block of @count 64-bit samples
* dst may equal src for in-place conversion
*/
static void
converter_swap_endian_64 (gpointer dst, const gpointer src, gint count)
{
guint64 *out = dst;
const guint64 *in = src;
gint i;
for (i = 0; i < count; i++)
out[i] = GUINT64_SWAP_LE_BE (in[i]);
}
/* the worker function to perform endian-conversion only
* assuming finfo and foutinfo have the same depth
*/
static gboolean
converter_endian (GstAudioConverter * convert,
GstAudioConverterFlags flags, gpointer in[], gsize in_frames,
gpointer out[], gsize out_frames)
{
gint i;
AudioChain *chain;
gsize samples;
chain = convert->chain_end;
samples = in_frames * chain->inc;
GST_LOG ("convert endian: %" G_GSIZE_FORMAT " / %" G_GSIZE_FORMAT " samples",
in_frames, samples);
if (in) {
for (i = 0; i < chain->blocks; i++)
convert->swap_endian (out[i], in[i], samples);
} else {
for (i = 0; i < chain->blocks; i++)
gst_audio_format_fill_silence (convert->in.finfo, out[i], samples);
}
return TRUE;
}
static gboolean static gboolean
converter_generic (GstAudioConverter * convert, converter_generic (GstAudioConverter * convert,
GstAudioConverterFlags flags, gpointer in[], gsize in_frames, GstAudioConverterFlags flags, gpointer in[], gsize in_frames,
@ -889,6 +1048,14 @@ converter_resample (GstAudioConverter * convert,
return TRUE; return TRUE;
} }
#define GST_AUDIO_FORMAT_IS_ENDIAN_CONVERSION(info1, info2) \
( \
!(((info1)->flags ^ (info2)->flags) & (~GST_AUDIO_FORMAT_FLAG_UNPACK)) && \
(info1)->endianness != (info2)->endianness && \
(info1)->width == (info2)->width && \
(info1)->depth == (info2)->depth \
)
/** /**
* gst_audio_converter_new: (skip) * gst_audio_converter_new: (skip)
* @flags: extra #GstAudioConverterFlags * @flags: extra #GstAudioConverterFlags
@ -950,18 +1117,50 @@ gst_audio_converter_new (GstAudioConverterFlags flags, GstAudioInfo * in_info,
convert->chain_end = chain_pack (convert, prev); convert->chain_end = chain_pack (convert, prev);
convert->convert = converter_generic; convert->convert = converter_generic;
convert->in_place = FALSE;
/* optimize */ /* optimize */
if (out_info->finfo->format == in_info->finfo->format if (convert->mix_passthrough) {
&& convert->mix_passthrough) { if (out_info->finfo->format == in_info->finfo->format) {
if (convert->resampler == NULL) { if (convert->resampler == NULL) {
GST_INFO GST_INFO
("same formats, no resampler and passthrough mixing -> passthrough"); ("same formats, no resampler and passthrough mixing -> passthrough");
convert->convert = converter_passthrough; convert->convert = converter_passthrough;
} else { convert->in_place = TRUE;
if (is_intermediate_format (in_info->finfo->format)) { } else {
GST_INFO ("same formats, and passthrough mixing -> only resampling"); if (is_intermediate_format (in_info->finfo->format)) {
convert->convert = converter_resample; GST_INFO ("same formats, and passthrough mixing -> only resampling");
convert->convert = converter_resample;
}
}
} else if (GST_AUDIO_FORMAT_IS_ENDIAN_CONVERSION (out_info->finfo,
in_info->finfo)) {
if (convert->resampler == NULL) {
GST_INFO ("no resampler, passthrough mixing -> only endian conversion");
convert->convert = converter_endian;
convert->in_place = TRUE;
switch (GST_AUDIO_INFO_BPS (in_info)) {
case 2:
GST_DEBUG ("initializing 16-bit endian conversion");
convert->swap_endian = converter_swap_endian_16;
break;
case 3:
GST_DEBUG ("initializing 24-bit endian conversion");
convert->swap_endian = converter_swap_endian_24;
break;
case 4:
GST_DEBUG ("initializing 32-bit endian conversion");
convert->swap_endian = converter_swap_endian_32;
break;
case 8:
GST_DEBUG ("initializing 64-bit endian conversion");
convert->swap_endian = converter_swap_endian_64;
break;
default:
GST_ERROR ("unsupported sample width for endian conversion");
g_assert_not_reached ();
}
} }
} }
} }
@ -1129,3 +1328,18 @@ gst_audio_converter_samples (GstAudioConverter * convert,
} }
return convert->convert (convert, flags, in, in_frames, out, out_frames); return convert->convert (convert, flags, in, in_frames, out, out_frames);
} }
/**
* gst_audio_converter_supports_inplace
* @convert: a #GstAudioConverter
*
* Returns whether the audio converter can perform the conversion in-place.
* The return value would be typically input to gst_base_transform_set_in_place()
*
* Returns: %TRUE when the conversion can be done in place.
*/
gboolean
gst_audio_converter_supports_inplace (GstAudioConverter * convert)
{
return convert->in_place;
}

View file

@ -108,4 +108,6 @@ gboolean gst_audio_converter_samples (GstAudioConverter * co
gpointer in[], gsize in_frames, gpointer in[], gsize in_frames,
gpointer out[], gsize out_frames); gpointer out[], gsize out_frames);
gboolean gst_audio_converter_supports_inplace (GstAudioConverter *convert);
#endif /* __GST_AUDIO_CONVERTER_H__ */ #endif /* __GST_AUDIO_CONVERTER_H__ */

View file

@ -85,6 +85,8 @@ static gboolean gst_audio_convert_set_caps (GstBaseTransform * base,
GstCaps * incaps, GstCaps * outcaps); GstCaps * incaps, GstCaps * outcaps);
static GstFlowReturn gst_audio_convert_transform (GstBaseTransform * base, static GstFlowReturn gst_audio_convert_transform (GstBaseTransform * base,
GstBuffer * inbuf, GstBuffer * outbuf); GstBuffer * inbuf, GstBuffer * outbuf);
static GstFlowReturn gst_audio_convert_transform_ip (GstBaseTransform * base,
GstBuffer * buf);
static gboolean gst_audio_convert_transform_meta (GstBaseTransform * trans, static gboolean gst_audio_convert_transform_meta (GstBaseTransform * trans,
GstBuffer * outbuf, GstMeta * meta, GstBuffer * inbuf); GstBuffer * outbuf, GstMeta * meta, GstBuffer * inbuf);
static GstFlowReturn gst_audio_convert_submit_input_buffer (GstBaseTransform * static GstFlowReturn gst_audio_convert_submit_input_buffer (GstBaseTransform *
@ -176,6 +178,8 @@ gst_audio_convert_class_init (GstAudioConvertClass * klass)
GST_DEBUG_FUNCPTR (gst_audio_convert_set_caps); GST_DEBUG_FUNCPTR (gst_audio_convert_set_caps);
basetransform_class->transform = basetransform_class->transform =
GST_DEBUG_FUNCPTR (gst_audio_convert_transform); GST_DEBUG_FUNCPTR (gst_audio_convert_transform);
basetransform_class->transform_ip =
GST_DEBUG_FUNCPTR (gst_audio_convert_transform_ip);
basetransform_class->transform_meta = basetransform_class->transform_meta =
GST_DEBUG_FUNCPTR (gst_audio_convert_transform_meta); GST_DEBUG_FUNCPTR (gst_audio_convert_transform_meta);
basetransform_class->submit_input_buffer = basetransform_class->submit_input_buffer =
@ -646,6 +650,7 @@ gst_audio_convert_set_caps (GstBaseTransform * base, GstCaps * incaps,
GstAudioConvert *this = GST_AUDIO_CONVERT (base); GstAudioConvert *this = GST_AUDIO_CONVERT (base);
GstAudioInfo in_info; GstAudioInfo in_info;
GstAudioInfo out_info; GstAudioInfo out_info;
gboolean in_place;
GST_DEBUG_OBJECT (base, "incaps %" GST_PTR_FORMAT ", outcaps %" GST_DEBUG_OBJECT (base, "incaps %" GST_PTR_FORMAT ", outcaps %"
GST_PTR_FORMAT, incaps, outcaps); GST_PTR_FORMAT, incaps, outcaps);
@ -667,6 +672,9 @@ gst_audio_convert_set_caps (GstBaseTransform * base, GstCaps * incaps,
GST_AUDIO_CONVERTER_OPT_NOISE_SHAPING_METHOD, GST_AUDIO_CONVERTER_OPT_NOISE_SHAPING_METHOD,
GST_TYPE_AUDIO_NOISE_SHAPING_METHOD, this->ns, NULL)); GST_TYPE_AUDIO_NOISE_SHAPING_METHOD, this->ns, NULL));
in_place = gst_audio_converter_supports_inplace (this->convert);
gst_base_transform_set_in_place (base, in_place);
if (this->convert == NULL) if (this->convert == NULL)
goto no_converter; goto no_converter;
@ -693,6 +701,7 @@ no_converter:
} }
} }
/* if called through gst_audio_convert_transform_ip() inbuf == outbuf */
static GstFlowReturn static GstFlowReturn
gst_audio_convert_transform (GstBaseTransform * base, GstBuffer * inbuf, gst_audio_convert_transform (GstBaseTransform * base, GstBuffer * inbuf,
GstBuffer * outbuf) GstBuffer * outbuf)
@ -775,6 +784,12 @@ convert_error:
} }
} }
static GstFlowReturn
gst_audio_convert_transform_ip (GstBaseTransform * base, GstBuffer * buf)
{
return gst_audio_convert_transform (base, buf, buf);
}
static gboolean static gboolean
gst_audio_convert_transform_meta (GstBaseTransform * trans, GstBuffer * outbuf, gst_audio_convert_transform_meta (GstBaseTransform * trans, GstBuffer * outbuf,
GstMeta * meta, GstBuffer * inbuf) GstMeta * meta, GstBuffer * inbuf)