From dbb857b93b4a879f9e3ec1ac83435bc0d9b78fbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Dr=C3=B6ge?= Date: Thu, 28 Jun 2007 20:37:58 +0000 Subject: [PATCH] gst/audioconvert/: Implement dithering and noise shaping in audioconvert. By default now Original commit message from CVS: * gst/audioconvert/Makefile.am: * gst/audioconvert/audioconvert.c: (audio_convert_get_func_index), (check_default), (audio_convert_prepare_context), (audio_convert_clean_context), (audio_convert_convert): * gst/audioconvert/audioconvert.h: * gst/audioconvert/gstaudioconvert.c: (gst_audio_convert_dithering_get_type), (gst_audio_convert_ns_get_type), (gst_audio_convert_class_init), (gst_audio_convert_init), (gst_audio_convert_set_caps), (gst_audio_convert_set_property), (gst_audio_convert_get_property): * gst/audioconvert/gstaudioconvert.h: * gst/audioconvert/gstaudioquantize.c: (gst_audio_quantize_setup_noise_shaping), (gst_audio_quantize_free_noise_shaping), (gst_audio_quantize_setup_dither), (gst_audio_quantize_free_dither), (gst_audio_quantize_setup_quantize_func), (gst_audio_quantize_setup), (gst_audio_quantize_free): * gst/audioconvert/gstaudioquantize.h: Implement dithering and noise shaping in audioconvert. By default now TPDF dithering (and no noise shaping) will be used when converting from a higher bit depth to 20 bit depth or smaller, otherwise everything will be as it is now. For the last audioconvert in a pipeline it would make sense to use some kind of noise shaping, enabling it by default for all conversions would give undesired results though. Fixes #360246. * tests/check/elements/audioconvert.c: (setup_audioconvert), (GST_START_TEST): Adjust unit test for the new audioconvert. --- ChangeLog | 32 ++ gst/audioconvert/Makefile.am | 2 + gst/audioconvert/audioconvert.c | 240 +++++++++---- gst/audioconvert/audioconvert.h | 54 ++- gst/audioconvert/gstaudioconvert.c | 109 +++++- gst/audioconvert/gstaudioconvert.h | 3 + gst/audioconvert/gstaudioquantize.c | 511 ++++++++++++++++++++++++++++ gst/audioconvert/gstaudioquantize.h | 37 ++ tests/check/elements/audioconvert.c | 16 +- 9 files changed, 920 insertions(+), 84 deletions(-) create mode 100644 gst/audioconvert/gstaudioquantize.c create mode 100644 gst/audioconvert/gstaudioquantize.h diff --git a/ChangeLog b/ChangeLog index 9035bbdcd3..cdf3c20372 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,35 @@ +2007-06-28 Sebastian Dröge + + * gst/audioconvert/Makefile.am: + * gst/audioconvert/audioconvert.c: (audio_convert_get_func_index), + (check_default), (audio_convert_prepare_context), + (audio_convert_clean_context), (audio_convert_convert): + * gst/audioconvert/audioconvert.h: + * gst/audioconvert/gstaudioconvert.c: + (gst_audio_convert_dithering_get_type), + (gst_audio_convert_ns_get_type), (gst_audio_convert_class_init), + (gst_audio_convert_init), (gst_audio_convert_set_caps), + (gst_audio_convert_set_property), (gst_audio_convert_get_property): + * gst/audioconvert/gstaudioconvert.h: + * gst/audioconvert/gstaudioquantize.c: + (gst_audio_quantize_setup_noise_shaping), + (gst_audio_quantize_free_noise_shaping), + (gst_audio_quantize_setup_dither), + (gst_audio_quantize_free_dither), + (gst_audio_quantize_setup_quantize_func), + (gst_audio_quantize_setup), (gst_audio_quantize_free): + * gst/audioconvert/gstaudioquantize.h: + Implement dithering and noise shaping in audioconvert. By default now + TPDF dithering (and no noise shaping) will be used when converting + from a higher bit depth to 20 bit depth or smaller, otherwise + everything will be as it is now. + For the last audioconvert in a pipeline it would make sense to + use some kind of noise shaping, enabling it by default for all + conversions would give undesired results though. Fixes #360246. + * tests/check/elements/audioconvert.c: (setup_audioconvert), + (GST_START_TEST): + Adjust unit test for the new audioconvert. + 2007-06-28 Wim Taymans * gst/playback/gstqueue2.c: (apply_segment), (update_buffering): diff --git a/gst/audioconvert/Makefile.am b/gst/audioconvert/Makefile.am index d608b30d4a..7410a808bb 100644 --- a/gst/audioconvert/Makefile.am +++ b/gst/audioconvert/Makefile.am @@ -4,6 +4,7 @@ libgstaudioconvert_la_SOURCES = \ gstaudioconvert.c \ audioconvert.c \ gstchannelmix.c \ + gstaudioquantize.c \ plugin.c libgstaudioconvert_la_CFLAGS = $(GST_PLUGINS_BASE_CFLAGS) $(GST_BASE_CFLAGS) $(GST_CFLAGS) @@ -16,6 +17,7 @@ noinst_HEADERS = \ gstaudioconvert.h \ audioconvert.h \ gstchannelmix.h \ + gstaudioquantize.h \ plugin.h #TESTS = channelmixtest diff --git a/gst/audioconvert/audioconvert.c b/gst/audioconvert/audioconvert.c index 17599ecd60..e68da6e521 100644 --- a/gst/audioconvert/audioconvert.c +++ b/gst/audioconvert/audioconvert.c @@ -26,13 +26,10 @@ #include #include "gstchannelmix.h" +#include "gstaudioquantize.h" #include "audioconvert.h" #include "gst/floatcast/floatcast.h" -/* int to float/double conversion: int2xxx(i) = 1 / (2^31-1) * i */ -#define INT2FLOAT(i) (4.6566128752457969e-10 * ((gfloat)i)) -#define INT2DOUBLE(i) (4.6566128752457969e-10 * ((gdouble)i)) - /* sign bit in the intermediate format */ #define SIGNED (1U<<31) @@ -43,7 +40,7 @@ audio_convert_unpack_##name /* unpack from integer to signed integer 32 */ -#define MAKE_UNPACK_FUNC_II(name, stride, sign, READ_FUNC) \ +#define MAKE_UNPACK_FUNC_II(name, stride, sign, READ_FUNC) \ static void \ MAKE_UNPACK_FUNC_NAME (name) (guint8 *src, gint32 *dst, \ gint scale, gint count) \ @@ -63,7 +60,7 @@ MAKE_UNPACK_FUNC_NAME (name) (type * src, gint32 * dst, gint s, gint count) \ \ for (; count; count--) { \ /* blow up to 32 bit */ \ - temp = (READ_FUNC (*src++) * 2147483647.0) + 0.5; \ + temp = floor ((READ_FUNC (*src++) * 2147483647.0) + 0.5); \ *dst++ = (gint32) CLAMP (temp, G_MININT32, G_MAXINT32); \ } \ } @@ -78,6 +75,20 @@ MAKE_UNPACK_FUNC_NAME (name) (type * src, gdouble * dst, gint s, \ *dst++ = (gdouble) FUNC (*src++); \ } +/* unpack from int to float 64 (double) */ +#define MAKE_UNPACK_FUNC_IF(name, stride, sign, READ_FUNC) \ +static void \ +MAKE_UNPACK_FUNC_NAME (name) (guint8 * src, gdouble * dst, gint scale, \ + gint count) \ +{ \ + gdouble tmp; \ + for (; count; count--) { \ + tmp = (gdouble) ((((gint32) READ_FUNC (src)) << scale) ^ (sign)); \ + *dst++ = tmp * (1.0 / 2147483647.0); \ + src += stride; \ + } \ +} + #define READ8(p) GST_READ_UINT8(p) #define READ16_FROM_LE(p) GST_READ_UINT16_LE (p) #define READ16_FROM_BE(p) GST_READ_UINT16_BE (p) @@ -108,6 +119,20 @@ MAKE_UNPACK_FUNC_FF (float_hq_le, gfloat, GFLOAT_FROM_LE); MAKE_UNPACK_FUNC_FF (float_hq_be, gfloat, GFLOAT_FROM_BE); MAKE_UNPACK_FUNC_FF (double_hq_le, gdouble, GDOUBLE_FROM_LE); MAKE_UNPACK_FUNC_FF (double_hq_be, gdouble, GDOUBLE_FROM_BE); +MAKE_UNPACK_FUNC_IF (u8_float, 1, SIGNED, READ8); +MAKE_UNPACK_FUNC_IF (s8_float, 1, 0, READ8); +MAKE_UNPACK_FUNC_IF (u16_le_float, 2, SIGNED, READ16_FROM_LE); +MAKE_UNPACK_FUNC_IF (s16_le_float, 2, 0, READ16_FROM_LE); +MAKE_UNPACK_FUNC_IF (u16_be_float, 2, SIGNED, READ16_FROM_BE); +MAKE_UNPACK_FUNC_IF (s16_be_float, 2, 0, READ16_FROM_BE); +MAKE_UNPACK_FUNC_IF (u24_le_float, 3, SIGNED, READ24_FROM_LE); +MAKE_UNPACK_FUNC_IF (s24_le_float, 3, 0, READ24_FROM_LE); +MAKE_UNPACK_FUNC_IF (u24_be_float, 3, SIGNED, READ24_FROM_BE); +MAKE_UNPACK_FUNC_IF (s24_be_float, 3, 0, READ24_FROM_BE); +MAKE_UNPACK_FUNC_IF (u32_le_float, 4, SIGNED, READ32_FROM_LE); +MAKE_UNPACK_FUNC_IF (s32_le_float, 4, 0, READ32_FROM_LE); +MAKE_UNPACK_FUNC_IF (u32_be_float, 4, SIGNED, READ32_FROM_BE); +MAKE_UNPACK_FUNC_IF (s32_be_float, 4, 0, READ32_FROM_BE); /* One of the double_hq_* functions generated above is ineffecient, but it's * never used anyway. The same is true for one of the s32_* functions. */ @@ -122,64 +147,36 @@ audio_convert_pack_##name * These functions convert the signed 32 bit integers to the * target format. For this to work the following steps are done: * - * 1) If the output format is smaller than 32 bit we add 0.5LSB of - * the target format (i.e. 1<<(scale-1)) to get proper rounding. - * Shifting will result in rounding towards negative infinity (for - * signed values) or zero (for unsigned values). As we might overflow - * an overflow check is performed. - * Additionally, if our target format is signed and the value is smaller - * than zero we decrease it by one to round -X.5 downwards. - * This leads to the following rounding: - * -1.2 => -1 1.2 => 1 - * -1.5 => -2 1.5 => 2 - * -1.7 => -2 1.7 => 2 - * 2) If the output format is unsigned we will XOR the sign bit. This + * 1) If the output format is unsigned we will XOR the sign bit. This * will do the same as if we add 1<<31. - * 3) Afterwards we shift to the target depth. It's necessary to left-shift + * 2) Afterwards we shift to the target depth. It's necessary to left-shift * on signed values here to get arithmetical shifting. - * 4) This is then written into our target array by the corresponding write + * 3) This is then written into our target array by the corresponding write * function for the target width. */ /* pack from signed integer 32 to integer */ #define MAKE_PACK_FUNC_II(name, stride, sign, WRITE_FUNC) \ static void \ -MAKE_PACK_FUNC_NAME (name) (gint32 *src, gpointer dst, \ +MAKE_PACK_FUNC_NAME (name) (gint32 *src, guint8 * dst, \ gint scale, gint count) \ { \ - guint8 *p = (guint8 *)dst; \ gint32 tmp; \ - if (scale > 0) { \ - guint32 bias = 1 << (scale - 1); \ - for (;count; count--) { \ - tmp = *src++; \ - if (tmp > 0 && G_MAXINT32 - tmp < bias) \ - tmp = G_MAXINT32; \ - else \ - tmp += bias; \ - if (sign == 0 && tmp < 0) \ - tmp--; \ - tmp = ((tmp) ^ (sign)) >> scale; \ - WRITE_FUNC (p, tmp); \ - p+=stride; \ - } \ - } else { \ - for (;count; count--) { \ - tmp = (*src++ ^ (sign)); \ - WRITE_FUNC (p, tmp); \ - p+=stride; \ - } \ + for (;count; count--) { \ + tmp = (*src++ ^ (sign)) >> scale; \ + WRITE_FUNC (dst, tmp); \ + dst += stride; \ } \ } /* pack from signed integer 32 to float */ -#define MAKE_PACK_FUNC_IF(name, type, FUNC, FUNC2) \ +#define MAKE_PACK_FUNC_IF(name, type, FUNC) \ static void \ MAKE_PACK_FUNC_NAME (name) (gint32 * src, type * dst, gint scale, \ gint count) \ { \ for (; count; count--) \ - *dst++ = FUNC (FUNC2 (*src++)); \ + *dst++ = FUNC ((type) ((*src++) * (1.0 / 2147483647.0))); \ } /* pack from float 64 (double) to float */ @@ -192,6 +189,42 @@ MAKE_PACK_FUNC_NAME (name) (gdouble * src, type * dst, gint s, \ *dst++ = FUNC ((type) (*src++)); \ } +/* pack from float 64 (double) to signed int. + * the floats are already in the correct range. Only a cast is needed. + */ +#define MAKE_PACK_FUNC_FI_S(name, stride, WRITE_FUNC) \ +static void \ +MAKE_PACK_FUNC_NAME (name) (gdouble * src, guint8 * dst, gint scale, \ + gint count) \ +{ \ + gint32 tmp; \ + for (; count; count--) { \ + tmp = (gint32) (*src); \ + WRITE_FUNC (dst, tmp); \ + src++; \ + dst += stride; \ + } \ +} + +/* pack from float 64 (double) to unsigned int. + * the floats are already in the correct range. Only a cast is needed + * and an addition of 2^(target_depth-1) to get in the correct unsigned + * range. */ +#define MAKE_PACK_FUNC_FI_U(name, stride, WRITE_FUNC) \ +static void \ +MAKE_PACK_FUNC_NAME (name) (gdouble * src, guint8 * dst, gint scale, \ + gint count) \ +{ \ + guint32 tmp; \ + gdouble limit = (1U<<(32-scale-1)); \ + for (; count; count--) { \ + tmp = (guint32) (*src + limit); \ + WRITE_FUNC (dst, tmp); \ + src++; \ + dst += stride; \ + } \ +} + #define WRITE8(p, v) GST_WRITE_UINT8 (p, v) #define WRITE16_TO_LE(p,v) GST_WRITE_UINT16_LE (p, (guint16)(v)) #define WRITE16_TO_BE(p,v) GST_WRITE_UINT16_BE (p, (guint16)(v)) @@ -214,12 +247,27 @@ MAKE_PACK_FUNC_II (u32_le, 4, SIGNED, WRITE32_TO_LE); MAKE_PACK_FUNC_II (s32_le, 4, 0, WRITE32_TO_LE); MAKE_PACK_FUNC_II (u32_be, 4, SIGNED, WRITE32_TO_BE); MAKE_PACK_FUNC_II (s32_be, 4, 0, WRITE32_TO_BE); -MAKE_PACK_FUNC_IF (float_le, gfloat, GFLOAT_TO_LE, INT2FLOAT); -MAKE_PACK_FUNC_IF (float_be, gfloat, GFLOAT_TO_BE, INT2FLOAT); -MAKE_PACK_FUNC_IF (double_le, gdouble, GDOUBLE_TO_LE, INT2DOUBLE); -MAKE_PACK_FUNC_IF (double_be, gdouble, GDOUBLE_TO_BE, INT2DOUBLE); +MAKE_PACK_FUNC_IF (float_le, gfloat, GFLOAT_TO_LE); +MAKE_PACK_FUNC_IF (float_be, gfloat, GFLOAT_TO_BE); +MAKE_PACK_FUNC_IF (double_le, gdouble, GDOUBLE_TO_LE); +MAKE_PACK_FUNC_IF (double_be, gdouble, GDOUBLE_TO_BE); MAKE_PACK_FUNC_FF (float_hq_le, gfloat, GFLOAT_TO_LE); MAKE_PACK_FUNC_FF (float_hq_be, gfloat, GFLOAT_TO_BE); +MAKE_PACK_FUNC_FI_U (u8_float, 1, WRITE8); +MAKE_PACK_FUNC_FI_S (s8_float, 1, WRITE8); +MAKE_PACK_FUNC_FI_U (u16_le_float, 2, WRITE16_TO_LE); +MAKE_PACK_FUNC_FI_S (s16_le_float, 2, WRITE16_TO_LE); +MAKE_PACK_FUNC_FI_U (u16_be_float, 2, WRITE16_TO_BE); +MAKE_PACK_FUNC_FI_S (s16_be_float, 2, WRITE16_TO_BE); +MAKE_PACK_FUNC_FI_U (u24_le_float, 3, WRITE24_TO_LE); +MAKE_PACK_FUNC_FI_S (s24_le_float, 3, WRITE24_TO_LE); +MAKE_PACK_FUNC_FI_U (u24_be_float, 3, WRITE24_TO_BE); +MAKE_PACK_FUNC_FI_S (s24_be_float, 3, WRITE24_TO_BE); +MAKE_PACK_FUNC_FI_U (u32_le_float, 4, WRITE32_TO_LE); +MAKE_PACK_FUNC_FI_S (s32_le_float, 4, WRITE32_TO_LE); +MAKE_PACK_FUNC_FI_U (u32_be_float, 4, WRITE32_TO_BE); +MAKE_PACK_FUNC_FI_S (s32_be_float, 4, WRITE32_TO_BE); + /* For double_hq, packing and unpacking is the same, so we reuse the unpacking * functions here. */ #define audio_convert_pack_double_hq_le MAKE_UNPACK_FUNC_NAME (double_hq_le) @@ -250,6 +298,22 @@ static AudioConvertUnpack unpack_funcs[] = { (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (float_hq_be), (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (double_hq_le), (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (double_hq_be), + (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (u8_float), + (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (s8_float), + (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (u8_float), + (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (s8_float), + (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (u16_le_float), + (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (s16_le_float), + (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (u16_be_float), + (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (s16_be_float), + (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (u24_le_float), + (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (s24_le_float), + (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (u24_be_float), + (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (s24_be_float), + (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (u32_le_float), + (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (s32_le_float), + (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (u32_be_float), + (AudioConvertUnpack) MAKE_UNPACK_FUNC_NAME (s32_be_float), }; static AudioConvertPack pack_funcs[] = { @@ -277,10 +341,29 @@ static AudioConvertPack pack_funcs[] = { (AudioConvertPack) MAKE_PACK_FUNC_NAME (float_hq_be), (AudioConvertPack) MAKE_PACK_FUNC_NAME (double_hq_le), (AudioConvertPack) MAKE_PACK_FUNC_NAME (double_hq_be), + (AudioConvertPack) MAKE_PACK_FUNC_NAME (u8_float), + (AudioConvertPack) MAKE_PACK_FUNC_NAME (s8_float), + (AudioConvertPack) MAKE_PACK_FUNC_NAME (u8_float), + (AudioConvertPack) MAKE_PACK_FUNC_NAME (s8_float), + (AudioConvertPack) MAKE_PACK_FUNC_NAME (u16_le_float), + (AudioConvertPack) MAKE_PACK_FUNC_NAME (s16_le_float), + (AudioConvertPack) MAKE_PACK_FUNC_NAME (u16_be_float), + (AudioConvertPack) MAKE_PACK_FUNC_NAME (s16_be_float), + (AudioConvertPack) MAKE_PACK_FUNC_NAME (u24_le_float), + (AudioConvertPack) MAKE_PACK_FUNC_NAME (s24_le_float), + (AudioConvertPack) MAKE_PACK_FUNC_NAME (u24_be_float), + (AudioConvertPack) MAKE_PACK_FUNC_NAME (s24_be_float), + (AudioConvertPack) MAKE_PACK_FUNC_NAME (u32_le_float), + (AudioConvertPack) MAKE_PACK_FUNC_NAME (s32_le_float), + (AudioConvertPack) MAKE_PACK_FUNC_NAME (u32_be_float), + (AudioConvertPack) MAKE_PACK_FUNC_NAME (s32_be_float), }; +#define DOUBLE_INTERMEDIATE_FORMAT(ctx) \ + ((!ctx->in.is_int && !ctx->out.is_int) || (ctx->ns != NOISE_SHAPING_NONE)) + static gint -audio_convert_get_func_index (AudioConvertFmt * fmt) +audio_convert_get_func_index (AudioConvertCtx * ctx, AudioConvertFmt * fmt) { gint index = 0; @@ -288,19 +371,22 @@ audio_convert_get_func_index (AudioConvertFmt * fmt) index += (fmt->width / 8 - 1) * 4; index += fmt->endianness == G_LITTLE_ENDIAN ? 0 : 2; index += fmt->sign ? 1 : 0; + index += (ctx->ns == NOISE_SHAPING_NONE) ? 0 : 24; } else { /* this is float/double */ index = 16; index += (fmt->width == 32) ? 0 : 2; index += (fmt->endianness == G_LITTLE_ENDIAN) ? 0 : 1; + index += (DOUBLE_INTERMEDIATE_FORMAT (ctx)) ? 4 : 0; } + return index; } -static gboolean +static inline gboolean check_default (AudioConvertCtx * ctx, AudioConvertFmt * fmt) { - if (ctx->in.is_int || ctx->out.is_int) { + if (!DOUBLE_INTERMEDIATE_FORMAT (ctx)) { return (fmt->width == 32 && fmt->depth == 32 && fmt->endianness == G_BYTE_ORDER && fmt->sign == TRUE); } else { @@ -322,7 +408,7 @@ audio_convert_clean_fmt (AudioConvertFmt * fmt) gboolean audio_convert_prepare_context (AudioConvertCtx * ctx, AudioConvertFmt * in, - AudioConvertFmt * out) + AudioConvertFmt * out, DitherType dither, NoiseShapingType ns) { gint idx_in, idx_out; @@ -336,26 +422,40 @@ audio_convert_prepare_context (AudioConvertCtx * ctx, AudioConvertFmt * in, ctx->in = *in; ctx->out = *out; + /* Don't dither or apply noise shaping if out depth is bigger than 20 bits + * as DA converters only can do a SNR up to 20 bits in reality. + * Also don't dither or apply noise shaping if target depth is larger than + * source depth. */ + if (ctx->out.depth <= 20 && (!ctx->in.is_int + || ctx->in.depth >= ctx->out.depth)) { + ctx->dither = dither; + ctx->ns = ns; + } else { + ctx->dither = DITHER_NONE; + ctx->ns = NOISE_SHAPING_NONE; + } + + /* Use simple error feedback when output sample rate is smaller than + * 32000 as the other methods might move the noise to audible ranges */ + if (ctx->ns > NOISE_SHAPING_ERROR_FEEDBACK && ctx->out.rate < 32000) + ctx->ns = NOISE_SHAPING_ERROR_FEEDBACK; + gst_channel_mix_setup_matrix (ctx); - idx_in = audio_convert_get_func_index (in); + idx_in = audio_convert_get_func_index (ctx, in); ctx->unpack = unpack_funcs[idx_in]; - idx_out = audio_convert_get_func_index (out); + idx_out = audio_convert_get_func_index (ctx, out); ctx->pack = pack_funcs[idx_out]; - /* if both formats are float/double use double as intermediate format and - * and switch mixing */ - if (in->is_int || out->is_int) { + /* if both formats are float/double or we use noise shaping use double as + * intermediate format and and switch mixing */ + if (!DOUBLE_INTERMEDIATE_FORMAT (ctx)) { GST_INFO ("use int mixing"); ctx->channel_mix = (AudioConvertMix) gst_channel_mix_mix_int; } else { GST_INFO ("use float mixing"); ctx->channel_mix = (AudioConvertMix) gst_channel_mix_mix_float; - /* Bump the pack/unpack function indices by 4 to use double as intermediary - * format (float_hq_*, double_hq_* functions).*/ - ctx->unpack = unpack_funcs[idx_in + 4]; - ctx->pack = pack_funcs[idx_out + 4]; } GST_INFO ("unitsizes: %d -> %d", in->unit_size, out->unit_size); @@ -372,6 +472,8 @@ audio_convert_prepare_context (AudioConvertCtx * ctx, AudioConvertFmt * in, ctx->in_scale = (in->is_int) ? (32 - in->depth) : 0; ctx->out_scale = (out->is_int) ? (32 - out->depth) : 0; + gst_audio_quantize_setup (ctx); + return TRUE; } @@ -380,6 +482,7 @@ audio_convert_clean_context (AudioConvertCtx * ctx) { g_return_val_if_fail (ctx != NULL, FALSE); + gst_audio_quantize_free (ctx); audio_convert_clean_fmt (&ctx->in); audio_convert_clean_fmt (&ctx->out); gst_channel_mix_unset_matrix (ctx); @@ -425,12 +528,12 @@ audio_convert_convert (AudioConvertCtx * ctx, gpointer src, outsize = ctx->out.unit_size * samples; /* find biggest temp buffer size */ - size = (ctx->in.is_int || ctx->out.is_int) ? - sizeof (gint32) : sizeof (gdouble); + size = (DOUBLE_INTERMEDIATE_FORMAT (ctx)) ? sizeof (gdouble) + : sizeof (gint32); if (!ctx->in_default) intemp = insize * size * 8 / ctx->in.width; - if (!ctx->mix_passthrough) + if (!ctx->mix_passthrough || !ctx->out_default) outtemp = outsize * size * 8 / ctx->out.width; biggest = MAX (intemp, outtemp); @@ -474,6 +577,15 @@ audio_convert_convert (AudioConvertCtx * ctx, gpointer src, src = outbuf; } + /* we only need to quantize if output format is int */ + if (ctx->out.is_int) { + if (ctx->out_default) + outbuf = dst; + else + outbuf = tmpbuf; + ctx->quantize (ctx, src, outbuf, samples); + } + if (!ctx->out_default) { /* pack default format into dst */ ctx->pack (src, dst, ctx->out_scale, samples * ctx->out.channels); diff --git a/gst/audioconvert/audioconvert.h b/gst/audioconvert/audioconvert.h index 34e90329d7..7e79f5a421 100644 --- a/gst/audioconvert/audioconvert.h +++ b/gst/audioconvert/audioconvert.h @@ -25,6 +25,23 @@ #include #include +typedef enum +{ + DITHER_NONE = 0, + DITHER_RPDF, + DITHER_TPDF, + DITHER_TPDF_HF +} DitherType; + +typedef enum +{ + NOISE_SHAPING_NONE = 0, + NOISE_SHAPING_ERROR_FEEDBACK, + NOISE_SHAPING_SIMPLE, + NOISE_SHAPING_MEDIUM, + NOISE_SHAPING_HIGH +} NoiseShapingType; + typedef struct _AudioConvertCtx AudioConvertCtx; typedef struct _AudioConvertFmt AudioConvertFmt; @@ -45,10 +62,14 @@ struct _AudioConvertFmt gint unit_size; }; -typedef void (*AudioConvertUnpack) (gpointer src, gpointer dst, gint scale, gint count); -typedef void (*AudioConvertPack) (gpointer src, gpointer dst, gint scale, gint count); +typedef void (*AudioConvertUnpack) (gpointer src, gpointer dst, gint scale, + gint count); +typedef void (*AudioConvertPack) (gpointer src, gpointer dst, gint scale, + gint count); typedef void (*AudioConvertMix) (AudioConvertCtx *, gpointer, gpointer, gint); +typedef void (*AudioConvertQuantize) (AudioConvertCtx * ctx, gpointer src, + gpointer dst, gint count); struct _AudioConvertCtx { @@ -73,20 +94,31 @@ struct _AudioConvertCtx gint in_scale; gint out_scale; - + AudioConvertMix channel_mix; + + AudioConvertQuantize quantize; + DitherType dither; + NoiseShapingType ns; + /* random number generate for dither noise */ + GRand *dither_random; + /* last random number generated per channel for hifreq TPDF dither */ + gpointer last_random; + /* contains the past quantization errors, error[out_channels][count] */ + gdouble *error_buf; }; -gboolean audio_convert_clean_fmt (AudioConvertFmt *fmt); +gboolean audio_convert_clean_fmt (AudioConvertFmt * fmt); -gboolean audio_convert_prepare_context (AudioConvertCtx *ctx, AudioConvertFmt *in, - AudioConvertFmt *out); -gboolean audio_convert_get_sizes (AudioConvertCtx *ctx, gint samples, gint *srcsize, - gint *dstsize); +gboolean audio_convert_prepare_context (AudioConvertCtx * ctx, + AudioConvertFmt * in, AudioConvertFmt * out, DitherType dither, + NoiseShapingType ns); +gboolean audio_convert_get_sizes (AudioConvertCtx * ctx, gint samples, + gint * srcsize, gint * dstsize); -gboolean audio_convert_clean_context (AudioConvertCtx *ctx); +gboolean audio_convert_clean_context (AudioConvertCtx * ctx); -gboolean audio_convert_convert (AudioConvertCtx *ctx, gpointer src, - gpointer dst, gint samples, gboolean src_writable); +gboolean audio_convert_convert (AudioConvertCtx * ctx, gpointer src, + gpointer dst, gint samples, gboolean src_writable); #endif /* __AUDIO_CONVERT_H__ */ diff --git a/gst/audioconvert/gstaudioconvert.c b/gst/audioconvert/gstaudioconvert.c index d2a20670d4..278b1b75da 100644 --- a/gst/audioconvert/gstaudioconvert.c +++ b/gst/audioconvert/gstaudioconvert.c @@ -74,6 +74,7 @@ #include "gstaudioconvert.h" #include "gstchannelmix.h" +#include "gstaudioquantize.h" #include "plugin.h" GST_DEBUG_CATEGORY (audio_convert_debug); @@ -102,6 +103,11 @@ static GstFlowReturn gst_audio_convert_transform (GstBaseTransform * base, GstBuffer * inbuf, GstBuffer * outbuf); static GstFlowReturn gst_audio_convert_transform_ip (GstBaseTransform * base, GstBuffer * buf); +static void gst_audio_convert_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec); +static void gst_audio_convert_get_property (GObject * object, guint prop_id, + GValue * value, GParamSpec * pspec); + /* AudioConvert signals and args */ enum @@ -113,7 +119,8 @@ enum enum { ARG_0, - ARG_AGGRESSIVE + ARG_DITHERING, + ARG_NOISE_SHAPING, }; #define DEBUG_INIT(bla) \ @@ -179,6 +186,50 @@ GST_STATIC_PAD_TEMPLATE ("sink", GST_PAD_ALWAYS, STATIC_CAPS); +#define GST_TYPE_AUDIO_CONVERT_DITHERING (gst_audio_convert_dithering_get_type ()) +static GType +gst_audio_convert_dithering_get_type (void) +{ + static GType gtype = 0; + + if (gtype == 0) { + static const GEnumValue values[] = { + {DITHER_NONE, "No dithering", + "none"}, + {DITHER_RPDF, "Rectangular dithering", "rpdf"}, + {DITHER_TPDF, "Triangular dithering (default)", "tpdf"}, + {DITHER_TPDF_HF, "High frequency triangular dithering", "tpdf-hf"}, + {0, NULL, NULL} + }; + + gtype = g_enum_register_static ("GstAudioConvertDithering", values); + } + return gtype; +} + +#define GST_TYPE_AUDIO_CONVERT_NOISE_SHAPING (gst_audio_convert_ns_get_type ()) +static GType +gst_audio_convert_ns_get_type (void) +{ + static GType gtype = 0; + + if (gtype == 0) { + static const GEnumValue values[] = { + {NOISE_SHAPING_NONE, "No noise shaping (default)", + "none"}, + {NOISE_SHAPING_ERROR_FEEDBACK, "Error feedback", "error-feedback"}, + {NOISE_SHAPING_SIMPLE, "Simple 2-pole noise shaping", "simple"}, + {NOISE_SHAPING_MEDIUM, "Medium 5-pole noise shaping", "medium"}, + {NOISE_SHAPING_HIGH, "High 8-pole noise shaping", "high"}, + {0, NULL, NULL} + }; + + gtype = g_enum_register_static ("GstAudioConvertNoiseShaping", values); + } + return gtype; +} + + /*** TYPE FUNCTIONS ***********************************************************/ static void @@ -201,12 +252,25 @@ gst_audio_convert_class_init (GstAudioConvertClass * klass) gint i; gobject_class->dispose = gst_audio_convert_dispose; + gobject_class->set_property = gst_audio_convert_set_property; + gobject_class->get_property = gst_audio_convert_get_property; supported_positions = g_new0 (GstAudioChannelPosition, GST_AUDIO_CHANNEL_POSITION_NUM); for (i = 0; i < GST_AUDIO_CHANNEL_POSITION_NUM; i++) supported_positions[i] = i; + g_object_class_install_property (gobject_class, ARG_DITHERING, + g_param_spec_enum ("dithering", "Dithering", + "Selects between different dithering methods.", + GST_TYPE_AUDIO_CONVERT_DITHERING, DITHER_TPDF, G_PARAM_READWRITE)); + + g_object_class_install_property (gobject_class, ARG_NOISE_SHAPING, + g_param_spec_enum ("noise-shaping", "Noise shaping", + "Selects between different noise shaping methods.", + GST_TYPE_AUDIO_CONVERT_NOISE_SHAPING, NOISE_SHAPING_NONE, + G_PARAM_READWRITE)); + basetransform_class->get_unit_size = GST_DEBUG_FUNCPTR (gst_audio_convert_get_unit_size); basetransform_class->transform_caps = @@ -226,6 +290,8 @@ gst_audio_convert_class_init (GstAudioConvertClass * klass) static void gst_audio_convert_init (GstAudioConvert * this, GstAudioConvertClass * g_class) { + this->dither = DITHER_TPDF; + this->ns = NOISE_SHAPING_NONE; memset (&this->ctx, 0, sizeof (AudioConvertCtx)); } @@ -672,7 +738,8 @@ gst_audio_convert_set_caps (GstBaseTransform * base, GstCaps * incaps, if (!gst_audio_convert_parse_caps (outcaps, &out_ac_caps)) return FALSE; - if (!audio_convert_prepare_context (&this->ctx, &in_ac_caps, &out_ac_caps)) + if (!audio_convert_prepare_context (&this->ctx, &in_ac_caps, &out_ac_caps, + this->dither, this->ns)) goto no_converter; return TRUE; @@ -753,3 +820,41 @@ convert_error: return GST_FLOW_ERROR; } } + +static void +gst_audio_convert_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec) +{ + GstAudioConvert *this = GST_AUDIO_CONVERT (object); + + switch (prop_id) { + case ARG_DITHERING: + this->dither = g_value_get_enum (value); + break; + case ARG_NOISE_SHAPING: + this->ns = g_value_get_enum (value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +gst_audio_convert_get_property (GObject * object, guint prop_id, + GValue * value, GParamSpec * pspec) +{ + GstAudioConvert *this = GST_AUDIO_CONVERT (object); + + switch (prop_id) { + case ARG_DITHERING: + g_value_set_enum (value, this->dither); + break; + case ARG_NOISE_SHAPING: + g_value_set_enum (value, this->ns); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} diff --git a/gst/audioconvert/gstaudioconvert.h b/gst/audioconvert/gstaudioconvert.h index 91733cc4df..dcab33231c 100644 --- a/gst/audioconvert/gstaudioconvert.h +++ b/gst/audioconvert/gstaudioconvert.h @@ -47,6 +47,9 @@ struct _GstAudioConvert GstBaseTransform element; AudioConvertCtx ctx; + + DitherType dither; + NoiseShapingType ns; }; struct _GstAudioConvertClass diff --git a/gst/audioconvert/gstaudioquantize.c b/gst/audioconvert/gstaudioquantize.c new file mode 100644 index 0000000000..2198845d91 --- /dev/null +++ b/gst/audioconvert/gstaudioquantize.c @@ -0,0 +1,511 @@ +/* GStreamer + * Copyright (C) 2007 Sebastian Dröge + * + * gstaudioquantize.c: quantizes audio to the target format and optionally + * applies dithering and noise shaping. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +/* + * FIXME: When doing dithering with int as intermediate format + * one gets audible harmonics while the noise floor is + * constant for double as intermediate format! + */ + +/* TODO: - Maybe drop 5-pole noise shaping and use coefficients + * generated by dmaker + * http://shibatch.sf.net + */ + +#include +#include +#include +#include "audioconvert.h" +#include "gstaudioquantize.h" + +#define MAKE_QUANTIZE_FUNC_NAME(name) \ +gst_audio_quantize_quantize_##name + +/* Quantize functions for gint32 as intermediate format */ + +#define MAKE_QUANTIZE_FUNC_I(name, DITHER_INIT_FUNC, ADD_DITHER_FUNC, \ + ROUND_FUNC) \ +static void \ +MAKE_QUANTIZE_FUNC_NAME (name) (AudioConvertCtx *ctx, gint32 *src, \ + gint32 *dst, gint count) \ +{ \ + gint scale = ctx->out_scale; \ + gint channels = ctx->out.channels; \ + gint chan_pos; \ + \ + if (scale > 0) { \ + gint32 tmp; \ + guint32 mask = 0xffffffff & (0xffffffff << scale); \ + guint32 bias = 1U << (scale - 1); \ + DITHER_INIT_FUNC(); \ + \ + for (;count;count--) { \ + for (chan_pos = 0; chan_pos < channels; chan_pos++) { \ + tmp = *src++; \ + ADD_DITHER_FUNC(); \ + ROUND_FUNC(); \ + *dst = tmp & mask; \ + dst++; \ + } \ + } \ + } else { \ + for (;count;count--) { \ + for (chan_pos = 0; chan_pos < channels; chan_pos++) { \ + *dst = *src++; \ + dst++; \ + } \ + } \ + } \ +} + + +/* Quantize functions for gdouble as intermediate format with + * int as target */ + +#define MAKE_QUANTIZE_FUNC_F(name, DITHER_INIT_FUNC, NS_INIT_FUNC, \ + ADD_NS_FUNC, ADD_DITHER_FUNC, \ + UPDATE_ERROR_FUNC) \ +static void \ +MAKE_QUANTIZE_FUNC_NAME (name) (AudioConvertCtx *ctx, gdouble *src, \ + gdouble *dst, gint count) \ +{ \ + gint scale = ctx->out_scale; \ + gint channels = ctx->out.channels; \ + gint chan_pos; \ + gdouble factor = (1U<<(32-scale-1)) - 1; \ + \ + if (scale > 0) { \ + gdouble tmp; \ + DITHER_INIT_FUNC(); \ + NS_INIT_FUNC(); \ + \ + for (;count;count--) { \ + for (chan_pos = 0; chan_pos < channels; chan_pos++) { \ + tmp = *src++; \ + ADD_NS_FUNC(); \ + ADD_DITHER_FUNC(); \ + tmp = floor(tmp * factor + 0.5); \ + *dst = CLAMP (tmp, -factor - 1, factor); \ + UPDATE_ERROR_FUNC(); \ + dst++; \ + } \ + } \ + } else { \ + for (;count;count--) { \ + for (chan_pos = 0; chan_pos < channels; chan_pos++) { \ + *dst = *src++ * 2147483647.0; \ + dst++; \ + } \ + } \ + } \ +} + +/* Rounding functions for int as intermediate format, only used when + * not using dithering. With dithering we include this offset in our + * dither noise instead. */ + +#define ROUND() \ + if (tmp > 0 && G_MAXINT32 - tmp <= bias) \ + tmp = G_MAXINT32; \ + else \ + tmp += bias; + + +#define NONE_FUNC() + +/* Dithering definitions + * See http://en.wikipedia.org/wiki/Dithering or + * http://www.cadenzarecording.com/Dither.html for explainations. + * + * We already add the rounding offset to the dither noise here + * to have only one overflow check instead of two. */ + +#define INIT_DITHER_RPDF_I() \ + gint32 rand; \ + gint32 dither = (1<<(scale)); + +#define ADD_DITHER_RPDF_I() \ + rand = g_rand_int_range (ctx->dither_random, bias - dither, \ + bias + dither); \ + if (rand > 0 && tmp > 0 && G_MAXINT32 - tmp <= rand) \ + tmp = G_MAXINT32; \ + else if (rand < 0 && tmp < 0 && G_MININT32 - tmp >= rand) \ + tmp = G_MININT32; \ + else \ + tmp += rand; + +#define INIT_DITHER_RPDF_F() \ + gdouble dither = 1.0/(1U<<(32 - scale - 1)); + +#define ADD_DITHER_RPDF_F() \ + tmp += g_rand_double_range (ctx->dither_random, - dither, \ + dither); + +#define INIT_DITHER_TPDF_I() \ + gint32 rand; \ + gint32 dither = (1<<(scale - 1)); \ + bias = bias >> 1; + +#define ADD_DITHER_TPDF_I() \ + rand = g_rand_int_range (ctx->dither_random, bias - dither, \ + bias + dither - 1) \ + + g_rand_int_range (ctx->dither_random, bias - dither, \ + bias + dither - 1); \ + if (rand > 0 && tmp > 0 && G_MAXINT32 - tmp <= rand) \ + tmp = G_MAXINT32; \ + else if (rand < 0 && tmp < 0 && G_MININT32 - tmp >= rand) \ + tmp = G_MININT32; \ + else \ + tmp += rand; + +#define INIT_DITHER_TPDF_F() \ + gdouble dither = 1.0/(1U<<(32 - scale)); + +#define ADD_DITHER_TPDF_F() \ + tmp += g_rand_double_range (ctx->dither_random, - dither, \ + dither) \ + + g_rand_double_range (ctx->dither_random, - dither, \ + dither); + +#define INIT_DITHER_TPDF_HF_I() \ + gint32 rand; \ + gint32 dither = (1<<(scale-1)); \ + gint32 *last_random = (gint32 *) ctx->last_random, tmp_rand; \ + bias = bias >> 1; + +#define ADD_DITHER_TPDF_HF_I() \ + tmp_rand = g_rand_int_range (ctx->dither_random, bias - dither, \ + bias + dither); \ + rand = tmp_rand - last_random[chan_pos]; \ + last_random[chan_pos] = tmp_rand; \ + if (rand > 0 && tmp > 0 && G_MAXINT32 - tmp <= rand) \ + tmp = G_MAXINT32; \ + else if (rand < 0 && tmp < 0 && G_MININT32 - tmp >= rand) \ + tmp = G_MININT32; \ + else \ + tmp += rand; + +/* Like TPDF dither but the dither noise is oriented more to the + * higher frequencies */ + +#define INIT_DITHER_TPDF_HF_F() \ + gdouble rand; \ + gdouble dither = 1.0/(1U<<(32 - scale)); \ + gdouble *last_random = (gdouble *) ctx->last_random, tmp_rand; + +#define ADD_DITHER_TPDF_HF_F() \ + tmp_rand = g_rand_double_range (ctx->dither_random, - dither, \ + dither); \ + rand = tmp_rand - last_random[chan_pos]; \ + last_random[chan_pos] = tmp_rand; \ + tmp += rand; + +/* Noise shaping definitions. + * See http://en.wikipedia.org/wiki/Noise_shaping for explainations. */ + + +/* Simple error feedback: Just accumulate the dithering and quantization + * error and remove it from each sample. */ + +#define INIT_NS_ERROR_FEEDBACK() \ + gdouble orig; \ + gdouble *errors = ctx->error_buf; + +#define ADD_NS_ERROR_FEEDBACK() \ + orig = tmp; \ + tmp -= errors[chan_pos]; + +#define UPDATE_ERROR_ERROR_FEEDBACK() \ + errors[chan_pos] += (*dst)/factor - orig; + +/* Same as error feedback but also add 1/2 of the previous error value. + * This moves the noise a bit more into the higher frequencies. */ + +#define INIT_NS_SIMPLE() \ + gdouble orig; \ + gdouble *errors = ctx->error_buf, cur_error; + +#define ADD_NS_SIMPLE() \ + cur_error = errors[chan_pos*2] - 0.5 * errors[chan_pos*2 + 1]; \ + tmp -= cur_error; \ + orig = tmp; + +#define UPDATE_ERROR_SIMPLE() \ + errors[chan_pos*2 + 1] = errors[chan_pos*2]; \ + errors[chan_pos*2] = (*dst)/factor - orig; + + +/* Noise shaping coefficients from[1], moves most power of the + * error noise into inaudible frequency ranges. + * + * [1] + * "Minimally Audible Noise Shaping", Stanley P. Lipshitz, + * John Vanderkooy, and Robert A. Wannamaker, + * J. Audio Eng. Soc., Vol. 39, No. 11, November 1991. */ + +static const gdouble ns_medium_coeffs[] = { + 2.033, -2.165, 1.959, -1.590, 0.6149 +}; + +#define INIT_NS_MEDIUM() \ + gdouble orig; \ + gdouble *errors = ctx->error_buf, cur_error; \ + int j; + +#define ADD_NS_MEDIUM() \ + cur_error = 0.0; \ + for (j = 0; j < 5; j++) \ + cur_error += errors[chan_pos*5 + j] * ns_medium_coeffs[j]; \ + tmp -= cur_error; \ + orig = tmp; + +#define UPDATE_ERROR_MEDIUM() \ + for (j = 4; j > 0; j--) \ + errors[chan_pos*5 + j] = errors[chan_pos*5 + j-1]; \ + errors[chan_pos*5] = (*dst)/factor - orig; + +/* Noise shaping coefficients by David Schleef, moves most power of the + * error noise into inaudible frequency ranges */ + +static const gdouble ns_high_coeffs[] = { + 2.08484, -2.92975, 3.27918, -3.31399, 2.61339, -1.72008, 0.876066, -0.340122 +}; + +#define INIT_NS_HIGH() \ + gdouble orig; \ + gdouble *errors = ctx->error_buf, cur_error; \ + int j; + +#define ADD_NS_HIGH() \ + cur_error = 0.0; \ + for (j = 0; j < 8; j++) \ + cur_error += errors[chan_pos + j] * ns_high_coeffs[j]; \ + tmp -= cur_error; \ + orig = tmp; + +#define UPDATE_ERROR_HIGH() \ + for (j = 7; j > 0; j--) \ + errors[chan_pos + j] = errors[chan_pos + j-1]; \ + errors[chan_pos] = (*dst)/factor - orig; + + +MAKE_QUANTIZE_FUNC_I (signed_none_none, NONE_FUNC, NONE_FUNC, ROUND); +MAKE_QUANTIZE_FUNC_I (signed_rpdf_none, INIT_DITHER_RPDF_I, ADD_DITHER_RPDF_I, + NONE_FUNC); +MAKE_QUANTIZE_FUNC_I (signed_tpdf_none, INIT_DITHER_TPDF_I, ADD_DITHER_TPDF_I, + NONE_FUNC); +MAKE_QUANTIZE_FUNC_I (signed_tpdf_hf_none, INIT_DITHER_TPDF_HF_I, + ADD_DITHER_TPDF_HF_I, NONE_FUNC); + +MAKE_QUANTIZE_FUNC_I (unsigned_none_none, NONE_FUNC, NONE_FUNC, ROUND); +MAKE_QUANTIZE_FUNC_I (unsigned_rpdf_none, INIT_DITHER_RPDF_I, ADD_DITHER_RPDF_I, + NONE_FUNC); +MAKE_QUANTIZE_FUNC_I (unsigned_tpdf_none, INIT_DITHER_TPDF_I, ADD_DITHER_TPDF_I, + NONE_FUNC); +MAKE_QUANTIZE_FUNC_I (unsigned_tpdf_hf_none, INIT_DITHER_TPDF_HF_I, + ADD_DITHER_TPDF_HF_I, NONE_FUNC); + +MAKE_QUANTIZE_FUNC_F (float_none_error_feedback, NONE_FUNC, + INIT_NS_ERROR_FEEDBACK, ADD_NS_ERROR_FEEDBACK, NONE_FUNC, + UPDATE_ERROR_ERROR_FEEDBACK); +MAKE_QUANTIZE_FUNC_F (float_none_simple, NONE_FUNC, INIT_NS_SIMPLE, + ADD_NS_SIMPLE, NONE_FUNC, UPDATE_ERROR_SIMPLE); +MAKE_QUANTIZE_FUNC_F (float_none_medium, NONE_FUNC, INIT_NS_MEDIUM, + ADD_NS_MEDIUM, NONE_FUNC, UPDATE_ERROR_MEDIUM); +MAKE_QUANTIZE_FUNC_F (float_none_high, NONE_FUNC, INIT_NS_HIGH, ADD_NS_HIGH, + NONE_FUNC, UPDATE_ERROR_HIGH); + +MAKE_QUANTIZE_FUNC_F (float_rpdf_error_feedback, INIT_DITHER_RPDF_F, + INIT_NS_ERROR_FEEDBACK, ADD_NS_ERROR_FEEDBACK, ADD_DITHER_RPDF_F, + UPDATE_ERROR_ERROR_FEEDBACK); +MAKE_QUANTIZE_FUNC_F (float_rpdf_simple, INIT_DITHER_RPDF_F, INIT_NS_SIMPLE, + ADD_NS_SIMPLE, ADD_DITHER_RPDF_F, UPDATE_ERROR_SIMPLE); +MAKE_QUANTIZE_FUNC_F (float_rpdf_medium, INIT_DITHER_RPDF_F, INIT_NS_MEDIUM, + ADD_NS_MEDIUM, ADD_DITHER_RPDF_F, UPDATE_ERROR_MEDIUM); +MAKE_QUANTIZE_FUNC_F (float_rpdf_high, INIT_DITHER_RPDF_F, INIT_NS_HIGH, + ADD_NS_HIGH, ADD_DITHER_RPDF_F, UPDATE_ERROR_HIGH); + +MAKE_QUANTIZE_FUNC_F (float_tpdf_error_feedback, INIT_DITHER_TPDF_F, + INIT_NS_ERROR_FEEDBACK, ADD_NS_ERROR_FEEDBACK, ADD_DITHER_TPDF_F, + UPDATE_ERROR_ERROR_FEEDBACK); +MAKE_QUANTIZE_FUNC_F (float_tpdf_simple, INIT_DITHER_TPDF_F, INIT_NS_SIMPLE, + ADD_NS_SIMPLE, ADD_DITHER_TPDF_F, UPDATE_ERROR_SIMPLE); +MAKE_QUANTIZE_FUNC_F (float_tpdf_medium, INIT_DITHER_TPDF_F, INIT_NS_MEDIUM, + ADD_NS_MEDIUM, ADD_DITHER_TPDF_F, UPDATE_ERROR_MEDIUM); +MAKE_QUANTIZE_FUNC_F (float_tpdf_high, INIT_DITHER_TPDF_F, INIT_NS_HIGH, + ADD_NS_HIGH, ADD_DITHER_TPDF_F, UPDATE_ERROR_HIGH); + +MAKE_QUANTIZE_FUNC_F (float_tpdf_hf_error_feedback, INIT_DITHER_TPDF_HF_F, + INIT_NS_ERROR_FEEDBACK, ADD_NS_ERROR_FEEDBACK, ADD_DITHER_TPDF_HF_F, + UPDATE_ERROR_ERROR_FEEDBACK); +MAKE_QUANTIZE_FUNC_F (float_tpdf_hf_simple, INIT_DITHER_TPDF_HF_F, + INIT_NS_SIMPLE, ADD_NS_SIMPLE, ADD_DITHER_TPDF_HF_F, UPDATE_ERROR_SIMPLE); +MAKE_QUANTIZE_FUNC_F (float_tpdf_hf_medium, INIT_DITHER_TPDF_HF_F, + INIT_NS_MEDIUM, ADD_NS_MEDIUM, ADD_DITHER_TPDF_HF_F, UPDATE_ERROR_MEDIUM); +MAKE_QUANTIZE_FUNC_F (float_tpdf_hf_high, INIT_DITHER_TPDF_HF_F, INIT_NS_HIGH, + ADD_NS_HIGH, ADD_DITHER_TPDF_HF_F, UPDATE_ERROR_HIGH); + +static AudioConvertQuantize quantize_funcs[] = { + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (signed_none_none), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (signed_rpdf_none), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (signed_tpdf_none), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (signed_tpdf_hf_none), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (unsigned_none_none), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (unsigned_rpdf_none), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (unsigned_tpdf_none), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (unsigned_tpdf_hf_none), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (float_none_error_feedback), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (float_none_simple), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (float_none_medium), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (float_none_high), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (float_rpdf_error_feedback), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (float_rpdf_simple), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (float_rpdf_medium), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (float_rpdf_high), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (float_tpdf_error_feedback), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (float_tpdf_simple), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (float_tpdf_medium), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (float_tpdf_high), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (float_tpdf_hf_error_feedback), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (float_tpdf_hf_simple), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (float_tpdf_hf_medium), + (AudioConvertQuantize) MAKE_QUANTIZE_FUNC_NAME (float_tpdf_hf_high), +}; + +static void +gst_audio_quantize_setup_noise_shaping (AudioConvertCtx * ctx) +{ + switch (ctx->ns) { + case NOISE_SHAPING_HIGH:{ + ctx->error_buf = g_new0 (gdouble, ctx->out.channels * 8); + break; + } + case NOISE_SHAPING_MEDIUM:{ + ctx->error_buf = g_new0 (gdouble, ctx->out.channels * 5); + break; + } + case NOISE_SHAPING_SIMPLE:{ + ctx->error_buf = g_new0 (gdouble, ctx->out.channels * 2); + break; + } + case NOISE_SHAPING_ERROR_FEEDBACK: + ctx->error_buf = g_new0 (gdouble, ctx->out.channels); + break; + case NOISE_SHAPING_NONE: + default: + ctx->error_buf = NULL; + break; + } + return; +} + +static void +gst_audio_quantize_free_noise_shaping (AudioConvertCtx * ctx) +{ + switch (ctx->ns) { + case NOISE_SHAPING_HIGH: + case NOISE_SHAPING_MEDIUM: + case NOISE_SHAPING_SIMPLE: + case NOISE_SHAPING_ERROR_FEEDBACK: + case NOISE_SHAPING_NONE: + default: + break; + } + return; + + g_free (ctx->error_buf); + ctx->error_buf = NULL; + return; +} + +static void +gst_audio_quantize_setup_dither (AudioConvertCtx * ctx) +{ + switch (ctx->dither) { + case DITHER_TPDF_HF: + if (ctx->out.is_int) + ctx->last_random = g_new0 (gint32, ctx->out.channels); + else + ctx->last_random = g_new0 (gdouble, ctx->out.channels); + ctx->dither_random = g_rand_new (); + break; + case DITHER_RPDF: + case DITHER_TPDF: + ctx->dither_random = g_rand_new (); + ctx->last_random = NULL; + break; + case DITHER_NONE: + default: + ctx->dither_random = NULL; + ctx->last_random = NULL; + break; + } + return; +} + +static void +gst_audio_quantize_free_dither (AudioConvertCtx * ctx) +{ + g_free (ctx->last_random); + if (ctx->dither_random) + g_rand_free (ctx->dither_random); + + return; +} + +static void +gst_audio_quantize_setup_quantize_func (AudioConvertCtx * ctx) +{ + gint index = 0; + + if (!ctx->out.is_int) { + ctx->quantize = NULL; + return; + } + + if (ctx->ns == NOISE_SHAPING_NONE) { + index += ctx->dither; + index += (ctx->out.sign) ? 0 : 4; + } else { + index += 8 + (4 * ctx->dither); + index += ctx->ns - 1; + } + + ctx->quantize = quantize_funcs[index]; +} + +gboolean +gst_audio_quantize_setup (AudioConvertCtx * ctx) +{ + gst_audio_quantize_setup_dither (ctx); + gst_audio_quantize_setup_noise_shaping (ctx); + gst_audio_quantize_setup_quantize_func (ctx); + + return TRUE; +} + +void +gst_audio_quantize_free (AudioConvertCtx * ctx) +{ + gst_audio_quantize_free_dither (ctx); + gst_audio_quantize_free_noise_shaping (ctx); +} diff --git a/gst/audioconvert/gstaudioquantize.h b/gst/audioconvert/gstaudioquantize.h new file mode 100644 index 0000000000..a61003bd77 --- /dev/null +++ b/gst/audioconvert/gstaudioquantize.h @@ -0,0 +1,37 @@ +/* GStreamer + * Copyright (C) 2007 Sebastian Dröge + * + * gstaudioquantize.h: quantizes audio to the target format and optionally + * applies dithering and noise shaping. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include +#include "audioconvert.h" + +GST_DEBUG_CATEGORY_EXTERN (audio_convert_debug); +#define GST_CAT_DEFAULT (audio_convert_debug) + +#ifndef __GST_AUDIO_QUANTIZE_H__ +#define __GST_AUDIO_QUANTIZE_H__ + +gboolean gst_audio_quantize_setup (AudioConvertCtx * ctx); +void gst_audio_quantize_reset (AudioConvertCtx * ctx); +void gst_audio_quantize_free (AudioConvertCtx * ctx); + + +#endif /* __GST_AUDIO_QUANTIZE_H__ */ diff --git a/tests/check/elements/audioconvert.c b/tests/check/elements/audioconvert.c index 41f23c0d6e..7bc750ab1d 100644 --- a/tests/check/elements/audioconvert.c +++ b/tests/check/elements/audioconvert.c @@ -87,6 +87,8 @@ setup_audioconvert (GstCaps * outcaps) GST_DEBUG ("setup_audioconvert with caps %" GST_PTR_FORMAT, outcaps); audioconvert = gst_check_setup_element ("audioconvert"); + g_object_set (G_OBJECT (audioconvert), "dithering", 0, NULL); + g_object_set (G_OBJECT (audioconvert), "noise-shaping", 0, NULL); mysrcpad = gst_check_setup_src_pad (audioconvert, &srctemplate, NULL); mysinkpad = gst_check_setup_sink_pad (audioconvert, &sinktemplate, NULL); /* this installs a getcaps func that will always return the caps we set @@ -532,7 +534,7 @@ GST_START_TEST (test_int_conversion) gint16 out[] = { 0, G_MININT16, G_MAXINT16, 32, 33, 32, 33, 31, - -32, -33, + -31, -32, -31, -33, -32 }; @@ -633,9 +635,9 @@ GST_START_TEST (test_float_conversion) { gint16 in[] = { 0, -32768, 16384, -16384 }; gdouble out[] = { 0.0, - 4.6566128752457969e-10 * (gdouble) (-32768L << 16), /* ~ -1.0 */ - 4.6566128752457969e-10 * (gdouble) (16384L << 16), /* ~ 0.5 */ - 4.6566128752457969e-10 * (gdouble) (-16384L << 16), /* ~ -0.5 */ + (gdouble) (-32768L << 16) / 2147483647.0, /* ~ -1.0 */ + (gdouble) (16384L << 16) / 2147483647.0, /* ~ 0.5 */ + (gdouble) (-16384L << 16) / 2147483647.0, /* ~ -0.5 */ }; RUN_CONVERSION ("16 signed to 64 float", @@ -645,9 +647,9 @@ GST_START_TEST (test_float_conversion) { gint32 in[] = { 0, (-1L << 31), (1L << 30), (-1L << 30) }; gdouble out[] = { 0.0, - 4.6566128752457969e-10 * (gdouble) (-1L << 31), /* ~ -1.0 */ - 4.6566128752457969e-10 * (gdouble) (1L << 30), /* ~ 0.5 */ - 4.6566128752457969e-10 * (gdouble) (-1L << 30), /* ~ -0.5 */ + (gdouble) (-1L << 31) / 2147483647.0, /* ~ -1.0 */ + (gdouble) (1L << 30) / 2147483647.0, /* ~ 0.5 */ + (gdouble) (-1L << 30) / 2147483647.0, /* ~ -0.5 */ }; RUN_CONVERSION ("32 signed to 64 float",