From 746415a6e3a40d4d688aa25bcbb7062b54dfd4a8 Mon Sep 17 00:00:00 2001 From: Vincent Penquerc'h Date: Thu, 11 Aug 2011 15:54:15 +0100 Subject: [PATCH] audioresample: use SSE/SSE2 when possible Compile in the code on i386 and x86_64, and use ORC to determine when the runtime platform can run the code. https://bugzilla.gnome.org/show_bug.cgi?id=636562 --- configure.ac | 2 + gst/audioresample/resample.c | 99 ++++++++++++++++++++-- gst/audioresample/resample_sse.h | 4 + gst/audioresample/speex_resampler_double.c | 1 + gst/audioresample/speex_resampler_float.c | 2 + 5 files changed, 99 insertions(+), 9 deletions(-) diff --git a/configure.ac b/configure.ac index f63700e8da..f14abe19eb 100644 --- a/configure.ac +++ b/configure.ac @@ -213,6 +213,8 @@ LIBS="$save_libs" dnl used in gst-libs/gst/pbutils and associated unit test AC_CHECK_HEADERS([process.h sys/types.h sys/wait.h sys/stat.h]) +AC_CHECK_HEADERS([xmmintrin.h emmintrin.h]) + dnl ffmpegcolorspace includes _stdint.h dnl also, Windows does not have long long AX_CREATE_STDINT_H diff --git a/gst/audioresample/resample.c b/gst/audioresample/resample.c index a10c9f698d..ca4960804e 100644 --- a/gst/audioresample/resample.c +++ b/gst/audioresample/resample.c @@ -64,10 +64,30 @@ #ifdef OUTSIDE_SPEEX #include +#ifdef HAVE_STRING_H +#include +#endif + #include +#ifdef HAVE_ORC +#include +#endif + #define EXPORT G_GNUC_INTERNAL +#ifdef _USE_SSE +#ifndef HAVE_XMMINTRIN_H +#undef _USE_SSE +#endif +#endif + +#ifdef _USE_SSE2 +#ifndef HAVE_EMMINTRIN_H +#undef _USE_SSE2 +#endif +#endif + static inline void * speex_alloc (int size) { @@ -110,7 +130,7 @@ speex_free (void *ptr) #define NULL 0 #endif -#ifdef _USE_SSE +#if defined _USE_SSE || defined _USE_SSE2 #include "resample_sse.h" #endif @@ -121,6 +141,28 @@ speex_free (void *ptr) #define FIXED_STACK_ALLOC 1024 #endif +/* Allow selecting SSE or not when compiled with SSE support */ +#ifdef _USE_SSE +#define SSE_FALLBACK(macro) \ + if (st->use_sse) goto sse_##macro##_sse; { +#define SSE_IMPLEMENTATION(macro) \ + goto sse_##macro##_end; } sse_##macro##_sse: { +#define SSE_END(macro) sse_##macro##_end:; } +#else +#define SSE_FALLBACK(macro) +#endif + +#ifdef _USE_SSE2 +#define SSE2_FALLBACK(macro) \ + if (st->use_sse2) goto sse2_##macro##_sse2; { +#define SSE2_IMPLEMENTATION(macro) \ + goto sse2_##macro##_end; } sse2_##macro##_sse2: { +#define SSE2_END(macro) sse2_##macro##_end:; } +#else +#define SSE2_FALLBACK(macro) +#endif + + typedef int (*resampler_basic_func) (SpeexResamplerState *, spx_uint32_t, const spx_word16_t *, spx_uint32_t *, spx_word16_t *, spx_uint32_t *); @@ -155,6 +197,9 @@ struct SpeexResamplerState_ int in_stride; int out_stride; + + int use_sse:1; + int use_sse2:1; }; static double kaiser12_table[68] = { @@ -410,7 +455,7 @@ resampler_basic_direct_single (SpeexResamplerState * st, const spx_word16_t *sinc = &sinc_table[samp_frac_num * N]; const spx_word16_t *iptr = &in[last_sample]; -#ifndef OVERRIDE_INNER_PRODUCT_SINGLE + SSE_FALLBACK (INNER_PRODUCT_SINGLE) sum = 0; for (j = 0; j < N; j++) sum += MULT16_16 (sinc[j], iptr[j]); @@ -427,8 +472,10 @@ resampler_basic_direct_single (SpeexResamplerState * st, } sum = accum[0] + accum[1] + accum[2] + accum[3]; */ -#else +#ifdef OVERRIDE_INNER_PRODUCT_SINGLE + SSE_IMPLEMENTATION (INNER_PRODUCT_SINGLE) sum = inner_product_single (sinc, iptr, N); + SSE_END(INNER_PRODUCT_SINGLE) #endif out[out_stride * out_sample++] = SATURATE32 (PSHR32 (sum, 15), 32767); @@ -471,7 +518,7 @@ resampler_basic_direct_double (SpeexResamplerState * st, const spx_word16_t *sinc = &sinc_table[samp_frac_num * N]; const spx_word16_t *iptr = &in[last_sample]; -#ifndef OVERRIDE_INNER_PRODUCT_DOUBLE + SSE2_FALLBACK (INNER_PRODUCT_DOUBLE) double accum[4] = { 0, 0, 0, 0 }; for (j = 0; j < N; j += 4) { @@ -481,8 +528,10 @@ resampler_basic_direct_double (SpeexResamplerState * st, accum[3] += sinc[j + 3] * iptr[j + 3]; } sum = accum[0] + accum[1] + accum[2] + accum[3]; -#else +#ifdef OVERRIDE_INNER_PRODUCT_DOUBLE + SSE2_IMPLEMENTATION (INNER_PRODUCT_DOUBLE) sum = inner_product_double (sinc, iptr, N); + SSE2_END (INNER_PRODUCT_DOUBLE) #endif out[out_stride * out_sample++] = PSHR32 (sum, 15); @@ -534,7 +583,7 @@ resampler_basic_interpolate_single (SpeexResamplerState * st, spx_word16_t interp[4]; -#ifndef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE + SSE_FALLBACK (INTERPOLATE_PRODUCT_SINGLE) spx_word32_t accum[4] = { 0, 0, 0, 0 }; for (j = 0; j < N; j++) { @@ -559,12 +608,14 @@ resampler_basic_interpolate_single (SpeexResamplerState * st, 1)) + MULT16_32_Q15 (interp[1], SHR32 (accum[1], 1)) + MULT16_32_Q15 (interp[2], SHR32 (accum[2], 1)) + MULT16_32_Q15 (interp[3], SHR32 (accum[3], 1)); -#else +#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE + SSE_IMPLEMENTATION (INTERPOLATE_PRODUCT_SINGLE) cubic_coef (frac, interp); sum = interpolate_product_single (iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); + SSE_END (INTERPOLATE_PRODUCT_SINGLE) #endif out[out_stride * out_sample++] = SATURATE32 (PSHR32 (sum, 14), 32767); @@ -624,7 +675,7 @@ resampler_basic_interpolate_double (SpeexResamplerState * st, spx_word16_t interp[4]; -#ifndef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE + SSE2_FALLBACK (INTERPOLATE_PRODUCT_DOUBLE) double accum[4] = { 0, 0, 0, 0 }; for (j = 0; j < N; j++) { @@ -648,12 +699,14 @@ resampler_basic_interpolate_double (SpeexResamplerState * st, MULT16_32_Q15 (interp[0], accum[0]) + MULT16_32_Q15 (interp[1], accum[1]) + MULT16_32_Q15 (interp[2], accum[2]) + MULT16_32_Q15 (interp[3], accum[3]); -#else +#ifdef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE + SSE2_IMPLEMENTATION (INTERPOLATE_PRODUCT_DOUBLE) cubic_coef (frac, interp); sum = interpolate_product_double (iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); + SSE2_END (INTERPOLATE_PRODUCT_DOUBLE) #endif out[out_stride * out_sample++] = PSHR32 (sum, 15); @@ -875,6 +928,17 @@ speex_resampler_init (spx_uint32_t nb_channels, spx_uint32_t in_rate, out_rate, quality, err); } +static void +check_insn_set (SpeexResamplerState * st, const char *name) +{ + if (!name) + return; + if (!strcmp (name, "sse")) + st->use_sse = 1; + if (!strcmp (name, "sse2")) + st->use_sse = st->use_sse2 = 1; +} + EXPORT SpeexResamplerState * speex_resampler_init_frac (spx_uint32_t nb_channels, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate, @@ -912,6 +976,23 @@ speex_resampler_init_frac (spx_uint32_t nb_channels, spx_uint32_t ratio_num, st->buffer_size = 160; #endif + st->use_sse = st->use_sse2 = 0; +#if defined HAVE_ORC && !defined DISABLE_ORC + orc_init (); + { + OrcTarget *target = orc_target_get_default (); + if (target) { + unsigned int flags = orc_target_get_default_flags (target); + check_insn_set (st, orc_target_get_name (target)); + for (i = 0; i < 32; ++i) { + if (flags & (1 << i)) { + check_insn_set (st, orc_target_get_flag_name (target, i)); + } + } + } + } +#endif + /* Per channel data */ st->last_sample = (spx_int32_t *) speex_alloc (nb_channels * sizeof (int)); st->magic_samples = (spx_uint32_t *) speex_alloc (nb_channels * sizeof (int)); diff --git a/gst/audioresample/resample_sse.h b/gst/audioresample/resample_sse.h index c418ca1d5c..36522a3659 100644 --- a/gst/audioresample/resample_sse.h +++ b/gst/audioresample/resample_sse.h @@ -34,7 +34,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifdef HAVE_XMMINTRIN_H #include +#endif #define OVERRIDE_INNER_PRODUCT_SINGLE static inline float inner_product_single(const float *a, const float *b, unsigned int len) @@ -72,7 +74,9 @@ static inline float interpolate_product_single(const float *a, const float *b, u } #ifdef _USE_SSE2 +#ifdef HAVE_EMMINTRIN_H #include +#endif #define OVERRIDE_INNER_PRODUCT_DOUBLE #ifdef DOUBLE_PRECISION diff --git a/gst/audioresample/speex_resampler_double.c b/gst/audioresample/speex_resampler_double.c index e5a25714ca..ef2503d609 100644 --- a/gst/audioresample/speex_resampler_double.c +++ b/gst/audioresample/speex_resampler_double.c @@ -17,6 +17,7 @@ * Boston, MA 02111-1307, USA. */ +#define _USE_SSE2 #define FLOATING_POINT #define DOUBLE_PRECISION #define OUTSIDE_SPEEX diff --git a/gst/audioresample/speex_resampler_float.c b/gst/audioresample/speex_resampler_float.c index f13f60c5b0..ef3df1551b 100644 --- a/gst/audioresample/speex_resampler_float.c +++ b/gst/audioresample/speex_resampler_float.c @@ -17,6 +17,8 @@ * Boston, MA 02111-1307, USA. */ +#define _USE_SSE +#define _USE_SSE2 #define FLOATING_POINT #define OUTSIDE_SPEEX #define RANDOM_PREFIX resample_float