audio-resampler: add float stereo SSE function

This commit is contained in:
Wim Taymans 2016-01-18 12:52:41 +01:00
parent e74c207433
commit 2555317a71
2 changed files with 58 additions and 3 deletions

View file

@ -39,7 +39,35 @@ inner_product_gfloat_1_sse (gfloat * o, const gfloat * a, const gfloat * b, gint
_mm_store_ss (o, sum);
}
static inline void
inner_product_gfloat_2_sse (gfloat * o, const gfloat * a, const gfloat * b, gint len)
{
gint i = 0;
__m128 sum = _mm_setzero_ps (), t;
for (; i < len; i += 8) {
t = _mm_load_ps (b + i);
sum =
_mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + 2 * i + 0),
_mm_unpacklo_ps (t, t)));
sum =
_mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + 2 * i + 4),
_mm_unpackhi_ps (t, t)));
t = _mm_load_ps (b + i + 4);
sum =
_mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + 2 * i + 8),
_mm_unpacklo_ps (t, t)));
sum =
_mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + 2 * i + 12),
_mm_unpackhi_ps (t, t)));
}
sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum));
*(gint64*)o = _mm_cvtsi128_si64 ((__m128i)sum);
}
MAKE_RESAMPLE_FUNC (gfloat, 1, sse);
MAKE_RESAMPLE_FUNC (gfloat, 2, sse);
#endif
#if defined (HAVE_EMMINTRIN_H) && defined(__SSE2__)
@ -212,12 +240,14 @@ audio_resampler_check_x86 (const gchar *option)
#if defined (HAVE_XMMINTRIN_H) && defined(__SSE__)
GST_DEBUG ("enable SSE optimisations");
resample_gfloat_1 = resample_gfloat_1_sse;
resample_gfloat_2 = resample_gfloat_2_sse;
#endif
} else if (!strcmp (option, "sse2")) {
#if defined (HAVE_EMMINTRIN_H) && defined(__SSE2__)
GST_DEBUG ("enable SSE2 optimisations");
resample_gint16_1 = resample_gint16_1_sse2;
resample_gfloat_1 = resample_gfloat_1_sse;
resample_gfloat_2 = resample_gfloat_2_sse;
resample_gdouble_1 = resample_gdouble_1_sse2;
resample_gint16_2 = resample_gint16_2_sse2;
resample_gdouble_2 = resample_gdouble_2_sse2;

View file

@ -423,6 +423,21 @@ inner_product_gfloat_1_c (gfloat * o, const gfloat * a, const gfloat * b,
*o = res;
}
static inline void
inner_product_gfloat_2_c (gfloat * o, const gfloat * a, const gfloat * b,
gint len)
{
gint i;
gfloat r[2] = { 0.0, 0.0 };
for (i = 0; i < len; i++) {
r[0] += a[2 * i] * b[i];
r[1] += a[2 * i + 1] * b[i];
}
o[0] = r[0];
o[1] = r[1];
}
static inline void
inner_product_gdouble_1_c (gdouble * o, const gdouble * a, const gdouble * b,
gint len)
@ -498,6 +513,7 @@ MAKE_RESAMPLE_FUNC (gint32, 1, c);
MAKE_RESAMPLE_FUNC (gfloat, 1, c);
MAKE_RESAMPLE_FUNC (gdouble, 1, c);
MAKE_RESAMPLE_FUNC (gint16, 2, c);
MAKE_RESAMPLE_FUNC (gfloat, 2, c);
MAKE_RESAMPLE_FUNC (gdouble, 2, c);
static ResampleFunc resample_funcs[] = {
@ -506,6 +522,7 @@ static ResampleFunc resample_funcs[] = {
resample_gfloat_1_c,
resample_gdouble_1_c,
resample_gint16_2_c,
resample_gfloat_2_c,
resample_gdouble_2_c,
};
@ -514,7 +531,8 @@ static ResampleFunc resample_funcs[] = {
#define resample_gfloat_1 resample_funcs[2]
#define resample_gdouble_1 resample_funcs[3]
#define resample_gint16_2 resample_funcs[4]
#define resample_gdouble_2 resample_funcs[5]
#define resample_gfloat_2 resample_funcs[5]
#define resample_gdouble_2 resample_funcs[6]
#if defined HAVE_ORC && !defined DISABLE_ORC
# if defined (__i386__) || defined (__x86_64__)
@ -739,8 +757,15 @@ resampler_calculate_taps (GstAudioResampler * resampler)
}
break;
case GST_AUDIO_FORMAT_F32:
resampler->resample = resample_gfloat_1;
resampler->deinterleave = deinterleave_gfloat;
if (!non_interleaved && resampler->channels == 2 && n_taps >= 4) {
resampler->resample = resample_gfloat_2;
resampler->deinterleave = deinterleave_copy;
resampler->blocks = 1;
resampler->inc = resampler->channels;;
} else {
resampler->resample = resample_gfloat_1;
resampler->deinterleave = deinterleave_gfloat;
}
break;
case GST_AUDIO_FORMAT_S32:
resampler->resample = resample_gint32_1;