mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-11-25 11:11:08 +00:00
audioresample: fix SSE2 building with double precision
The full double implementation was missing. https://bugzilla.gnome.org/show_bug.cgi?id=636562
This commit is contained in:
parent
72b92af807
commit
58fd202b7d
1 changed files with 48 additions and 0 deletions
|
@ -75,6 +75,22 @@ static inline float interpolate_product_single(const float *a, const float *b, u
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#define OVERRIDE_INNER_PRODUCT_DOUBLE
|
#define OVERRIDE_INNER_PRODUCT_DOUBLE
|
||||||
|
|
||||||
|
#ifdef DOUBLE_PRECISION
|
||||||
|
static inline double inner_product_double(const double *a, const double *b, unsigned int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
double ret;
|
||||||
|
__m128d sum = _mm_setzero_pd();
|
||||||
|
for (i=0;i<len;i+=4)
|
||||||
|
{
|
||||||
|
sum = _mm_add_pd(sum, _mm_mul_pd(_mm_loadu_pd(a+i), _mm_loadu_pd(b+i)));
|
||||||
|
sum = _mm_add_pd(sum, _mm_mul_pd(_mm_loadu_pd(a+i+2), _mm_loadu_pd(b+i+2)));
|
||||||
|
}
|
||||||
|
sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
|
||||||
|
_mm_store_sd(&ret, sum);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
#else
|
||||||
static inline double inner_product_double(const float *a, const float *b, unsigned int len)
|
static inline double inner_product_double(const float *a, const float *b, unsigned int len)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
@ -95,8 +111,39 @@ static inline double inner_product_double(const float *a, const float *b, unsign
|
||||||
_mm_store_sd(&ret, sum);
|
_mm_store_sd(&ret, sum);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
|
#define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
|
||||||
|
|
||||||
|
#ifdef DOUBLE_PRECISION
|
||||||
|
static inline double interpolate_product_double(const double *a, const double *b, unsigned int len, const spx_uint32_t oversample, double *frac) {
|
||||||
|
int i;
|
||||||
|
double ret;
|
||||||
|
__m128d sum;
|
||||||
|
__m128d sum1 = _mm_setzero_pd();
|
||||||
|
__m128d sum2 = _mm_setzero_pd();
|
||||||
|
__m128d f1 = _mm_loadu_pd(frac);
|
||||||
|
__m128d f2 = _mm_loadu_pd(frac+2);
|
||||||
|
__m128d t;
|
||||||
|
for(i=0;i<len;i+=2)
|
||||||
|
{
|
||||||
|
t = _mm_mul_pd(_mm_load1_pd(a+i), _mm_loadu_pd(b+i*oversample));
|
||||||
|
sum1 = _mm_add_pd(sum1, t);
|
||||||
|
sum2 = _mm_add_pd(sum2, _mm_unpackhi_pd(t, t));
|
||||||
|
|
||||||
|
t = _mm_mul_pd(_mm_load1_pd(a+i+1), _mm_loadu_pd(b+(i+1)*oversample));
|
||||||
|
sum1 = _mm_add_pd(sum1, t);
|
||||||
|
sum2 = _mm_add_pd(sum2, _mm_unpackhi_pd(t, t));
|
||||||
|
}
|
||||||
|
sum1 = _mm_mul_pd(f1, sum1);
|
||||||
|
sum2 = _mm_mul_pd(f2, sum2);
|
||||||
|
sum = _mm_add_pd(sum1, sum2);
|
||||||
|
sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
|
||||||
|
_mm_store_sd(&ret, sum);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
#else
|
||||||
static inline double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
|
static inline double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
|
||||||
int i;
|
int i;
|
||||||
double ret;
|
double ret;
|
||||||
|
@ -124,5 +171,6 @@ static inline double interpolate_product_double(const float *a, const float *b,
|
||||||
_mm_store_sd(&ret, sum);
|
_mm_store_sd(&ret, sum);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in a new issue