resample: fix double interpolation sse code

We were only reading 2 filter taps and we need to read 4 to do cubic
interpolation.
This commit is contained in:
Wim Taymans 2016-02-11 18:01:40 +01:00
parent 5cef3f31ad
commit 188c0811de

View file

@ -168,13 +168,13 @@ static inline double interpolate_product_double(const double *a, const double *b
{ {
for(;i<len-1;i+=2) for(;i<len-1;i+=2)
{ {
t = _mm_mul_pd(_mm_load1_pd(a+i), _mm_loadu_pd(b+i*oversample)); t = _mm_load1_pd(a+i);
sum1 = _mm_add_pd(sum1, t); sum1 = _mm_add_pd(sum1, _mm_mul_pd(t, _mm_loadu_pd(b+i*oversample)));
sum2 = _mm_add_pd(sum2, _mm_unpackhi_pd(t, t)); sum2 = _mm_add_pd(sum2, _mm_mul_pd(t, _mm_loadu_pd(b+i*oversample+2)));
t = _mm_mul_pd(_mm_load1_pd(a+i+1), _mm_loadu_pd(b+(i+1)*oversample)); t = _mm_load1_pd(a+i+1);
sum1 = _mm_add_pd(sum1, t); sum1 = _mm_add_pd(sum1, _mm_mul_pd(t, _mm_loadu_pd(b+(i+1)*oversample)));
sum2 = _mm_add_pd(sum2, _mm_unpackhi_pd(t, t)); sum2 = _mm_add_pd(sum2, _mm_mul_pd(t, _mm_loadu_pd(b+(i+1)*oversample+2)));
} }
sum1 = _mm_mul_pd(f1, sum1); sum1 = _mm_mul_pd(f1, sum1);
sum2 = _mm_mul_pd(f2, sum2); sum2 = _mm_mul_pd(f2, sum2);