mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-01-23 07:38:16 +00:00
audio-resampler: Improve taps memory layout
Rearrange the oversampled taps in memory to make it easier to use SIMD instructions on them. this simplifies some sse code. Add some more optimizations
This commit is contained in:
parent
e9fc039bb1
commit
f6e0481ab5
2 changed files with 245 additions and 80 deletions
|
@ -45,23 +45,15 @@ inner_product_gfloat_linear_1_sse (gfloat * o, const gfloat * a,
|
|||
const gfloat * b, gint len, const gfloat * icoeff, gint oversample)
|
||||
{
|
||||
gint i = 0;
|
||||
__m128 sum = _mm_setzero_ps (), t, b0;
|
||||
__m128 sum = _mm_setzero_ps (), t;
|
||||
__m128 f = _mm_loadu_ps(icoeff);
|
||||
|
||||
for (; i < len; i += 4) {
|
||||
t = _mm_loadu_ps (a + i);
|
||||
|
||||
b0 = _mm_loadh_pi (b0, (__m64 *) (b + (i+0)*oversample));
|
||||
b0 = _mm_loadl_pi (b0, (__m64 *) (b + (i+1)*oversample));
|
||||
|
||||
sum =
|
||||
_mm_add_ps (sum, _mm_mul_ps (_mm_unpacklo_ps (t, t), b0));
|
||||
|
||||
b0 = _mm_loadh_pi (b0, (__m64 *) (b + (i+2)*oversample));
|
||||
b0 = _mm_loadl_pi (b0, (__m64 *) (b + (i+3)*oversample));
|
||||
|
||||
sum =
|
||||
_mm_add_ps (sum, _mm_mul_ps (_mm_unpackhi_ps (t, t), b0));
|
||||
sum = _mm_add_ps (sum, _mm_mul_ps (_mm_unpacklo_ps (t, t),
|
||||
_mm_load_ps (b + 2 * (i + 0))));
|
||||
sum = _mm_add_ps (sum, _mm_mul_ps (_mm_unpackhi_ps (t, t),
|
||||
_mm_load_ps (b + 2 * (i + 2))));
|
||||
}
|
||||
sum = _mm_mul_ps (sum, f);
|
||||
sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum));
|
||||
|
@ -79,9 +71,9 @@ inner_product_gfloat_cubic_1_sse (gfloat * o, const gfloat * a,
|
|||
|
||||
for (; i < len; i += 2) {
|
||||
sum = _mm_add_ps (sum, _mm_mul_ps (_mm_load1_ps (a + i + 0),
|
||||
_mm_loadu_ps (b + (i + 0) * oversample)));
|
||||
_mm_load_ps (b + 4 * (i + 0))));
|
||||
sum = _mm_add_ps (sum, _mm_mul_ps (_mm_load1_ps (a + i + 1),
|
||||
_mm_loadu_ps (b + (i + 1) * oversample)));
|
||||
_mm_load_ps (b + 4 * (i + 1))));
|
||||
}
|
||||
sum = _mm_mul_ps (sum, f);
|
||||
sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum));
|
||||
|
@ -118,9 +110,10 @@ inner_product_gfloat_none_2_sse (gfloat * o, const gfloat * a,
|
|||
}
|
||||
|
||||
MAKE_RESAMPLE_FUNC (gfloat, none, 1, sse);
|
||||
MAKE_RESAMPLE_FUNC (gfloat, none, 2, sse);
|
||||
MAKE_RESAMPLE_FUNC (gfloat, linear, 1, sse);
|
||||
MAKE_RESAMPLE_FUNC (gfloat, cubic, 1, sse);
|
||||
|
||||
MAKE_RESAMPLE_FUNC (gfloat, none, 2, sse);
|
||||
#endif
|
||||
|
||||
#if defined (HAVE_EMMINTRIN_H) && defined(__SSE2__)
|
||||
|
@ -154,6 +147,94 @@ inner_product_gint16_none_1_sse2 (gint16 * o, const gint16 * a,
|
|||
*o = _mm_extract_epi16 (sum, 0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
inner_product_gint16_linear_1_sse2 (gint16 * o, const gint16 * a,
|
||||
const gint16 * b, gint len, const gint16 * icoeff, gint oversample)
|
||||
{
|
||||
gint i = 0;
|
||||
__m128i sum, t, ta, tb, m1, m2;
|
||||
__m128i f = _mm_cvtsi64_si128 (*((long long*)icoeff));
|
||||
|
||||
sum = _mm_setzero_si128 ();
|
||||
f = _mm_unpacklo_epi16 (f, sum);
|
||||
|
||||
for (; i < len; i += 8) {
|
||||
t = _mm_loadu_si128 ((__m128i *) (a + i));
|
||||
|
||||
ta = _mm_unpacklo_epi16 (t, t);
|
||||
tb = _mm_load_si128 ((__m128i *) (b + 2 * i + 0));
|
||||
|
||||
m1 = _mm_mulhi_epi16 (ta, tb);
|
||||
m2 = _mm_mullo_epi16 (ta, tb);
|
||||
|
||||
sum = _mm_add_epi32 (sum, _mm_unpacklo_epi16 (m2, m1));
|
||||
sum = _mm_add_epi32 (sum, _mm_unpackhi_epi16 (m2, m1));
|
||||
|
||||
ta = _mm_unpackhi_epi16 (t, t);
|
||||
tb = _mm_load_si128 ((__m128i *) (b + 2 * i + 8));
|
||||
|
||||
m1 = _mm_mulhi_epi16 (ta, tb);
|
||||
m2 = _mm_mullo_epi16 (ta, tb);
|
||||
|
||||
sum = _mm_add_epi32 (sum, _mm_unpacklo_epi16 (m2, m1));
|
||||
sum = _mm_add_epi32 (sum, _mm_unpackhi_epi16 (m2, m1));
|
||||
}
|
||||
sum = _mm_srai_epi32 (sum, PRECISION_S16);
|
||||
sum = _mm_madd_epi16 (sum, f);
|
||||
|
||||
sum =
|
||||
_mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (2, 3, 2,
|
||||
3)));
|
||||
sum =
|
||||
_mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (1, 1, 1,
|
||||
1)));
|
||||
|
||||
sum = _mm_add_epi32 (sum, _mm_set1_epi32 (1 << (PRECISION_S16 - 1)));
|
||||
sum = _mm_srai_epi32 (sum, PRECISION_S16);
|
||||
sum = _mm_packs_epi32 (sum, sum);
|
||||
*o = _mm_extract_epi16 (sum, 0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
inner_product_gint16_cubic_1_sse2 (gint16 * o, const gint16 * a,
|
||||
const gint16 * b, gint len, const gint16 * icoeff, gint oversample)
|
||||
{
|
||||
gint i = 0;
|
||||
__m128i sum, ta, tb, m1, m2;
|
||||
__m128i f = _mm_cvtsi64_si128 (*((long long*)icoeff));
|
||||
|
||||
sum = _mm_setzero_si128 ();
|
||||
f = _mm_unpacklo_epi16 (f, sum);
|
||||
|
||||
for (; i < len; i += 2) {
|
||||
ta = _mm_cvtsi32_si128 (*(gint32*)(a + i));
|
||||
ta = _mm_unpacklo_epi16 (ta, ta);
|
||||
ta = _mm_unpacklo_epi16 (ta, ta);
|
||||
|
||||
tb = _mm_load_si128 ((__m128i *) (b + 4 * i + 0));
|
||||
|
||||
m1 = _mm_mulhi_epi16 (ta, tb);
|
||||
m2 = _mm_mullo_epi16 (ta, tb);
|
||||
|
||||
sum = _mm_add_epi32 (sum, _mm_unpacklo_epi16 (m2, m1));
|
||||
sum = _mm_add_epi32 (sum, _mm_unpackhi_epi16 (m2, m1));
|
||||
}
|
||||
sum = _mm_srai_epi32 (sum, PRECISION_S16);
|
||||
sum = _mm_madd_epi16 (sum, f);
|
||||
|
||||
sum =
|
||||
_mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (2, 3, 2,
|
||||
3)));
|
||||
sum =
|
||||
_mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (1, 1, 1,
|
||||
1)));
|
||||
|
||||
sum = _mm_add_epi32 (sum, _mm_set1_epi32 (1 << (PRECISION_S16 - 1)));
|
||||
sum = _mm_srai_epi32 (sum, PRECISION_S16);
|
||||
sum = _mm_packs_epi32 (sum, sum);
|
||||
*o = _mm_extract_epi16 (sum, 0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
inner_product_gdouble_none_1_sse2 (gdouble * o, const gdouble * a,
|
||||
const gdouble * b, gint len, const gdouble * icoeff, gint oversample)
|
||||
|
@ -179,6 +260,51 @@ inner_product_gdouble_none_1_sse2 (gdouble * o, const gdouble * a,
|
|||
_mm_store_sd (o, sum);
|
||||
}
|
||||
|
||||
static inline void
|
||||
inner_product_gdouble_linear_1_sse2 (gdouble * o, const gdouble * a,
|
||||
const gdouble * b, gint len, const gdouble * icoeff, gint oversample)
|
||||
{
|
||||
gint i = 0;
|
||||
__m128d sum = _mm_setzero_pd ();
|
||||
__m128d f = _mm_loadu_pd (icoeff);
|
||||
|
||||
for (; i < len; i += 4) {
|
||||
sum = _mm_add_pd (sum, _mm_mul_pd (_mm_load1_pd (a + i + 0), _mm_load_pd (b + 2 * i + 0)));
|
||||
sum = _mm_add_pd (sum, _mm_mul_pd (_mm_load1_pd (a + i + 1), _mm_load_pd (b + 2 * i + 2)));
|
||||
sum = _mm_add_pd (sum, _mm_mul_pd (_mm_load1_pd (a + i + 2), _mm_load_pd (b + 2 * i + 4)));
|
||||
sum = _mm_add_pd (sum, _mm_mul_pd (_mm_load1_pd (a + i + 3), _mm_load_pd (b + 2 * i + 6)));
|
||||
}
|
||||
sum = _mm_mul_pd (sum, f);
|
||||
sum = _mm_add_sd (sum, _mm_unpackhi_pd (sum, sum));
|
||||
_mm_store_sd (o, sum);
|
||||
}
|
||||
|
||||
static inline void
|
||||
inner_product_gdouble_cubic_1_sse2 (gdouble * o, const gdouble * a,
|
||||
const gdouble * b, gint len, const gdouble * icoeff, gint oversample)
|
||||
{
|
||||
gint i = 0;
|
||||
__m128d sum1 = _mm_setzero_pd (), t;
|
||||
__m128d sum2 = _mm_setzero_pd ();
|
||||
__m128d f1 = _mm_loadu_pd (icoeff);
|
||||
__m128d f2 = _mm_loadu_pd (icoeff+2);
|
||||
|
||||
for (; i < len; i += 2) {
|
||||
t = _mm_load1_pd (a + i + 0);
|
||||
sum1 = _mm_add_pd (sum1, _mm_mul_pd (t, _mm_load_pd (b + 4 * i + 0)));
|
||||
sum2 = _mm_add_pd (sum2, _mm_mul_pd (t, _mm_load_pd (b + 4 * i + 2)));
|
||||
|
||||
t = _mm_load1_pd (a + i + 1);
|
||||
sum1 = _mm_add_pd (sum1, _mm_mul_pd (t, _mm_load_pd (b + 4 * i + 4)));
|
||||
sum2 = _mm_add_pd (sum2, _mm_mul_pd (t, _mm_load_pd (b + 4 * i + 6)));
|
||||
}
|
||||
sum1 = _mm_mul_pd (sum1, f1);
|
||||
sum2 = _mm_mul_pd (sum2, f2);
|
||||
sum1 = _mm_add_pd (sum1, sum2);
|
||||
sum1 = _mm_add_sd (sum1, _mm_unpackhi_pd (sum1, sum1));
|
||||
_mm_store_sd (o, sum1);
|
||||
}
|
||||
|
||||
static inline void
|
||||
inner_product_gint16_none_2_sse2 (gint16 * o, const gint16 * a,
|
||||
const gint16 * b, gint len, const gint16 * icoeff, gint oversample)
|
||||
|
@ -239,9 +365,16 @@ inner_product_gdouble_none_2_sse2 (gdouble * o, const gdouble * a,
|
|||
}
|
||||
|
||||
MAKE_RESAMPLE_FUNC (gint16, none, 1, sse2);
|
||||
MAKE_RESAMPLE_FUNC (gint16, linear, 1, sse2);
|
||||
MAKE_RESAMPLE_FUNC (gint16, cubic, 1, sse2);
|
||||
|
||||
MAKE_RESAMPLE_FUNC (gdouble, none, 1, sse2);
|
||||
MAKE_RESAMPLE_FUNC (gdouble, linear, 1, sse2);
|
||||
MAKE_RESAMPLE_FUNC (gdouble, cubic, 1, sse2);
|
||||
|
||||
MAKE_RESAMPLE_FUNC (gint16, none, 2, sse2);
|
||||
MAKE_RESAMPLE_FUNC (gdouble, none, 2, sse2);
|
||||
|
||||
#endif
|
||||
|
||||
#if defined (HAVE_SMMINTRIN_H) && defined(__SSE4_1__)
|
||||
|
@ -295,21 +428,29 @@ audio_resampler_check_x86 (const gchar *option)
|
|||
#if defined (HAVE_XMMINTRIN_H) && defined(__SSE__)
|
||||
GST_DEBUG ("enable SSE optimisations");
|
||||
resample_gfloat_none_1 = resample_gfloat_none_1_sse;
|
||||
resample_gfloat_none_2 = resample_gfloat_none_2_sse;
|
||||
resample_gfloat_linear_1 = resample_gfloat_linear_1_sse;
|
||||
resample_gfloat_cubic_1 = resample_gfloat_cubic_1_sse;
|
||||
|
||||
resample_gfloat_none_2 = resample_gfloat_none_2_sse;
|
||||
#endif
|
||||
} else if (!strcmp (option, "sse2")) {
|
||||
#if defined (HAVE_EMMINTRIN_H) && defined(__SSE2__)
|
||||
GST_DEBUG ("enable SSE2 optimisations");
|
||||
resample_gint16_none_1 = resample_gint16_none_1_sse2;
|
||||
resample_gint16_linear_1 = resample_gint16_linear_1_sse2;
|
||||
resample_gint16_cubic_1 = resample_gint16_cubic_1_sse2;
|
||||
|
||||
resample_gfloat_none_1 = resample_gfloat_none_1_sse;
|
||||
resample_gfloat_none_2 = resample_gfloat_none_2_sse;
|
||||
resample_gdouble_none_1 = resample_gdouble_none_1_sse2;
|
||||
resample_gint16_none_2 = resample_gint16_none_2_sse2;
|
||||
resample_gdouble_none_2 = resample_gdouble_none_2_sse2;
|
||||
resample_gfloat_linear_1 = resample_gfloat_linear_1_sse;
|
||||
resample_gfloat_cubic_1 = resample_gfloat_cubic_1_sse;
|
||||
|
||||
resample_gdouble_none_1 = resample_gdouble_none_1_sse2;
|
||||
resample_gdouble_linear_1 = resample_gdouble_linear_1_sse2;
|
||||
resample_gdouble_cubic_1 = resample_gdouble_cubic_1_sse2;
|
||||
|
||||
resample_gint16_none_2 = resample_gint16_none_2_sse2;
|
||||
resample_gfloat_none_2 = resample_gfloat_none_2_sse;
|
||||
resample_gdouble_none_2 = resample_gdouble_none_2_sse2;
|
||||
#endif
|
||||
} else if (!strcmp (option, "sse41")) {
|
||||
#if defined (HAVE_SMMINTRIN_H) && defined(__SSE4_1__)
|
||||
|
|
|
@ -390,6 +390,27 @@ MAKE_CONVERT_TAPS_INT_FUNC (gint32, PRECISION_S32);
|
|||
MAKE_CONVERT_TAPS_FLOAT_FUNC (gfloat);
|
||||
MAKE_CONVERT_TAPS_FLOAT_FUNC (gdouble);
|
||||
|
||||
#define MAKE_EXTRACT_TAPS_FUNC(type) \
|
||||
static inline void \
|
||||
extract_taps_##type (GstAudioResampler * resampler, type *tmpcoeff, \
|
||||
gint n_taps, gint oversample, gint mult) \
|
||||
{ \
|
||||
gint i, j, k; \
|
||||
for (i = 0; i < oversample; i++) { \
|
||||
type *coeff = (type *) ((gint8*)resampler->coeff + \
|
||||
i * resampler->cstride); \
|
||||
for (j = 0; j < n_taps; j++) { \
|
||||
for (k = 0; k < mult; k++) { \
|
||||
*coeff++ = tmpcoeff[i + j*oversample + k]; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
MAKE_EXTRACT_TAPS_FUNC (gint16);
|
||||
MAKE_EXTRACT_TAPS_FUNC (gint32);
|
||||
MAKE_EXTRACT_TAPS_FUNC (gfloat);
|
||||
MAKE_EXTRACT_TAPS_FUNC (gdouble);
|
||||
|
||||
#define GET_TAPS_NONE_FUNC(type) \
|
||||
static inline gpointer \
|
||||
get_taps_##type##_none (GstAudioResampler * resampler, \
|
||||
|
@ -421,12 +442,19 @@ get_taps_##type##_none (GstAudioResampler * resampler,
|
|||
} \
|
||||
return res; \
|
||||
}
|
||||
|
||||
GET_TAPS_NONE_FUNC (gint16);
|
||||
GET_TAPS_NONE_FUNC (gint32);
|
||||
GET_TAPS_NONE_FUNC (gfloat);
|
||||
GET_TAPS_NONE_FUNC (gdouble);
|
||||
|
||||
#define MAKE_COEFF_LINEAR_INT_FUNC(type,type2,prec) \
|
||||
static inline void \
|
||||
make_coeff_##type##_linear (gint frac, gint out_rate, type *icoeff) \
|
||||
{ \
|
||||
type x = ((type2)frac << prec) / out_rate; \
|
||||
icoeff[0] = icoeff[2] = x; \
|
||||
icoeff[1] = icoeff[3] = (1L << prec) - x; \
|
||||
}
|
||||
#define MAKE_COEFF_LINEAR_FLOAT_FUNC(type) \
|
||||
static inline void \
|
||||
make_coeff_##type##_linear (gint frac, gint out_rate, type *icoeff) \
|
||||
|
@ -435,30 +463,11 @@ make_coeff_##type##_linear (gint frac, gint out_rate, type *icoeff) \
|
|||
icoeff[0] = icoeff[2] = x; \
|
||||
icoeff[1] = icoeff[3] = 1.0 - x; \
|
||||
}
|
||||
#define MAKE_COEFF_LINEAR_INT_FUNC(type,type2,prec) \
|
||||
static inline void \
|
||||
make_coeff_##type##_linear (gint frac, gint out_rate, type *icoeff) \
|
||||
{ \
|
||||
type x = ((type2)frac << prec) / out_rate; \
|
||||
icoeff[0] = icoeff[2] = x; \
|
||||
icoeff[1] = icoeff[3] = (1 << prec) - x; \
|
||||
}
|
||||
|
||||
MAKE_COEFF_LINEAR_INT_FUNC (gint16, gint32, PRECISION_S16);
|
||||
MAKE_COEFF_LINEAR_INT_FUNC (gint32, gint64, PRECISION_S32);
|
||||
MAKE_COEFF_LINEAR_FLOAT_FUNC (gfloat);
|
||||
MAKE_COEFF_LINEAR_FLOAT_FUNC (gdouble);
|
||||
|
||||
#define MAKE_COEFF_CUBIC_FLOAT_FUNC(type) \
|
||||
static inline void \
|
||||
make_coeff_##type##_cubic (gint frac, gint out_rate, type *icoeff) \
|
||||
{ \
|
||||
type x = (type) frac / out_rate, x2 = x * x, x3 = x2 * x; \
|
||||
icoeff[0] = 0.16667f * (x3 - x); \
|
||||
icoeff[1] = x + 0.5f * (x2 - x3); \
|
||||
icoeff[3] = -0.33333f * x + 0.5f * x2 - 0.16667f * x3; \
|
||||
icoeff[2] = 1. - icoeff[0] - icoeff[1] - icoeff[3]; \
|
||||
}
|
||||
#define MAKE_COEFF_CUBIC_INT_FUNC(type,type2,prec) \
|
||||
static inline void \
|
||||
make_coeff_##type##_cubic (gint frac, gint out_rate, type *icoeff) \
|
||||
|
@ -473,7 +482,16 @@ make_coeff_##type##_cubic (gint frac, gint out_rate, type *icoeff) \
|
|||
(x2 >> 1) - ((((type2) x3 << prec) / 6) >> prec); \
|
||||
icoeff[2] = one - icoeff[0] - icoeff[1] - icoeff[3]; \
|
||||
}
|
||||
|
||||
#define MAKE_COEFF_CUBIC_FLOAT_FUNC(type) \
|
||||
static inline void \
|
||||
make_coeff_##type##_cubic (gint frac, gint out_rate, type *icoeff) \
|
||||
{ \
|
||||
type x = (type) frac / out_rate, x2 = x * x, x3 = x2 * x; \
|
||||
icoeff[0] = 0.16667f * (x3 - x); \
|
||||
icoeff[1] = x + 0.5f * (x2 - x3); \
|
||||
icoeff[3] = -0.33333f * x + 0.5f * x2 - 0.16667f * x3; \
|
||||
icoeff[2] = 1. - icoeff[0] - icoeff[1] - icoeff[3]; \
|
||||
}
|
||||
MAKE_COEFF_CUBIC_INT_FUNC (gint16, gint32, PRECISION_S16);
|
||||
MAKE_COEFF_CUBIC_INT_FUNC (gint32, gint64, PRECISION_S32);
|
||||
MAKE_COEFF_CUBIC_FLOAT_FUNC (gfloat);
|
||||
|
@ -488,12 +506,13 @@ get_taps_##type##_##inter (GstAudioResampler * resampler, \
|
|||
gint out_rate = resampler->out_rate; \
|
||||
gint offset, frac, pos; \
|
||||
gint oversample = resampler->oversample; \
|
||||
gint cstride = resampler->cstride; \
|
||||
\
|
||||
pos = *samp_phase * oversample; \
|
||||
offset = (oversample - 1) - (pos / out_rate); \
|
||||
frac = pos % out_rate; \
|
||||
\
|
||||
res = (type *)resampler->coeff + offset; \
|
||||
res = (gint8 *) resampler->coeff + offset * cstride; \
|
||||
make_coeff_##type##_##inter (frac, out_rate, icoeff); \
|
||||
\
|
||||
*samp_index += resampler->samp_inc; \
|
||||
|
@ -526,7 +545,7 @@ inner_product_##type##_none_1_c (type * o, const type * a, \
|
|||
for (i = 0; i < len; i++) \
|
||||
res += (type2) a[i] * (type2) b[i]; \
|
||||
\
|
||||
res = (res + (1 << ((prec) - 1))) >> (prec); \
|
||||
res = (res + (1L << ((prec) - 1))) >> (prec); \
|
||||
*o = CLAMP (res, -(limit), (limit) - 1); \
|
||||
}
|
||||
|
||||
|
@ -542,12 +561,12 @@ inner_product_##type##_linear_1_c (type * o, const type * a, \
|
|||
type2 res[2] = { 0, 0 }; \
|
||||
\
|
||||
for (i = 0; i < len; i++) { \
|
||||
res[0] += (type2) a[i] * (type2) b[i * oversample + 0]; \
|
||||
res[1] += (type2) a[i] * (type2) b[i * oversample + 1]; \
|
||||
res[0] += (type2) a[i] * (type2) b[2 * i + 0]; \
|
||||
res[1] += (type2) a[i] * (type2) b[2 * i + 1]; \
|
||||
} \
|
||||
res[0] = (res[0] >> (prec)) * ic[0] + \
|
||||
(res[1] >> (prec)) * ic[1]; \
|
||||
res[0] = (res[0] + (1 << ((prec) - 1))) >> (prec); \
|
||||
res[0] = (res[0] >> (prec)) * (type2) ic[0] + \
|
||||
(res[1] >> (prec)) * (type2) ic[1]; \
|
||||
res[0] = (res[0] + (1L << ((prec) - 1))) >> (prec); \
|
||||
*o = CLAMP (res[0], -(limit), (limit) - 1); \
|
||||
}
|
||||
|
||||
|
@ -563,16 +582,16 @@ inner_product_##type##_cubic_1_c (type * o, const type * a, \
|
|||
type2 res[4] = { 0, 0, 0, 0 }; \
|
||||
\
|
||||
for (i = 0; i < len; i++) { \
|
||||
res[0] += (type2) a[i] * (type2) b[i * oversample + 0]; \
|
||||
res[1] += (type2) a[i] * (type2) b[i * oversample + 1]; \
|
||||
res[2] += (type2) a[i] * (type2) b[i * oversample + 2]; \
|
||||
res[3] += (type2) a[i] * (type2) b[i * oversample + 3]; \
|
||||
res[0] += (type2) a[i] * (type2) b[4 * i + 0]; \
|
||||
res[1] += (type2) a[i] * (type2) b[4 * i + 1]; \
|
||||
res[2] += (type2) a[i] * (type2) b[4 * i + 2]; \
|
||||
res[3] += (type2) a[i] * (type2) b[4 * i + 3]; \
|
||||
} \
|
||||
res[0] = (res[0] >> (prec)) * ic[0] + \
|
||||
(res[1] >> (prec)) * ic[1] + \
|
||||
(res[2] >> (prec)) * ic[2] + \
|
||||
(res[3] >> (prec)) * ic[3]; \
|
||||
res[0] = (res[0] + (1 << ((prec) - 1))) >> (prec); \
|
||||
res[0] = (res[0] >> (prec)) * (type2) ic[0] + \
|
||||
(res[1] >> (prec)) * (type2) ic[1] + \
|
||||
(res[2] >> (prec)) * (type2) ic[2] + \
|
||||
(res[3] >> (prec)) * (type2) ic[3]; \
|
||||
res[0] = (res[0] + (1L << ((prec) - 1))) >> (prec); \
|
||||
*o = CLAMP (res[0], -(limit), (limit) - 1); \
|
||||
}
|
||||
|
||||
|
@ -605,8 +624,8 @@ inner_product_##type##_linear_1_c (type * o, const type * a, \
|
|||
type res[2] = { 0.0, 0.0 }; \
|
||||
\
|
||||
for (i = 0; i < len; i++) { \
|
||||
res[0] += a[i] * b[i * oversample + 0]; \
|
||||
res[1] += a[i] * b[i * oversample + 1]; \
|
||||
res[0] += a[i] * b[2 * i + 0]; \
|
||||
res[1] += a[i] * b[2 * i + 1]; \
|
||||
} \
|
||||
*o = res[0] * ic[0] + res[1] * ic[1]; \
|
||||
}
|
||||
|
@ -622,10 +641,10 @@ inner_product_##type##_cubic_1_c (type * o, const type * a, \
|
|||
type res[4] = { 0.0, 0.0, 0.0, 0.0 }; \
|
||||
\
|
||||
for (i = 0; i < len; i++) { \
|
||||
res[0] += a[i] * b[i * oversample + 0]; \
|
||||
res[1] += a[i] * b[i * oversample + 1]; \
|
||||
res[2] += a[i] * b[i * oversample + 2]; \
|
||||
res[3] += a[i] * b[i * oversample + 3]; \
|
||||
res[0] += a[i] * b[4 * i + 0]; \
|
||||
res[1] += a[i] * b[4 * i + 1]; \
|
||||
res[2] += a[i] * b[4 * i + 2]; \
|
||||
res[3] += a[i] * b[4 * i + 3]; \
|
||||
} \
|
||||
*o = res[0] * ic[0] + res[1] * ic[1] + \
|
||||
res[2] * ic[2] + res[3] * ic[3]; \
|
||||
|
@ -659,9 +678,10 @@ resample_ ##type## _ ##inter## _ ##channels## _ ##arch (GstAudioResampler * resa
|
|||
\
|
||||
ipp = &ip[samp_index * channels]; \
|
||||
\
|
||||
taps = get_taps_ ##type##_##inter (resampler, &samp_index, &samp_phase, icoeff); \
|
||||
\
|
||||
inner_product_ ##type##_##inter##_##channels##_##arch (op, ipp, taps, n_taps, icoeff, oversample); \
|
||||
taps = get_taps_ ##type##_##inter \
|
||||
(resampler, &samp_index, &samp_phase, icoeff); \
|
||||
inner_product_ ##type##_##inter##_##channels##_##arch \
|
||||
(op, ipp, taps, n_taps, icoeff, oversample); \
|
||||
op += ostride; \
|
||||
} \
|
||||
memmove (ip, &ip[samp_index * channels], \
|
||||
|
@ -802,10 +822,10 @@ deinterleave_ ##type (GstAudioResampler * resampler, gpointer sbuf[], \
|
|||
} \
|
||||
}
|
||||
|
||||
MAKE_DEINTERLEAVE_FUNC (gdouble);
|
||||
MAKE_DEINTERLEAVE_FUNC (gfloat);
|
||||
MAKE_DEINTERLEAVE_FUNC (gint32);
|
||||
MAKE_DEINTERLEAVE_FUNC (gint16);
|
||||
MAKE_DEINTERLEAVE_FUNC (gint32);
|
||||
MAKE_DEINTERLEAVE_FUNC (gfloat);
|
||||
MAKE_DEINTERLEAVE_FUNC (gdouble);
|
||||
|
||||
static DeinterleaveFunc deinterleave_funcs[] = {
|
||||
deinterleave_gint16,
|
||||
|
@ -875,7 +895,7 @@ calculate_kaiser_params (GstAudioResampler * resampler)
|
|||
|
||||
static void
|
||||
alloc_coeff_mem (GstAudioResampler * resampler, gint bps, gint n_taps,
|
||||
gint n_phases)
|
||||
gint n_phases, gint n_mult)
|
||||
{
|
||||
if (resampler->alloc_taps >= n_taps && resampler->alloc_phases >= n_phases)
|
||||
return;
|
||||
|
@ -883,7 +903,8 @@ alloc_coeff_mem (GstAudioResampler * resampler, gint bps, gint n_taps,
|
|||
resampler->tmpcoeff =
|
||||
g_realloc_n (resampler->tmpcoeff, n_taps, sizeof (gdouble));
|
||||
|
||||
resampler->cstride = GST_ROUND_UP_32 (bps * (n_taps + TAPS_OVERREAD));
|
||||
resampler->cstride =
|
||||
GST_ROUND_UP_32 (bps * (n_mult * n_taps + TAPS_OVERREAD));
|
||||
g_free (resampler->coeffmem);
|
||||
resampler->coeffmem = g_malloc0 (n_phases * resampler->cstride + ALIGN - 1);
|
||||
resampler->coeff = MEM_ALIGN (resampler->coeffmem, ALIGN);
|
||||
|
@ -983,7 +1004,7 @@ resampler_calculate_taps (GstAudioResampler * resampler)
|
|||
}
|
||||
|
||||
if (interpolate) {
|
||||
gint otaps;
|
||||
gint otaps, mult;
|
||||
gpointer coeff;
|
||||
gdouble x, weight, *tmpcoeff;
|
||||
GstAudioResamplerFilterInterpolation filter_interpolation =
|
||||
|
@ -995,37 +1016,40 @@ resampler_calculate_taps (GstAudioResampler * resampler)
|
|||
else
|
||||
resampler->filter_interpolation = filter_interpolation;
|
||||
|
||||
otaps = oversample * n_taps;
|
||||
switch (resampler->filter_interpolation) {
|
||||
default:
|
||||
case GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_LINEAR:
|
||||
otaps += 1;
|
||||
mult = 2;
|
||||
break;
|
||||
case GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_CUBIC:
|
||||
otaps += 3;
|
||||
mult = 4;
|
||||
break;
|
||||
}
|
||||
otaps = oversample * n_taps + mult - 1;
|
||||
|
||||
alloc_coeff_mem (resampler, bps, otaps, 1);
|
||||
alloc_coeff_mem (resampler, bps, otaps, oversample, mult);
|
||||
|
||||
coeff = resampler->coeff;
|
||||
tmpcoeff = resampler->tmpcoeff;
|
||||
coeff = tmpcoeff = resampler->tmpcoeff;
|
||||
x = 1.0 - n_taps / 2;
|
||||
weight = fill_taps (resampler, tmpcoeff, x, otaps, oversample);
|
||||
|
||||
switch (resampler->format) {
|
||||
case GST_AUDIO_FORMAT_S16:
|
||||
convert_taps_gint16 (tmpcoeff, coeff, weight / oversample, otaps);
|
||||
extract_taps_gint16 (resampler, coeff, n_taps, oversample, mult);
|
||||
break;
|
||||
case GST_AUDIO_FORMAT_S32:
|
||||
convert_taps_gint32 (tmpcoeff, coeff, weight / oversample, otaps);
|
||||
extract_taps_gint32 (resampler, coeff, n_taps, oversample, mult);
|
||||
break;
|
||||
case GST_AUDIO_FORMAT_F32:
|
||||
convert_taps_gfloat (tmpcoeff, coeff, weight / oversample, otaps);
|
||||
extract_taps_gfloat (resampler, coeff, n_taps, oversample, mult);
|
||||
break;
|
||||
default:
|
||||
case GST_AUDIO_FORMAT_F64:
|
||||
convert_taps_gdouble (tmpcoeff, coeff, weight / oversample, otaps);
|
||||
extract_taps_gdouble (resampler, coeff, n_taps, oversample, mult);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
|
@ -1033,7 +1057,7 @@ resampler_calculate_taps (GstAudioResampler * resampler)
|
|||
GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_NONE;
|
||||
resampler->taps = g_realloc_n (resampler->taps, out_rate, sizeof (Tap));
|
||||
memset (resampler->taps, 0, sizeof (Tap) * out_rate);
|
||||
alloc_coeff_mem (resampler, bps, n_taps, out_rate);
|
||||
alloc_coeff_mem (resampler, bps, n_taps, out_rate, 1);
|
||||
}
|
||||
|
||||
resampler->samp_inc = in_rate / out_rate;
|
||||
|
|
Loading…
Reference in a new issue