mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-01-23 07:38:16 +00:00
audio-resampler: Improve taps memory layout
Rearrange the oversampled taps in memory to make it easier to use SIMD instructions on them. this simplifies some sse code. Add some more optimizations
This commit is contained in:
parent
e9fc039bb1
commit
f6e0481ab5
2 changed files with 245 additions and 80 deletions
|
@ -45,23 +45,15 @@ inner_product_gfloat_linear_1_sse (gfloat * o, const gfloat * a,
|
||||||
const gfloat * b, gint len, const gfloat * icoeff, gint oversample)
|
const gfloat * b, gint len, const gfloat * icoeff, gint oversample)
|
||||||
{
|
{
|
||||||
gint i = 0;
|
gint i = 0;
|
||||||
__m128 sum = _mm_setzero_ps (), t, b0;
|
__m128 sum = _mm_setzero_ps (), t;
|
||||||
__m128 f = _mm_loadu_ps(icoeff);
|
__m128 f = _mm_loadu_ps(icoeff);
|
||||||
|
|
||||||
for (; i < len; i += 4) {
|
for (; i < len; i += 4) {
|
||||||
t = _mm_loadu_ps (a + i);
|
t = _mm_loadu_ps (a + i);
|
||||||
|
sum = _mm_add_ps (sum, _mm_mul_ps (_mm_unpacklo_ps (t, t),
|
||||||
b0 = _mm_loadh_pi (b0, (__m64 *) (b + (i+0)*oversample));
|
_mm_load_ps (b + 2 * (i + 0))));
|
||||||
b0 = _mm_loadl_pi (b0, (__m64 *) (b + (i+1)*oversample));
|
sum = _mm_add_ps (sum, _mm_mul_ps (_mm_unpackhi_ps (t, t),
|
||||||
|
_mm_load_ps (b + 2 * (i + 2))));
|
||||||
sum =
|
|
||||||
_mm_add_ps (sum, _mm_mul_ps (_mm_unpacklo_ps (t, t), b0));
|
|
||||||
|
|
||||||
b0 = _mm_loadh_pi (b0, (__m64 *) (b + (i+2)*oversample));
|
|
||||||
b0 = _mm_loadl_pi (b0, (__m64 *) (b + (i+3)*oversample));
|
|
||||||
|
|
||||||
sum =
|
|
||||||
_mm_add_ps (sum, _mm_mul_ps (_mm_unpackhi_ps (t, t), b0));
|
|
||||||
}
|
}
|
||||||
sum = _mm_mul_ps (sum, f);
|
sum = _mm_mul_ps (sum, f);
|
||||||
sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum));
|
sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum));
|
||||||
|
@ -79,9 +71,9 @@ inner_product_gfloat_cubic_1_sse (gfloat * o, const gfloat * a,
|
||||||
|
|
||||||
for (; i < len; i += 2) {
|
for (; i < len; i += 2) {
|
||||||
sum = _mm_add_ps (sum, _mm_mul_ps (_mm_load1_ps (a + i + 0),
|
sum = _mm_add_ps (sum, _mm_mul_ps (_mm_load1_ps (a + i + 0),
|
||||||
_mm_loadu_ps (b + (i + 0) * oversample)));
|
_mm_load_ps (b + 4 * (i + 0))));
|
||||||
sum = _mm_add_ps (sum, _mm_mul_ps (_mm_load1_ps (a + i + 1),
|
sum = _mm_add_ps (sum, _mm_mul_ps (_mm_load1_ps (a + i + 1),
|
||||||
_mm_loadu_ps (b + (i + 1) * oversample)));
|
_mm_load_ps (b + 4 * (i + 1))));
|
||||||
}
|
}
|
||||||
sum = _mm_mul_ps (sum, f);
|
sum = _mm_mul_ps (sum, f);
|
||||||
sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum));
|
sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum));
|
||||||
|
@ -118,9 +110,10 @@ inner_product_gfloat_none_2_sse (gfloat * o, const gfloat * a,
|
||||||
}
|
}
|
||||||
|
|
||||||
MAKE_RESAMPLE_FUNC (gfloat, none, 1, sse);
|
MAKE_RESAMPLE_FUNC (gfloat, none, 1, sse);
|
||||||
MAKE_RESAMPLE_FUNC (gfloat, none, 2, sse);
|
|
||||||
MAKE_RESAMPLE_FUNC (gfloat, linear, 1, sse);
|
MAKE_RESAMPLE_FUNC (gfloat, linear, 1, sse);
|
||||||
MAKE_RESAMPLE_FUNC (gfloat, cubic, 1, sse);
|
MAKE_RESAMPLE_FUNC (gfloat, cubic, 1, sse);
|
||||||
|
|
||||||
|
MAKE_RESAMPLE_FUNC (gfloat, none, 2, sse);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined (HAVE_EMMINTRIN_H) && defined(__SSE2__)
|
#if defined (HAVE_EMMINTRIN_H) && defined(__SSE2__)
|
||||||
|
@ -154,6 +147,94 @@ inner_product_gint16_none_1_sse2 (gint16 * o, const gint16 * a,
|
||||||
*o = _mm_extract_epi16 (sum, 0);
|
*o = _mm_extract_epi16 (sum, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
inner_product_gint16_linear_1_sse2 (gint16 * o, const gint16 * a,
|
||||||
|
const gint16 * b, gint len, const gint16 * icoeff, gint oversample)
|
||||||
|
{
|
||||||
|
gint i = 0;
|
||||||
|
__m128i sum, t, ta, tb, m1, m2;
|
||||||
|
__m128i f = _mm_cvtsi64_si128 (*((long long*)icoeff));
|
||||||
|
|
||||||
|
sum = _mm_setzero_si128 ();
|
||||||
|
f = _mm_unpacklo_epi16 (f, sum);
|
||||||
|
|
||||||
|
for (; i < len; i += 8) {
|
||||||
|
t = _mm_loadu_si128 ((__m128i *) (a + i));
|
||||||
|
|
||||||
|
ta = _mm_unpacklo_epi16 (t, t);
|
||||||
|
tb = _mm_load_si128 ((__m128i *) (b + 2 * i + 0));
|
||||||
|
|
||||||
|
m1 = _mm_mulhi_epi16 (ta, tb);
|
||||||
|
m2 = _mm_mullo_epi16 (ta, tb);
|
||||||
|
|
||||||
|
sum = _mm_add_epi32 (sum, _mm_unpacklo_epi16 (m2, m1));
|
||||||
|
sum = _mm_add_epi32 (sum, _mm_unpackhi_epi16 (m2, m1));
|
||||||
|
|
||||||
|
ta = _mm_unpackhi_epi16 (t, t);
|
||||||
|
tb = _mm_load_si128 ((__m128i *) (b + 2 * i + 8));
|
||||||
|
|
||||||
|
m1 = _mm_mulhi_epi16 (ta, tb);
|
||||||
|
m2 = _mm_mullo_epi16 (ta, tb);
|
||||||
|
|
||||||
|
sum = _mm_add_epi32 (sum, _mm_unpacklo_epi16 (m2, m1));
|
||||||
|
sum = _mm_add_epi32 (sum, _mm_unpackhi_epi16 (m2, m1));
|
||||||
|
}
|
||||||
|
sum = _mm_srai_epi32 (sum, PRECISION_S16);
|
||||||
|
sum = _mm_madd_epi16 (sum, f);
|
||||||
|
|
||||||
|
sum =
|
||||||
|
_mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (2, 3, 2,
|
||||||
|
3)));
|
||||||
|
sum =
|
||||||
|
_mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (1, 1, 1,
|
||||||
|
1)));
|
||||||
|
|
||||||
|
sum = _mm_add_epi32 (sum, _mm_set1_epi32 (1 << (PRECISION_S16 - 1)));
|
||||||
|
sum = _mm_srai_epi32 (sum, PRECISION_S16);
|
||||||
|
sum = _mm_packs_epi32 (sum, sum);
|
||||||
|
*o = _mm_extract_epi16 (sum, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
inner_product_gint16_cubic_1_sse2 (gint16 * o, const gint16 * a,
|
||||||
|
const gint16 * b, gint len, const gint16 * icoeff, gint oversample)
|
||||||
|
{
|
||||||
|
gint i = 0;
|
||||||
|
__m128i sum, ta, tb, m1, m2;
|
||||||
|
__m128i f = _mm_cvtsi64_si128 (*((long long*)icoeff));
|
||||||
|
|
||||||
|
sum = _mm_setzero_si128 ();
|
||||||
|
f = _mm_unpacklo_epi16 (f, sum);
|
||||||
|
|
||||||
|
for (; i < len; i += 2) {
|
||||||
|
ta = _mm_cvtsi32_si128 (*(gint32*)(a + i));
|
||||||
|
ta = _mm_unpacklo_epi16 (ta, ta);
|
||||||
|
ta = _mm_unpacklo_epi16 (ta, ta);
|
||||||
|
|
||||||
|
tb = _mm_load_si128 ((__m128i *) (b + 4 * i + 0));
|
||||||
|
|
||||||
|
m1 = _mm_mulhi_epi16 (ta, tb);
|
||||||
|
m2 = _mm_mullo_epi16 (ta, tb);
|
||||||
|
|
||||||
|
sum = _mm_add_epi32 (sum, _mm_unpacklo_epi16 (m2, m1));
|
||||||
|
sum = _mm_add_epi32 (sum, _mm_unpackhi_epi16 (m2, m1));
|
||||||
|
}
|
||||||
|
sum = _mm_srai_epi32 (sum, PRECISION_S16);
|
||||||
|
sum = _mm_madd_epi16 (sum, f);
|
||||||
|
|
||||||
|
sum =
|
||||||
|
_mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (2, 3, 2,
|
||||||
|
3)));
|
||||||
|
sum =
|
||||||
|
_mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (1, 1, 1,
|
||||||
|
1)));
|
||||||
|
|
||||||
|
sum = _mm_add_epi32 (sum, _mm_set1_epi32 (1 << (PRECISION_S16 - 1)));
|
||||||
|
sum = _mm_srai_epi32 (sum, PRECISION_S16);
|
||||||
|
sum = _mm_packs_epi32 (sum, sum);
|
||||||
|
*o = _mm_extract_epi16 (sum, 0);
|
||||||
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
inner_product_gdouble_none_1_sse2 (gdouble * o, const gdouble * a,
|
inner_product_gdouble_none_1_sse2 (gdouble * o, const gdouble * a,
|
||||||
const gdouble * b, gint len, const gdouble * icoeff, gint oversample)
|
const gdouble * b, gint len, const gdouble * icoeff, gint oversample)
|
||||||
|
@ -179,6 +260,51 @@ inner_product_gdouble_none_1_sse2 (gdouble * o, const gdouble * a,
|
||||||
_mm_store_sd (o, sum);
|
_mm_store_sd (o, sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
inner_product_gdouble_linear_1_sse2 (gdouble * o, const gdouble * a,
|
||||||
|
const gdouble * b, gint len, const gdouble * icoeff, gint oversample)
|
||||||
|
{
|
||||||
|
gint i = 0;
|
||||||
|
__m128d sum = _mm_setzero_pd ();
|
||||||
|
__m128d f = _mm_loadu_pd (icoeff);
|
||||||
|
|
||||||
|
for (; i < len; i += 4) {
|
||||||
|
sum = _mm_add_pd (sum, _mm_mul_pd (_mm_load1_pd (a + i + 0), _mm_load_pd (b + 2 * i + 0)));
|
||||||
|
sum = _mm_add_pd (sum, _mm_mul_pd (_mm_load1_pd (a + i + 1), _mm_load_pd (b + 2 * i + 2)));
|
||||||
|
sum = _mm_add_pd (sum, _mm_mul_pd (_mm_load1_pd (a + i + 2), _mm_load_pd (b + 2 * i + 4)));
|
||||||
|
sum = _mm_add_pd (sum, _mm_mul_pd (_mm_load1_pd (a + i + 3), _mm_load_pd (b + 2 * i + 6)));
|
||||||
|
}
|
||||||
|
sum = _mm_mul_pd (sum, f);
|
||||||
|
sum = _mm_add_sd (sum, _mm_unpackhi_pd (sum, sum));
|
||||||
|
_mm_store_sd (o, sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
inner_product_gdouble_cubic_1_sse2 (gdouble * o, const gdouble * a,
|
||||||
|
const gdouble * b, gint len, const gdouble * icoeff, gint oversample)
|
||||||
|
{
|
||||||
|
gint i = 0;
|
||||||
|
__m128d sum1 = _mm_setzero_pd (), t;
|
||||||
|
__m128d sum2 = _mm_setzero_pd ();
|
||||||
|
__m128d f1 = _mm_loadu_pd (icoeff);
|
||||||
|
__m128d f2 = _mm_loadu_pd (icoeff+2);
|
||||||
|
|
||||||
|
for (; i < len; i += 2) {
|
||||||
|
t = _mm_load1_pd (a + i + 0);
|
||||||
|
sum1 = _mm_add_pd (sum1, _mm_mul_pd (t, _mm_load_pd (b + 4 * i + 0)));
|
||||||
|
sum2 = _mm_add_pd (sum2, _mm_mul_pd (t, _mm_load_pd (b + 4 * i + 2)));
|
||||||
|
|
||||||
|
t = _mm_load1_pd (a + i + 1);
|
||||||
|
sum1 = _mm_add_pd (sum1, _mm_mul_pd (t, _mm_load_pd (b + 4 * i + 4)));
|
||||||
|
sum2 = _mm_add_pd (sum2, _mm_mul_pd (t, _mm_load_pd (b + 4 * i + 6)));
|
||||||
|
}
|
||||||
|
sum1 = _mm_mul_pd (sum1, f1);
|
||||||
|
sum2 = _mm_mul_pd (sum2, f2);
|
||||||
|
sum1 = _mm_add_pd (sum1, sum2);
|
||||||
|
sum1 = _mm_add_sd (sum1, _mm_unpackhi_pd (sum1, sum1));
|
||||||
|
_mm_store_sd (o, sum1);
|
||||||
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
inner_product_gint16_none_2_sse2 (gint16 * o, const gint16 * a,
|
inner_product_gint16_none_2_sse2 (gint16 * o, const gint16 * a,
|
||||||
const gint16 * b, gint len, const gint16 * icoeff, gint oversample)
|
const gint16 * b, gint len, const gint16 * icoeff, gint oversample)
|
||||||
|
@ -239,9 +365,16 @@ inner_product_gdouble_none_2_sse2 (gdouble * o, const gdouble * a,
|
||||||
}
|
}
|
||||||
|
|
||||||
MAKE_RESAMPLE_FUNC (gint16, none, 1, sse2);
|
MAKE_RESAMPLE_FUNC (gint16, none, 1, sse2);
|
||||||
|
MAKE_RESAMPLE_FUNC (gint16, linear, 1, sse2);
|
||||||
|
MAKE_RESAMPLE_FUNC (gint16, cubic, 1, sse2);
|
||||||
|
|
||||||
MAKE_RESAMPLE_FUNC (gdouble, none, 1, sse2);
|
MAKE_RESAMPLE_FUNC (gdouble, none, 1, sse2);
|
||||||
|
MAKE_RESAMPLE_FUNC (gdouble, linear, 1, sse2);
|
||||||
|
MAKE_RESAMPLE_FUNC (gdouble, cubic, 1, sse2);
|
||||||
|
|
||||||
MAKE_RESAMPLE_FUNC (gint16, none, 2, sse2);
|
MAKE_RESAMPLE_FUNC (gint16, none, 2, sse2);
|
||||||
MAKE_RESAMPLE_FUNC (gdouble, none, 2, sse2);
|
MAKE_RESAMPLE_FUNC (gdouble, none, 2, sse2);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined (HAVE_SMMINTRIN_H) && defined(__SSE4_1__)
|
#if defined (HAVE_SMMINTRIN_H) && defined(__SSE4_1__)
|
||||||
|
@ -295,21 +428,29 @@ audio_resampler_check_x86 (const gchar *option)
|
||||||
#if defined (HAVE_XMMINTRIN_H) && defined(__SSE__)
|
#if defined (HAVE_XMMINTRIN_H) && defined(__SSE__)
|
||||||
GST_DEBUG ("enable SSE optimisations");
|
GST_DEBUG ("enable SSE optimisations");
|
||||||
resample_gfloat_none_1 = resample_gfloat_none_1_sse;
|
resample_gfloat_none_1 = resample_gfloat_none_1_sse;
|
||||||
resample_gfloat_none_2 = resample_gfloat_none_2_sse;
|
|
||||||
resample_gfloat_linear_1 = resample_gfloat_linear_1_sse;
|
resample_gfloat_linear_1 = resample_gfloat_linear_1_sse;
|
||||||
resample_gfloat_cubic_1 = resample_gfloat_cubic_1_sse;
|
resample_gfloat_cubic_1 = resample_gfloat_cubic_1_sse;
|
||||||
|
|
||||||
|
resample_gfloat_none_2 = resample_gfloat_none_2_sse;
|
||||||
#endif
|
#endif
|
||||||
} else if (!strcmp (option, "sse2")) {
|
} else if (!strcmp (option, "sse2")) {
|
||||||
#if defined (HAVE_EMMINTRIN_H) && defined(__SSE2__)
|
#if defined (HAVE_EMMINTRIN_H) && defined(__SSE2__)
|
||||||
GST_DEBUG ("enable SSE2 optimisations");
|
GST_DEBUG ("enable SSE2 optimisations");
|
||||||
resample_gint16_none_1 = resample_gint16_none_1_sse2;
|
resample_gint16_none_1 = resample_gint16_none_1_sse2;
|
||||||
|
resample_gint16_linear_1 = resample_gint16_linear_1_sse2;
|
||||||
|
resample_gint16_cubic_1 = resample_gint16_cubic_1_sse2;
|
||||||
|
|
||||||
resample_gfloat_none_1 = resample_gfloat_none_1_sse;
|
resample_gfloat_none_1 = resample_gfloat_none_1_sse;
|
||||||
resample_gfloat_none_2 = resample_gfloat_none_2_sse;
|
|
||||||
resample_gdouble_none_1 = resample_gdouble_none_1_sse2;
|
|
||||||
resample_gint16_none_2 = resample_gint16_none_2_sse2;
|
|
||||||
resample_gdouble_none_2 = resample_gdouble_none_2_sse2;
|
|
||||||
resample_gfloat_linear_1 = resample_gfloat_linear_1_sse;
|
resample_gfloat_linear_1 = resample_gfloat_linear_1_sse;
|
||||||
resample_gfloat_cubic_1 = resample_gfloat_cubic_1_sse;
|
resample_gfloat_cubic_1 = resample_gfloat_cubic_1_sse;
|
||||||
|
|
||||||
|
resample_gdouble_none_1 = resample_gdouble_none_1_sse2;
|
||||||
|
resample_gdouble_linear_1 = resample_gdouble_linear_1_sse2;
|
||||||
|
resample_gdouble_cubic_1 = resample_gdouble_cubic_1_sse2;
|
||||||
|
|
||||||
|
resample_gint16_none_2 = resample_gint16_none_2_sse2;
|
||||||
|
resample_gfloat_none_2 = resample_gfloat_none_2_sse;
|
||||||
|
resample_gdouble_none_2 = resample_gdouble_none_2_sse2;
|
||||||
#endif
|
#endif
|
||||||
} else if (!strcmp (option, "sse41")) {
|
} else if (!strcmp (option, "sse41")) {
|
||||||
#if defined (HAVE_SMMINTRIN_H) && defined(__SSE4_1__)
|
#if defined (HAVE_SMMINTRIN_H) && defined(__SSE4_1__)
|
||||||
|
|
|
@ -390,6 +390,27 @@ MAKE_CONVERT_TAPS_INT_FUNC (gint32, PRECISION_S32);
|
||||||
MAKE_CONVERT_TAPS_FLOAT_FUNC (gfloat);
|
MAKE_CONVERT_TAPS_FLOAT_FUNC (gfloat);
|
||||||
MAKE_CONVERT_TAPS_FLOAT_FUNC (gdouble);
|
MAKE_CONVERT_TAPS_FLOAT_FUNC (gdouble);
|
||||||
|
|
||||||
|
#define MAKE_EXTRACT_TAPS_FUNC(type) \
|
||||||
|
static inline void \
|
||||||
|
extract_taps_##type (GstAudioResampler * resampler, type *tmpcoeff, \
|
||||||
|
gint n_taps, gint oversample, gint mult) \
|
||||||
|
{ \
|
||||||
|
gint i, j, k; \
|
||||||
|
for (i = 0; i < oversample; i++) { \
|
||||||
|
type *coeff = (type *) ((gint8*)resampler->coeff + \
|
||||||
|
i * resampler->cstride); \
|
||||||
|
for (j = 0; j < n_taps; j++) { \
|
||||||
|
for (k = 0; k < mult; k++) { \
|
||||||
|
*coeff++ = tmpcoeff[i + j*oversample + k]; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
MAKE_EXTRACT_TAPS_FUNC (gint16);
|
||||||
|
MAKE_EXTRACT_TAPS_FUNC (gint32);
|
||||||
|
MAKE_EXTRACT_TAPS_FUNC (gfloat);
|
||||||
|
MAKE_EXTRACT_TAPS_FUNC (gdouble);
|
||||||
|
|
||||||
#define GET_TAPS_NONE_FUNC(type) \
|
#define GET_TAPS_NONE_FUNC(type) \
|
||||||
static inline gpointer \
|
static inline gpointer \
|
||||||
get_taps_##type##_none (GstAudioResampler * resampler, \
|
get_taps_##type##_none (GstAudioResampler * resampler, \
|
||||||
|
@ -421,12 +442,19 @@ get_taps_##type##_none (GstAudioResampler * resampler,
|
||||||
} \
|
} \
|
||||||
return res; \
|
return res; \
|
||||||
}
|
}
|
||||||
|
|
||||||
GET_TAPS_NONE_FUNC (gint16);
|
GET_TAPS_NONE_FUNC (gint16);
|
||||||
GET_TAPS_NONE_FUNC (gint32);
|
GET_TAPS_NONE_FUNC (gint32);
|
||||||
GET_TAPS_NONE_FUNC (gfloat);
|
GET_TAPS_NONE_FUNC (gfloat);
|
||||||
GET_TAPS_NONE_FUNC (gdouble);
|
GET_TAPS_NONE_FUNC (gdouble);
|
||||||
|
|
||||||
|
#define MAKE_COEFF_LINEAR_INT_FUNC(type,type2,prec) \
|
||||||
|
static inline void \
|
||||||
|
make_coeff_##type##_linear (gint frac, gint out_rate, type *icoeff) \
|
||||||
|
{ \
|
||||||
|
type x = ((type2)frac << prec) / out_rate; \
|
||||||
|
icoeff[0] = icoeff[2] = x; \
|
||||||
|
icoeff[1] = icoeff[3] = (1L << prec) - x; \
|
||||||
|
}
|
||||||
#define MAKE_COEFF_LINEAR_FLOAT_FUNC(type) \
|
#define MAKE_COEFF_LINEAR_FLOAT_FUNC(type) \
|
||||||
static inline void \
|
static inline void \
|
||||||
make_coeff_##type##_linear (gint frac, gint out_rate, type *icoeff) \
|
make_coeff_##type##_linear (gint frac, gint out_rate, type *icoeff) \
|
||||||
|
@ -435,30 +463,11 @@ make_coeff_##type##_linear (gint frac, gint out_rate, type *icoeff) \
|
||||||
icoeff[0] = icoeff[2] = x; \
|
icoeff[0] = icoeff[2] = x; \
|
||||||
icoeff[1] = icoeff[3] = 1.0 - x; \
|
icoeff[1] = icoeff[3] = 1.0 - x; \
|
||||||
}
|
}
|
||||||
#define MAKE_COEFF_LINEAR_INT_FUNC(type,type2,prec) \
|
|
||||||
static inline void \
|
|
||||||
make_coeff_##type##_linear (gint frac, gint out_rate, type *icoeff) \
|
|
||||||
{ \
|
|
||||||
type x = ((type2)frac << prec) / out_rate; \
|
|
||||||
icoeff[0] = icoeff[2] = x; \
|
|
||||||
icoeff[1] = icoeff[3] = (1 << prec) - x; \
|
|
||||||
}
|
|
||||||
|
|
||||||
MAKE_COEFF_LINEAR_INT_FUNC (gint16, gint32, PRECISION_S16);
|
MAKE_COEFF_LINEAR_INT_FUNC (gint16, gint32, PRECISION_S16);
|
||||||
MAKE_COEFF_LINEAR_INT_FUNC (gint32, gint64, PRECISION_S32);
|
MAKE_COEFF_LINEAR_INT_FUNC (gint32, gint64, PRECISION_S32);
|
||||||
MAKE_COEFF_LINEAR_FLOAT_FUNC (gfloat);
|
MAKE_COEFF_LINEAR_FLOAT_FUNC (gfloat);
|
||||||
MAKE_COEFF_LINEAR_FLOAT_FUNC (gdouble);
|
MAKE_COEFF_LINEAR_FLOAT_FUNC (gdouble);
|
||||||
|
|
||||||
#define MAKE_COEFF_CUBIC_FLOAT_FUNC(type) \
|
|
||||||
static inline void \
|
|
||||||
make_coeff_##type##_cubic (gint frac, gint out_rate, type *icoeff) \
|
|
||||||
{ \
|
|
||||||
type x = (type) frac / out_rate, x2 = x * x, x3 = x2 * x; \
|
|
||||||
icoeff[0] = 0.16667f * (x3 - x); \
|
|
||||||
icoeff[1] = x + 0.5f * (x2 - x3); \
|
|
||||||
icoeff[3] = -0.33333f * x + 0.5f * x2 - 0.16667f * x3; \
|
|
||||||
icoeff[2] = 1. - icoeff[0] - icoeff[1] - icoeff[3]; \
|
|
||||||
}
|
|
||||||
#define MAKE_COEFF_CUBIC_INT_FUNC(type,type2,prec) \
|
#define MAKE_COEFF_CUBIC_INT_FUNC(type,type2,prec) \
|
||||||
static inline void \
|
static inline void \
|
||||||
make_coeff_##type##_cubic (gint frac, gint out_rate, type *icoeff) \
|
make_coeff_##type##_cubic (gint frac, gint out_rate, type *icoeff) \
|
||||||
|
@ -473,7 +482,16 @@ make_coeff_##type##_cubic (gint frac, gint out_rate, type *icoeff) \
|
||||||
(x2 >> 1) - ((((type2) x3 << prec) / 6) >> prec); \
|
(x2 >> 1) - ((((type2) x3 << prec) / 6) >> prec); \
|
||||||
icoeff[2] = one - icoeff[0] - icoeff[1] - icoeff[3]; \
|
icoeff[2] = one - icoeff[0] - icoeff[1] - icoeff[3]; \
|
||||||
}
|
}
|
||||||
|
#define MAKE_COEFF_CUBIC_FLOAT_FUNC(type) \
|
||||||
|
static inline void \
|
||||||
|
make_coeff_##type##_cubic (gint frac, gint out_rate, type *icoeff) \
|
||||||
|
{ \
|
||||||
|
type x = (type) frac / out_rate, x2 = x * x, x3 = x2 * x; \
|
||||||
|
icoeff[0] = 0.16667f * (x3 - x); \
|
||||||
|
icoeff[1] = x + 0.5f * (x2 - x3); \
|
||||||
|
icoeff[3] = -0.33333f * x + 0.5f * x2 - 0.16667f * x3; \
|
||||||
|
icoeff[2] = 1. - icoeff[0] - icoeff[1] - icoeff[3]; \
|
||||||
|
}
|
||||||
MAKE_COEFF_CUBIC_INT_FUNC (gint16, gint32, PRECISION_S16);
|
MAKE_COEFF_CUBIC_INT_FUNC (gint16, gint32, PRECISION_S16);
|
||||||
MAKE_COEFF_CUBIC_INT_FUNC (gint32, gint64, PRECISION_S32);
|
MAKE_COEFF_CUBIC_INT_FUNC (gint32, gint64, PRECISION_S32);
|
||||||
MAKE_COEFF_CUBIC_FLOAT_FUNC (gfloat);
|
MAKE_COEFF_CUBIC_FLOAT_FUNC (gfloat);
|
||||||
|
@ -488,12 +506,13 @@ get_taps_##type##_##inter (GstAudioResampler * resampler, \
|
||||||
gint out_rate = resampler->out_rate; \
|
gint out_rate = resampler->out_rate; \
|
||||||
gint offset, frac, pos; \
|
gint offset, frac, pos; \
|
||||||
gint oversample = resampler->oversample; \
|
gint oversample = resampler->oversample; \
|
||||||
|
gint cstride = resampler->cstride; \
|
||||||
\
|
\
|
||||||
pos = *samp_phase * oversample; \
|
pos = *samp_phase * oversample; \
|
||||||
offset = (oversample - 1) - (pos / out_rate); \
|
offset = (oversample - 1) - (pos / out_rate); \
|
||||||
frac = pos % out_rate; \
|
frac = pos % out_rate; \
|
||||||
\
|
\
|
||||||
res = (type *)resampler->coeff + offset; \
|
res = (gint8 *) resampler->coeff + offset * cstride; \
|
||||||
make_coeff_##type##_##inter (frac, out_rate, icoeff); \
|
make_coeff_##type##_##inter (frac, out_rate, icoeff); \
|
||||||
\
|
\
|
||||||
*samp_index += resampler->samp_inc; \
|
*samp_index += resampler->samp_inc; \
|
||||||
|
@ -526,7 +545,7 @@ inner_product_##type##_none_1_c (type * o, const type * a, \
|
||||||
for (i = 0; i < len; i++) \
|
for (i = 0; i < len; i++) \
|
||||||
res += (type2) a[i] * (type2) b[i]; \
|
res += (type2) a[i] * (type2) b[i]; \
|
||||||
\
|
\
|
||||||
res = (res + (1 << ((prec) - 1))) >> (prec); \
|
res = (res + (1L << ((prec) - 1))) >> (prec); \
|
||||||
*o = CLAMP (res, -(limit), (limit) - 1); \
|
*o = CLAMP (res, -(limit), (limit) - 1); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -542,12 +561,12 @@ inner_product_##type##_linear_1_c (type * o, const type * a, \
|
||||||
type2 res[2] = { 0, 0 }; \
|
type2 res[2] = { 0, 0 }; \
|
||||||
\
|
\
|
||||||
for (i = 0; i < len; i++) { \
|
for (i = 0; i < len; i++) { \
|
||||||
res[0] += (type2) a[i] * (type2) b[i * oversample + 0]; \
|
res[0] += (type2) a[i] * (type2) b[2 * i + 0]; \
|
||||||
res[1] += (type2) a[i] * (type2) b[i * oversample + 1]; \
|
res[1] += (type2) a[i] * (type2) b[2 * i + 1]; \
|
||||||
} \
|
} \
|
||||||
res[0] = (res[0] >> (prec)) * ic[0] + \
|
res[0] = (res[0] >> (prec)) * (type2) ic[0] + \
|
||||||
(res[1] >> (prec)) * ic[1]; \
|
(res[1] >> (prec)) * (type2) ic[1]; \
|
||||||
res[0] = (res[0] + (1 << ((prec) - 1))) >> (prec); \
|
res[0] = (res[0] + (1L << ((prec) - 1))) >> (prec); \
|
||||||
*o = CLAMP (res[0], -(limit), (limit) - 1); \
|
*o = CLAMP (res[0], -(limit), (limit) - 1); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -563,16 +582,16 @@ inner_product_##type##_cubic_1_c (type * o, const type * a, \
|
||||||
type2 res[4] = { 0, 0, 0, 0 }; \
|
type2 res[4] = { 0, 0, 0, 0 }; \
|
||||||
\
|
\
|
||||||
for (i = 0; i < len; i++) { \
|
for (i = 0; i < len; i++) { \
|
||||||
res[0] += (type2) a[i] * (type2) b[i * oversample + 0]; \
|
res[0] += (type2) a[i] * (type2) b[4 * i + 0]; \
|
||||||
res[1] += (type2) a[i] * (type2) b[i * oversample + 1]; \
|
res[1] += (type2) a[i] * (type2) b[4 * i + 1]; \
|
||||||
res[2] += (type2) a[i] * (type2) b[i * oversample + 2]; \
|
res[2] += (type2) a[i] * (type2) b[4 * i + 2]; \
|
||||||
res[3] += (type2) a[i] * (type2) b[i * oversample + 3]; \
|
res[3] += (type2) a[i] * (type2) b[4 * i + 3]; \
|
||||||
} \
|
} \
|
||||||
res[0] = (res[0] >> (prec)) * ic[0] + \
|
res[0] = (res[0] >> (prec)) * (type2) ic[0] + \
|
||||||
(res[1] >> (prec)) * ic[1] + \
|
(res[1] >> (prec)) * (type2) ic[1] + \
|
||||||
(res[2] >> (prec)) * ic[2] + \
|
(res[2] >> (prec)) * (type2) ic[2] + \
|
||||||
(res[3] >> (prec)) * ic[3]; \
|
(res[3] >> (prec)) * (type2) ic[3]; \
|
||||||
res[0] = (res[0] + (1 << ((prec) - 1))) >> (prec); \
|
res[0] = (res[0] + (1L << ((prec) - 1))) >> (prec); \
|
||||||
*o = CLAMP (res[0], -(limit), (limit) - 1); \
|
*o = CLAMP (res[0], -(limit), (limit) - 1); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -605,8 +624,8 @@ inner_product_##type##_linear_1_c (type * o, const type * a, \
|
||||||
type res[2] = { 0.0, 0.0 }; \
|
type res[2] = { 0.0, 0.0 }; \
|
||||||
\
|
\
|
||||||
for (i = 0; i < len; i++) { \
|
for (i = 0; i < len; i++) { \
|
||||||
res[0] += a[i] * b[i * oversample + 0]; \
|
res[0] += a[i] * b[2 * i + 0]; \
|
||||||
res[1] += a[i] * b[i * oversample + 1]; \
|
res[1] += a[i] * b[2 * i + 1]; \
|
||||||
} \
|
} \
|
||||||
*o = res[0] * ic[0] + res[1] * ic[1]; \
|
*o = res[0] * ic[0] + res[1] * ic[1]; \
|
||||||
}
|
}
|
||||||
|
@ -622,10 +641,10 @@ inner_product_##type##_cubic_1_c (type * o, const type * a, \
|
||||||
type res[4] = { 0.0, 0.0, 0.0, 0.0 }; \
|
type res[4] = { 0.0, 0.0, 0.0, 0.0 }; \
|
||||||
\
|
\
|
||||||
for (i = 0; i < len; i++) { \
|
for (i = 0; i < len; i++) { \
|
||||||
res[0] += a[i] * b[i * oversample + 0]; \
|
res[0] += a[i] * b[4 * i + 0]; \
|
||||||
res[1] += a[i] * b[i * oversample + 1]; \
|
res[1] += a[i] * b[4 * i + 1]; \
|
||||||
res[2] += a[i] * b[i * oversample + 2]; \
|
res[2] += a[i] * b[4 * i + 2]; \
|
||||||
res[3] += a[i] * b[i * oversample + 3]; \
|
res[3] += a[i] * b[4 * i + 3]; \
|
||||||
} \
|
} \
|
||||||
*o = res[0] * ic[0] + res[1] * ic[1] + \
|
*o = res[0] * ic[0] + res[1] * ic[1] + \
|
||||||
res[2] * ic[2] + res[3] * ic[3]; \
|
res[2] * ic[2] + res[3] * ic[3]; \
|
||||||
|
@ -659,9 +678,10 @@ resample_ ##type## _ ##inter## _ ##channels## _ ##arch (GstAudioResampler * resa
|
||||||
\
|
\
|
||||||
ipp = &ip[samp_index * channels]; \
|
ipp = &ip[samp_index * channels]; \
|
||||||
\
|
\
|
||||||
taps = get_taps_ ##type##_##inter (resampler, &samp_index, &samp_phase, icoeff); \
|
taps = get_taps_ ##type##_##inter \
|
||||||
\
|
(resampler, &samp_index, &samp_phase, icoeff); \
|
||||||
inner_product_ ##type##_##inter##_##channels##_##arch (op, ipp, taps, n_taps, icoeff, oversample); \
|
inner_product_ ##type##_##inter##_##channels##_##arch \
|
||||||
|
(op, ipp, taps, n_taps, icoeff, oversample); \
|
||||||
op += ostride; \
|
op += ostride; \
|
||||||
} \
|
} \
|
||||||
memmove (ip, &ip[samp_index * channels], \
|
memmove (ip, &ip[samp_index * channels], \
|
||||||
|
@ -802,10 +822,10 @@ deinterleave_ ##type (GstAudioResampler * resampler, gpointer sbuf[], \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
MAKE_DEINTERLEAVE_FUNC (gdouble);
|
|
||||||
MAKE_DEINTERLEAVE_FUNC (gfloat);
|
|
||||||
MAKE_DEINTERLEAVE_FUNC (gint32);
|
|
||||||
MAKE_DEINTERLEAVE_FUNC (gint16);
|
MAKE_DEINTERLEAVE_FUNC (gint16);
|
||||||
|
MAKE_DEINTERLEAVE_FUNC (gint32);
|
||||||
|
MAKE_DEINTERLEAVE_FUNC (gfloat);
|
||||||
|
MAKE_DEINTERLEAVE_FUNC (gdouble);
|
||||||
|
|
||||||
static DeinterleaveFunc deinterleave_funcs[] = {
|
static DeinterleaveFunc deinterleave_funcs[] = {
|
||||||
deinterleave_gint16,
|
deinterleave_gint16,
|
||||||
|
@ -875,7 +895,7 @@ calculate_kaiser_params (GstAudioResampler * resampler)
|
||||||
|
|
||||||
static void
|
static void
|
||||||
alloc_coeff_mem (GstAudioResampler * resampler, gint bps, gint n_taps,
|
alloc_coeff_mem (GstAudioResampler * resampler, gint bps, gint n_taps,
|
||||||
gint n_phases)
|
gint n_phases, gint n_mult)
|
||||||
{
|
{
|
||||||
if (resampler->alloc_taps >= n_taps && resampler->alloc_phases >= n_phases)
|
if (resampler->alloc_taps >= n_taps && resampler->alloc_phases >= n_phases)
|
||||||
return;
|
return;
|
||||||
|
@ -883,7 +903,8 @@ alloc_coeff_mem (GstAudioResampler * resampler, gint bps, gint n_taps,
|
||||||
resampler->tmpcoeff =
|
resampler->tmpcoeff =
|
||||||
g_realloc_n (resampler->tmpcoeff, n_taps, sizeof (gdouble));
|
g_realloc_n (resampler->tmpcoeff, n_taps, sizeof (gdouble));
|
||||||
|
|
||||||
resampler->cstride = GST_ROUND_UP_32 (bps * (n_taps + TAPS_OVERREAD));
|
resampler->cstride =
|
||||||
|
GST_ROUND_UP_32 (bps * (n_mult * n_taps + TAPS_OVERREAD));
|
||||||
g_free (resampler->coeffmem);
|
g_free (resampler->coeffmem);
|
||||||
resampler->coeffmem = g_malloc0 (n_phases * resampler->cstride + ALIGN - 1);
|
resampler->coeffmem = g_malloc0 (n_phases * resampler->cstride + ALIGN - 1);
|
||||||
resampler->coeff = MEM_ALIGN (resampler->coeffmem, ALIGN);
|
resampler->coeff = MEM_ALIGN (resampler->coeffmem, ALIGN);
|
||||||
|
@ -983,7 +1004,7 @@ resampler_calculate_taps (GstAudioResampler * resampler)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (interpolate) {
|
if (interpolate) {
|
||||||
gint otaps;
|
gint otaps, mult;
|
||||||
gpointer coeff;
|
gpointer coeff;
|
||||||
gdouble x, weight, *tmpcoeff;
|
gdouble x, weight, *tmpcoeff;
|
||||||
GstAudioResamplerFilterInterpolation filter_interpolation =
|
GstAudioResamplerFilterInterpolation filter_interpolation =
|
||||||
|
@ -995,37 +1016,40 @@ resampler_calculate_taps (GstAudioResampler * resampler)
|
||||||
else
|
else
|
||||||
resampler->filter_interpolation = filter_interpolation;
|
resampler->filter_interpolation = filter_interpolation;
|
||||||
|
|
||||||
otaps = oversample * n_taps;
|
|
||||||
switch (resampler->filter_interpolation) {
|
switch (resampler->filter_interpolation) {
|
||||||
default:
|
default:
|
||||||
case GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_LINEAR:
|
case GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_LINEAR:
|
||||||
otaps += 1;
|
mult = 2;
|
||||||
break;
|
break;
|
||||||
case GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_CUBIC:
|
case GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_CUBIC:
|
||||||
otaps += 3;
|
mult = 4;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
otaps = oversample * n_taps + mult - 1;
|
||||||
|
|
||||||
alloc_coeff_mem (resampler, bps, otaps, 1);
|
alloc_coeff_mem (resampler, bps, otaps, oversample, mult);
|
||||||
|
|
||||||
coeff = resampler->coeff;
|
coeff = tmpcoeff = resampler->tmpcoeff;
|
||||||
tmpcoeff = resampler->tmpcoeff;
|
|
||||||
x = 1.0 - n_taps / 2;
|
x = 1.0 - n_taps / 2;
|
||||||
weight = fill_taps (resampler, tmpcoeff, x, otaps, oversample);
|
weight = fill_taps (resampler, tmpcoeff, x, otaps, oversample);
|
||||||
|
|
||||||
switch (resampler->format) {
|
switch (resampler->format) {
|
||||||
case GST_AUDIO_FORMAT_S16:
|
case GST_AUDIO_FORMAT_S16:
|
||||||
convert_taps_gint16 (tmpcoeff, coeff, weight / oversample, otaps);
|
convert_taps_gint16 (tmpcoeff, coeff, weight / oversample, otaps);
|
||||||
|
extract_taps_gint16 (resampler, coeff, n_taps, oversample, mult);
|
||||||
break;
|
break;
|
||||||
case GST_AUDIO_FORMAT_S32:
|
case GST_AUDIO_FORMAT_S32:
|
||||||
convert_taps_gint32 (tmpcoeff, coeff, weight / oversample, otaps);
|
convert_taps_gint32 (tmpcoeff, coeff, weight / oversample, otaps);
|
||||||
|
extract_taps_gint32 (resampler, coeff, n_taps, oversample, mult);
|
||||||
break;
|
break;
|
||||||
case GST_AUDIO_FORMAT_F32:
|
case GST_AUDIO_FORMAT_F32:
|
||||||
convert_taps_gfloat (tmpcoeff, coeff, weight / oversample, otaps);
|
convert_taps_gfloat (tmpcoeff, coeff, weight / oversample, otaps);
|
||||||
|
extract_taps_gfloat (resampler, coeff, n_taps, oversample, mult);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
case GST_AUDIO_FORMAT_F64:
|
case GST_AUDIO_FORMAT_F64:
|
||||||
convert_taps_gdouble (tmpcoeff, coeff, weight / oversample, otaps);
|
convert_taps_gdouble (tmpcoeff, coeff, weight / oversample, otaps);
|
||||||
|
extract_taps_gdouble (resampler, coeff, n_taps, oversample, mult);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -1033,7 +1057,7 @@ resampler_calculate_taps (GstAudioResampler * resampler)
|
||||||
GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_NONE;
|
GST_AUDIO_RESAMPLER_FILTER_INTERPOLATION_NONE;
|
||||||
resampler->taps = g_realloc_n (resampler->taps, out_rate, sizeof (Tap));
|
resampler->taps = g_realloc_n (resampler->taps, out_rate, sizeof (Tap));
|
||||||
memset (resampler->taps, 0, sizeof (Tap) * out_rate);
|
memset (resampler->taps, 0, sizeof (Tap) * out_rate);
|
||||||
alloc_coeff_mem (resampler, bps, n_taps, out_rate);
|
alloc_coeff_mem (resampler, bps, n_taps, out_rate, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
resampler->samp_inc = in_rate / out_rate;
|
resampler->samp_inc = in_rate / out_rate;
|
||||||
|
|
Loading…
Reference in a new issue