gstreamer/gst-libs/gst/audio/audio-resampler-x86.h
Wim Taymans 524ea147cc audio-resampler: improve filter construction
Remove some unused variables from the inner product functions.
Make filter coefficients by interpolating if required.
Rename some fields.
Try hard to not recalculate filters when just chaging the rate.
Add more proprties to audioresample.
2016-03-28 13:25:52 +02:00

596 lines
18 KiB
C

/* GStreamer
* Copyright (C) <2015> Wim Taymans <wim.taymans@gmail.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#if defined (HAVE_XMMINTRIN_H) && defined(__SSE__)
#include <xmmintrin.h>
static inline void
inner_product_gfloat_none_1_sse (gfloat * o, const gfloat * a,
const gfloat * b, gint len, const gfloat * icoeff)
{
gint i = 0;
__m128 sum = _mm_setzero_ps ();
for (; i < len; i += 8) {
sum =
_mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + i + 0),
_mm_load_ps (b + i + 0)));
sum =
_mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + i + 4),
_mm_load_ps (b + i + 4)));
}
sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum));
sum = _mm_add_ss (sum, _mm_shuffle_ps (sum, sum, 0x55));
_mm_store_ss (o, sum);
}
static inline void
inner_product_gfloat_linear_1_sse (gfloat * o, const gfloat * a,
const gfloat * b, gint len, const gfloat * icoeff)
{
gint i = 0;
__m128 sum, t;
__m128 f = _mm_loadu_ps(icoeff);
sum = _mm_setzero_ps ();
for (; i < len; i += 4) {
t = _mm_loadu_ps (a + i);
sum = _mm_add_ps (sum, _mm_mul_ps (_mm_unpacklo_ps (t, t),
_mm_load_ps (b + 2 * (i + 0))));
sum = _mm_add_ps (sum, _mm_mul_ps (_mm_unpackhi_ps (t, t),
_mm_load_ps (b + 2 * (i + 2))));
}
sum = _mm_mul_ps (sum, f);
sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum));
sum = _mm_add_ss (sum, _mm_shuffle_ps (sum, sum, 0x55));
_mm_store_ss (o, sum);
}
static inline void
inner_product_gfloat_cubic_1_sse (gfloat * o, const gfloat * a,
const gfloat * b, gint len, const gfloat * icoeff)
{
gint i = 0;
__m128 sum = _mm_setzero_ps ();
__m128 f = _mm_loadu_ps(icoeff);
for (; i < len; i += 2) {
sum = _mm_add_ps (sum, _mm_mul_ps (_mm_load1_ps (a + i + 0),
_mm_load_ps (b + 4 * (i + 0))));
sum = _mm_add_ps (sum, _mm_mul_ps (_mm_load1_ps (a + i + 1),
_mm_load_ps (b + 4 * (i + 1))));
}
sum = _mm_mul_ps (sum, f);
sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum));
sum = _mm_add_ss (sum, _mm_shuffle_ps (sum, sum, 0x55));
_mm_store_ss (o, sum);
}
static inline void
inner_product_gfloat_none_2_sse (gfloat * o, const gfloat * a,
const gfloat * b, gint len, const gfloat * icoeff)
{
gint i = 0;
__m128 sum = _mm_setzero_ps (), t;
for (; i < len; i += 8) {
t = _mm_load_ps (b + i);
sum =
_mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + 2 * i + 0),
_mm_unpacklo_ps (t, t)));
sum =
_mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + 2 * i + 4),
_mm_unpackhi_ps (t, t)));
t = _mm_load_ps (b + i + 4);
sum =
_mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + 2 * i + 8),
_mm_unpacklo_ps (t, t)));
sum =
_mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + 2 * i + 12),
_mm_unpackhi_ps (t, t)));
}
sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum));
*(gint64*)o = _mm_cvtsi128_si64 ((__m128i)sum);
}
MAKE_RESAMPLE_FUNC (gfloat, none, 1, sse);
MAKE_RESAMPLE_FUNC (gfloat, linear, 1, sse);
MAKE_RESAMPLE_FUNC (gfloat, cubic, 1, sse);
MAKE_RESAMPLE_FUNC (gfloat, none, 2, sse);
#endif
#if defined (HAVE_EMMINTRIN_H) && defined(__SSE2__)
#include <emmintrin.h>
static inline void
inner_product_gint16_none_1_sse2 (gint16 * o, const gint16 * a,
const gint16 * b, gint len, const gint16 * icoeff)
{
gint i = 0;
__m128i sum, ta, tb;
sum = _mm_setzero_si128 ();
for (; i < len; i += 8) {
ta = _mm_loadu_si128 ((__m128i *) (a + i));
tb = _mm_load_si128 ((__m128i *) (b + i));
sum = _mm_add_epi32 (sum, _mm_madd_epi16 (ta, tb));
}
sum =
_mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (2, 3, 2,
3)));
sum =
_mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (1, 1, 1,
1)));
sum = _mm_add_epi32 (sum, _mm_set1_epi32 (1 << (PRECISION_S16 - 1)));
sum = _mm_srai_epi32 (sum, PRECISION_S16);
sum = _mm_packs_epi32 (sum, sum);
*o = _mm_extract_epi16 (sum, 0);
}
static inline void
inner_product_gint16_linear_1_sse2 (gint16 * o, const gint16 * a,
const gint16 * b, gint len, const gint16 * icoeff)
{
gint i = 0;
__m128i sum, t, ta, tb;
__m128i f = _mm_cvtsi64_si128 (*((long long*)icoeff));
sum = _mm_setzero_si128 ();
f = _mm_unpacklo_epi16 (f, sum);
for (; i < len; i += 8) {
t = _mm_loadu_si128 ((__m128i *) (a + i));
ta = _mm_unpacklo_epi32 (t, t);
tb = _mm_load_si128 ((__m128i *) (b + 2 * i + 0));
tb = _mm_shufflelo_epi16 (tb, _MM_SHUFFLE (3,1,2,0));
tb = _mm_shufflehi_epi16 (tb, _MM_SHUFFLE (3,1,2,0));
sum = _mm_add_epi32 (sum, _mm_madd_epi16 (ta, tb));
ta = _mm_unpackhi_epi32 (t, t);
tb = _mm_load_si128 ((__m128i *) (b + 2 * i + 8));
tb = _mm_shufflelo_epi16 (tb, _MM_SHUFFLE (3,1,2,0));
tb = _mm_shufflehi_epi16 (tb, _MM_SHUFFLE (3,1,2,0));
sum = _mm_add_epi32 (sum, _mm_madd_epi16 (ta, tb));
}
sum = _mm_srai_epi32 (sum, PRECISION_S16);
sum = _mm_madd_epi16 (sum, f);
sum =
_mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (2, 3, 2,
3)));
sum =
_mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (1, 1, 1,
1)));
sum = _mm_add_epi32 (sum, _mm_set1_epi32 (1 << (PRECISION_S16 - 1)));
sum = _mm_srai_epi32 (sum, PRECISION_S16);
sum = _mm_packs_epi32 (sum, sum);
*o = _mm_extract_epi16 (sum, 0);
}
static inline void
inner_product_gint16_cubic_1_sse2 (gint16 * o, const gint16 * a,
const gint16 * b, gint len, const gint16 * icoeff)
{
gint i = 0;
__m128i sum, ta, tb;
__m128i f = _mm_cvtsi64_si128 (*((long long*)icoeff));
sum = _mm_setzero_si128 ();
f = _mm_unpacklo_epi16 (f, sum);
for (; i < len; i += 2) {
ta = _mm_cvtsi32_si128 (*(gint32*)(a + i));
ta = _mm_unpacklo_epi32 (ta, ta);
ta = _mm_unpacklo_epi32 (ta, ta);
tb = _mm_unpacklo_epi16 (_mm_cvtsi64_si128 (*(gint64*)(b + 4 * i + 0)),
_mm_cvtsi64_si128 (*(gint64*)(b + 4 * i + 4)));
sum = _mm_add_epi32 (sum, _mm_madd_epi16 (ta, tb));
}
sum = _mm_srai_epi32 (sum, PRECISION_S16);
sum = _mm_madd_epi16 (sum, f);
sum =
_mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (2, 3, 2,
3)));
sum =
_mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (1, 1, 1,
1)));
sum = _mm_add_epi32 (sum, _mm_set1_epi32 (1 << (PRECISION_S16 - 1)));
sum = _mm_srai_epi32 (sum, PRECISION_S16);
sum = _mm_packs_epi32 (sum, sum);
*o = _mm_extract_epi16 (sum, 0);
}
static inline void
inner_product_gdouble_none_1_sse2 (gdouble * o, const gdouble * a,
const gdouble * b, gint len, const gdouble * icoeff)
{
gint i = 0;
__m128d sum = _mm_setzero_pd ();
for (; i < len; i += 8) {
sum =
_mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + i + 0),
_mm_load_pd (b + i + 0)));
sum =
_mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + i + 2),
_mm_load_pd (b + i + 2)));
sum =
_mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + i + 4),
_mm_load_pd (b + i + 4)));
sum =
_mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + i + 6),
_mm_load_pd (b + i + 6)));
}
sum = _mm_add_sd (sum, _mm_unpackhi_pd (sum, sum));
_mm_store_sd (o, sum);
}
static inline void
inner_product_gdouble_linear_1_sse2 (gdouble * o, const gdouble * a,
const gdouble * b, gint len, const gdouble * icoeff)
{
gint i = 0;
__m128d sum = _mm_setzero_pd ();
__m128d f = _mm_loadu_pd (icoeff);
for (; i < len; i += 4) {
sum = _mm_add_pd (sum, _mm_mul_pd (_mm_load1_pd (a + i + 0), _mm_load_pd (b + 2 * i + 0)));
sum = _mm_add_pd (sum, _mm_mul_pd (_mm_load1_pd (a + i + 1), _mm_load_pd (b + 2 * i + 2)));
sum = _mm_add_pd (sum, _mm_mul_pd (_mm_load1_pd (a + i + 2), _mm_load_pd (b + 2 * i + 4)));
sum = _mm_add_pd (sum, _mm_mul_pd (_mm_load1_pd (a + i + 3), _mm_load_pd (b + 2 * i + 6)));
}
sum = _mm_mul_pd (sum, f);
sum = _mm_add_sd (sum, _mm_unpackhi_pd (sum, sum));
_mm_store_sd (o, sum);
}
static inline void
inner_product_gdouble_cubic_1_sse2 (gdouble * o, const gdouble * a,
const gdouble * b, gint len, const gdouble * icoeff)
{
gint i = 0;
__m128d sum1 = _mm_setzero_pd (), t;
__m128d sum2 = _mm_setzero_pd ();
__m128d f1 = _mm_loadu_pd (icoeff);
__m128d f2 = _mm_loadu_pd (icoeff+2);
for (; i < len; i += 2) {
t = _mm_load1_pd (a + i + 0);
sum1 = _mm_add_pd (sum1, _mm_mul_pd (t, _mm_load_pd (b + 4 * i + 0)));
sum2 = _mm_add_pd (sum2, _mm_mul_pd (t, _mm_load_pd (b + 4 * i + 2)));
t = _mm_load1_pd (a + i + 1);
sum1 = _mm_add_pd (sum1, _mm_mul_pd (t, _mm_load_pd (b + 4 * i + 4)));
sum2 = _mm_add_pd (sum2, _mm_mul_pd (t, _mm_load_pd (b + 4 * i + 6)));
}
sum1 = _mm_mul_pd (sum1, f1);
sum2 = _mm_mul_pd (sum2, f2);
sum1 = _mm_add_pd (sum1, sum2);
sum1 = _mm_add_sd (sum1, _mm_unpackhi_pd (sum1, sum1));
_mm_store_sd (o, sum1);
}
static inline void
inner_product_gint16_none_2_sse2 (gint16 * o, const gint16 * a,
const gint16 * b, gint len, const gint16 * icoeff)
{
gint i = 0;
__m128i sum, ta, tb, t1;
sum = _mm_setzero_si128 ();
for (; i < len; i += 8) {
tb = _mm_load_si128 ((__m128i *) (b + i));
t1 = _mm_unpacklo_epi16 (tb, tb);
ta = _mm_loadu_si128 ((__m128i *) (a + 2 * i));
sum = _mm_add_epi32 (sum, _mm_madd_epi16 (ta, t1));
t1 = _mm_unpackhi_epi16 (tb, tb);
ta = _mm_loadu_si128 ((__m128i *) (a + 2 * i + 8));
sum = _mm_add_epi32 (sum, _mm_madd_epi16 (ta, t1));
}
sum =
_mm_add_epi32 (sum, _mm_shuffle_epi32 (sum, _MM_SHUFFLE (2, 3, 2,
3)));
sum = _mm_add_epi32 (sum, _mm_set1_epi32 (1 << (PRECISION_S16 - 1)));
sum = _mm_srai_epi32 (sum, PRECISION_S16);
sum = _mm_packs_epi32 (sum, sum);
*(gint32*)o = _mm_cvtsi128_si32 (sum);
}
static inline void
inner_product_gdouble_none_2_sse2 (gdouble * o, const gdouble * a,
const gdouble * b, gint len, const gdouble * icoeff)
{
gint i = 0;
__m128d sum = _mm_setzero_pd (), t;
for (; i < len; i += 4) {
t = _mm_load_pd (b + i);
sum =
_mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + 2 * i),
_mm_unpacklo_pd (t, t)));
sum =
_mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + 2 * i + 2),
_mm_unpackhi_pd (t, t)));
t = _mm_load_pd (b + i + 2);
sum =
_mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + 2 * i + 4),
_mm_unpacklo_pd (t, t)));
sum =
_mm_add_pd (sum, _mm_mul_pd (_mm_loadu_pd (a + 2 * i + 6),
_mm_unpackhi_pd (t, t)));
}
_mm_store_pd (o, sum);
}
MAKE_RESAMPLE_FUNC (gint16, none, 1, sse2);
MAKE_RESAMPLE_FUNC (gint16, linear, 1, sse2);
MAKE_RESAMPLE_FUNC (gint16, cubic, 1, sse2);
MAKE_RESAMPLE_FUNC (gdouble, none, 1, sse2);
MAKE_RESAMPLE_FUNC (gdouble, linear, 1, sse2);
MAKE_RESAMPLE_FUNC (gdouble, cubic, 1, sse2);
MAKE_RESAMPLE_FUNC (gint16, none, 2, sse2);
MAKE_RESAMPLE_FUNC (gdouble, none, 2, sse2);
static void
interpolate_gdouble_linear_sse2 (gdouble * o, const gdouble * a,
gint len, const gdouble * icoeff)
{
gint i = 0;
__m128d f = _mm_loadu_pd (icoeff), t1, t2;
for (; i < len; i += 2) {
t1 = _mm_mul_pd (_mm_load_pd (a + 2*i + 0), f);
t1 = _mm_add_sd (t1, _mm_unpackhi_pd (t1, t1));
t2 = _mm_mul_pd (_mm_load_pd (a + 2*i + 2), f);
t2 = _mm_add_sd (t2, _mm_unpackhi_pd (t2, t2));
_mm_store_pd (o + i, _mm_unpacklo_pd (t1, t2));
}
}
static void
interpolate_gdouble_cubic_sse2 (gdouble * o, const gdouble * a,
gint len, const gdouble * icoeff)
{
gint i = 0;
__m128d t1, t2;
__m128d f1 = _mm_loadu_pd (icoeff);
__m128d f2 = _mm_loadu_pd (icoeff+2);
for (; i < len; i += 2) {
t1 = _mm_add_pd (_mm_mul_pd (_mm_load_pd (a + 4*i + 0), f1),
_mm_mul_pd (_mm_load_pd (a + 4*i + 2), f2));
t1 = _mm_add_sd (t1, _mm_unpackhi_pd (t1, t1));
t2 = _mm_add_pd (_mm_mul_pd (_mm_load_pd (a + 4*i + 4), f1),
_mm_mul_pd (_mm_load_pd (a + 4*i + 6), f2));
t2 = _mm_add_sd (t2, _mm_unpackhi_pd (t2, t2));
_mm_store_pd (o + i, _mm_unpacklo_pd (t1, t2));
}
}
#endif
#if defined (HAVE_SMMINTRIN_H) && defined(__SSE4_1__)
#include <smmintrin.h>
static inline void
inner_product_gint32_none_1_sse41 (gint32 * o, const gint32 * a,
const gint32 * b, gint len, const gint32 * icoeff)
{
gint i = 0;
__m128i sum, ta, tb;
gint64 res;
sum = _mm_setzero_si128 ();
for (; i < len; i += 8) {
ta = _mm_loadu_si128 ((__m128i *) (a + i));
tb = _mm_load_si128 ((__m128i *) (b + i));
sum =
_mm_add_epi64 (sum, _mm_mul_epi32 (_mm_unpacklo_epi32 (ta, ta),
_mm_unpacklo_epi32 (tb, tb)));
sum =
_mm_add_epi64 (sum, _mm_mul_epi32 (_mm_unpackhi_epi32 (ta, ta),
_mm_unpackhi_epi32 (tb, tb)));
ta = _mm_loadu_si128 ((__m128i *) (a + i + 4));
tb = _mm_load_si128 ((__m128i *) (b + i + 4));
sum =
_mm_add_epi64 (sum, _mm_mul_epi32 (_mm_unpacklo_epi32 (ta, ta),
_mm_unpacklo_epi32 (tb, tb)));
sum =
_mm_add_epi64 (sum, _mm_mul_epi32 (_mm_unpackhi_epi32 (ta, ta),
_mm_unpackhi_epi32 (tb, tb)));
}
sum = _mm_add_epi64 (sum, _mm_unpackhi_epi64 (sum, sum));
res = _mm_cvtsi128_si64 (sum);
res = (res + (1 << (PRECISION_S32 - 1))) >> PRECISION_S32;
*o = CLAMP (res, -(1L << 31), (1L << 31) - 1);
}
static inline void
inner_product_gint32_linear_1_sse41 (gint32 * o, const gint32 * a,
const gint32 * b, gint len, const gint32 * icoeff)
{
gint i = 0;
gint64 res;
__m128i sum, t, ta, tb;
__m128i f = _mm_loadu_si128 ((__m128i *)icoeff);
sum = _mm_setzero_si128 ();
f = _mm_unpacklo_epi32 (f, f);
for (; i < len; i += 4) {
t = _mm_loadu_si128 ((__m128i *)(a + i));
ta = _mm_unpacklo_epi32 (t, t);
tb = _mm_load_si128 ((__m128i *)(b + 2*i + 0));
sum =
_mm_add_epi64 (sum, _mm_mul_epi32 (_mm_unpacklo_epi64 (ta, ta),
_mm_unpacklo_epi32 (tb, tb)));
sum =
_mm_add_epi64 (sum, _mm_mul_epi32 (_mm_unpackhi_epi64 (ta, ta),
_mm_unpackhi_epi32 (tb, tb)));
ta = _mm_unpackhi_epi32 (t, t);
tb = _mm_load_si128 ((__m128i *)(b + 2*i + 4));
sum =
_mm_add_epi64 (sum, _mm_mul_epi32 (_mm_unpacklo_epi64 (ta, ta),
_mm_unpacklo_epi32 (tb, tb)));
sum =
_mm_add_epi64 (sum, _mm_mul_epi32 (_mm_unpackhi_epi64 (ta, ta),
_mm_unpackhi_epi32 (tb, tb)));
}
sum = _mm_srli_epi64 (sum, PRECISION_S32);
sum = _mm_mul_epi32 (sum, f);
sum = _mm_add_epi64 (sum, _mm_unpackhi_epi64 (sum, sum));
res = _mm_cvtsi128_si64 (sum);
res = (res + (1 << (PRECISION_S32 - 1))) >> PRECISION_S32;
*o = CLAMP (res, -(1L << 31), (1L << 31) - 1);
}
static inline void
inner_product_gint32_cubic_1_sse41 (gint32 * o, const gint32 * a,
const gint32 * b, gint len, const gint32 * icoeff)
{
gint i = 0;
gint64 res;
__m128i sum1, sum2, t, ta, tb;
__m128i f = _mm_loadu_si128 ((__m128i *)icoeff), f1, f2;
sum1 = sum2 = _mm_setzero_si128 ();
f1 = _mm_unpacklo_epi32 (f, f);
f2 = _mm_unpackhi_epi32 (f, f);
for (; i < len; i += 2) {
t = _mm_cvtsi64_si128 (*(gint64 *)(a + i));
t = _mm_unpacklo_epi32 (t, t);
ta = _mm_unpacklo_epi64 (t, t);
tb = _mm_load_si128 ((__m128i *)(b + 4*i + 0));
sum1 =
_mm_add_epi64 (sum1, _mm_mul_epi32 (ta, _mm_unpacklo_epi32 (tb, tb)));
sum2 =
_mm_add_epi64 (sum2, _mm_mul_epi32 (ta, _mm_unpackhi_epi32 (tb, tb)));
ta = _mm_unpackhi_epi64 (t, t);
tb = _mm_load_si128 ((__m128i *)(b + 4*i + 4));
sum1 =
_mm_add_epi64 (sum1, _mm_mul_epi32 (ta, _mm_unpacklo_epi32 (tb, tb)));
sum2 =
_mm_add_epi64 (sum2, _mm_mul_epi32 (ta, _mm_unpackhi_epi32 (tb, tb)));
}
sum1 = _mm_srli_epi64 (sum1, PRECISION_S32);
sum2 = _mm_srli_epi64 (sum2, PRECISION_S32);
sum1 = _mm_mul_epi32 (sum1, f1);
sum2 = _mm_mul_epi32 (sum2, f2);
sum1 = _mm_add_epi64 (sum1, sum2);
sum1 = _mm_add_epi64 (sum1, _mm_unpackhi_epi64 (sum1, sum1));
res = _mm_cvtsi128_si64 (sum1);
res = (res + (1 << (PRECISION_S32 - 1))) >> PRECISION_S32;
*o = CLAMP (res, -(1L << 31), (1L << 31) - 1);
}
MAKE_RESAMPLE_FUNC (gint32, none, 1, sse41);
MAKE_RESAMPLE_FUNC (gint32, linear, 1, sse41);
MAKE_RESAMPLE_FUNC (gint32, cubic, 1, sse41);
#endif
static void
audio_resampler_check_x86 (const gchar *option)
{
if (!strcmp (option, "sse")) {
#if defined (HAVE_XMMINTRIN_H) && defined(__SSE__)
GST_DEBUG ("enable SSE optimisations");
resample_gfloat_none_1 = resample_gfloat_none_1_sse;
resample_gfloat_linear_1 = resample_gfloat_linear_1_sse;
resample_gfloat_cubic_1 = resample_gfloat_cubic_1_sse;
resample_gfloat_none_2 = resample_gfloat_none_2_sse;
#else
GST_DEBUG ("SSE optimisations not enabled");
#endif
} else if (!strcmp (option, "sse2")) {
#if defined (HAVE_EMMINTRIN_H) && defined(__SSE2__)
GST_DEBUG ("enable SSE2 optimisations");
resample_gint16_none_1 = resample_gint16_none_1_sse2;
resample_gint16_linear_1 = resample_gint16_linear_1_sse2;
resample_gint16_cubic_1 = resample_gint16_cubic_1_sse2;
resample_gfloat_none_1 = resample_gfloat_none_1_sse;
resample_gfloat_linear_1 = resample_gfloat_linear_1_sse;
resample_gfloat_cubic_1 = resample_gfloat_cubic_1_sse;
resample_gdouble_none_1 = resample_gdouble_none_1_sse2;
resample_gdouble_linear_1 = resample_gdouble_linear_1_sse2;
resample_gdouble_cubic_1 = resample_gdouble_cubic_1_sse2;
resample_gint16_none_2 = resample_gint16_none_2_sse2;
resample_gfloat_none_2 = resample_gfloat_none_2_sse;
resample_gdouble_none_2 = resample_gdouble_none_2_sse2;
interpolate_gdouble_linear = interpolate_gdouble_linear_sse2;
interpolate_gdouble_cubic = interpolate_gdouble_cubic_sse2;
#else
GST_DEBUG ("SSE2 optimisations not enabled");
#endif
} else if (!strcmp (option, "sse41")) {
#if defined (HAVE_SMMINTRIN_H) && defined(__SSE4_1__)
GST_DEBUG ("enable SSE41 optimisations");
resample_gint32_none_1 = resample_gint32_none_1_sse41;
resample_gint32_linear_1 = resample_gint32_linear_1_sse41;
resample_gint32_cubic_1 = resample_gint32_cubic_1_sse41;
#else
GST_DEBUG ("SSE41 optimisations not enabled");
#endif
}
}