mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-11-23 10:11:08 +00:00
fft: Merge kissfft 1.2.8
This reduces memory footprint for the FFT and adds OpenMP support (but we don't use it).
This commit is contained in:
parent
e3285fb53d
commit
fdfb70e262
12 changed files with 200 additions and 24 deletions
|
@ -265,6 +265,40 @@ kf_work (kiss_fft_f32_cpx * Fout,
|
||||||
const int m = *factors++; /* stage's fft length/p */
|
const int m = *factors++; /* stage's fft length/p */
|
||||||
const kiss_fft_f32_cpx *Fout_end = Fout + p * m;
|
const kiss_fft_f32_cpx *Fout_end = Fout + p * m;
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
// use openmp extensions at the
|
||||||
|
// top-level (not recursive)
|
||||||
|
if (fstride == 1) {
|
||||||
|
int k;
|
||||||
|
|
||||||
|
// execute the p different work units in different threads
|
||||||
|
# pragma omp parallel for
|
||||||
|
for (k = 0; k < p; ++k)
|
||||||
|
kf_work (Fout + k * m, f + fstride * in_stride * k, fstride * p,
|
||||||
|
in_stride, factors, st);
|
||||||
|
// all threads have joined by this point
|
||||||
|
|
||||||
|
switch (p) {
|
||||||
|
case 2:
|
||||||
|
kf_bfly2 (Fout, fstride, st, m);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
kf_bfly3 (Fout, fstride, st, m);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
kf_bfly4 (Fout, fstride, st, m);
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
kf_bfly5 (Fout, fstride, st, m);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
kf_bfly_generic (Fout, fstride, st, m, p);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (m == 1) {
|
if (m == 1) {
|
||||||
do {
|
do {
|
||||||
*Fout = *f;
|
*Fout = *f;
|
||||||
|
@ -272,6 +306,10 @@ kf_work (kiss_fft_f32_cpx * Fout,
|
||||||
} while (++Fout != Fout_end);
|
} while (++Fout != Fout_end);
|
||||||
} else {
|
} else {
|
||||||
do {
|
do {
|
||||||
|
// recursive call:
|
||||||
|
// DFT of size m*p performed by doing
|
||||||
|
// p instances of smaller DFTs of size m,
|
||||||
|
// each one takes a decimated version of the input
|
||||||
kf_work (Fout, f, fstride * p, in_stride, factors, st);
|
kf_work (Fout, f, fstride * p, in_stride, factors, st);
|
||||||
f += fstride * in_stride;
|
f += fstride * in_stride;
|
||||||
} while ((Fout += m) != Fout_end);
|
} while ((Fout += m) != Fout_end);
|
||||||
|
@ -279,6 +317,7 @@ kf_work (kiss_fft_f32_cpx * Fout,
|
||||||
|
|
||||||
Fout = Fout_beg;
|
Fout = Fout_beg;
|
||||||
|
|
||||||
|
// recombine the p smaller DFTs
|
||||||
switch (p) {
|
switch (p) {
|
||||||
case 2:
|
case 2:
|
||||||
kf_bfly2 (Fout, fstride, st, m);
|
kf_bfly2 (Fout, fstride, st, m);
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <memory.h>
|
#include <string.h>
|
||||||
#include <glib.h>
|
#include <glib.h>
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@ -92,6 +92,10 @@ void kiss_fft_f32_cleanup(void);
|
||||||
*/
|
*/
|
||||||
int kiss_fft_f32_next_fast_size(int n);
|
int kiss_fft_f32_next_fast_size(int n);
|
||||||
|
|
||||||
|
/* for real ffts, we need an even size */
|
||||||
|
#define kiss_fftr_next_fast_size_real(n) \
|
||||||
|
(kiss_fft_next_fast_size( ((n)+1)>>1)<<1)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -265,6 +265,40 @@ kf_work (kiss_fft_f64_cpx * Fout,
|
||||||
const int m = *factors++; /* stage's fft length/p */
|
const int m = *factors++; /* stage's fft length/p */
|
||||||
const kiss_fft_f64_cpx *Fout_end = Fout + p * m;
|
const kiss_fft_f64_cpx *Fout_end = Fout + p * m;
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
// use openmp extensions at the
|
||||||
|
// top-level (not recursive)
|
||||||
|
if (fstride == 1) {
|
||||||
|
int k;
|
||||||
|
|
||||||
|
// execute the p different work units in different threads
|
||||||
|
# pragma omp parallel for
|
||||||
|
for (k = 0; k < p; ++k)
|
||||||
|
kf_work (Fout + k * m, f + fstride * in_stride * k, fstride * p,
|
||||||
|
in_stride, factors, st);
|
||||||
|
// all threads have joined by this point
|
||||||
|
|
||||||
|
switch (p) {
|
||||||
|
case 2:
|
||||||
|
kf_bfly2 (Fout, fstride, st, m);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
kf_bfly3 (Fout, fstride, st, m);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
kf_bfly4 (Fout, fstride, st, m);
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
kf_bfly5 (Fout, fstride, st, m);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
kf_bfly_generic (Fout, fstride, st, m, p);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (m == 1) {
|
if (m == 1) {
|
||||||
do {
|
do {
|
||||||
*Fout = *f;
|
*Fout = *f;
|
||||||
|
@ -272,6 +306,10 @@ kf_work (kiss_fft_f64_cpx * Fout,
|
||||||
} while (++Fout != Fout_end);
|
} while (++Fout != Fout_end);
|
||||||
} else {
|
} else {
|
||||||
do {
|
do {
|
||||||
|
// recursive call:
|
||||||
|
// DFT of size m*p performed by doing
|
||||||
|
// p instances of smaller DFTs of size m,
|
||||||
|
// each one takes a decimated version of the input
|
||||||
kf_work (Fout, f, fstride * p, in_stride, factors, st);
|
kf_work (Fout, f, fstride * p, in_stride, factors, st);
|
||||||
f += fstride * in_stride;
|
f += fstride * in_stride;
|
||||||
} while ((Fout += m) != Fout_end);
|
} while ((Fout += m) != Fout_end);
|
||||||
|
@ -279,6 +317,7 @@ kf_work (kiss_fft_f64_cpx * Fout,
|
||||||
|
|
||||||
Fout = Fout_beg;
|
Fout = Fout_beg;
|
||||||
|
|
||||||
|
// recombine the p smaller DFTs
|
||||||
switch (p) {
|
switch (p) {
|
||||||
case 2:
|
case 2:
|
||||||
kf_bfly2 (Fout, fstride, st, m);
|
kf_bfly2 (Fout, fstride, st, m);
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <memory.h>
|
#include <string.h>
|
||||||
#include <glib.h>
|
#include <glib.h>
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@ -92,6 +92,10 @@ void kiss_fft_f64_cleanup(void);
|
||||||
*/
|
*/
|
||||||
int kiss_fft_f64_next_fast_size(int n);
|
int kiss_fft_f64_next_fast_size(int n);
|
||||||
|
|
||||||
|
/* for real ffts, we need an even size */
|
||||||
|
#define kiss_fftr_next_fast_size_real(n) \
|
||||||
|
(kiss_fft_next_fast_size( ((n)+1)>>1)<<1)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -265,6 +265,40 @@ kf_work (kiss_fft_s16_cpx * Fout,
|
||||||
const int m = *factors++; /* stage's fft length/p */
|
const int m = *factors++; /* stage's fft length/p */
|
||||||
const kiss_fft_s16_cpx *Fout_end = Fout + p * m;
|
const kiss_fft_s16_cpx *Fout_end = Fout + p * m;
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
// use openmp extensions at the
|
||||||
|
// top-level (not recursive)
|
||||||
|
if (fstride == 1) {
|
||||||
|
int k;
|
||||||
|
|
||||||
|
// execute the p different work units in different threads
|
||||||
|
# pragma omp parallel for
|
||||||
|
for (k = 0; k < p; ++k)
|
||||||
|
kf_work (Fout + k * m, f + fstride * in_stride * k, fstride * p,
|
||||||
|
in_stride, factors, st);
|
||||||
|
// all threads have joined by this point
|
||||||
|
|
||||||
|
switch (p) {
|
||||||
|
case 2:
|
||||||
|
kf_bfly2 (Fout, fstride, st, m);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
kf_bfly3 (Fout, fstride, st, m);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
kf_bfly4 (Fout, fstride, st, m);
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
kf_bfly5 (Fout, fstride, st, m);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
kf_bfly_generic (Fout, fstride, st, m, p);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (m == 1) {
|
if (m == 1) {
|
||||||
do {
|
do {
|
||||||
*Fout = *f;
|
*Fout = *f;
|
||||||
|
@ -272,6 +306,10 @@ kf_work (kiss_fft_s16_cpx * Fout,
|
||||||
} while (++Fout != Fout_end);
|
} while (++Fout != Fout_end);
|
||||||
} else {
|
} else {
|
||||||
do {
|
do {
|
||||||
|
// recursive call:
|
||||||
|
// DFT of size m*p performed by doing
|
||||||
|
// p instances of smaller DFTs of size m,
|
||||||
|
// each one takes a decimated version of the input
|
||||||
kf_work (Fout, f, fstride * p, in_stride, factors, st);
|
kf_work (Fout, f, fstride * p, in_stride, factors, st);
|
||||||
f += fstride * in_stride;
|
f += fstride * in_stride;
|
||||||
} while ((Fout += m) != Fout_end);
|
} while ((Fout += m) != Fout_end);
|
||||||
|
@ -279,6 +317,7 @@ kf_work (kiss_fft_s16_cpx * Fout,
|
||||||
|
|
||||||
Fout = Fout_beg;
|
Fout = Fout_beg;
|
||||||
|
|
||||||
|
// recombine the p smaller DFTs
|
||||||
switch (p) {
|
switch (p) {
|
||||||
case 2:
|
case 2:
|
||||||
kf_bfly2 (Fout, fstride, st, m);
|
kf_bfly2 (Fout, fstride, st, m);
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <memory.h>
|
#include <string.h>
|
||||||
#include <glib.h>
|
#include <glib.h>
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@ -95,6 +95,10 @@ void kiss_fft_s16_cleanup(void);
|
||||||
*/
|
*/
|
||||||
int kiss_fft_s16_next_fast_size(int n);
|
int kiss_fft_s16_next_fast_size(int n);
|
||||||
|
|
||||||
|
/* for real ffts, we need an even size */
|
||||||
|
#define kiss_fftr_next_fast_size_real(n) \
|
||||||
|
(kiss_fft_next_fast_size( ((n)+1)>>1)<<1)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -265,6 +265,40 @@ kf_work (kiss_fft_s32_cpx * Fout,
|
||||||
const int m = *factors++; /* stage's fft length/p */
|
const int m = *factors++; /* stage's fft length/p */
|
||||||
const kiss_fft_s32_cpx *Fout_end = Fout + p * m;
|
const kiss_fft_s32_cpx *Fout_end = Fout + p * m;
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
// use openmp extensions at the
|
||||||
|
// top-level (not recursive)
|
||||||
|
if (fstride == 1) {
|
||||||
|
int k;
|
||||||
|
|
||||||
|
// execute the p different work units in different threads
|
||||||
|
# pragma omp parallel for
|
||||||
|
for (k = 0; k < p; ++k)
|
||||||
|
kf_work (Fout + k * m, f + fstride * in_stride * k, fstride * p,
|
||||||
|
in_stride, factors, st);
|
||||||
|
// all threads have joined by this point
|
||||||
|
|
||||||
|
switch (p) {
|
||||||
|
case 2:
|
||||||
|
kf_bfly2 (Fout, fstride, st, m);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
kf_bfly3 (Fout, fstride, st, m);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
kf_bfly4 (Fout, fstride, st, m);
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
kf_bfly5 (Fout, fstride, st, m);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
kf_bfly_generic (Fout, fstride, st, m, p);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (m == 1) {
|
if (m == 1) {
|
||||||
do {
|
do {
|
||||||
*Fout = *f;
|
*Fout = *f;
|
||||||
|
@ -272,6 +306,10 @@ kf_work (kiss_fft_s32_cpx * Fout,
|
||||||
} while (++Fout != Fout_end);
|
} while (++Fout != Fout_end);
|
||||||
} else {
|
} else {
|
||||||
do {
|
do {
|
||||||
|
// recursive call:
|
||||||
|
// DFT of size m*p performed by doing
|
||||||
|
// p instances of smaller DFTs of size m,
|
||||||
|
// each one takes a decimated version of the input
|
||||||
kf_work (Fout, f, fstride * p, in_stride, factors, st);
|
kf_work (Fout, f, fstride * p, in_stride, factors, st);
|
||||||
f += fstride * in_stride;
|
f += fstride * in_stride;
|
||||||
} while ((Fout += m) != Fout_end);
|
} while ((Fout += m) != Fout_end);
|
||||||
|
@ -279,6 +317,7 @@ kf_work (kiss_fft_s32_cpx * Fout,
|
||||||
|
|
||||||
Fout = Fout_beg;
|
Fout = Fout_beg;
|
||||||
|
|
||||||
|
// recombine the p smaller DFTs
|
||||||
switch (p) {
|
switch (p) {
|
||||||
case 2:
|
case 2:
|
||||||
kf_bfly2 (Fout, fstride, st, m);
|
kf_bfly2 (Fout, fstride, st, m);
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <memory.h>
|
#include <string.h>
|
||||||
#include <glib.h>
|
#include <glib.h>
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@ -96,6 +96,10 @@ void kiss_fft_s32_cleanup(void);
|
||||||
*/
|
*/
|
||||||
int kiss_fft_s32_next_fast_size(int n);
|
int kiss_fft_s32_next_fast_size(int n);
|
||||||
|
|
||||||
|
/* for real ffts, we need an even size */
|
||||||
|
#define kiss_fftr_next_fast_size_real(n) \
|
||||||
|
(kiss_fft_next_fast_size( ((n)+1)>>1)<<1)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -40,7 +40,7 @@ kiss_fftr_f32_alloc (int nfft, int inverse_fft, void *mem, size_t * lenmem)
|
||||||
|
|
||||||
kiss_fft_f32_alloc (nfft, inverse_fft, NULL, &subsize);
|
kiss_fft_f32_alloc (nfft, inverse_fft, NULL, &subsize);
|
||||||
memneeded = ALIGN_STRUCT (sizeof (struct kiss_fftr_f32_state))
|
memneeded = ALIGN_STRUCT (sizeof (struct kiss_fftr_f32_state))
|
||||||
+ ALIGN_STRUCT (subsize) + sizeof (kiss_fft_f32_cpx) * (nfft * 2);
|
+ ALIGN_STRUCT (subsize) + sizeof (kiss_fft_f32_cpx) * (nfft * 3 / 2);
|
||||||
|
|
||||||
if (lenmem == NULL) {
|
if (lenmem == NULL) {
|
||||||
st = (kiss_fftr_f32_cfg) KISS_FFT_F32_MALLOC (memneeded);
|
st = (kiss_fftr_f32_cfg) KISS_FFT_F32_MALLOC (memneeded);
|
||||||
|
@ -58,8 +58,9 @@ kiss_fftr_f32_alloc (int nfft, int inverse_fft, void *mem, size_t * lenmem)
|
||||||
st->super_twiddles = st->tmpbuf + nfft;
|
st->super_twiddles = st->tmpbuf + nfft;
|
||||||
kiss_fft_f32_alloc (nfft, inverse_fft, st->substate, &subsize);
|
kiss_fft_f32_alloc (nfft, inverse_fft, st->substate, &subsize);
|
||||||
|
|
||||||
for (i = 0; i < nfft; ++i) {
|
for (i = 0; i < nfft / 2; ++i) {
|
||||||
double phase = -3.14159265358979323846264338327 * ((double) i / nfft + .5);
|
double phase =
|
||||||
|
-3.14159265358979323846264338327 * ((double) (i + 1) / nfft + .5);
|
||||||
|
|
||||||
if (inverse_fft)
|
if (inverse_fft)
|
||||||
phase *= -1;
|
phase *= -1;
|
||||||
|
@ -117,7 +118,7 @@ kiss_fftr_f32 (kiss_fftr_f32_cfg st, const kiss_fft_f32_scalar * timedata,
|
||||||
|
|
||||||
C_ADD (f1k, fpk, fpnk);
|
C_ADD (f1k, fpk, fpnk);
|
||||||
C_SUB (f2k, fpk, fpnk);
|
C_SUB (f2k, fpk, fpnk);
|
||||||
C_MUL (tw, f2k, st->super_twiddles[k]);
|
C_MUL (tw, f2k, st->super_twiddles[k - 1]);
|
||||||
|
|
||||||
freqdata[k].r = HALF_OF (f1k.r + tw.r);
|
freqdata[k].r = HALF_OF (f1k.r + tw.r);
|
||||||
freqdata[k].i = HALF_OF (f1k.i + tw.i);
|
freqdata[k].i = HALF_OF (f1k.i + tw.i);
|
||||||
|
@ -155,7 +156,7 @@ kiss_fftri_f32 (kiss_fftr_f32_cfg st, const kiss_fft_f32_cpx * freqdata,
|
||||||
|
|
||||||
C_ADD (fek, fk, fnkc);
|
C_ADD (fek, fk, fnkc);
|
||||||
C_SUB (tmp, fk, fnkc);
|
C_SUB (tmp, fk, fnkc);
|
||||||
C_MUL (fok, tmp, st->super_twiddles[k]);
|
C_MUL (fok, tmp, st->super_twiddles[k - 1]);
|
||||||
C_ADD (st->tmpbuf[k], fek, fok);
|
C_ADD (st->tmpbuf[k], fek, fok);
|
||||||
C_SUB (st->tmpbuf[ncfft - k], fek, fok);
|
C_SUB (st->tmpbuf[ncfft - k], fek, fok);
|
||||||
#ifdef USE_SIMD
|
#ifdef USE_SIMD
|
||||||
|
|
|
@ -41,7 +41,7 @@ kiss_fftr_f64_alloc (int nfft, int inverse_fft, void *mem, size_t * lenmem)
|
||||||
kiss_fft_f64_alloc (nfft, inverse_fft, NULL, &subsize);
|
kiss_fft_f64_alloc (nfft, inverse_fft, NULL, &subsize);
|
||||||
memneeded = ALIGN_STRUCT (sizeof (struct kiss_fftr_f64_state))
|
memneeded = ALIGN_STRUCT (sizeof (struct kiss_fftr_f64_state))
|
||||||
+ ALIGN_STRUCT (subsize)
|
+ ALIGN_STRUCT (subsize)
|
||||||
+ sizeof (kiss_fft_f64_cpx) * (nfft * 2);
|
+ sizeof (kiss_fft_f64_cpx) * (nfft * 3 / 2);
|
||||||
|
|
||||||
if (lenmem == NULL) {
|
if (lenmem == NULL) {
|
||||||
st = (kiss_fftr_f64_cfg) KISS_FFT_F64_MALLOC (memneeded);
|
st = (kiss_fftr_f64_cfg) KISS_FFT_F64_MALLOC (memneeded);
|
||||||
|
@ -59,8 +59,9 @@ kiss_fftr_f64_alloc (int nfft, int inverse_fft, void *mem, size_t * lenmem)
|
||||||
st->super_twiddles = st->tmpbuf + nfft;
|
st->super_twiddles = st->tmpbuf + nfft;
|
||||||
kiss_fft_f64_alloc (nfft, inverse_fft, st->substate, &subsize);
|
kiss_fft_f64_alloc (nfft, inverse_fft, st->substate, &subsize);
|
||||||
|
|
||||||
for (i = 0; i < nfft; ++i) {
|
for (i = 0; i < nfft / 2; ++i) {
|
||||||
double phase = -3.14159265358979323846264338327 * ((double) i / nfft + .5);
|
double phase =
|
||||||
|
-3.14159265358979323846264338327 * ((double) (i + 1) / nfft + .5);
|
||||||
|
|
||||||
if (inverse_fft)
|
if (inverse_fft)
|
||||||
phase *= -1;
|
phase *= -1;
|
||||||
|
@ -118,7 +119,7 @@ kiss_fftr_f64 (kiss_fftr_f64_cfg st, const kiss_fft_f64_scalar * timedata,
|
||||||
|
|
||||||
C_ADD (f1k, fpk, fpnk);
|
C_ADD (f1k, fpk, fpnk);
|
||||||
C_SUB (f2k, fpk, fpnk);
|
C_SUB (f2k, fpk, fpnk);
|
||||||
C_MUL (tw, f2k, st->super_twiddles[k]);
|
C_MUL (tw, f2k, st->super_twiddles[k - 1]);
|
||||||
|
|
||||||
freqdata[k].r = HALF_OF (f1k.r + tw.r);
|
freqdata[k].r = HALF_OF (f1k.r + tw.r);
|
||||||
freqdata[k].i = HALF_OF (f1k.i + tw.i);
|
freqdata[k].i = HALF_OF (f1k.i + tw.i);
|
||||||
|
@ -156,7 +157,7 @@ kiss_fftri_f64 (kiss_fftr_f64_cfg st, const kiss_fft_f64_cpx * freqdata,
|
||||||
|
|
||||||
C_ADD (fek, fk, fnkc);
|
C_ADD (fek, fk, fnkc);
|
||||||
C_SUB (tmp, fk, fnkc);
|
C_SUB (tmp, fk, fnkc);
|
||||||
C_MUL (fok, tmp, st->super_twiddles[k]);
|
C_MUL (fok, tmp, st->super_twiddles[k - 1]);
|
||||||
C_ADD (st->tmpbuf[k], fek, fok);
|
C_ADD (st->tmpbuf[k], fek, fok);
|
||||||
C_SUB (st->tmpbuf[ncfft - k], fek, fok);
|
C_SUB (st->tmpbuf[ncfft - k], fek, fok);
|
||||||
#ifdef USE_SIMD
|
#ifdef USE_SIMD
|
||||||
|
|
|
@ -41,7 +41,7 @@ kiss_fftr_s16_alloc (int nfft, int inverse_fft, void *mem, size_t * lenmem)
|
||||||
kiss_fft_s16_alloc (nfft, inverse_fft, NULL, &subsize);
|
kiss_fft_s16_alloc (nfft, inverse_fft, NULL, &subsize);
|
||||||
memneeded = ALIGN_STRUCT (sizeof (struct kiss_fftr_s16_state))
|
memneeded = ALIGN_STRUCT (sizeof (struct kiss_fftr_s16_state))
|
||||||
+ ALIGN_STRUCT (subsize)
|
+ ALIGN_STRUCT (subsize)
|
||||||
+ sizeof (kiss_fft_s16_cpx) * (nfft * 2);
|
+ sizeof (kiss_fft_s16_cpx) * (nfft * 3 / 2);
|
||||||
|
|
||||||
if (lenmem == NULL) {
|
if (lenmem == NULL) {
|
||||||
st = (kiss_fftr_s16_cfg) KISS_FFT_S16_MALLOC (memneeded);
|
st = (kiss_fftr_s16_cfg) KISS_FFT_S16_MALLOC (memneeded);
|
||||||
|
@ -59,8 +59,9 @@ kiss_fftr_s16_alloc (int nfft, int inverse_fft, void *mem, size_t * lenmem)
|
||||||
st->super_twiddles = st->tmpbuf + nfft;
|
st->super_twiddles = st->tmpbuf + nfft;
|
||||||
kiss_fft_s16_alloc (nfft, inverse_fft, st->substate, &subsize);
|
kiss_fft_s16_alloc (nfft, inverse_fft, st->substate, &subsize);
|
||||||
|
|
||||||
for (i = 0; i < nfft; ++i) {
|
for (i = 0; i < nfft / 2; ++i) {
|
||||||
double phase = -3.14159265358979323846264338327 * ((double) i / nfft + .5);
|
double phase =
|
||||||
|
-3.14159265358979323846264338327 * ((double) (i + 1) / nfft + .5);
|
||||||
|
|
||||||
if (inverse_fft)
|
if (inverse_fft)
|
||||||
phase *= -1;
|
phase *= -1;
|
||||||
|
@ -118,7 +119,7 @@ kiss_fftr_s16 (kiss_fftr_s16_cfg st, const kiss_fft_s16_scalar * timedata,
|
||||||
|
|
||||||
C_ADD (f1k, fpk, fpnk);
|
C_ADD (f1k, fpk, fpnk);
|
||||||
C_SUB (f2k, fpk, fpnk);
|
C_SUB (f2k, fpk, fpnk);
|
||||||
C_MUL (tw, f2k, st->super_twiddles[k]);
|
C_MUL (tw, f2k, st->super_twiddles[k - 1]);
|
||||||
|
|
||||||
freqdata[k].r = HALF_OF (f1k.r + tw.r);
|
freqdata[k].r = HALF_OF (f1k.r + tw.r);
|
||||||
freqdata[k].i = HALF_OF (f1k.i + tw.i);
|
freqdata[k].i = HALF_OF (f1k.i + tw.i);
|
||||||
|
@ -156,7 +157,7 @@ kiss_fftri_s16 (kiss_fftr_s16_cfg st, const kiss_fft_s16_cpx * freqdata,
|
||||||
|
|
||||||
C_ADD (fek, fk, fnkc);
|
C_ADD (fek, fk, fnkc);
|
||||||
C_SUB (tmp, fk, fnkc);
|
C_SUB (tmp, fk, fnkc);
|
||||||
C_MUL (fok, tmp, st->super_twiddles[k]);
|
C_MUL (fok, tmp, st->super_twiddles[k - 1]);
|
||||||
C_ADD (st->tmpbuf[k], fek, fok);
|
C_ADD (st->tmpbuf[k], fek, fok);
|
||||||
C_SUB (st->tmpbuf[ncfft - k], fek, fok);
|
C_SUB (st->tmpbuf[ncfft - k], fek, fok);
|
||||||
#ifdef USE_SIMD
|
#ifdef USE_SIMD
|
||||||
|
|
|
@ -41,7 +41,7 @@ kiss_fftr_s32_alloc (int nfft, int inverse_fft, void *mem, size_t * lenmem)
|
||||||
kiss_fft_s32_alloc (nfft, inverse_fft, NULL, &subsize);
|
kiss_fft_s32_alloc (nfft, inverse_fft, NULL, &subsize);
|
||||||
memneeded = ALIGN_STRUCT (sizeof (struct kiss_fftr_s32_state))
|
memneeded = ALIGN_STRUCT (sizeof (struct kiss_fftr_s32_state))
|
||||||
+ ALIGN_STRUCT (subsize)
|
+ ALIGN_STRUCT (subsize)
|
||||||
+ sizeof (kiss_fft_s32_cpx) * (nfft * 2);
|
+ sizeof (kiss_fft_s32_cpx) * (nfft * 3 / 2);
|
||||||
|
|
||||||
if (lenmem == NULL) {
|
if (lenmem == NULL) {
|
||||||
st = (kiss_fftr_s32_cfg) KISS_FFT_S32_MALLOC (memneeded);
|
st = (kiss_fftr_s32_cfg) KISS_FFT_S32_MALLOC (memneeded);
|
||||||
|
@ -59,8 +59,9 @@ kiss_fftr_s32_alloc (int nfft, int inverse_fft, void *mem, size_t * lenmem)
|
||||||
st->super_twiddles = st->tmpbuf + nfft;
|
st->super_twiddles = st->tmpbuf + nfft;
|
||||||
kiss_fft_s32_alloc (nfft, inverse_fft, st->substate, &subsize);
|
kiss_fft_s32_alloc (nfft, inverse_fft, st->substate, &subsize);
|
||||||
|
|
||||||
for (i = 0; i < nfft; ++i) {
|
for (i = 0; i < nfft / 2; ++i) {
|
||||||
double phase = -3.14159265358979323846264338327 * ((double) i / nfft + .5);
|
double phase =
|
||||||
|
-3.14159265358979323846264338327 * ((double) (i + 1) / nfft + .5);
|
||||||
|
|
||||||
if (inverse_fft)
|
if (inverse_fft)
|
||||||
phase *= -1;
|
phase *= -1;
|
||||||
|
@ -118,7 +119,7 @@ kiss_fftr_s32 (kiss_fftr_s32_cfg st, const kiss_fft_s32_scalar * timedata,
|
||||||
|
|
||||||
C_ADD (f1k, fpk, fpnk);
|
C_ADD (f1k, fpk, fpnk);
|
||||||
C_SUB (f2k, fpk, fpnk);
|
C_SUB (f2k, fpk, fpnk);
|
||||||
C_MUL (tw, f2k, st->super_twiddles[k]);
|
C_MUL (tw, f2k, st->super_twiddles[k - 1]);
|
||||||
|
|
||||||
freqdata[k].r = HALF_OF (f1k.r + tw.r);
|
freqdata[k].r = HALF_OF (f1k.r + tw.r);
|
||||||
freqdata[k].i = HALF_OF (f1k.i + tw.i);
|
freqdata[k].i = HALF_OF (f1k.i + tw.i);
|
||||||
|
@ -156,7 +157,7 @@ kiss_fftri_s32 (kiss_fftr_s32_cfg st, const kiss_fft_s32_cpx * freqdata,
|
||||||
|
|
||||||
C_ADD (fek, fk, fnkc);
|
C_ADD (fek, fk, fnkc);
|
||||||
C_SUB (tmp, fk, fnkc);
|
C_SUB (tmp, fk, fnkc);
|
||||||
C_MUL (fok, tmp, st->super_twiddles[k]);
|
C_MUL (fok, tmp, st->super_twiddles[k - 1]);
|
||||||
C_ADD (st->tmpbuf[k], fek, fok);
|
C_ADD (st->tmpbuf[k], fek, fok);
|
||||||
C_SUB (st->tmpbuf[ncfft - k], fek, fok);
|
C_SUB (st->tmpbuf[ncfft - k], fek, fok);
|
||||||
#ifdef USE_SIMD
|
#ifdef USE_SIMD
|
||||||
|
|
Loading…
Reference in a new issue