adding resample lib

Original commit message from CVS:
adding resample lib
This commit is contained in:
Thomas Vander Stichele 2001-12-23 10:44:28 +00:00
parent bdc729b656
commit f00917f6d4
7 changed files with 1641 additions and 0 deletions

View file

@ -0,0 +1,26 @@
libdir = $(libdir)/gst
lib_LTLIBRARIES = libresample.la
if HAVE_CPU_I386
ARCHCFLAGS = -march=i486
else
if HAVE_CPU_PPC
ARCHCFLAGS = -Wa,-m7400
else
ARCHCFLAGS =
endif
endif
libresample_la_SOURCES = dtos.c functable.c resample.c resample.h
libresample_la_LIBADD = $(GST_LIBS)
libresample_la_CFLAGS = $(GST_CFLAGS) -ffast-math $(ARCHCFLAGS)
noinst_HEADERS = resample.h
#check_PROGRAMS = test
#test_SOURCES = test.c
#test_LDADD = libresample.la

View file

@ -0,0 +1,62 @@
This is a snapshot of my current work developing an audio
resampling library. While working on this library, I started
writing lots of general purpose functions that should really
be part of a larger library. Rather than have a constantly
changing library, and since the current code is capable, I
decided to freeze this codebase for use with gstreamer, and
move active development of the code elsewhere.
The algorithm used is based on Shannon's theorem, which says
that you can recreate an input signal from equidistant samples
using a sin(x)/x filter; thus, you can create new samples from
the regenerated input signal. Since sin(x)/x is expensive to
evaluate, an interpolated lookup table is used. Also, a
windowing function (1-x^2)^2 is used, which aids the convergence
of sin(x)/x for lower frequencies at the expense of higher.
There is one tunable parameter, which is the filter length.
Longer filter lengths are obviously slower, but more accurate.
There's not much reason to use a filter length longer than 64,
since other approximations start to dominate. Filter lengths
as short as 8 are audially acceptable, but should not be
considered for serious work.
Performance: A PowerPC G4 at 400 Mhz can resample 2 audio
channels at almost 10x speed with a filter length of 64, without
using Altivec extensions. (My goal was 10x speed, which I almost
reached. Maybe later.)
Limitations: Currently only supports streams in the form of
interleaved signed 16-bit samples.
The test.c program is a simple regression test. It creates a
test input pattern (1 sec at 48 khz) that is a frequency ramp
from 0 to 24000 hz, and then converts it to 44100 hz using a
filter length of 64. It then compares the result to the same
pattern generated at 44100 hz, and outputs the result to the
file "out".
A graph of the correct output should have field 2 and field 4
almost equal (plus/minus 1) up to about sample 40000 (which
corresponds to 20 khz), and then field 2 should be close to 0
above that. Running the test program will print to stdout
something like the following:
time 0.112526
average error 10k=0.4105 22k=639.34
The average error is RMS error over the range [0-10khz] and
[0-22khz], and is expressed in sample values, for an input
amplitude of 16000. Note that RMS errors below 1.0 can't
really be compared, but basically this shows that below
10 khz, the resampler is nearly perfect. Most of the error
is concentrated above 20 khz.
If the average error is significantly larger after modifying
the code, it's probably not good.
dave...

View file

@ -0,0 +1,201 @@
/* Resampling library
* Copyright (C) <2001> David A. Schleef <ds@schleef.org>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#include <string.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
//#include <ml.h>
#include <resample.h>
#define short_to_double_table
//#define short_to_double_altivec
#define short_to_double_unroll
#ifdef short_to_double_table
static float ints_high[256];
static float ints_low[256];
void conv_double_short_table(double *dest, short *src, int n)
{
static int init = 0;
int i;
unsigned int idx;
if(!init){
for(i=0;i<256;i++){
ints_high[i]=256.0*((i<128)?i:i-256);
ints_low[i]=i;
}
init = 1;
}
if(n&1){
idx = (unsigned short)*src++;
*dest++ = ints_high[(idx>>8)] + ints_low[(idx&0xff)];
n-=1;
}
for(i=0;i<n;i+=2){
idx = (unsigned short)*src++;
*dest++ = ints_high[(idx>>8)] + ints_low[(idx&0xff)];
idx = (unsigned short)*src++;
*dest++ = ints_high[(idx>>8)] + ints_low[(idx&0xff)];
}
}
#endif
#ifdef short_to_double_unroll
void conv_double_short_unroll(double *dest, short *src, int n)
{
if(n&1){
*dest++ = *src++;
n--;
}
if(n&2){
*dest++ = *src++;
*dest++ = *src++;
n-=2;
}
while(n>0){
*dest++ = *src++;
*dest++ = *src++;
*dest++ = *src++;
*dest++ = *src++;
n-=4;
}
}
#endif
void conv_double_short_ref(double *dest, short *src, int n)
{
int i;
for(i=0;i<n;i++){
dest[i]=src[i];
}
}
#ifdef HAVE_CPU_PPC
#if 0
static union { int i[4]; float f[4]; } av_tmp __attribute__ ((__aligned__ (16)));
void conv_double_short_altivec(double *dest, short *src, int n)
{
int i;
for(i=0;i<n;i+=4){
av_tmp.i[0] = src[0];
av_tmp.i[1] = src[1];
av_tmp.i[2] = src[2];
av_tmp.i[3] = src[3];
asm(
" lvx 0,0,%0\n"
" vcfsx 1,0,0\n"
" stvx 1,0,%0\n"
: : "r" (&av_tmp)
);
dest[0]=av_tmp.f[0];
dest[1]=av_tmp.f[1];
dest[2]=av_tmp.f[2];
dest[3]=av_tmp.f[3];
src += 4;
dest += 4;
}
}
#endif
#endif
/* double to short */
void conv_short_double_ref(short *dest, double *src, int n)
{
int i;
double x;
for(i=0;i<n;i++){
x = *src++;
if(x<-32768.0)x=-32768.0;
if(x>32767.0)x=32767.0;
*dest++ = rint(x);
}
}
#ifdef HAVE_CPU_PPC
void conv_short_double_ppcasm(short *dest, double *src, int n)
{
int tmp[2];
double min = -32768.0;
double max = 32767.0;
double ftmp0, ftmp1;
asm __volatile__(
"\taddic. %3,%3,-8\n"
"\taddic. %6,%6,-2\n"
"loop:\n"
"\tlfdu %0,8(%3)\n"
"\tfsub %1,%0,%4\n"
"\tfsel %0,%1,%0,%4\n"
"\tfsub %1,%0,%5\n"
"\tfsel %0,%1,%5,%0\n"
"\tfctiw %1,%0\n"
"\taddic. 5,5,-1\n"
"\tstfd %1,0(%2)\n"
"\tlhz 9,6(%2)\n"
"\tsthu 9,2(%6)\n"
"\tbne loop\n"
: "=&f" (ftmp0), "=&f" (ftmp1)
: "b" (tmp), "r" (src), "f" (min), "f" (max), "r" (dest)
: "r9", "r5" );
}
#endif
void conv_double_short_dstr(double *dest, short *src, int n, int dstr)
{
int i;
void *d = dest;
for(i=0;i<n;i++){
(*(double *)d)=*src++;
d += dstr;
}
}
void conv_short_double_sstr(short *dest, double *src, int n, int sstr)
{
int i;
double x;
void *s = src;
for(i=0;i<n;i++){
x = *(double *)s;
if(x<-32768.0)x=-32768.0;
if(x>32767.0)x=32767.0;
*dest++ = rint(x);
s += sstr;
}
}

View file

@ -0,0 +1,311 @@
/* Resampling library
* Copyright (C) <2001> David A. Schleef <ds@schleef.org>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#include <string.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <resample.h>
double functable_sinc(void *p,double x)
{
if(x==0)return 1;
return sin(x)/x;
}
double functable_dsinc(void *p,double x)
{
if(x==0)return 0;
return cos(x)/x - sin(x)/(x*x);
}
double functable_window_boxcar(void *p,double x)
{
if(x<-1 || x>1)return 0;
return 1;
}
double functable_window_dboxcar(void *p,double x)
{
return 0;
}
double functable_window_std(void *p,double x)
{
if(x<-1 || x>1)return 0;
return (1-x*x)*(1-x*x);
}
double functable_window_dstd(void *p,double x)
{
if(x<-1 || x>1)return 0;
return -4*x*(1-x*x);
}
void functable_init(functable_t *t)
{
int i;
double x;
t->fx = malloc(sizeof(double)*(t->len+1));
t->fdx = malloc(sizeof(double)*(t->len+1));
t->invoffset = 1.0 / t->offset;
for(i=0;i<t->len+1;i++){
x = t->start + t->offset * i;
x *= t->scale;
t->fx[i] = t->func_x(t->priv,x);
t->fdx[i] = t->scale * t->func_dx(t->priv,x);
}
if(t->func2_x){
double f1x,f1dx;
double f2x,f2dx;
for(i=0;i<t->len+1;i++){
x = t->start + t->offset * i;
x *= t->scale2;
f2x = t->func2_x(t->priv,x);
f2dx = t->scale2 * t->func2_dx(t->priv,x);
f1x = t->fx[i];
f1dx = t->fdx[i];
t->fx[i] = f1x * f2x;
t->fdx[i] = f1x * f2dx + f1dx * f2x;
}
}
}
double functable_eval(functable_t *t,double x)
{
int i;
double f0, f1, w0, w1;
double x2, x3;
double w;
if(x<t->start || x>(t->start+(t->len+1)*t->offset)){
printf("x out of range %g\n",x);
}
x -= t->start;
x /= t->offset;
i = floor(x);
x -= i;
x2 = x * x;
x3 = x2 * x;
f1 = 3 * x2 - 2 * x3;
f0 = 1 - f1;
w0 = (x - 2 * x2 + x3) * t->offset;
w1 = (-x2 + x3) * t->offset;
//printf("i=%d x=%g f0=%g f1=%g w0=%g w1=%g\n",i,x,f0,f1,w0,w1);
w = t->fx[i] * f0 + t->fx[i + 1] * f1 +
t->fdx[i] * w0 + t->fdx[i + 1] * w1;
//w = t->fx[i] * (1-x) + t->fx[i+1] * x;
return w;
}
double functable_fir(functable_t *t, double x, int n, double *data, int len)
{
int i,j;
double f0, f1, w0, w1;
double x2, x3;
double w;
double sum;
x -= t->start;
x /= t->offset;
i = floor(x);
x -= i;
x2 = x * x;
x3 = x2 * x;
f1 = 3 * x2 - 2 * x3;
f0 = 1 - f1;
w0 = (x - 2 * x2 + x3) * t->offset;
w1 = (-x2 + x3) * t->offset;
sum = 0;
for(j=0;j<len;j++){
w = t->fx[i] * f0 + t->fx[i + 1] * f1 +
t->fdx[i] * w0 + t->fdx[i + 1] * w1;
sum += data[j*2] * w;
i += n;
}
return sum;
}
void functable_fir2(functable_t *t, double *r0, double *r1, double x,
int n, double *data, int len)
{
int i,j;
double f0, f1, w0, w1;
double x2, x3;
double w;
double sum0, sum1;
double floor_x;
x -= t->start;
x *= t->invoffset;
floor_x = floor(x);
i = floor_x;
x -= floor_x;
x2 = x * x;
x3 = x2 * x;
f1 = 3 * x2 - 2 * x3;
f0 = 1 - f1;
w0 = (x - 2 * x2 + x3) * t->offset;
w1 = (-x2 + x3) * t->offset;
sum0 = 0;
sum1 = 0;
for(j=0;j<len;j++){
w = t->fx[i] * f0 + t->fx[i + 1] * f1 +
t->fdx[i] * w0 + t->fdx[i + 1] * w1;
sum0 += data[j*2] * w;
sum1 += data[j*2+1] * w;
i += n;
#define unroll2
#define unroll3
#define unroll4
#ifdef unroll2
j++;
w = t->fx[i] * f0 + t->fx[i + 1] * f1 +
t->fdx[i] * w0 + t->fdx[i + 1] * w1;
sum0 += data[j*2] * w;
sum1 += data[j*2+1] * w;
i += n;
#endif
#ifdef unroll3
j++;
w = t->fx[i] * f0 + t->fx[i + 1] * f1 +
t->fdx[i] * w0 + t->fdx[i + 1] * w1;
sum0 += data[j*2] * w;
sum1 += data[j*2+1] * w;
i += n;
#endif
#ifdef unroll4
j++;
w = t->fx[i] * f0 + t->fx[i + 1] * f1 +
t->fdx[i] * w0 + t->fdx[i + 1] * w1;
sum0 += data[j*2] * w;
sum1 += data[j*2+1] * w;
i += n;
#endif
}
*r0 = sum0;
*r1 = sum1;
}
#ifdef unused
void functable_fir2_altivec(functable_t *t, float *r0, float *r1,
double x, int n, float *data, int len)
{
int i,j;
double f0, f1, w0, w1;
double x2, x3;
double w;
double sum0, sum1;
double floor_x;
x -= t->start;
x *= t->invoffset;
floor_x = floor(x);
i = floor_x;
x -= floor_x;
x2 = x * x;
x3 = x2 * x;
f1 = 3 * x2 - 2 * x3;
f0 = 1 - f1;
w0 = (x - 2 * x2 + x3) * t->offset;
w1 = (-x2 + x3) * t->offset;
sum0 = 0;
sum1 = 0;
for(j=0;j<len;j++){
// t->fx, t->fdx needs to be multiplexed by n
// we need 5 consecutive floats, which fit into 2 vecs
// load v0, t->fx[i]
// load v1, t->fx[i+n]
// v2 = v0 (not correct)
// v3 = (v0>>32) || (v1<<3*32) (not correct)
//
// load v4, t->dfx[i]
// load v5, t->dfx[i+n]
// v6 = v4 (not correct)
// v7 = (v4>>32) || (v5<<3*32) (not correct)
//
// v8 = splat(f0)
// v9 = splat(f1)
// v10 = splat(w0)
// v11 = splat(w1)
//
// v12 = v2 * v8
// v12 += v3 * v9
// v12 += v6 * v10
// v12 += v7 * v11
w = t->fx[i] * f0 + t->fx[i + 1] * f1 +
t->fdx[i] * w0 + t->fdx[i + 1] * w1;
// v13 = data[j*2]
// v14 = data[j*2+4]
// v15 = deinterlace_high(v13,v14)
// v16 = deinterlace_low(v13,v14)
// (sum0) v17 += multsum(v13,v15)
// (sum1) v18 += multsum(v14,v16)
sum0 += data[j*2] * w;
sum1 += data[j*2+1] * w;
i += n;
}
*r0 = sum0;
*r1 = sum1;
}
#endif

View file

@ -0,0 +1,530 @@
/* Resampling library
* Copyright (C) <2001> David A. Schleef <ds@schleef.org>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#include <string.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <resample.h>
inline double sinc(double x)
{
if(x==0)return 1;
return sin(x) / x;
}
inline double window_func(double x)
{
x = 1 - x*x;
return x*x;
}
signed short double_to_s16(double x)
{
if(x<-32768){
printf("clipped\n");
return -32768;
}
if(x>32767){
printf("clipped\n");
return -32767;
}
return rint(x);
}
signed short double_to_s16_ppcasm(double x)
{
if(x<-32768){
return -32768;
}
if(x>32767){
return -32767;
}
return rint(x);
}
static void resample_sinc_ft(resample_t * r);
void resample_init(resample_t * r)
{
r->i_start = 0;
if(r->filter_length&1){
r->o_start = 0;
}else{
r->o_start = r->o_inc * 0.5;
}
memset(r->acc, 0, sizeof(r->acc));
resample_reinit(r);
}
void resample_reinit(resample_t * r)
{
/* i_inc is the number of samples that the output increments for
* each input sample. o_inc is the opposite. */
r->i_inc = (double) r->o_rate / r->i_rate;
r->o_inc = (double) r->i_rate / r->o_rate;
r->halftaps = (r->filter_length - 1.0) * 0.5;
switch (r->method) {
default:
case RESAMPLE_NEAREST:
r->scale = resample_nearest;
break;
case RESAMPLE_BILINEAR:
r->scale = resample_bilinear;
break;
case RESAMPLE_SINC_SLOW:
r->scale = resample_sinc;
break;
case RESAMPLE_SINC:
r->scale = resample_sinc_ft;
break;
}
}
/*
* Prepare to be confused.
*
* We keep a "timebase" that is based on output samples. The zero
* of the timebase cooresponds to the next output sample that will
* be written.
*
* i_start is the "time" that corresponds to the first input sample
* in an incoming buffer. Since the output depends on input samples
* ahead in time, i_start will tend to be around halftaps.
*
* i_start_buf is the time of the first sample in the temporary
* buffer.
*/
void resample_scale(resample_t * r, void *i_buf, unsigned int i_size)
{
int o_size;
r->i_buf = i_buf;
r->i_samples = i_size / 2 / r->channels;
r->i_start_buf = r->i_start - r->filter_length * r->i_inc;
/* i_start is the offset (in a given output sample) that is the
* beginning of the current input buffer */
r->i_end = r->i_start + r->i_inc * r->i_samples;
r->o_samples = floor(r->i_end - r->halftaps * r->i_inc);
o_size = r->o_samples * r->channels * 2;
r->o_buf = r->get_buffer(r->priv, o_size);
if(r->verbose){
printf("resample_scale: i_buf=%p i_size=%d\n",
i_buf,i_size);
printf("resample_scale: i_samples=%d o_samples=%d i_inc=%g o_buf=%p\n",
r->i_samples, r->o_samples, r->i_inc, r->o_buf);
printf("resample_scale: i_start=%g i_end=%g o_start=%g\n",
r->i_start, r->i_end, r->o_start);
}
if ((r->filter_length + r->i_samples)*2*2 > r->buffer_len) {
int size = (r->filter_length + r->i_samples) * sizeof(double) * 2;
if(r->verbose){
printf("resample temp buffer size=%d\n",size);
}
if(r->buffer)free(r->buffer);
r->buffer_len = size;
r->buffer = malloc(size);
memset(r->buffer, 0, size);
}
if(r->channels==2){
conv_double_short(
r->buffer + r->filter_length * sizeof(double) * 2,
r->i_buf, r->i_samples * 2);
}else{
conv_double_short_dstr(
r->buffer + r->filter_length * sizeof(double) * 2,
r->i_buf, r->i_samples, sizeof(double) * 2);
}
r->scale(r);
memcpy(r->buffer,
r->buffer + r->i_samples * sizeof(double) * 2,
r->filter_length * sizeof(double) * 2);
/* updating times */
r->i_start += r->i_samples * r->i_inc;
r->o_start += r->o_samples * r->o_inc - r->i_samples;
/* adjusting timebase zero */
r->i_start -= r->o_samples;
}
void resample_nearest(resample_t * r)
{
signed short *i_ptr, *o_ptr;
int i_count = 0;
double a;
int i;
i_ptr = (signed short *) r->i_buf;
o_ptr = (signed short *) r->o_buf;
a = r->o_start;
i_count = 0;
#define SCALE_LOOP(COPY,INC) \
for (i = 0; i < r->o_samples; i++) { \
COPY; \
a += r->o_inc; \
while (a >= 1) { \
a -= 1; \
i_ptr+=INC; \
i_count++; \
} \
o_ptr+=INC; \
}
switch (r->channels) {
case 1:
SCALE_LOOP(o_ptr[0] = i_ptr[0], 1);
break;
case 2:
SCALE_LOOP(o_ptr[0] = i_ptr[0];
o_ptr[1] = i_ptr[1], 2);
break;
default:
{
int n, n_chan = r->channels;
SCALE_LOOP(for (n = 0; n < n_chan; n++) o_ptr[n] =
i_ptr[n], n_chan);
}
}
if (i_count != r->i_samples) {
printf("handled %d in samples (expected %d)\n", i_count,
r->i_samples);
}
}
void resample_bilinear(resample_t * r)
{
signed short *i_ptr, *o_ptr;
int o_count = 0;
double b;
int i;
double acc0, acc1;
i_ptr = (signed short *) r->i_buf;
o_ptr = (signed short *) r->o_buf;
acc0 = r->acc[0];
acc1 = r->acc[1];
b = r->i_start;
for (i = 0; i < r->i_samples; i++) {
b += r->i_inc;
//printf("in %d\n",i_ptr[0]);
if(b>=2){
printf("not expecting b>=2\n");
}
if (b >= 1) {
acc0 += (1.0 - (b-r->i_inc)) * i_ptr[0];
acc1 += (1.0 - (b-r->i_inc)) * i_ptr[1];
o_ptr[0] = rint(acc0);
//printf("out %d\n",o_ptr[0]);
o_ptr[1] = rint(acc1);
o_ptr += 2;
o_count++;
b -= 1.0;
acc0 = b * i_ptr[0];
acc1 = b * i_ptr[1];
} else {
acc0 += i_ptr[0] * r->i_inc;
acc1 += i_ptr[1] * r->i_inc;
}
i_ptr += 2;
}
r->acc[0] = acc0;
r->acc[1] = acc1;
if (o_count != r->o_samples) {
printf("handled %d out samples (expected %d)\n", o_count,
r->o_samples);
}
}
void resample_sinc_slow(resample_t * r)
{
signed short *i_ptr, *o_ptr;
int i, j;
double c0, c1;
double a;
int start;
double center;
double weight;
if (!r->buffer) {
int size = r->filter_length * 2 * r->channels;
printf("resample temp buffer\n");
r->buffer = malloc(size);
memset(r->buffer, 0, size);
}
i_ptr = (signed short *) r->i_buf;
o_ptr = (signed short *) r->o_buf;
a = r->i_start;
#define GETBUF(index,chan) (((index)<0) \
? ((short *)(r->buffer))[((index)+r->filter_length)*2+(chan)] \
: i_ptr[(index)*2+(chan)])
{
double sinx, cosx, sind, cosd;
double x, d;
double t;
for (i = 0; i < r->o_samples; i++) {
start = floor(a) - r->filter_length;
center = a - r->halftaps;
x = M_PI * (start - center) * r->o_inc;
sinx = sin(M_PI * (start - center) * r->o_inc);
cosx = cos(M_PI * (start - center) * r->o_inc);
d = M_PI * r->o_inc;
sind = sin(M_PI * r->o_inc);
cosd = cos(M_PI * r->o_inc);
c0 = 0;
c1 = 0;
for (j = 0; j < r->filter_length; j++) {
weight = (x==0)?1:(sinx/x);
//printf("j %d sin %g cos %g\n",j,sinx,cosx);
//printf("j %d sin %g x %g sinc %g\n",j,sinx,x,weight);
c0 += weight * GETBUF((start + j), 0);
c1 += weight * GETBUF((start + j), 1);
t = cosx * cosd - sinx * sind;
sinx = cosx * sind + sinx * cosd;
cosx = t;
x += d;
}
o_ptr[0] = rint(c0);
o_ptr[1] = rint(c1);
o_ptr += 2;
a += r->o_inc;
}
}
memcpy(r->buffer,
i_ptr + (r->i_samples - r->filter_length) * r->channels,
r->filter_length * 2 * r->channels);
}
void resample_sinc(resample_t * r)
{
double *ptr;
signed short *o_ptr;
int i, j;
double c0, c1;
double a;
int start;
double center;
double weight;
double x0, x, d;
double scale;
ptr = (double *) r->buffer;
o_ptr = (signed short *) r->o_buf;
/* scale provides a cutoff frequency for the low
* pass filter aspects of sinc(). scale=M_PI
* will cut off at the input frequency, which is
* good for up-sampling, but will cause aliasing
* for downsampling. Downsampling needs to be
* cut off at o_rate, thus scale=M_PI*r->i_inc. */
/* actually, it needs to be M_PI*r->i_inc*r->i_inc.
* Need to research why. */
scale = M_PI*r->i_inc;
for (i = 0; i < r->o_samples; i++) {
a = r->o_start + i * r->o_inc;
start = floor(a - r->halftaps);
//printf("%d: a=%g start=%d end=%d\n",i,a,start,start+r->filter_length-1);
center = a;
//x = M_PI * (start - center) * r->o_inc;
//d = M_PI * r->o_inc;
//x = (start - center) * r->o_inc;
x0 = (start - center) * r->o_inc;
d = r->o_inc;
c0 = 0;
c1 = 0;
for (j = 0; j < r->filter_length; j++) {
x = x0 + d * j;
weight = sinc(x*scale*r->i_inc)*scale/M_PI;
weight *= window_func(x/r->halftaps*r->i_inc);
c0 += weight * ptr[(start + j + r->filter_length)*2 + 0];
c1 += weight * ptr[(start + j + r->filter_length)*2 + 1];
}
o_ptr[0] = double_to_s16(c0);
o_ptr[1] = double_to_s16(c1);
o_ptr += 2;
}
}
/*
* Resampling audio is best done using a sinc() filter.
*
*
* out[t] = Sum( in[t'] * sinc((t-t')/delta_t), all t')
*
* The immediate problem with this algorithm is that it involves a
* sum over an infinite number of input samples, both in the past
* and future. Note that even though sinc(x) is bounded by 1/x,
* and thus decays to 0 for large x, since sum(x,{x=0,1..,n}) diverges
* as log(n), we need to be careful about convergence. This is
* typically done by using a windowing function, which also makes
* the sum over a finite number of input samples.
*
* The next problem is computational: sinc(), and especially
* sinc() multiplied by a non-trivial windowing function is expensive
* to calculate, and also difficult to find SIMD optimizations. Since
* the time increment on input and output is different, it is not
* possible to use a FIR filter, because the taps would have to be
* recalculated for every t.
*
* To get around the expense of calculating sinc() for every point,
* we pre-calculate sinc() at a number of points, and then interpolate
* for the values we want in calculations. The interpolation method
* chosen is bi-cubic, which requires both the evalated function and
* its derivative at every pre-sampled point. Also, if the sampled
* points are spaced commensurate with the input delta_t, we notice
* that the interpolating weights are the same for every input point.
* This decreases the number of operations to 4 multiplies and 4 adds
* for each tap, regardless of the complexity of the filtering function.
*
* At this point, it is possible to rearrange the problem as the sum
* of 4 properly weghted FIR filters. Typical SIMD computation units
* are highly optimized for FIR filters, making long filter lengths
* reasonable.
*/
static functable_t *ft;
double out_tmp[10000];
static void resample_sinc_ft(resample_t * r)
{
double *ptr;
signed short *o_ptr;
int i;
//int j;
double c0, c1;
//double a;
double start_f, start_x;
int start;
double center;
//double weight;
double x, d;
double scale;
int n = 4;
scale = r->i_inc; // cutoff at 22050
//scale = 1.0; // cutoff at 24000
//scale = r->i_inc * 0.5; // cutoff at 11025
if(!ft){
ft = malloc(sizeof(*ft));
memset(ft,0,sizeof(*ft));
ft->len = (r->filter_length + 2) * n;
ft->offset = 1.0 / n;
ft->start = - ft->len * 0.5 * ft->offset;
ft->func_x = functable_sinc;
ft->func_dx = functable_dsinc;
ft->scale = M_PI * scale;
ft->func2_x = functable_window_std;
ft->func2_dx = functable_window_dstd;
ft->scale2 = 1.0 / r->halftaps;
functable_init(ft);
//printf("len=%d offset=%g start=%g\n",ft->len,ft->offset,ft->start);
}
ptr = r->buffer;
o_ptr = (signed short *) r->o_buf;
center = r->o_start;
start_x = center - r->halftaps;
start_f = floor(start_x);
start_x -= start_f;
start = start_f;
for (i = 0; i < r->o_samples; i++) {
//start_f = floor(center - r->halftaps);
//printf("%d: a=%g start=%d end=%d\n",i,a,start,start+r->filter_length-1);
x = start_f - center;
d = 1;
c0 = 0;
c1 = 0;
//#define slow
#ifdef slow
for (j = 0; j < r->filter_length; j++) {
weight = functable_eval(ft,x)*scale;
//weight = sinc(M_PI * scale * x)*scale*r->i_inc;
//weight *= window_func(x / r->halftaps);
c0 += weight * ptr[(start + j + r->filter_length)*2 + 0];
c1 += weight * ptr[(start + j + r->filter_length)*2 + 1];
x += d;
}
#else
functable_fir2(ft,
&c0,&c1,
x, n,
ptr+(start + r->filter_length)*2,
r->filter_length);
c0 *= scale;
c1 *= scale;
#endif
out_tmp[2 * i + 0] = c0;
out_tmp[2 * i + 1] = c1;
center += r->o_inc;
start_x += r->o_inc;
while(start_x>=1.0){
start_f++;
start_x -= 1.0;
start++;
}
}
if(r->channels==2){
conv_short_double(r->o_buf,out_tmp,2 * r->o_samples);
}else{
conv_short_double_sstr(r->o_buf,out_tmp,r->o_samples,2 * sizeof(double));
}
}

View file

@ -0,0 +1,159 @@
/* Resampling library
* Copyright (C) <2001> David Schleef <ds@schleef.org>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#ifndef __RESAMPLE_H__
#define __RESAMPLE_H__
#include <config.h>
typedef struct resample_s resample_t;
struct resample_s {
/* parameters */
int method;
int channels;
int verbose;
int filter_length;
double i_rate;
double o_rate;
void *priv;
void *(*get_buffer)(void *priv, unsigned int size);
/* internal parameters */
double halftaps;
/* filter state */
void *buffer;
int buffer_len;
double i_start;
double o_start;
double i_start_buf;
double i_end_buf;
double i_inc;
double o_inc;
double i_end;
double o_end;
int i_samples;
int o_samples;
void *i_buf, *o_buf;
double acc[10];
/* methods */
void (*scale)(resample_t *r);
double ack;
};
enum{
RESAMPLE_NEAREST = 0,
RESAMPLE_BILINEAR,
RESAMPLE_SINC_SLOW,
RESAMPLE_SINC,
};
void resample_init(resample_t *r);
void resample_reinit(resample_t *r);
void resample_scale(resample_t *r, void *i_buf, unsigned int size);
void resample_nearest(resample_t *r);
void resample_bilinear(resample_t *r);
void resample_sinc(resample_t *r);
void resample_sinc_slow(resample_t *r);
typedef struct functable_s functable_t;
struct functable_s {
double start;
double offset;
int len;
double invoffset;
double scale;
double scale2;
double (*func_x)(void *,double x);
double (*func_dx)(void *,double x);
double (*func2_x)(void *,double x);
double (*func2_dx)(void *,double x);
double *fx;
double *fdx;
void *priv;
};
void functable_init(functable_t *t);
double functable_eval(functable_t *t,double x);
double functable_fir(functable_t *t,double x0,int n,double *data,int len);
void functable_fir2(functable_t *t,double *r0, double *r1, double x0,
int n,double *data,int len);
double functable_sinc(void *p, double x);
double functable_dsinc(void *p, double x);
double functable_window_std(void *p, double x);
double functable_window_dstd(void *p, double x);
double functable_window_boxcar(void *p, double x);
double functable_window_dboxcar(void *p, double x);
/* math lib stuff */
void conv_double_short_table(double *dest, short *src, int n);
void conv_double_short_unroll(double *dest, short *src, int n);
void conv_double_short_ref(double *dest, short *src, int n);
#ifdef HAVE_CPU_PPC
void conv_double_short_altivec(double *dest, short *src, int n);
#endif
void conv_short_double_ref(short *dest, double *src, int n);
#ifdef HAVE_CPU_PPC
void conv_short_double_ppcasm(short *dest, double *src, int n);
#endif
#ifdef HAVE_CPU_PPC
#define conv_double_short conv_double_short_table
#define conv_short_double conv_short_double_ppcasm
#else
#define conv_double_short conv_double_short_ref
#define conv_short_double conv_short_double_ref
#endif
void conv_double_short_dstr(double *dest, short *src, int n, int dstr);
void conv_short_double_sstr(short *dest, double *src, int n, int dstr);
#endif /* __RESAMPLE_H__ */

View file

@ -0,0 +1,352 @@
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/time.h>
#include <resample.h>
#define AMP 16000
#define I_RATE 48000
#define O_RATE 44100
//#define O_RATE 24000
//#define test_func(x) 1
//#define test_func(x) sin(2*M_PI*(x)*10)
//#define test_func(x) sin(2*M_PI*(x)*(x)*1000)
#define test_func(x) sin(2*M_PI*(x)*(x)*12000)
short i_buf[I_RATE*2*2];
short o_buf[O_RATE*2*2];
static int i_offset;
static int o_offset;
FILE *out;
void test_res1(void);
void test_res2(void);
void test_res3(void);
void test_res4(void);
void test_res5(void);
void test_res6(void);
void test_res7(void);
int main(int argc,char *argv[])
{
out = fopen("out","w");
test_res7();
return 0;
}
void *get_buffer(void *priv, unsigned int size)
{
void *ret;
ret = ((void *)o_buf) + o_offset;
o_offset += size;
return ret;
}
struct timeval start_time;
void start_timer(void)
{
gettimeofday(&start_time,NULL);
//printf("start %ld.%06ld\n",start_time.tv_sec,start_time.tv_usec);
}
void end_timer(void)
{
struct timeval end_time;
double diff;
gettimeofday(&end_time,NULL);
//printf("end %ld.%06ld\n",end_time.tv_sec,end_time.tv_usec);
diff = (end_time.tv_sec - start_time.tv_sec) +
1e-6*(end_time.tv_usec - start_time.tv_usec);
printf("time %g\n",diff);
}
void test_res1(void)
{
resample_t *r;
int i;
double sum10k,sum22k;
double f;
int n10k,n22k;
double x;
for(i=0;i<I_RATE;i++){
i_buf[i*2+0] = rint(AMP * test_func((double)i/I_RATE));
//i_buf[i*2+1] = rint(AMP * test_func((double)i/I_RATE));
i_buf[i*2+1] = (i<1000)?AMP:0;
}
r = malloc(sizeof(resample_t));
memset(r,0,sizeof(resample_t));
r->i_rate = I_RATE;
r->o_rate = O_RATE;
//r->method = RESAMPLE_SINC_SLOW;
r->method = RESAMPLE_SINC;
r->channels = 2;
//r->verbose = 1;
r->filter_length = 64;
r->get_buffer = get_buffer;
resample_init(r);
start_timer();
#define blocked
#ifdef blocked
for(i=0;i+256<I_RATE;i+=256){
resample_scale(r,i_buf+i*2,256*2*2);
}
if(I_RATE-i){
resample_scale(r,i_buf+i*2,(I_RATE-i)*2*2);
}
#else
resample_scale(r,i_buf,I_RATE*2*2);
#endif
end_timer();
for(i=0;i<O_RATE;i++){
f = AMP*test_func((double)i/O_RATE);
//f = rint(AMP*test_func((double)i/O_RATE));
fprintf(out,"%d %d %d %g %g\n",i,
o_buf[2*i+0],o_buf[2*i+1],
f,o_buf[2*i+0]-f);
}
sum10k=0;
sum22k=0;
n10k=0;
n22k=0;
for(i=0;i<O_RATE;i++){
f = AMP*test_func((double)i/O_RATE);
//f = rint(AMP*test_func((double)i/O_RATE));
x = o_buf[2*i+0]-f;
if(((0.5*i)/O_RATE*I_RATE)<10000){
sum10k += x*x;
n10k++;
}
if(((0.5*i)/O_RATE*I_RATE)<22050){
sum22k += x*x;
n22k++;
}
}
printf("average error 10k=%g 22k=%g\n",
sqrt(sum10k/n10k),
sqrt(sum22k/n22k));
}
void test_res2(void)
{
functable_t *t;
int i;
double x;
double f1,f2;
t = malloc(sizeof(*t));
memset(t,0,sizeof(*t));
t->start = -50.0;
t->offset = 1;
t->len = 100;
t->func_x = functable_sinc;
t->func_dx = functable_dsinc;
functable_init(t);
for(i=0;i<1000;i++){
x = -50.0 + 0.1 * i;
f1 = functable_sinc(NULL,x);
f2 = functable_eval(t,x);
fprintf(out,"%d %g %g %g\n",i,f1,f2,f1-f2);
}
}
void test_res3(void)
{
functable_t *t;
int i;
double x;
double f1,f2;
int n = 1;
t = malloc(sizeof(*t));
memset(t,0,sizeof(*t));
t->start = -50.0;
t->offset = 1.0 / n;
t->len = 100 * n;
t->func_x = functable_sinc;
t->func_dx = functable_dsinc;
t->func2_x = functable_window_std;
t->func2_dx = functable_window_dstd;
t->scale = 1.0;
t->scale2 = 1.0 / (M_PI * 16);
functable_init(t);
for(i=0;i<1000 * n;i++){
x = -50.0 + 0.1/n * i;
f1 = functable_sinc(NULL,t->scale * x) *
functable_window_std(NULL,t->scale2 * x);
f2 = functable_eval(t,x);
fprintf(out,"%d %g %g %g\n",i,f1,f2,f2-f1);
}
}
double sinc_poly(double x)
{
#define INV3FAC 1.66666666666666666e-1
#define INV5FAC 8.33333333333333333e-3
#define INV7FAC 1.984126984e-4
#define INV9FAC 2.755731922e-6
#define INV11FAC 2.505210839e-8
double x2 = x * x;
return 1
- x2 * INV3FAC
+ x2 * x2 * INV5FAC
- x2 * x2 * x2 * INV7FAC;
//+ x2 * x2 * x2 * x2 * INV9FAC
//- x2 * x2 * x2 * x2 * x2 * INV11FAC;
}
void test_res4(void)
{
int i;
double x,f1,f2;
for(i=1;i<100;i++){
x = 0.01 * i;
f1 = 1 - sin(x)/x;
f2 = 1 - sinc_poly(x);
fprintf(out,"%g %.20g %.20g %.20g\n",x,f1,f2,f2-f1);
}
}
void test_res5(void)
{
int i;
double sum;
start_timer();
sum = 0;
for(i=0;i<I_RATE;i++){
sum += i_buf[i*2];
}
end_timer();
i_buf[0] = sum;
}
void short_to_double(double *d,short *x) { *d = *x; }
void short_to_float(float *f,short *x) { *f = *x; }
void float_to_double(double *f,float *x) { *f = *x; }
void double_to_short(short *f,double *x) { *f = *x; }
double res6_tmp[1000];
void test_res6(void)
{
int i;
for(i=0;i<I_RATE;i++){
i_buf[i] = rint(AMP * test_func((double)i/I_RATE));
}
conv_double_short_ref(res6_tmp,i_buf,1000);
for(i=0;i<1000;i++){
res6_tmp[i] *= 3.0;
}
conv_short_double_ppcasm(o_buf,res6_tmp,1000);
for(i=0;i<1000;i++){
fprintf(out,"%d %d %g %d\n",i,i_buf[i],res6_tmp[i],o_buf[i]);
}
}
void test_res7(void)
{
resample_t *r;
int i;
double sum10k,sum22k;
double f;
int n10k,n22k;
double x;
for(i=0;i<I_RATE;i++){
i_buf[i] = rint(AMP * test_func((double)i/I_RATE));
}
r = malloc(sizeof(resample_t));
memset(r,0,sizeof(resample_t));
r->i_rate = I_RATE;
r->o_rate = O_RATE;
//r->method = RESAMPLE_SINC_SLOW;
r->method = RESAMPLE_SINC;
r->channels = 1;
//r->verbose = 1;
r->filter_length = 64;
r->get_buffer = get_buffer;
resample_init(r);
start_timer();
#define blocked
#ifdef blocked
for(i=0;i+256<I_RATE;i+=256){
resample_scale(r,i_buf+i,256*2);
}
if(I_RATE-i){
resample_scale(r,i_buf+i,(I_RATE-i)*2);
}
#else
resample_scale(r,i_buf,I_RATE*2);
#endif
end_timer();
for(i=0;i<O_RATE;i++){
f = AMP*test_func((double)i/O_RATE);
//f = rint(AMP*test_func((double)i/O_RATE));
fprintf(out,"%d %d %d %g %g\n",i,
o_buf[i],0,
f,o_buf[i]-f);
}
sum10k=0;
sum22k=0;
n10k=0;
n22k=0;
for(i=0;i<O_RATE;i++){
f = AMP*test_func((double)i/O_RATE);
//f = rint(AMP*test_func((double)i/O_RATE));
x = o_buf[i]-f;
if(((0.5*i)/O_RATE*I_RATE)<10000){
sum10k += x*x;
n10k++;
}
if(((0.5*i)/O_RATE*I_RATE)<22050){
sum22k += x*x;
n22k++;
}
}
printf("average error 10k=%g 22k=%g\n",
sqrt(sum10k/n10k),
sqrt(sum22k/n22k));
}