/* Resampling library
 * Copyright (C) <2001> David A. Schleef <ds@schleef.org>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <string.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>

#include "private.h"



double
functable_sinc (void *p, double x)
{
  if (x == 0)
    return 1;
  return sin (x) / x;
}

double
functable_dsinc (void *p, double x)
{
  if (x == 0)
    return 0;
  return cos (x) / x - sin (x) / (x * x);
}

double
functable_window_boxcar (void *p, double x)
{
  if (x < -1 || x > 1)
    return 0;
  return 1;
}

double
functable_window_dboxcar (void *p, double x)
{
  return 0;
}

double
functable_window_std (void *p, double x)
{
  if (x < -1 || x > 1)
    return 0;
  return (1 - x * x) * (1 - x * x);
}

double
functable_window_dstd (void *p, double x)
{
  if (x < -1 || x > 1)
    return 0;
  return -4 * x * (1 - x * x);
}



void
functable_init (functable_t * t)
{
  int i;
  double x;

  t->fx = malloc (sizeof (double) * (t->len + 1));
  t->fdx = malloc (sizeof (double) * (t->len + 1));

  t->invoffset = 1.0 / t->offset;

  for (i = 0; i < t->len + 1; i++) {
    x = t->start + t->offset * i;
    x *= t->scale;

    t->fx[i] = t->func_x (t->priv, x);
    t->fdx[i] = t->scale * t->func_dx (t->priv, x);
  }
  if (t->func2_x) {
    double f1x, f1dx;
    double f2x, f2dx;

    for (i = 0; i < t->len + 1; i++) {
      x = t->start + t->offset * i;
      x *= t->scale2;

      f2x = t->func2_x (t->priv, x);
      f2dx = t->scale2 * t->func2_dx (t->priv, x);

      f1x = t->fx[i];
      f1dx = t->fdx[i];

      t->fx[i] = f1x * f2x;
      t->fdx[i] = f1x * f2dx + f1dx * f2x;
    }
  }
}

double
functable_eval (functable_t * t, double x)
{
  int i;
  double f0, f1, w0, w1;
  double x2, x3;
  double w;

  if (x < t->start || x > (t->start + (t->len + 1) * t->offset)) {
    printf ("x out of range %g\n", x);
  }
  x -= t->start;
  x /= t->offset;
  i = floor (x);
  x -= i;

  x2 = x * x;
  x3 = x2 * x;

  f1 = 3 * x2 - 2 * x3;
  f0 = 1 - f1;
  w0 = (x - 2 * x2 + x3) * t->offset;
  w1 = (-x2 + x3) * t->offset;

  /*printf("i=%d x=%g f0=%g f1=%g w0=%g w1=%g\n",i,x,f0,f1,w0,w1); */

  w = t->fx[i] * f0 + t->fx[i + 1] * f1 + t->fdx[i] * w0 + t->fdx[i + 1] * w1;

  /*w = t->fx[i] * (1-x) + t->fx[i+1] * x; */

  return w;
}


double
functable_fir (functable_t * t, double x, int n, double *data, int len)
{
  int i, j;
  double f0, f1, w0, w1;
  double x2, x3;
  double w;
  double sum;

  x -= t->start;
  x /= t->offset;
  i = floor (x);
  x -= i;

  x2 = x * x;
  x3 = x2 * x;

  f1 = 3 * x2 - 2 * x3;
  f0 = 1 - f1;
  w0 = (x - 2 * x2 + x3) * t->offset;
  w1 = (-x2 + x3) * t->offset;

  sum = 0;
  for (j = 0; j < len; j++) {
    w = t->fx[i] * f0 + t->fx[i + 1] * f1 + t->fdx[i] * w0 + t->fdx[i + 1] * w1;
    sum += data[j * 2] * w;
    i += n;
  }

  return sum;
}

void
functable_fir2 (functable_t * t, double *r0, double *r1, double x,
    int n, double *data, int len)
{
  int i, j;
  double f0, f1, w0, w1;
  double x2, x3;
  double w;
  double sum0, sum1;
  double floor_x;

  x -= t->start;
  x *= t->invoffset;
  floor_x = floor (x);
  i = floor_x;
  x -= floor_x;

  x2 = x * x;
  x3 = x2 * x;

  f1 = 3 * x2 - 2 * x3;
  f0 = 1 - f1;
  w0 = (x - 2 * x2 + x3) * t->offset;
  w1 = (-x2 + x3) * t->offset;

  sum0 = 0;
  sum1 = 0;
  for (j = 0; j < len; j++) {
    w = t->fx[i] * f0 + t->fx[i + 1] * f1 + t->fdx[i] * w0 + t->fdx[i + 1] * w1;
    sum0 += data[j * 2] * w;
    sum1 += data[j * 2 + 1] * w;
    i += n;

#define unroll2
#define unroll3
#define unroll4
#ifdef unroll2
    j++;

    w = t->fx[i] * f0 + t->fx[i + 1] * f1 + t->fdx[i] * w0 + t->fdx[i + 1] * w1;
    sum0 += data[j * 2] * w;
    sum1 += data[j * 2 + 1] * w;
    i += n;
#endif
#ifdef unroll3
    j++;

    w = t->fx[i] * f0 + t->fx[i + 1] * f1 + t->fdx[i] * w0 + t->fdx[i + 1] * w1;
    sum0 += data[j * 2] * w;
    sum1 += data[j * 2 + 1] * w;
    i += n;
#endif
#ifdef unroll4
    j++;

    w = t->fx[i] * f0 + t->fx[i + 1] * f1 + t->fdx[i] * w0 + t->fdx[i + 1] * w1;
    sum0 += data[j * 2] * w;
    sum1 += data[j * 2 + 1] * w;
    i += n;
#endif
  }

  *r0 = sum0;
  *r1 = sum1;
}



#ifdef unused
void
functable_fir2_altivec (functable_t * t, float *r0, float *r1,
    double x, int n, float *data, int len)
{
  int i, j;
  double f0, f1, w0, w1;
  double x2, x3;
  double w;
  double sum0, sum1;
  double floor_x;

  x -= t->start;
  x *= t->invoffset;
  floor_x = floor (x);
  i = floor_x;
  x -= floor_x;

  x2 = x * x;
  x3 = x2 * x;

  f1 = 3 * x2 - 2 * x3;
  f0 = 1 - f1;
  w0 = (x - 2 * x2 + x3) * t->offset;
  w1 = (-x2 + x3) * t->offset;

  sum0 = 0;
  sum1 = 0;
  for (j = 0; j < len; j++) {
    /* t->fx, t->fdx needs to be multiplexed by n */
    /* we need 5 consecutive floats, which fit into 2 vecs */
    /* load v0, t->fx[i] */
    /* load v1, t->fx[i+n] */
    /* v2 = v0 (not correct) */
    /* v3 = (v0>>32) || (v1<<3*32) (not correct) */
    /* */
    /* load v4, t->dfx[i] */
    /* load v5, t->dfx[i+n] */
    /* v6 = v4 (not correct) */
    /* v7 = (v4>>32) || (v5<<3*32) (not correct) */
    /*  */
    /* v8 = splat(f0) */
    /* v9 = splat(f1) */
    /* v10 = splat(w0) */
    /* v11 = splat(w1) */
    /* */
    /* v12 = v2 * v8 */
    /* v12 += v3 * v9 */
    /* v12 += v6 * v10 */
    /* v12 += v7 * v11 */

    w = t->fx[i] * f0 + t->fx[i + 1] * f1 + t->fdx[i] * w0 + t->fdx[i + 1] * w1;

    /* v13 = data[j*2] */
    /* v14 = data[j*2+4] */
    /* v15 = deinterlace_high(v13,v14) */
    /* v16 = deinterlace_low(v13,v14) */
    /* (sum0) v17 += multsum(v13,v15) */
    /* (sum1) v18 += multsum(v14,v16) */

    sum0 += data[j * 2] * w;
    sum1 += data[j * 2 + 1] * w;
    i += n;

  }

  *r0 = sum0;
  *r1 = sum1;
}
#endif