gstreamer/gst/audioresample/gstaudioresample.c

/* GStreamer
 * Copyright (C) 1999 Erik Walthinsen <omega@cse.ogi.edu>
 * Copyright (C) 2003,2004 David A. Schleef <ds@schleef.org>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */
/* Element-Checklist-Version: 5 */

/**
 * SECTION:element-audioresample
 *
 * <refsect2>
 * Audioresample resamples raw audio buffers to different sample rates using
 * a configurable windowing function to enhance quality.
 * <title>Example launch line</title>
 * <para>
 * <programlisting>
 * gst-launch -v filesrc location=sine.ogg ! oggdemux ! vorbisdec ! audioconvert ! audioresample ! audio/x-raw-int, rate=8000 ! alsasink
 * </programlisting>
 * Decode an Ogg/Vorbis downsample to 8Khz and play sound through alsa.
 * To create the Ogg/Vorbis file refer to the documentation of vorbisenc.
 * </para>
 * </refsect2>
 *
 * Last reviewed on 2006-03-02 (0.10.4)
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <string.h>
#include <math.h>

/*#define DEBUG_ENABLED */
#include "gstaudioresample.h"
#include <gst/audio/audio.h>
#include <gst/base/gstbasetransform.h>

GST_DEBUG_CATEGORY (audioresample_debug);
#define GST_CAT_DEFAULT audioresample_debug

/* elementfactory information */
static const GstElementDetails gst_audioresample_details =
GST_ELEMENT_DETAILS ("Audio scaler",
    "Filter/Converter/Audio",
    "Resample audio",
    "David Schleef <ds@schleef.org>");

#define DEFAULT_FILTERLEN       16

enum
{
  PROP_0,
  PROP_FILTERLEN
};

#define SUPPORTED_CAPS \
GST_STATIC_CAPS ( \
    "audio/x-raw-int, " \
      "rate = (int) [ 1, MAX ], " \
      "channels = (int) [ 1, MAX ], " \
      "endianness = (int) BYTE_ORDER, " \
      "width = (int) 16, " \
      "depth = (int) 16, " \
      "signed = (boolean) true;" \
    "audio/x-raw-int, " \
      "rate = (int) [ 1, MAX ], " \
      "channels = (int) [ 1, MAX ], " \
      "endianness = (int) BYTE_ORDER, " \
      "width = (int) 32, " \
      "depth = (int) 32, " \
      "signed = (boolean) true;" \
    "audio/x-raw-float, " \
      "rate = (int) [ 1, MAX ], "	\
      "channels = (int) [ 1, MAX ], " \
      "endianness = (int) BYTE_ORDER, " \
      "width = (int) 32; " \
    "audio/x-raw-float, " \
      "rate = (int) [ 1, MAX ], "	\
      "channels = (int) [ 1, MAX ], " \
      "endianness = (int) BYTE_ORDER, " \
      "width = (int) 64" \
)

static GstStaticPadTemplate gst_audioresample_sink_template =
GST_STATIC_PAD_TEMPLATE ("sink",
    GST_PAD_SINK, GST_PAD_ALWAYS, SUPPORTED_CAPS);

static GstStaticPadTemplate gst_audioresample_src_template =
GST_STATIC_PAD_TEMPLATE ("src",
    GST_PAD_SRC, GST_PAD_ALWAYS, SUPPORTED_CAPS);

static void gst_audioresample_set_property (GObject * object,
    guint prop_id, const GValue * value, GParamSpec * pspec);
static void gst_audioresample_get_property (GObject * object,
    guint prop_id, GValue * value, GParamSpec * pspec);

/* vmethods */
gboolean audioresample_get_unit_size (GstBaseTransform * base,
    GstCaps * caps, guint * size);
GstCaps *audioresample_transform_caps (GstBaseTransform * base,
    GstPadDirection direction, GstCaps * caps);
gboolean audioresample_transform_size (GstBaseTransform * trans,
    GstPadDirection direction, GstCaps * incaps, guint insize,
    GstCaps * outcaps, guint * outsize);
gboolean audioresample_set_caps (GstBaseTransform * base, GstCaps * incaps,
    GstCaps * outcaps);
static GstFlowReturn audioresample_pushthrough (GstAudioresample *
    audioresample);
static GstFlowReturn audioresample_transform (GstBaseTransform * base,
    GstBuffer * inbuf, GstBuffer * outbuf);
static gboolean audioresample_event (GstBaseTransform * base, GstEvent * event);
static gboolean audioresample_start (GstBaseTransform * base);
static gboolean audioresample_stop (GstBaseTransform * base);

#define DEBUG_INIT(bla) \
  GST_DEBUG_CATEGORY_INIT (audioresample_debug, "audioresample", 0, "audio resampling element");

GST_BOILERPLATE_FULL (GstAudioresample, gst_audioresample, GstBaseTransform,
    GST_TYPE_BASE_TRANSFORM, DEBUG_INIT);

static void
gst_audioresample_base_init (gpointer g_class)
{
  GstElementClass *gstelement_class = GST_ELEMENT_CLASS (g_class);

  gst_element_class_add_pad_template (gstelement_class,
      gst_static_pad_template_get (&gst_audioresample_src_template));
  gst_element_class_add_pad_template (gstelement_class,
      gst_static_pad_template_get (&gst_audioresample_sink_template));

  gst_element_class_set_details (gstelement_class, &gst_audioresample_details);
}

static void
gst_audioresample_class_init (GstAudioresampleClass * klass)
{
  GObjectClass *gobject_class;

  gobject_class = (GObjectClass *) klass;

  gobject_class->set_property = gst_audioresample_set_property;
  gobject_class->get_property = gst_audioresample_get_property;

  g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_FILTERLEN,
      g_param_spec_int ("filter_length", "filter_length", "filter_length",
          0, G_MAXINT, DEFAULT_FILTERLEN,
          G_PARAM_READWRITE | G_PARAM_CONSTRUCT));

  GST_BASE_TRANSFORM_CLASS (klass)->start =
      GST_DEBUG_FUNCPTR (audioresample_start);
  GST_BASE_TRANSFORM_CLASS (klass)->stop =
      GST_DEBUG_FUNCPTR (audioresample_stop);
  GST_BASE_TRANSFORM_CLASS (klass)->transform_size =
      GST_DEBUG_FUNCPTR (audioresample_transform_size);
  GST_BASE_TRANSFORM_CLASS (klass)->get_unit_size =
      GST_DEBUG_FUNCPTR (audioresample_get_unit_size);
  GST_BASE_TRANSFORM_CLASS (klass)->transform_caps =
      GST_DEBUG_FUNCPTR (audioresample_transform_caps);
  GST_BASE_TRANSFORM_CLASS (klass)->set_caps =
      GST_DEBUG_FUNCPTR (audioresample_set_caps);
  GST_BASE_TRANSFORM_CLASS (klass)->transform =
      GST_DEBUG_FUNCPTR (audioresample_transform);
  GST_BASE_TRANSFORM_CLASS (klass)->event =
      GST_DEBUG_FUNCPTR (audioresample_event);

  GST_BASE_TRANSFORM_CLASS (klass)->passthrough_on_same_caps = TRUE;
}

static void
gst_audioresample_init (GstAudioresample * audioresample,
    GstAudioresampleClass * klass)
{
  GstBaseTransform *trans;

  trans = GST_BASE_TRANSFORM (audioresample);

  /* buffer alloc passthrough is too impossible. FIXME, it
   * is trivial in the passtrough case. */
  gst_pad_set_bufferalloc_function (trans->sinkpad, NULL);

  audioresample->filter_length = DEFAULT_FILTERLEN;
}

/* vmethods */
static gboolean
audioresample_start (GstBaseTransform * base)
{
  GstAudioresample *audioresample = GST_AUDIORESAMPLE (base);

  audioresample->resample = resample_new ();
  audioresample->ts_offset = -1;
  audioresample->offset = -1;
  audioresample->next_ts = -1;

  resample_set_filter_length (audioresample->resample,
      audioresample->filter_length);

  return TRUE;
}

static gboolean
audioresample_stop (GstBaseTransform * base)
{
  GstAudioresample *audioresample = GST_AUDIORESAMPLE (base);

  if (audioresample->resample) {
    resample_free (audioresample->resample);
    audioresample->resample = NULL;
  }

  return TRUE;
}

gboolean
audioresample_get_unit_size (GstBaseTransform * base, GstCaps * caps,
    guint * size)
{
  gint width, channels;
  GstStructure *structure;
  gboolean ret;

  g_return_val_if_fail (size, FALSE);

  /* this works for both float and int */
  structure = gst_caps_get_structure (caps, 0);
  ret = gst_structure_get_int (structure, "width", &width);
  ret &= gst_structure_get_int (structure, "channels", &channels);
  g_return_val_if_fail (ret, FALSE);

  *size = width * channels / 8;

  return TRUE;
}

GstCaps *
audioresample_transform_caps (GstBaseTransform * base,
    GstPadDirection direction, GstCaps * caps)
{
  GstCaps *res;
  GstStructure *structure;

  /* transform caps gives one single caps so we can just replace
   * the rate property with our range. */
  res = gst_caps_copy (caps);
  structure = gst_caps_get_structure (res, 0);
  gst_structure_set (structure, "rate", GST_TYPE_INT_RANGE, 1, G_MAXINT, NULL);

  return res;
}

static gboolean
resample_set_state_from_caps (ResampleState * state, GstCaps * incaps,
    GstCaps * outcaps, gint * channels, gint * inrate, gint * outrate)
{
  GstStructure *structure;
  gboolean ret;
  gint myinrate, myoutrate;
  int mychannels;
  gint width, depth;
  ResampleFormat format;

  GST_DEBUG ("incaps %" GST_PTR_FORMAT ", outcaps %"
      GST_PTR_FORMAT, incaps, outcaps);

  structure = gst_caps_get_structure (incaps, 0);

  /* get width */
  ret = gst_structure_get_int (structure, "width", &width);
  if (!ret)
    goto no_width;

  /* figure out the format */
  if (g_str_equal (gst_structure_get_name (structure), "audio/x-raw-float")) {
    if (width == 32)
      format = RESAMPLE_FORMAT_F32;
    else if (width == 64)
      format = RESAMPLE_FORMAT_F64;
    else
      goto wrong_depth;
  } else {
    /* for int, depth and width must be the same */
    ret = gst_structure_get_int (structure, "depth", &depth);
    if (!ret || width != depth)
      goto not_equal;

    if (width == 16)
      format = RESAMPLE_FORMAT_S16;
    else if (width == 32)
      format = RESAMPLE_FORMAT_S32;
    else
      goto wrong_depth;
  }
  ret = gst_structure_get_int (structure, "rate", &myinrate);
  ret &= gst_structure_get_int (structure, "channels", &mychannels);
  if (!ret)
    goto no_in_rate_channels;

  structure = gst_caps_get_structure (outcaps, 0);
  ret = gst_structure_get_int (structure, "rate", &myoutrate);
  if (!ret)
    goto no_out_rate;

  if (channels)
    *channels = mychannels;
  if (inrate)
    *inrate = myinrate;
  if (outrate)
    *outrate = myoutrate;

  resample_set_format (state, format);
  resample_set_n_channels (state, mychannels);
  resample_set_input_rate (state, myinrate);
  resample_set_output_rate (state, myoutrate);

  return TRUE;

  /* ERRORS */
no_width:
  {
    GST_DEBUG ("failed to get width from caps");
    return FALSE;
  }
not_equal:
  {
    GST_DEBUG ("width %d and depth %d must be the same", width, depth);
    return FALSE;
  }
wrong_depth:
  {
    GST_DEBUG ("unknown depth %d found", depth);
    return FALSE;
  }
no_in_rate_channels:
  {
    GST_DEBUG ("could not get input rate and channels");
    return FALSE;
  }
no_out_rate:
  {
    GST_DEBUG ("could not get output rate");
    return FALSE;
  }
}

gboolean
audioresample_transform_size (GstBaseTransform * base,
    GstPadDirection direction, GstCaps * caps, guint size, GstCaps * othercaps,
    guint * othersize)
{
  GstAudioresample *audioresample = GST_AUDIORESAMPLE (base);
  ResampleState *state;
  GstCaps *srccaps, *sinkcaps;
  gboolean use_internal = FALSE;        /* whether we use the internal state */
  gboolean ret = TRUE;

  GST_DEBUG_OBJECT (base, "asked to transform size %d in direction %s",
      size, direction == GST_PAD_SINK ? "SINK" : "SRC");
  if (direction == GST_PAD_SINK) {
    sinkcaps = caps;
    srccaps = othercaps;
  } else {
    sinkcaps = othercaps;
    srccaps = caps;
  }

  /* if the caps are the ones that _set_caps got called with; we can use
   * our own state; otherwise we'll have to create a state */
  if (gst_caps_is_equal (sinkcaps, audioresample->sinkcaps) &&
      gst_caps_is_equal (srccaps, audioresample->srccaps)) {
    use_internal = TRUE;
    state = audioresample->resample;
  } else {
    GST_DEBUG_OBJECT (audioresample,
        "caps are not the set caps, creating state");
    state = resample_new ();
    resample_set_filter_length (state, audioresample->filter_length);
    resample_set_state_from_caps (state, sinkcaps, srccaps, NULL, NULL, NULL);
  }

  if (direction == GST_PAD_SINK) {
    /* asked to convert size of an incoming buffer */
    *othersize = resample_get_output_size_for_input (state, size);
  } else {
    /* asked to convert size of an outgoing buffer */
    *othersize = resample_get_input_size_for_output (state, size);
  }
  g_assert (*othersize % state->sample_size == 0);

  /* we make room for one extra sample, given that the resampling filter
   * can output an extra one for non-integral i_rate/o_rate */
  GST_DEBUG_OBJECT (base, "transformed size %d to %d", size, *othersize);

  if (!use_internal) {
    resample_free (state);
  }

  return ret;
}

gboolean
audioresample_set_caps (GstBaseTransform * base, GstCaps * incaps,
    GstCaps * outcaps)
{
  gboolean ret;
  gint inrate, outrate;
  int channels;
  GstAudioresample *audioresample = GST_AUDIORESAMPLE (base);

  GST_DEBUG_OBJECT (base, "incaps %" GST_PTR_FORMAT ", outcaps %"
      GST_PTR_FORMAT, incaps, outcaps);

  ret = resample_set_state_from_caps (audioresample->resample, incaps, outcaps,
      &channels, &inrate, &outrate);

  g_return_val_if_fail (ret, FALSE);

  audioresample->channels = channels;
  GST_DEBUG_OBJECT (audioresample, "set channels to %d", channels);
  audioresample->i_rate = inrate;
  GST_DEBUG_OBJECT (audioresample, "set i_rate to %d", inrate);
  audioresample->o_rate = outrate;
  GST_DEBUG_OBJECT (audioresample, "set o_rate to %d", outrate);

  /* save caps so we can short-circuit in the size_transform if the caps
   * are the same */
  /* FIXME: clean them up in state change ? */
  gst_caps_ref (incaps);
  gst_caps_replace (&audioresample->sinkcaps, incaps);
  gst_caps_ref (outcaps);
  gst_caps_replace (&audioresample->srccaps, outcaps);

  return TRUE;
}

static gboolean
audioresample_event (GstBaseTransform * base, GstEvent * event)
{
  GstAudioresample *audioresample;

  audioresample = GST_AUDIORESAMPLE (base);

  switch (GST_EVENT_TYPE (event)) {
    case GST_EVENT_FLUSH_START:
      break;
    case GST_EVENT_FLUSH_STOP:
      resample_input_flush (audioresample->resample);
      audioresample->ts_offset = -1;
      audioresample->next_ts = -1;
      audioresample->offset = -1;
      break;
    case GST_EVENT_NEWSEGMENT:
      resample_input_pushthrough (audioresample->resample);
      audioresample_pushthrough (audioresample);
      audioresample->ts_offset = -1;
      audioresample->next_ts = -1;
      audioresample->offset = -1;
      break;
    case GST_EVENT_EOS:
      resample_input_eos (audioresample->resample);
      audioresample_pushthrough (audioresample);
      break;
    default:
      break;
  }
  parent_class->event (base, event);

  return TRUE;
}

static GstFlowReturn
audioresample_do_output (GstAudioresample * audioresample, GstBuffer * outbuf)
{
  int outsize;
  int outsamples;
  ResampleState *r;

  r = audioresample->resample;

  outsize = resample_get_output_size (r);
  GST_DEBUG_OBJECT (audioresample, "audioresample can give me %d bytes",
      outsize);

  /* protect against mem corruption */
  if (outsize > GST_BUFFER_SIZE (outbuf)) {
    GST_WARNING_OBJECT (audioresample,
        "overriding audioresample's outsize %d with outbuffer's size %d",
        outsize, GST_BUFFER_SIZE (outbuf));
    outsize = GST_BUFFER_SIZE (outbuf);
  }
  /* catch possibly wrong size differences */
  if (GST_BUFFER_SIZE (outbuf) - outsize > r->sample_size) {
    GST_WARNING_OBJECT (audioresample,
        "audioresample's outsize %d too far from outbuffer's size %d",
        outsize, GST_BUFFER_SIZE (outbuf));
  }

  outsize = resample_get_output_data (r, GST_BUFFER_DATA (outbuf), outsize);
  outsamples = outsize / r->sample_size;
  GST_LOG_OBJECT (audioresample, "resample gave me %d bytes or %d samples",
      outsize, outsamples);

  GST_BUFFER_OFFSET (outbuf) = audioresample->offset;
  GST_BUFFER_TIMESTAMP (outbuf) = audioresample->next_ts;

  if (audioresample->ts_offset != -1) {
    audioresample->offset += outsamples;
    audioresample->ts_offset += outsamples;
    audioresample->next_ts =
        gst_util_uint64_scale_int (audioresample->ts_offset, GST_SECOND,
        audioresample->o_rate);
    GST_BUFFER_OFFSET_END (outbuf) = audioresample->offset;

    /* we calculate DURATION as the difference between "next" timestamp
     * and current timestamp so we ensure a contiguous stream, instead of
     * having rounding errors. */
    GST_BUFFER_DURATION (outbuf) = audioresample->next_ts -
        GST_BUFFER_TIMESTAMP (outbuf);
  } else {
    /* no valid offset know, we can still sortof calculate the duration though */
    GST_BUFFER_DURATION (outbuf) =
        gst_util_uint64_scale_int (outsamples, GST_SECOND,
        audioresample->o_rate);
  }

  /* check for possible mem corruption */
  if (outsize > GST_BUFFER_SIZE (outbuf)) {
    /* this is an error that when it happens, would need fixing in the
     * resample library; we told
     * it we wanted only GST_BUFFER_SIZE (outbuf), and it gave us more ! */
    GST_WARNING_OBJECT (audioresample,
        "audioresample, you memory corrupting bastard. "
        "you gave me outsize %d while my buffer was size %d",
        outsize, GST_BUFFER_SIZE (outbuf));
    return GST_FLOW_ERROR;
  }
  /* catch possibly wrong size differences */
  if (GST_BUFFER_SIZE (outbuf) - outsize > r->sample_size) {
    GST_WARNING_OBJECT (audioresample,
        "audioresample's written outsize %d too far from outbuffer's size %d",
        outsize, GST_BUFFER_SIZE (outbuf));
  }
  GST_BUFFER_SIZE (outbuf) = outsize;

  return GST_FLOW_OK;
}

static GstFlowReturn
audioresample_transform (GstBaseTransform * base, GstBuffer * inbuf,
    GstBuffer * outbuf)
{
  GstAudioresample *audioresample;
  ResampleState *r;
  guchar *data, *datacopy;
  gulong size;
  GstClockTime timestamp;

  audioresample = GST_AUDIORESAMPLE (base);
  r = audioresample->resample;

  data = GST_BUFFER_DATA (inbuf);
  size = GST_BUFFER_SIZE (inbuf);
  timestamp = GST_BUFFER_TIMESTAMP (inbuf);

  GST_DEBUG_OBJECT (audioresample, "got buffer of %ld bytes", size);

  if (audioresample->ts_offset == -1) {
    /* if we don't know the initial offset yet, calculate it based on the
     * input timestamp. */
    if (GST_CLOCK_TIME_IS_VALID (timestamp)) {
      GstClockTime stime;

      /* offset used to calculate the timestamps. We use the sample offset for this
       * to make it more accurate. We want the first buffer to have the same timestamp
       * as the incomming timestamp. */
      audioresample->next_ts = timestamp;
      audioresample->ts_offset =
          gst_util_uint64_scale_int (timestamp, r->o_rate, GST_SECOND);
      /* offset used to set as the buffer offset, this offset is always relative
       * to the stream time, note that timestamp is not... */
      stime = (timestamp - base->segment.start) + base->segment.time;
      audioresample->offset =
          gst_util_uint64_scale_int (stime, r->o_rate, GST_SECOND);
    }
  }

  /* need to memdup, resample takes ownership. */
  datacopy = g_memdup (data, size);
  resample_add_input_data (r, datacopy, size, g_free, datacopy);

  return audioresample_do_output (audioresample, outbuf);
}

/* push remaining data in the buffers out */
static GstFlowReturn
audioresample_pushthrough (GstAudioresample * audioresample)
{
  int outsize;
  ResampleState *r;
  GstBuffer *outbuf;
  GstFlowReturn res = GST_FLOW_OK;
  GstBaseTransform *trans;

  r = audioresample->resample;

  outsize = resample_get_output_size (r);
  if (outsize == 0)
    goto done;

  outbuf = gst_buffer_new_and_alloc (outsize);

  res = audioresample_do_output (audioresample, outbuf);
  if (res != GST_FLOW_OK)
    goto done;

  trans = GST_BASE_TRANSFORM (audioresample);

  res = gst_pad_push (trans->srcpad, outbuf);

done:
  return res;
}


static void
gst_audioresample_set_property (GObject * object, guint prop_id,
    const GValue * value, GParamSpec * pspec)
{
  GstAudioresample *audioresample;

  audioresample = GST_AUDIORESAMPLE (object);

  switch (prop_id) {
    case PROP_FILTERLEN:
      audioresample->filter_length = g_value_get_int (value);
      GST_DEBUG_OBJECT (GST_ELEMENT (audioresample), "new filter length %d",
          audioresample->filter_length);
      if (audioresample->resample) {
        resample_set_filter_length (audioresample->resample,
            audioresample->filter_length);
      }
      break;
    default:
      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
      break;
  }
}

static void
gst_audioresample_get_property (GObject * object, guint prop_id,
    GValue * value, GParamSpec * pspec)
{
  GstAudioresample *audioresample;

  audioresample = GST_AUDIORESAMPLE (object);

  switch (prop_id) {
    case PROP_FILTERLEN:
      g_value_set_int (value, audioresample->filter_length);
      break;
    default:
      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
      break;
  }
}


static gboolean
plugin_init (GstPlugin * plugin)
{
  resample_init ();

  if (!gst_element_register (plugin, "audioresample", GST_RANK_PRIMARY,
          GST_TYPE_AUDIORESAMPLE)) {
    return FALSE;
  }

  return TRUE;
}

GST_PLUGIN_DEFINE (GST_VERSION_MAJOR,
    GST_VERSION_MINOR,
    "audioresample",
    "Resamples audio", plugin_init, VERSION, "LGPL", GST_PACKAGE_NAME,
    GST_PACKAGE_ORIGIN);