/* * GStreamer * Copyright (C) 2008 Rov Juvano * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ /** * SECTION:element-scaletempo * * Scale tempo while maintaining pitch * (WSOLA-like technique with cross correlation) * Inspired by SoundTouch library by Olli Parviainen * * Use Sceletempo to apply playback rates without the chipmunk effect. * * * Example pipelines * * |[ * filesrc location=media.ext ! decodebin name=d \ * d. ! queue ! audioconvert ! audioresample ! scaletempo ! audioconvert ! audioresample ! autoaudiosink \ * d. ! queue ! videoconvert ! autovideosink * ]| * OR * |[ * playbin uri=... audio_sink="scaletempo ! audioconvert ! audioresample ! autoaudiosink" * ]| * When an application sends a seek event with rate != 1.0, Scaletempo applies * the rate change by scaling the tempo without scaling the pitch. * * Scaletempo works by producing audio in constant sized chunks * (#GstScaletempo:stride) but consuming chunks proportional to the playback * rate. * * Scaletempo then smooths the output by blending the end of one stride with * the next (#GstScaletempo:overlap). * * Scaletempo smooths the overlap further by searching within the input buffer * for the best overlap position. Scaletempo uses a statistical cross * correlation (roughly a dot-product). Scaletempo consumes most of its CPU * cycles here. One can use the #GstScaletempo:search propery to tune how far * the algoritm looks. * * */ /* * Note: frame = audio key unit (i.e. one sample for each channel) */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include /* for memset */ #include "gstscaletempo.h" GST_DEBUG_CATEGORY_STATIC (gst_scaletempo_debug); #define GST_CAT_DEFAULT gst_scaletempo_debug /* Filter signals and args */ enum { LAST_SIGNAL }; enum { PROP_0, PROP_RATE, PROP_STRIDE, PROP_OVERLAP, PROP_SEARCH, }; #define SUPPORTED_CAPS \ GST_STATIC_CAPS ( \ GST_AUDIO_CAPS_MAKE (GST_AUDIO_NE (F32)) "; " \ GST_AUDIO_CAPS_MAKE (GST_AUDIO_NE (S16)) \ ) static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink", GST_PAD_SINK, GST_PAD_ALWAYS, SUPPORTED_CAPS); static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src", GST_PAD_SRC, GST_PAD_ALWAYS, SUPPORTED_CAPS); #define DEBUG_INIT(bla) GST_DEBUG_CATEGORY_INIT (gst_scaletempo_debug, "scaletempo", 0, "scaletempo element"); #define gst_scaletempo_parent_class parent_class G_DEFINE_TYPE_WITH_CODE (GstScaletempo, gst_scaletempo, GST_TYPE_BASE_TRANSFORM, DEBUG_INIT (0)); static guint best_overlap_offset_float (GstScaletempo * st) { gfloat *pw, *po, *ppc, *search_start; gfloat best_corr = G_MININT; guint best_off = 0; gint i, off; pw = st->table_window; po = st->buf_overlap; po += st->samples_per_frame; ppc = st->buf_pre_corr; for (i = st->samples_per_frame; i < st->samples_overlap; i++) { *ppc++ = *pw++ * *po++; } search_start = (gfloat *) st->buf_queue + st->samples_per_frame; for (off = 0; off < st->frames_search; off++) { gfloat corr = 0; gfloat *ps = search_start; ppc = st->buf_pre_corr; for (i = st->samples_per_frame; i < st->samples_overlap; i++) { corr += *ppc++ * *ps++; } if (corr > best_corr) { best_corr = corr; best_off = off; } search_start += st->samples_per_frame; } return best_off * st->bytes_per_frame; } /* buffer padding for loop optimization: sizeof(gint32) * (loop_size - 1) */ #define UNROLL_PADDING (4*3) static guint best_overlap_offset_s16 (GstScaletempo * st) { gint32 *pw, *ppc; gint16 *po, *search_start; gint64 best_corr = G_MININT64; guint best_off = 0; guint off; glong i; pw = st->table_window; po = st->buf_overlap; po += st->samples_per_frame; ppc = st->buf_pre_corr; for (i = st->samples_per_frame; i < st->samples_overlap; i++) { *ppc++ = (*pw++ * *po++) >> 15; } search_start = (gint16 *) st->buf_queue + st->samples_per_frame; for (off = 0; off < st->frames_search; off++) { gint64 corr = 0; gint16 *ps = search_start; ppc = st->buf_pre_corr; ppc += st->samples_overlap - st->samples_per_frame; ps += st->samples_overlap - st->samples_per_frame; i = -((glong) st->samples_overlap - (glong) st->samples_per_frame); do { corr += ppc[i + 0] * ps[i + 0]; corr += ppc[i + 1] * ps[i + 1]; corr += ppc[i + 2] * ps[i + 2]; corr += ppc[i + 3] * ps[i + 3]; i += 4; } while (i < 0); if (corr > best_corr) { best_corr = corr; best_off = off; } search_start += st->samples_per_frame; } return best_off * st->bytes_per_frame; } static void output_overlap_float (GstScaletempo * st, gpointer buf_out, guint bytes_off) { gfloat *pout = buf_out; gfloat *pb = st->table_blend; gfloat *po = st->buf_overlap; gfloat *pin = (gfloat *) (st->buf_queue + bytes_off); gint i; for (i = 0; i < st->samples_overlap; i++) { *pout++ = *po - *pb++ * (*po - *pin++); po++; } } static void output_overlap_s16 (GstScaletempo * st, gpointer buf_out, guint bytes_off) { gint16 *pout = buf_out; gint32 *pb = st->table_blend; gint16 *po = st->buf_overlap; gint16 *pin = (gint16 *) (st->buf_queue + bytes_off); gint i; for (i = 0; i < st->samples_overlap; i++) { *pout++ = *po - ((*pb++ * (*po - *pin++)) >> 16); po++; } } static guint fill_queue (GstScaletempo * st, GstBuffer * buf_in, guint offset) { guint bytes_in = gst_buffer_get_size (buf_in) - offset; guint offset_unchanged = offset; GstMapInfo map; gst_buffer_map (buf_in, &map, GST_MAP_READ); if (st->bytes_to_slide > 0) { if (st->bytes_to_slide < st->bytes_queued) { guint bytes_in_move = st->bytes_queued - st->bytes_to_slide; memmove (st->buf_queue, st->buf_queue + st->bytes_to_slide, bytes_in_move); st->bytes_to_slide = 0; st->bytes_queued = bytes_in_move; } else { guint bytes_in_skip; st->bytes_to_slide -= st->bytes_queued; bytes_in_skip = MIN (st->bytes_to_slide, bytes_in); st->bytes_queued = 0; st->bytes_to_slide -= bytes_in_skip; offset += bytes_in_skip; bytes_in -= bytes_in_skip; } } if (bytes_in > 0) { guint bytes_in_copy = MIN (st->bytes_queue_max - st->bytes_queued, bytes_in); memcpy (st->buf_queue + st->bytes_queued, map.data + offset, bytes_in_copy); st->bytes_queued += bytes_in_copy; offset += bytes_in_copy; } gst_buffer_unmap (buf_in, &map); return offset - offset_unchanged; } static void reinit_buffers (GstScaletempo * st) { gint i, j; guint frames_overlap; guint new_size; GstClockTime latency; guint frames_stride = st->ms_stride * st->sample_rate / 1000.0; st->bytes_stride = frames_stride * st->bytes_per_frame; /* overlap */ frames_overlap = frames_stride * st->percent_overlap; if (frames_overlap < 1) { /* if no overlap */ st->bytes_overlap = 0; st->bytes_standing = st->bytes_stride; st->samples_standing = st->bytes_standing / st->bytes_per_sample; st->output_overlap = NULL; } else { guint prev_overlap = st->bytes_overlap; st->bytes_overlap = frames_overlap * st->bytes_per_frame; st->samples_overlap = frames_overlap * st->samples_per_frame; st->bytes_standing = st->bytes_stride - st->bytes_overlap; st->samples_standing = st->bytes_standing / st->bytes_per_sample; st->buf_overlap = g_realloc (st->buf_overlap, st->bytes_overlap); st->table_blend = g_realloc (st->table_blend, st->samples_overlap * 4); /* sizeof (gint32|gfloat) */ if (st->bytes_overlap > prev_overlap) { memset ((guint8 *) st->buf_overlap + prev_overlap, 0, st->bytes_overlap - prev_overlap); } if (st->use_int) { gint32 *pb = st->table_blend; gint64 blend = 0; for (i = 0; i < frames_overlap; i++) { gint32 v = blend / frames_overlap; for (j = 0; j < st->samples_per_frame; j++) { *pb++ = v; } blend += 65535; /* 2^16 */ } st->output_overlap = output_overlap_s16; } else { gfloat *pb = st->table_blend; gfloat t = (gfloat) frames_overlap; for (i = 0; i < frames_overlap; i++) { gfloat v = i / t; for (j = 0; j < st->samples_per_frame; j++) { *pb++ = v; } } st->output_overlap = output_overlap_float; } } /* best overlap */ st->frames_search = (frames_overlap <= 1) ? 0 : st->ms_search * st->sample_rate / 1000.0; if (st->frames_search < 1) { /* if no search */ st->best_overlap_offset = NULL; } else { guint bytes_pre_corr = (st->samples_overlap - st->samples_per_frame) * 4; /* sizeof (gint32|gfloat) */ st->buf_pre_corr = g_realloc (st->buf_pre_corr, bytes_pre_corr + UNROLL_PADDING); st->table_window = g_realloc (st->table_window, bytes_pre_corr); if (st->use_int) { gint64 t = frames_overlap; gint32 n = 8589934588LL / (t * t); /* 4 * (2^31 - 1) / t^2 */ gint32 *pw; memset ((guint8 *) st->buf_pre_corr + bytes_pre_corr, 0, UNROLL_PADDING); pw = st->table_window; for (i = 1; i < frames_overlap; i++) { gint32 v = (i * (t - i) * n) >> 15; for (j = 0; j < st->samples_per_frame; j++) { *pw++ = v; } } st->best_overlap_offset = best_overlap_offset_s16; } else { gfloat *pw = st->table_window; for (i = 1; i < frames_overlap; i++) { gfloat v = i * (frames_overlap - i); for (j = 0; j < st->samples_per_frame; j++) { *pw++ = v; } } st->best_overlap_offset = best_overlap_offset_float; } } new_size = (st->frames_search + frames_stride + frames_overlap) * st->bytes_per_frame; if (st->bytes_queued > new_size) { if (st->bytes_to_slide > st->bytes_queued) { st->bytes_to_slide -= st->bytes_queued; st->bytes_queued = 0; } else { guint new_queued = MIN (st->bytes_queued - st->bytes_to_slide, new_size); memmove (st->buf_queue, st->buf_queue + st->bytes_queued - new_queued, new_queued); st->bytes_to_slide = 0; st->bytes_queued = new_queued; } } st->bytes_queue_max = new_size; st->buf_queue = g_realloc (st->buf_queue, st->bytes_queue_max); latency = gst_util_uint64_scale (st->bytes_queue_max, GST_SECOND, st->bytes_per_frame * st->sample_rate); if (st->latency != latency) { st->latency = latency; gst_element_post_message (GST_ELEMENT (st), gst_message_new_latency (GST_OBJECT (st))); } st->bytes_stride_scaled = st->bytes_stride * st->scale; st->frames_stride_scaled = st->bytes_stride_scaled / st->bytes_per_frame; GST_DEBUG ("%.3f scale, %.3f stride_in, %i stride_out, %i standing, %i overlap, %i search, %i queue, %s mode", st->scale, st->frames_stride_scaled, (gint) (st->bytes_stride / st->bytes_per_frame), (gint) (st->bytes_standing / st->bytes_per_frame), (gint) (st->bytes_overlap / st->bytes_per_frame), st->frames_search, (gint) (st->bytes_queue_max / st->bytes_per_frame), (st->use_int ? "s16" : "float")); st->reinit_buffers = FALSE; } /* GstBaseTransform vmethod implementations */ static GstFlowReturn gst_scaletempo_transform (GstBaseTransform * trans, GstBuffer * inbuf, GstBuffer * outbuf) { GstScaletempo *st = GST_SCALETEMPO (trans); gint8 *pout; guint offset_in, bytes_out; GstMapInfo omap; GstClockTime timestamp; gst_buffer_map (outbuf, &omap, GST_MAP_WRITE); pout = (gint8 *) omap.data; offset_in = fill_queue (st, inbuf, 0); bytes_out = 0; while (st->bytes_queued >= st->bytes_queue_max) { guint bytes_off = 0; gdouble frames_to_slide; guint frames_to_stride_whole; /* output stride */ if (st->output_overlap) { if (st->best_overlap_offset) { bytes_off = st->best_overlap_offset (st); } st->output_overlap (st, pout, bytes_off); } memcpy (pout + st->bytes_overlap, st->buf_queue + bytes_off + st->bytes_overlap, st->bytes_standing); pout += st->bytes_stride; bytes_out += st->bytes_stride; /* input stride */ memcpy (st->buf_overlap, st->buf_queue + bytes_off + st->bytes_stride, st->bytes_overlap); frames_to_slide = st->frames_stride_scaled + st->frames_stride_error; frames_to_stride_whole = (gint) frames_to_slide; st->bytes_to_slide = frames_to_stride_whole * st->bytes_per_frame; st->frames_stride_error = frames_to_slide - frames_to_stride_whole; offset_in += fill_queue (st, inbuf, offset_in); } gst_buffer_unmap (outbuf, &omap); timestamp = GST_BUFFER_TIMESTAMP (inbuf) - st->segment_start; if (timestamp < st->latency) timestamp = 0; else timestamp -= st->latency; GST_BUFFER_TIMESTAMP (outbuf) = timestamp / st->scale + st->segment_start; GST_BUFFER_DURATION (outbuf) = gst_util_uint64_scale (bytes_out, GST_SECOND, st->bytes_per_frame * st->sample_rate); gst_buffer_set_size (outbuf, bytes_out); return GST_FLOW_OK; } static gboolean gst_scaletempo_transform_size (GstBaseTransform * trans, GstPadDirection direction, GstCaps * caps, gsize size, GstCaps * othercaps, gsize * othersize) { if (direction == GST_PAD_SINK) { GstScaletempo *scaletempo = GST_SCALETEMPO (trans); gint bytes_to_out; if (scaletempo->reinit_buffers) reinit_buffers (scaletempo); bytes_to_out = size + scaletempo->bytes_queued - scaletempo->bytes_to_slide; if (bytes_to_out < (gint) scaletempo->bytes_queue_max) { *othersize = 0; } else { /* while (total_buffered - stride_length * n >= queue_max) n++ */ *othersize = scaletempo->bytes_stride * ((guint) ( (bytes_to_out - scaletempo->bytes_queue_max + /* rounding protection */ scaletempo->bytes_per_frame) / scaletempo->bytes_stride_scaled) + 1); } return TRUE; } return FALSE; } static gboolean gst_scaletempo_sink_event (GstBaseTransform * trans, GstEvent * event) { if (GST_EVENT_TYPE (event) == GST_EVENT_SEGMENT) { GstScaletempo *scaletempo = GST_SCALETEMPO (trans); GstSegment segment; gst_event_copy_segment (event, &segment); if (scaletempo->scale != segment.rate) { if (ABS (segment.rate - 1.0) < 1e-10) { scaletempo->scale = 1.0; gst_base_transform_set_passthrough (GST_BASE_TRANSFORM (scaletempo), TRUE); } else { gst_base_transform_set_passthrough (GST_BASE_TRANSFORM (scaletempo), FALSE); scaletempo->scale = segment.rate; scaletempo->bytes_stride_scaled = scaletempo->bytes_stride * scaletempo->scale; scaletempo->frames_stride_scaled = scaletempo->bytes_stride_scaled / scaletempo->bytes_per_frame; GST_DEBUG ("%.3f scale, %.3f stride_in, %i stride_out", scaletempo->scale, scaletempo->frames_stride_scaled, (gint) (scaletempo->bytes_stride / scaletempo->bytes_per_frame)); scaletempo->bytes_to_slide = 0; } } if (scaletempo->scale != 1.0) { scaletempo->segment_start = segment.start; segment.applied_rate = scaletempo->scale; segment.rate = 1.0; gst_event_unref (event); if (segment.stop != -1) { segment.stop = (segment.stop - segment.start) / segment.applied_rate + segment.start; } event = gst_event_new_segment (&segment); gst_pad_push_event (GST_BASE_TRANSFORM_SRC_PAD (trans), event); return TRUE; } } return GST_BASE_TRANSFORM_CLASS (parent_class)->sink_event (trans, event); } static gboolean gst_scaletempo_set_caps (GstBaseTransform * trans, GstCaps * incaps, GstCaps * outcaps) { GstScaletempo *scaletempo = GST_SCALETEMPO (trans); gint width, bps, nch, rate; gboolean use_int; GstAudioInfo info; if (!gst_audio_info_from_caps (&info, incaps)) return FALSE; nch = GST_AUDIO_INFO_CHANNELS (&info); rate = GST_AUDIO_INFO_RATE (&info); width = GST_AUDIO_INFO_WIDTH (&info); use_int = GST_AUDIO_INFO_IS_INTEGER (&info); bps = width / 8; GST_DEBUG ("caps: %" GST_PTR_FORMAT ", %d bps", incaps, bps); if (rate != scaletempo->sample_rate || nch != scaletempo->samples_per_frame || bps != scaletempo->bytes_per_sample || use_int != scaletempo->use_int) { scaletempo->sample_rate = rate; scaletempo->samples_per_frame = nch; scaletempo->bytes_per_sample = bps; scaletempo->bytes_per_frame = nch * bps; scaletempo->use_int = use_int; scaletempo->reinit_buffers = TRUE; } return TRUE; } static gboolean gst_scaletempo_query (GstBaseTransform * trans, GstPadDirection direction, GstQuery * query) { GstScaletempo *scaletempo = GST_SCALETEMPO (trans); if (direction == GST_PAD_SRC) { switch (GST_QUERY_TYPE (query)) { case GST_QUERY_LATENCY:{ GstPad *peer; gboolean res; if ((peer = gst_pad_get_peer (GST_BASE_TRANSFORM_SINK_PAD (trans)))) { if ((res = gst_pad_query (peer, query))) { GstClockTime min, max; gboolean live; gst_query_parse_latency (query, &live, &min, &max); GST_DEBUG_OBJECT (scaletempo, "Peer latency: min %" GST_TIME_FORMAT " max %" GST_TIME_FORMAT, GST_TIME_ARGS (min), GST_TIME_ARGS (max)); /* add our own latency */ GST_DEBUG_OBJECT (scaletempo, "Our latency: %" GST_TIME_FORMAT, GST_TIME_ARGS (scaletempo->latency)); min += scaletempo->latency; if (max != GST_CLOCK_TIME_NONE) max += scaletempo->latency; GST_DEBUG_OBJECT (scaletempo, "Calculated total latency : min %" GST_TIME_FORMAT " max %" GST_TIME_FORMAT, GST_TIME_ARGS (min), GST_TIME_ARGS (max)); gst_query_set_latency (query, live, min, max); } gst_object_unref (peer); } return TRUE; break; } default:{ return GST_BASE_TRANSFORM_CLASS (parent_class)->query (trans, direction, query); break; } } } else { return GST_BASE_TRANSFORM_CLASS (parent_class)->query (trans, direction, query); } } /* GObject vmethod implementations */ static void gst_scaletempo_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec) { GstScaletempo *scaletempo = GST_SCALETEMPO (object); switch (prop_id) { case PROP_RATE: g_value_set_double (value, scaletempo->scale); break; case PROP_STRIDE: g_value_set_uint (value, scaletempo->ms_stride); break; case PROP_OVERLAP: g_value_set_double (value, scaletempo->percent_overlap); break; case PROP_SEARCH: g_value_set_uint (value, scaletempo->ms_search); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; } } static void gst_scaletempo_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec) { GstScaletempo *scaletempo = GST_SCALETEMPO (object); switch (prop_id) { case PROP_STRIDE:{ guint new_value = g_value_get_uint (value); if (scaletempo->ms_stride != new_value) { scaletempo->ms_stride = new_value; scaletempo->reinit_buffers = TRUE; } break; } case PROP_OVERLAP:{ gdouble new_value = g_value_get_double (value); if (scaletempo->percent_overlap != new_value) { scaletempo->percent_overlap = new_value; scaletempo->reinit_buffers = TRUE; } break; } case PROP_SEARCH:{ guint new_value = g_value_get_uint (value); if (scaletempo->ms_search != new_value) { scaletempo->ms_search = new_value; scaletempo->reinit_buffers = TRUE; } break; } default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; } } static void gst_scaletempo_class_init (GstScaletempoClass * klass) { GObjectClass *gobject_class = G_OBJECT_CLASS (klass); GstElementClass *gstelement_class = GST_ELEMENT_CLASS (klass); GstBaseTransformClass *basetransform_class = GST_BASE_TRANSFORM_CLASS (klass); gobject_class->get_property = GST_DEBUG_FUNCPTR (gst_scaletempo_get_property); gobject_class->set_property = GST_DEBUG_FUNCPTR (gst_scaletempo_set_property); g_object_class_install_property (gobject_class, PROP_RATE, g_param_spec_double ("rate", "Playback Rate", "Current playback rate", G_MININT, G_MAXINT, 1.0, G_PARAM_READABLE | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_STRIDE, g_param_spec_uint ("stride", "Stride Length", "Length in milliseconds to output each stride", 1, 5000, 30, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_OVERLAP, g_param_spec_double ("overlap", "Overlap Length", "Percentage of stride to overlap", 0, 1, .2, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_SEARCH, g_param_spec_uint ("search", "Search Length", "Length in milliseconds to search for best overlap position", 0, 500, 14, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)); gst_element_class_add_pad_template (gstelement_class, gst_static_pad_template_get (&src_template)); gst_element_class_add_pad_template (gstelement_class, gst_static_pad_template_get (&sink_template)); gst_element_class_set_static_metadata (gstelement_class, "Scaletempo", "Filter/Effect/Rate", "Sync audio tempo with playback rate", "Rov Juvano "); basetransform_class->sink_event = GST_DEBUG_FUNCPTR (gst_scaletempo_sink_event); basetransform_class->set_caps = GST_DEBUG_FUNCPTR (gst_scaletempo_set_caps); basetransform_class->transform_size = GST_DEBUG_FUNCPTR (gst_scaletempo_transform_size); basetransform_class->transform = GST_DEBUG_FUNCPTR (gst_scaletempo_transform); basetransform_class->query = GST_DEBUG_FUNCPTR (gst_scaletempo_query); } static void gst_scaletempo_init (GstScaletempo * scaletempo) { /* defaults */ scaletempo->ms_stride = 30; scaletempo->percent_overlap = .2; scaletempo->ms_search = 14; /* uninitialized */ scaletempo->scale = 0; scaletempo->sample_rate = 0; scaletempo->frames_stride_error = 0; scaletempo->bytes_stride = 0; scaletempo->bytes_queued = 0; scaletempo->bytes_to_slide = 0; scaletempo->segment_start = 0; }