From 978bcd7181bf0f282cb17d2f5f6f1ac009508f37 Mon Sep 17 00:00:00 2001
From: Vivia Nikolaidou <vivia@toolsonair.com>
Date: Tue, 21 Apr 2015 21:09:19 +0300
Subject: [PATCH] alevel: New audio/video level element

The videoframe-audiolevel element acts like a synchronized audio/video "level"
element. For each video frame, it posts a level-style message containing the
RMS value of the corresponding audio frames. This element needs both video and
audio to pass through it. Furthermore, it needs a queue after its video
source.

https://bugzilla.gnome.org/show_bug.cgi?id=748259
---
 configure.ac                                  |   2 +
 gst/videoframe_audiolevel/Makefile.am         |   9 +
 .../gstvideoframe-audiolevel.c                | 785 ++++++++++++++++++
 .../gstvideoframe-audiolevel.h                |  73 ++
 tests/check/Makefile.am                       |   8 +
 tests/check/elements/videoframe-audiolevel.c  | 636 ++++++++++++++
 6 files changed, 1513 insertions(+)
 create mode 100644 gst/videoframe_audiolevel/Makefile.am
 create mode 100644 gst/videoframe_audiolevel/gstvideoframe-audiolevel.c
 create mode 100644 gst/videoframe_audiolevel/gstvideoframe-audiolevel.h
 create mode 100644 tests/check/elements/videoframe-audiolevel.c
diff --git a/configure.ac b/configure.ac
index 2ecc868e99..ece0034bd8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -435,6 +435,7 @@ AG_GST_CHECK_PLUGIN(accurip)
 AG_GST_CHECK_PLUGIN(adpcmdec)
 AG_GST_CHECK_PLUGIN(adpcmenc)
 AG_GST_CHECK_PLUGIN(aiff)
+AG_GST_CHECK_PLUGIN(videoframe_audiolevel)
 AG_GST_CHECK_PLUGIN(asfmux)
 AG_GST_CHECK_PLUGIN(audiofxbad)
 AG_GST_CHECK_PLUGIN(audiomixer)
@@ -3415,6 +3416,7 @@ gst/accurip/Makefile
 gst/adpcmdec/Makefile
 gst/adpcmenc/Makefile
 gst/aiff/Makefile
+gst/videoframe_audiolevel/Makefile
 gst/asfmux/Makefile
 gst/audiofxbad/Makefile
 gst/audiomixer/Makefile
diff --git a/gst/videoframe_audiolevel/Makefile.am b/gst/videoframe_audiolevel/Makefile.am
new file mode 100644
index 0000000000..26197291b5
--- /dev/null
+++ b/gst/videoframe_audiolevel/Makefile.am
@@ -0,0 +1,9 @@
+plugin_LTLIBRARIES = libgstvideoframe_audiolevel.la
+
+libgstvideoframe_audiolevel_la_SOURCES = gstvideoframe-audiolevel.c
+libgstvideoframe_audiolevel_la_CFLAGS = $(GST_PLUGINS_BASE_CFLAGS) $(GST_BASE_CFLAGS) $(GST_CFLAGS)
+libgstvideoframe_audiolevel_la_LIBADD = $(GST_PLUGINS_BASE_LIBS) -lgstaudio-$(GST_API_VERSION) $(GST_BASE_LIBS) $(LIBM)
+libgstvideoframe_audiolevel_la_LDFLAGS = $(GST_PLUGIN_LDFLAGS)
+libgstvideoframe_audiolevel_la_LIBTOOLFLAGS = $(GST_PLUGIN_LIBTOOLFLAGS)
+
+noinst_HEADERS = gstvideoframe-audiolevel.h
diff --git a/gst/videoframe_audiolevel/gstvideoframe-audiolevel.c b/gst/videoframe_audiolevel/gstvideoframe-audiolevel.c
new file mode 100644
index 0000000000..762bf90c1c
--- /dev/null
+++ b/gst/videoframe_audiolevel/gstvideoframe-audiolevel.c
@@ -0,0 +1,785 @@
+/*
+ * GStreamer
+ * Copyright (C) 2015 Vivia Nikolaidou <vivia@toolsonair.com>
+ *
+ * Based on gstlevel.c:
+ * Copyright (C) 2000,2001,2002,2003,2005
+ *           Thomas Vander Stichele <thomas at apestaart dot org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+/**
+ * SECTION:element-videoframe-audiolevel
+ *
+ * This element acts like a synchronized audio/video "level". It gathers
+ * all audio buffers sent between two video frames, and then sends a message
+ * that contains the RMS value of all samples for these buffers.
+ *
+ * <refsect2>
+ * <title>Example launch line</title>
+ * |[
+ * gst-launch-1.0 -m filesrc location="file.mkv" ! decodebin name=d ! "audio/x-raw" ! videoframe-audiolevel name=l ! autoaudiosink d. ! "video/x-raw" ! l. l. ! queue ! autovideosink ]|
+ * </refsect2>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* FIXME 2.0: suppress warnings for deprecated API such as GValueArray
+ * with newer GLib versions (>= 2.31.0) */
+#define GLIB_DISABLE_DEPRECATION_WARNINGS
+
+#include "gstvideoframe-audiolevel.h"
+#include <math.h>
+
+#define GST_CAT_DEFAULT gst_videoframe_audiolevel_debug
+#if G_BYTE_ORDER == G_LITTLE_ENDIAN
+# define FORMATS "{ S8, S16LE, S32LE, F32LE, F64LE }"
+#else
+# define FORMATS "{ S8, S16BE, S32BE, F32BE, F64BE }"
+#endif
+GST_DEBUG_CATEGORY_STATIC (GST_CAT_DEFAULT);
+
+static GstStaticPadTemplate audio_sink_template =
+GST_STATIC_PAD_TEMPLATE ("asink",
+    GST_PAD_SINK,
+    GST_PAD_ALWAYS,
+    GST_STATIC_CAPS (GST_AUDIO_CAPS_MAKE (FORMATS))
+    );
+
+static GstStaticPadTemplate audio_src_template =
+GST_STATIC_PAD_TEMPLATE ("asrc",
+    GST_PAD_SRC,
+    GST_PAD_ALWAYS,
+    GST_STATIC_CAPS (GST_AUDIO_CAPS_MAKE (FORMATS))
+    );
+
+static GstStaticPadTemplate video_sink_template =
+GST_STATIC_PAD_TEMPLATE ("vsink",
+    GST_PAD_SINK,
+    GST_PAD_ALWAYS,
+    GST_STATIC_CAPS ("video/x-raw")
+    );
+
+static GstStaticPadTemplate video_src_template =
+GST_STATIC_PAD_TEMPLATE ("vsrc",
+    GST_PAD_SRC,
+    GST_PAD_ALWAYS,
+    GST_STATIC_CAPS ("video/x-raw")
+    );
+
+#define parent_class gst_videoframe_audiolevel_parent_class
+G_DEFINE_TYPE (GstVideoFrameAudioLevel, gst_videoframe_audiolevel,
+    GST_TYPE_ELEMENT);
+
+static GstFlowReturn gst_videoframe_audiolevel_asink_chain (GstPad * pad,
+    GstObject * parent, GstBuffer * inbuf);
+static GstFlowReturn gst_videoframe_audiolevel_vsink_chain (GstPad * pad,
+    GstObject * parent, GstBuffer * inbuf);
+static gboolean gst_videoframe_audiolevel_asink_event (GstPad * pad,
+    GstObject * parent, GstEvent * event);
+static gboolean gst_videoframe_audiolevel_vsink_event (GstPad * pad,
+    GstObject * parent, GstEvent * event);
+static GstIterator *gst_videoframe_audiolevel_iterate_internal_links (GstPad *
+    pad, GstObject * parent);
+
+static void gst_videoframe_audiolevel_finalize (GObject * gobject);
+
+static GstStateChangeReturn gst_videoframe_audiolevel_change_state (GstElement *
+    element, GstStateChange transition);
+
+static void
+gst_videoframe_audiolevel_class_init (GstVideoFrameAudioLevelClass * klass)
+{
+  GstElementClass *gstelement_class;
+  GObjectClass *gobject_class = (GObjectClass *) klass;
+
+  GST_DEBUG_CATEGORY_INIT (gst_videoframe_audiolevel_debug,
+      "videoframe-audiolevel", 0, "Synchronized audio/video level");
+
+  gstelement_class = (GstElementClass *) klass;
+
+  gst_element_class_set_static_metadata (gstelement_class,
+      "Video-frame audio level", "Filter/Analyzer/Audio",
+      "Synchronized audio/video RMS Level messenger for audio/raw",
+      "Vivia Nikolaidou <vivia@toolsonair.com>");
+
+  gobject_class->finalize = gst_videoframe_audiolevel_finalize;
+  gstelement_class->change_state = gst_videoframe_audiolevel_change_state;
+
+  gst_element_class_add_pad_template (gstelement_class,
+      gst_static_pad_template_get (&audio_src_template));
+  gst_element_class_add_pad_template (gstelement_class,
+      gst_static_pad_template_get (&audio_sink_template));
+
+  gst_element_class_add_pad_template (gstelement_class,
+      gst_static_pad_template_get (&video_src_template));
+  gst_element_class_add_pad_template (gstelement_class,
+      gst_static_pad_template_get (&video_sink_template));
+}
+
+static void
+gst_videoframe_audiolevel_init (GstVideoFrameAudioLevel * self)
+{
+  self->asinkpad =
+      gst_pad_new_from_static_template (&audio_sink_template, "asink");
+  gst_pad_set_chain_function (self->asinkpad,
+      GST_DEBUG_FUNCPTR (gst_videoframe_audiolevel_asink_chain));
+  gst_pad_set_event_function (self->asinkpad,
+      GST_DEBUG_FUNCPTR (gst_videoframe_audiolevel_asink_event));
+  gst_pad_set_iterate_internal_links_function (self->asinkpad,
+      GST_DEBUG_FUNCPTR (gst_videoframe_audiolevel_iterate_internal_links));
+  gst_element_add_pad (GST_ELEMENT (self), self->asinkpad);
+
+  self->vsinkpad =
+      gst_pad_new_from_static_template (&video_sink_template, "vsink");
+  gst_pad_set_chain_function (self->vsinkpad,
+      GST_DEBUG_FUNCPTR (gst_videoframe_audiolevel_vsink_chain));
+  gst_pad_set_event_function (self->vsinkpad,
+      GST_DEBUG_FUNCPTR (gst_videoframe_audiolevel_vsink_event));
+  gst_pad_set_iterate_internal_links_function (self->vsinkpad,
+      GST_DEBUG_FUNCPTR (gst_videoframe_audiolevel_iterate_internal_links));
+  gst_element_add_pad (GST_ELEMENT (self), self->vsinkpad);
+
+  self->asrcpad =
+      gst_pad_new_from_static_template (&audio_src_template, "asrc");
+  gst_pad_set_iterate_internal_links_function (self->asrcpad,
+      GST_DEBUG_FUNCPTR (gst_videoframe_audiolevel_iterate_internal_links));
+  gst_element_add_pad (GST_ELEMENT (self), self->asrcpad);
+
+  self->vsrcpad =
+      gst_pad_new_from_static_template (&video_src_template, "vsrc");
+  gst_pad_set_iterate_internal_links_function (self->vsrcpad,
+      GST_DEBUG_FUNCPTR (gst_videoframe_audiolevel_iterate_internal_links));
+  gst_element_add_pad (GST_ELEMENT (self), self->vsrcpad);
+
+  GST_PAD_SET_PROXY_CAPS (self->asinkpad);
+  GST_PAD_SET_PROXY_ALLOCATION (self->asinkpad);
+
+  GST_PAD_SET_PROXY_CAPS (self->asrcpad);
+  GST_PAD_SET_PROXY_SCHEDULING (self->asrcpad);
+
+  GST_PAD_SET_PROXY_CAPS (self->vsinkpad);
+  GST_PAD_SET_PROXY_ALLOCATION (self->vsinkpad);
+
+  GST_PAD_SET_PROXY_CAPS (self->vsrcpad);
+  GST_PAD_SET_PROXY_SCHEDULING (self->vsrcpad);
+
+  self->adapter = gst_adapter_new ();
+
+  g_queue_init (&self->vtimeq);
+  self->first_time = GST_CLOCK_TIME_NONE;
+  self->total_frames = 0;
+  /* alignment_threshold and discont_wait should become properties if needed */
+  self->alignment_threshold = 40 * GST_MSECOND;
+  self->discont_time = GST_CLOCK_TIME_NONE;
+  self->next_offset = -1;
+  self->discont_wait = 1 * GST_SECOND;
+
+  self->video_eos_flag = FALSE;
+  self->audio_flush_flag = FALSE;
+  self->shutdown_flag = FALSE;
+
+  g_mutex_init (&self->mutex);
+  g_cond_init (&self->cond);
+}
+
+static GstStateChangeReturn
+gst_videoframe_audiolevel_change_state (GstElement * element,
+    GstStateChange transition)
+{
+  GstStateChangeReturn ret;
+  GstVideoFrameAudioLevel *self = GST_VIDEOFRAME_AUDIOLEVEL (element);
+
+  switch (transition) {
+    case GST_STATE_CHANGE_PAUSED_TO_READY:
+      g_mutex_lock (&self->mutex);
+      self->shutdown_flag = TRUE;
+      g_cond_signal (&self->cond);
+      g_mutex_unlock (&self->mutex);
+      break;
+    case GST_STATE_CHANGE_READY_TO_PAUSED:
+      g_mutex_lock (&self->mutex);
+      self->shutdown_flag = FALSE;
+      self->video_eos_flag = FALSE;
+      self->audio_flush_flag = FALSE;
+      g_mutex_unlock (&self->mutex);
+    default:
+      break;
+  }
+
+  ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
+
+  switch (transition) {
+    case GST_STATE_CHANGE_PAUSED_TO_READY:
+      g_mutex_lock (&self->mutex);
+      self->first_time = GST_CLOCK_TIME_NONE;
+      self->total_frames = 0;
+      gst_segment_init (&self->asegment, GST_FORMAT_UNDEFINED);
+      gst_segment_init (&self->vsegment, GST_FORMAT_UNDEFINED);
+      self->vsegment.position = GST_CLOCK_TIME_NONE;
+      gst_adapter_clear (self->adapter);
+      g_queue_foreach (&self->vtimeq, (GFunc) g_free, NULL);
+      g_queue_clear (&self->vtimeq);
+      if (self->CS) {
+        g_free (self->CS);
+        self->CS = NULL;
+      }
+      g_mutex_unlock (&self->mutex);
+      break;
+    default:
+      break;
+  }
+
+  return ret;
+}
+
+static void
+gst_videoframe_audiolevel_finalize (GObject * object)
+{
+  GstVideoFrameAudioLevel *self = GST_VIDEOFRAME_AUDIOLEVEL (object);
+
+  if (self->adapter) {
+    g_object_unref (self->adapter);
+    self->adapter = NULL;
+  }
+  g_queue_foreach (&self->vtimeq, (GFunc) g_free, NULL);
+  g_queue_clear (&self->vtimeq);
+  self->first_time = GST_CLOCK_TIME_NONE;
+  self->total_frames = 0;
+  if (self->CS) {
+    g_free (self->CS);
+    self->CS = NULL;
+  }
+
+  g_mutex_clear (&self->mutex);
+  g_cond_clear (&self->cond);
+
+  G_OBJECT_CLASS (parent_class)->finalize (object);
+}
+
+#define DEFINE_INT_LEVEL_CALCULATOR(TYPE, RESOLUTION)                         \
+static void inline                                                            \
+gst_videoframe_audiolevel_calculate_##TYPE (gpointer data, guint num, guint channels,        \
+                            gdouble *NCS)                                     \
+{                                                                             \
+  TYPE * in = (TYPE *)data;                                                   \
+  register guint j;                                                           \
+  gdouble squaresum = 0.0;           /* square sum of the input samples */    \
+  register gdouble square = 0.0;     /* Square */                             \
+  gdouble normalizer;                /* divisor to get a [-1.0, 1.0] range */ \
+                                                                              \
+  /* *NCS = 0.0; Normalized Cumulative Square */                              \
+                                                                              \
+  for (j = 0; j < num; j += channels) {                                       \
+    square = ((gdouble) in[j]) * in[j];                                       \
+    squaresum += square;                                                      \
+  }                                                                           \
+                                                                              \
+  normalizer = (gdouble) (G_GINT64_CONSTANT(1) << (RESOLUTION * 2));          \
+  *NCS = squaresum / normalizer;                                              \
+}
+
+DEFINE_INT_LEVEL_CALCULATOR (gint32, 31);
+DEFINE_INT_LEVEL_CALCULATOR (gint16, 15);
+DEFINE_INT_LEVEL_CALCULATOR (gint8, 7);
+
+#define DEFINE_FLOAT_LEVEL_CALCULATOR(TYPE)                                   \
+static void inline                                                            \
+gst_videoframe_audiolevel_calculate_##TYPE (gpointer data, guint num, guint channels,        \
+                            gdouble *NCS)                                     \
+{                                                                             \
+  TYPE * in = (TYPE *)data;                                                   \
+  register guint j;                                                           \
+  gdouble squaresum = 0.0;           /* square sum of the input samples */    \
+  register gdouble square = 0.0;     /* Square */                             \
+                                                                              \
+  /* *NCS = 0.0; Normalized Cumulative Square */                              \
+                                                                              \
+  for (j = 0; j < num; j += channels) {                                       \
+    square = ((gdouble) in[j]) * in[j];                                       \
+    squaresum += square;                                                      \
+  }                                                                           \
+                                                                              \
+  *NCS = squaresum;                                                           \
+}
+
+DEFINE_FLOAT_LEVEL_CALCULATOR (gfloat);
+DEFINE_FLOAT_LEVEL_CALCULATOR (gdouble);
+
+static gboolean
+gst_videoframe_audiolevel_vsink_event (GstPad * pad, GstObject * parent,
+    GstEvent * event)
+{
+  GstVideoFrameAudioLevel *self = GST_VIDEOFRAME_AUDIOLEVEL (parent);
+  GST_LOG_OBJECT (pad, "Got %s event", GST_EVENT_TYPE_NAME (event));
+
+  switch (GST_EVENT_TYPE (event)) {
+    case GST_EVENT_SEGMENT:
+      g_mutex_lock (&self->mutex);
+      g_queue_foreach (&self->vtimeq, (GFunc) g_free, NULL);
+      g_queue_clear (&self->vtimeq);
+      g_mutex_unlock (&self->mutex);
+      gst_event_copy_segment (event, &self->vsegment);
+      if (self->vsegment.format != GST_FORMAT_TIME)
+        return FALSE;
+      self->vsegment.position = GST_CLOCK_TIME_NONE;
+      break;
+    case GST_EVENT_GAP:
+      return TRUE;
+    case GST_EVENT_EOS:
+      g_mutex_lock (&self->mutex);
+      self->video_eos_flag = TRUE;
+      g_cond_signal (&self->cond);
+      g_mutex_unlock (&self->mutex);
+      break;
+    case GST_EVENT_FLUSH_STOP:
+      g_mutex_lock (&self->mutex);
+      g_queue_foreach (&self->vtimeq, (GFunc) g_free, NULL);
+      g_queue_clear (&self->vtimeq);
+      gst_segment_init (&self->vsegment, GST_FORMAT_UNDEFINED);
+      g_cond_signal (&self->cond);
+      g_mutex_unlock (&self->mutex);
+      self->vsegment.position = GST_CLOCK_TIME_NONE;
+      break;
+    default:
+      break;
+  }
+  return gst_pad_event_default (pad, parent, event);
+}
+
+static gboolean
+gst_videoframe_audiolevel_asink_event (GstPad * pad, GstObject * parent,
+    GstEvent * event)
+{
+  GstVideoFrameAudioLevel *self = GST_VIDEOFRAME_AUDIOLEVEL (parent);
+  GST_LOG_OBJECT (pad, "Got %s event", GST_EVENT_TYPE_NAME (event));
+
+  switch (GST_EVENT_TYPE (event)) {
+    case GST_EVENT_SEGMENT:
+      self->first_time = GST_CLOCK_TIME_NONE;
+      self->total_frames = 0;
+      gst_adapter_clear (self->adapter);
+      gst_event_copy_segment (event, &self->asegment);
+      if (self->asegment.format != GST_FORMAT_TIME)
+        return FALSE;
+      break;
+    case GST_EVENT_FLUSH_START:
+      g_mutex_lock (&self->mutex);
+      self->audio_flush_flag = TRUE;
+      g_cond_signal (&self->cond);
+      g_mutex_unlock (&self->mutex);
+      break;
+    case GST_EVENT_FLUSH_STOP:
+      self->audio_flush_flag = FALSE;
+      self->total_frames = 0;
+      self->first_time = GST_CLOCK_TIME_NONE;
+      gst_adapter_clear (self->adapter);
+      gst_segment_init (&self->asegment, GST_FORMAT_UNDEFINED);
+      break;
+    case GST_EVENT_CAPS:{
+      GstCaps *caps;
+      gint channels;
+      gst_event_parse_caps (event, &caps);
+      GST_DEBUG_OBJECT (self, "Got caps %" GST_PTR_FORMAT, caps);
+      if (!gst_audio_info_from_caps (&self->ainfo, caps))
+        return FALSE;
+      switch (GST_AUDIO_INFO_FORMAT (&self->ainfo)) {
+        case GST_AUDIO_FORMAT_S8:
+          self->process = gst_videoframe_audiolevel_calculate_gint8;
+          break;
+        case GST_AUDIO_FORMAT_S16:
+          self->process = gst_videoframe_audiolevel_calculate_gint16;
+          break;
+        case GST_AUDIO_FORMAT_S32:
+          self->process = gst_videoframe_audiolevel_calculate_gint32;
+          break;
+        case GST_AUDIO_FORMAT_F32:
+          self->process = gst_videoframe_audiolevel_calculate_gfloat;
+          break;
+        case GST_AUDIO_FORMAT_F64:
+          self->process = gst_videoframe_audiolevel_calculate_gdouble;
+          break;
+        default:
+          self->process = NULL;
+          break;
+      }
+      gst_adapter_clear (self->adapter);
+      channels = GST_AUDIO_INFO_CHANNELS (&self->ainfo);
+      self->first_time = GST_CLOCK_TIME_NONE;
+      self->total_frames = 0;
+      if (self->CS)
+        g_free (self->CS);
+      self->CS = g_new0 (gdouble, channels);
+      break;
+    }
+    default:
+      break;
+  }
+
+  return gst_pad_event_default (pad, parent, event);
+}
+
+static GstMessage *
+update_rms_from_buffer (GstVideoFrameAudioLevel * self, GstBuffer * inbuf)
+{
+  GstMapInfo map;
+  guint8 *in_data;
+  gsize in_size;
+  gdouble CS;
+  guint i;
+  guint num_frames, frames;
+  guint num_int_samples = 0;    /* number of interleaved samples
+                                 * ie. total count for all channels combined */
+  gint channels, rate, bps;
+  GValue v = G_VALUE_INIT;
+  GValue va = G_VALUE_INIT;
+  GValueArray *a;
+  GstStructure *s;
+  GstMessage *msg;
+  GstClockTime duration, running_time;
+
+  channels = GST_AUDIO_INFO_CHANNELS (&self->ainfo);
+  bps = GST_AUDIO_INFO_BPS (&self->ainfo);
+  rate = GST_AUDIO_INFO_RATE (&self->ainfo);
+
+  gst_buffer_map (inbuf, &map, GST_MAP_READ);
+  in_data = map.data;
+  in_size = map.size;
+
+  num_int_samples = in_size / bps;
+
+  GST_LOG_OBJECT (self, "analyzing %u sample frames at ts %" GST_TIME_FORMAT,
+      num_int_samples, GST_TIME_ARGS (GST_BUFFER_TIMESTAMP (inbuf)));
+
+  g_return_val_if_fail (num_int_samples % channels == 0, NULL);
+
+  num_frames = num_int_samples / channels;
+  frames = num_frames;
+  duration = GST_FRAMES_TO_CLOCK_TIME (frames, rate);
+  if (num_frames > 0) {
+    for (i = 0; i < channels; ++i) {
+      self->process (in_data + (bps * i), num_int_samples, channels, &CS);
+      GST_LOG_OBJECT (self,
+          "[%d]: cumulative squares %lf, over %d samples/%d channels",
+          i, CS, num_int_samples, channels);
+      self->CS[i] += CS;
+    }
+    in_data += num_frames * bps;
+
+    self->total_frames += num_frames;
+  }
+  running_time =
+      self->first_time + gst_util_uint64_scale (self->total_frames, GST_SECOND,
+      rate);
+
+  a = g_value_array_new (channels);
+  s = gst_structure_new ("videoframe-audiolevel", "running-time", G_TYPE_UINT64,
+      running_time, "duration", G_TYPE_UINT64, duration, NULL);
+
+  g_value_init (&v, G_TYPE_DOUBLE);
+  g_value_init (&va, G_TYPE_VALUE_ARRAY);
+  for (i = 0; i < channels; i++) {
+    gdouble rms = sqrt (self->CS[i] / frames);
+    if (frames == 0 && self->CS[i] == 0) {
+      rms = 0;                  /* empty buffer */
+    }
+    self->CS[i] = 0.0;
+    g_value_set_double (&v, rms);
+    g_value_array_append (a, &v);
+  }
+  g_value_take_boxed (&va, a);
+  gst_structure_take_value (s, "rms", &va);
+  msg = gst_message_new_element (GST_OBJECT (self), s);
+
+  gst_buffer_unmap (inbuf, &map);
+
+  return msg;
+}
+
+static GstFlowReturn
+gst_videoframe_audiolevel_vsink_chain (GstPad * pad, GstObject * parent,
+    GstBuffer * inbuf)
+{
+  GstClockTime timestamp;
+  GstVideoFrameAudioLevel *self = GST_VIDEOFRAME_AUDIOLEVEL (parent);
+  GstClockTime duration;
+  GstClockTime *ptrtime = g_new (GstClockTime, 1);
+
+  timestamp = GST_BUFFER_TIMESTAMP (inbuf);
+  *ptrtime =
+      gst_segment_to_running_time (&self->vsegment, GST_FORMAT_TIME, timestamp);
+  g_mutex_lock (&self->mutex);
+  self->vsegment.position = timestamp;
+  duration = GST_BUFFER_DURATION (inbuf);
+  if (duration != GST_CLOCK_TIME_NONE)
+    self->vsegment.position += duration;
+  g_queue_push_tail (&self->vtimeq, ptrtime);
+  g_cond_signal (&self->cond);
+  GST_DEBUG_OBJECT (pad, "Pushed a frame");
+  g_mutex_unlock (&self->mutex);
+  return gst_pad_push (self->vsrcpad, inbuf);
+}
+
+static GstFlowReturn
+gst_videoframe_audiolevel_asink_chain (GstPad * pad, GstObject * parent,
+    GstBuffer * inbuf)
+{
+  GstClockTime timestamp, cur_time;
+  GstVideoFrameAudioLevel *self = GST_VIDEOFRAME_AUDIOLEVEL (parent);
+  GstBuffer *buf;
+  gsize inbuf_size;
+  guint64 start_offset, end_offset;
+  GstClockTime running_time;
+  gint rate, bpf;
+  gboolean discont = FALSE;
+
+  timestamp = GST_BUFFER_TIMESTAMP (inbuf);
+  running_time =
+      gst_segment_to_running_time (&self->asegment, GST_FORMAT_TIME, timestamp);
+
+  rate = GST_AUDIO_INFO_RATE (&self->ainfo);
+  bpf = GST_AUDIO_INFO_BPF (&self->ainfo);
+  start_offset = gst_util_uint64_scale (timestamp, rate, GST_SECOND);
+  inbuf_size = gst_buffer_get_size (inbuf);
+  end_offset = start_offset + inbuf_size / bpf;
+
+  g_mutex_lock (&self->mutex);
+
+  if (GST_BUFFER_IS_DISCONT (inbuf)
+      || GST_BUFFER_FLAG_IS_SET (inbuf, GST_BUFFER_FLAG_RESYNC)
+      || self->first_time == GST_CLOCK_TIME_NONE) {
+    discont = TRUE;
+  } else {
+    guint64 diff, max_sample_diff;
+
+    /* Check discont, based on audiobasesink */
+    if (start_offset <= self->next_offset)
+      diff = self->next_offset - start_offset;
+    else
+      diff = start_offset - self->next_offset;
+
+    max_sample_diff =
+        gst_util_uint64_scale_int (self->alignment_threshold, rate, GST_SECOND);
+
+    /* Discont! */
+    if (G_UNLIKELY (diff >= max_sample_diff)) {
+      if (self->discont_wait > 0) {
+        if (self->discont_time == GST_CLOCK_TIME_NONE) {
+          self->discont_time = timestamp;
+        } else if (timestamp - self->discont_time >= self->discont_wait) {
+          discont = TRUE;
+          self->discont_time = GST_CLOCK_TIME_NONE;
+        }
+      } else {
+        discont = TRUE;
+      }
+    } else if (G_UNLIKELY (self->discont_time != GST_CLOCK_TIME_NONE)) {
+      /* we have had a discont, but are now back on track! */
+      self->discont_time = GST_CLOCK_TIME_NONE;
+    }
+  }
+
+  if (discont) {
+    /* Have discont, need resync */
+    if (self->next_offset != -1)
+      GST_INFO_OBJECT (pad, "Have discont. Expected %"
+          G_GUINT64_FORMAT ", got %" G_GUINT64_FORMAT,
+          self->next_offset, start_offset);
+    self->total_frames = 0;
+    self->first_time = running_time;
+    self->next_offset = end_offset;
+  } else {
+    self->next_offset += inbuf_size / bpf;
+  }
+
+  gst_adapter_push (self->adapter, gst_buffer_ref (inbuf));
+
+  GST_DEBUG_OBJECT (self, "Queue length %i",
+      g_queue_get_length (&self->vtimeq));
+
+  while (TRUE) {
+    GstClockTime *vt0, *vt1;
+    GstClockTime vtemp;
+    GstMessage *msg;
+    gsize bytes, available_bytes;
+
+    vtemp = GST_CLOCK_TIME_NONE;
+
+    while (!(g_queue_get_length (&self->vtimeq) >= 2 || self->video_eos_flag
+            || self->audio_flush_flag || self->shutdown_flag))
+      g_cond_wait (&self->cond, &self->mutex);
+
+    if (self->audio_flush_flag || self->shutdown_flag) {
+      g_mutex_unlock (&self->mutex);
+      gst_buffer_unref (inbuf);
+      return GST_FLOW_FLUSHING;
+    } else if (self->video_eos_flag) {
+      GST_DEBUG_OBJECT (self, "Video EOS flag alert");
+      /* nothing to do here if queue is empty */
+      if (g_queue_get_length (&self->vtimeq) == 0)
+        break;
+
+      if (g_queue_get_length (&self->vtimeq) < 2) {
+        vtemp = self->vsegment.position;
+      } else if (self->vsegment.position == GST_CLOCK_TIME_NONE) {
+        /* g_queue_get_length is surely >= 2 at this point
+         * so the adapter isn't empty */
+        buf =
+            gst_adapter_take_buffer (self->adapter,
+            gst_adapter_available (self->adapter));
+        if (buf != NULL) {
+          GstMessage *msg;
+          msg = update_rms_from_buffer (self, buf);
+          g_mutex_unlock (&self->mutex);
+          gst_element_post_message (GST_ELEMENT (self), msg);
+          gst_buffer_unref (buf);
+          g_mutex_lock (&self->mutex);  /* we unlock again later */
+        }
+        break;
+      }
+    } else if (g_queue_get_length (&self->vtimeq) < 2) {
+      continue;
+    }
+
+    vt0 = g_queue_pop_head (&self->vtimeq);
+    if (vtemp == GST_CLOCK_TIME_NONE)
+      vt1 = g_queue_peek_head (&self->vtimeq);
+    else
+      vt1 = &vtemp;
+
+    cur_time =
+        self->first_time + gst_util_uint64_scale (self->total_frames,
+        GST_SECOND, rate);
+    GST_DEBUG_OBJECT (self,
+        "Processing: current time is %" GST_TIME_FORMAT,
+        GST_TIME_ARGS (cur_time));
+    GST_DEBUG_OBJECT (self, "Total frames is %i with a rate of %d",
+        self->total_frames, rate);
+    GST_DEBUG_OBJECT (self, "Start time is %" GST_TIME_FORMAT,
+        GST_TIME_ARGS (self->first_time));
+    GST_DEBUG_OBJECT (self, "Time on top is %" GST_TIME_FORMAT,
+        GST_TIME_ARGS (*vt0));
+
+    if (cur_time < *vt0) {
+      guint num_frames =
+          gst_util_uint64_scale (*vt0 - cur_time, rate, GST_SECOND);
+      bytes = num_frames * GST_AUDIO_INFO_BPF (&self->ainfo);
+      available_bytes = gst_adapter_available (self->adapter);
+      if (available_bytes == 0) {
+        g_queue_push_head (&self->vtimeq, vt0);
+        break;
+      }
+      if (bytes == 0) {
+        cur_time = *vt0;
+      } else {
+        GST_DEBUG_OBJECT (self, "Flushed %ld out of %ld bytes", bytes,
+            available_bytes);
+        gst_adapter_flush (self->adapter, MIN (bytes, available_bytes));
+        self->total_frames += num_frames;
+        if (available_bytes <= bytes) {
+          g_queue_push_head (&self->vtimeq, vt0);
+          break;
+        }
+        cur_time =
+            self->first_time + gst_util_uint64_scale (self->total_frames,
+            GST_SECOND, rate);
+      }
+    }
+    if (*vt1 > cur_time) {
+      bytes =
+          GST_AUDIO_INFO_BPF (&self->ainfo) * gst_util_uint64_scale (*vt1 -
+          cur_time, rate, GST_SECOND);
+    } else {
+      bytes = 0;                /* We just need to discard vt0 */
+    }
+    available_bytes = gst_adapter_available (self->adapter);
+    GST_DEBUG_OBJECT (self, "Adapter contains %ld out of %ld bytes",
+        available_bytes, bytes);
+
+    if (available_bytes < bytes) {
+      g_queue_push_head (&self->vtimeq, vt0);
+      goto done;
+    }
+
+    if (bytes > 0) {
+      buf = gst_adapter_take_buffer (self->adapter, bytes);
+      g_assert (buf != NULL);
+    } else {
+      /* Just an empty buffer */
+      buf = gst_buffer_new ();
+    }
+    msg = update_rms_from_buffer (self, buf);
+    g_mutex_unlock (&self->mutex);
+    gst_element_post_message (GST_ELEMENT (self), msg);
+    g_mutex_lock (&self->mutex);
+
+    gst_buffer_unref (buf);
+    g_free (vt0);
+    if (available_bytes == bytes)
+      break;
+  }
+done:
+  g_mutex_unlock (&self->mutex);
+  return gst_pad_push (self->asrcpad, inbuf);
+}
+
+static GstIterator *
+gst_videoframe_audiolevel_iterate_internal_links (GstPad * pad,
+    GstObject * parent)
+{
+  GstIterator *it = NULL;
+  GstPad *opad;
+  GValue val = { 0, };
+  GstVideoFrameAudioLevel *self = GST_VIDEOFRAME_AUDIOLEVEL (parent);
+
+  if (self->asinkpad == pad)
+    opad = gst_object_ref (self->asrcpad);
+  else if (self->asrcpad == pad)
+    opad = gst_object_ref (self->asinkpad);
+  else if (self->vsinkpad == pad)
+    opad = gst_object_ref (self->vsrcpad);
+  else if (self->vsrcpad == pad)
+    opad = gst_object_ref (self->vsinkpad);
+  else
+    goto out;
+
+  g_value_init (&val, GST_TYPE_PAD);
+  g_value_set_object (&val, opad);
+  it = gst_iterator_new_single (GST_TYPE_PAD, &val);
+  g_value_unset (&val);
+
+  gst_object_unref (opad);
+
+out:
+  return it;
+}
+
+static gboolean
+gst_videoframe_audiolevel_plugin_init (GstPlugin * plugin)
+{
+  return gst_element_register (plugin, "videoframe-audiolevel",
+      GST_RANK_NONE, GST_TYPE_VIDEOFRAME_AUDIOLEVEL);
+}
+
+GST_PLUGIN_DEFINE (GST_VERSION_MAJOR,
+    GST_VERSION_MINOR,
+    videoframe - audiolevel,
+    "Video frame-synchronized audio level",
+    gst_videoframe_audiolevel_plugin_init, VERSION, GST_LICENSE,
+    GST_PACKAGE_NAME, GST_PACKAGE_ORIGIN);
diff --git a/gst/videoframe_audiolevel/gstvideoframe-audiolevel.h b/gst/videoframe_audiolevel/gstvideoframe-audiolevel.h
new file mode 100644
index 0000000000..83332e521a
--- /dev/null
+++ b/gst/videoframe_audiolevel/gstvideoframe-audiolevel.h
@@ -0,0 +1,73 @@
+/* 
+ * GStreamer
+ * Copyright (C) 2015 Vivia Nikolaidou <vivia@toolsonair.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __GST_VIDEOFRAME_AUDIOLEVEL_H__
+#define __GST_VIDEOFRAME_AUDIOLEVEL_H__
+
+#include <gst/gst.h>
+#include <gst/audio/audio.h>
+
+G_BEGIN_DECLS
+#define GST_TYPE_VIDEOFRAME_AUDIOLEVEL                    (gst_videoframe_audiolevel_get_type())
+#define GST_VIDEOFRAME_AUDIOLEVEL(obj)                    (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_VIDEOFRAME_AUDIOLEVEL,GstVideoFrameAudioLevel))
+#define GST_IS_VIDEOFRAME_AUDIOLEVEL(obj)                 (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_VIDEOFRAME_AUDIOLEVEL))
+#define GST_VIDEOFRAME_AUDIOLEVEL_CLASS(klass)            (G_TYPE_CHECK_CLASS_CAST((klass) ,GST_TYPE_VIDEOFRAME_AUDIOLEVEL,GstVideoFrameAudioLevelClass))
+#define GST_IS_VIDEOFRAME_AUDIOLEVEL_CLASS(klass)         (G_TYPE_CHECK_CLASS_TYPE((klass) ,GST_TYPE_VIDEOFRAME_AUDIOLEVEL))
+#define GST_VIDEOFRAME_AUDIOLEVEL_GET_CLASS(obj)          (G_TYPE_INSTANCE_GET_CLASS((obj) ,GST_TYPE_VIDEOFRAME_AUDIOLEVEL,GstVideoFrameAudioLevelClass))
+typedef struct _GstVideoFrameAudioLevel GstVideoFrameAudioLevel;
+typedef struct _GstVideoFrameAudioLevelClass GstVideoFrameAudioLevelClass;
+
+struct _GstVideoFrameAudioLevel
+{
+  GstElement parent;
+
+  GstPad *asrcpad, *asinkpad, *vsrcpad, *vsinkpad;
+
+  GstAudioInfo ainfo;
+
+  gdouble *CS;                  /* normalized Cumulative Square */
+
+  GstSegment asegment, vsegment;
+
+  void (*process) (gpointer, guint, guint, gdouble *);
+
+  GQueue vtimeq;
+  GstAdapter *adapter;
+  GstClockTime first_time;
+  guint total_frames;
+  guint64 next_offset, alignment_threshold, discont_time, discont_wait;
+
+  gboolean video_eos_flag;
+  gboolean audio_flush_flag;
+  gboolean shutdown_flag;
+
+  GCond cond;
+  GMutex mutex;
+};
+
+struct _GstVideoFrameAudioLevelClass
+{
+  GstElementClass parent_class;
+};
+
+GType gst_videoframe_audiolevel_get_type (void);
+
+G_END_DECLS
+#endif /* __GST_VIDEOFRAME_AUDIOLEVEL_H__ */
diff --git a/tests/check/Makefile.am b/tests/check/Makefile.am
index c335f9a81b..6dd8dec5a4 100644
--- a/tests/check/Makefile.am
+++ b/tests/check/Makefile.am
@@ -247,6 +247,7 @@ check_PROGRAMS = \
 	$(check_curl) \
 	$(check_shm) \
 	elements/aiffparse \
+	elements/videoframe-audiolevel \
 	elements/autoconvert \
 	elements/autovideoconvert \
 	elements/audiointerleave \
@@ -373,6 +374,13 @@ libs_vp8parser_LDADD = \
 	$(GST_PLUGINS_BAD_LIBS) -lgstcodecparsers-@GST_API_VERSION@ \
 	$(GST_BASE_LIBS) $(GST_LIBS) $(LDADD)
 
+elements_videoframe_audiolevel_CFLAGS = \
+	$(GST_PLUGINS_BASE_CFLAGS) \
+	$(GST_BASE_CFLAGS) $(GST_CFLAGS) $(AM_CFLAGS)
+elements_videoframe_audiolevel_LDADD = \
+	$(GST_PLUGINS_BASE_LIBS) $(GST_BASE_LIBS) $(GST_LIBS) $(LDADD) \
+	-lgstaudio-@GST_API_VERSION@
+
 elements_faad_CFLAGS = \
 	$(GST_PLUGINS_BASE_CFLAGS) \
 	$(GST_BASE_CFLAGS) $(GST_CFLAGS) $(AM_CFLAGS)
diff --git a/tests/check/elements/videoframe-audiolevel.c b/tests/check/elements/videoframe-audiolevel.c
new file mode 100644
index 0000000000..9d57390e4a
--- /dev/null
+++ b/tests/check/elements/videoframe-audiolevel.c
@@ -0,0 +1,636 @@
+/* GStreamer unit test for videoframe-audiolevel
+ *
+ * Copyright (C) 2015 Vivia Nikolaidou <vivia@toolsonair.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+/* suppress warnings for deprecated API such as GValueArray
+ * with newer GLib versions (>= 2.31.0) */
+#define GLIB_DISABLE_DEPRECATION_WARNINGS
+
+#include <gst/check/gstcheck.h>
+#include <gst/audio/audio.h>
+
+static gboolean got_eos;
+static guint audio_buffer_count, video_buffer_count;
+static GstSegment current_audio_segment, current_video_segment;
+static guint num_msgs;
+static GQueue v_timestamp_q, msg_timestamp_q;
+
+static guint n_abuffers, n_vbuffers;
+static guint channels, fill_value;
+static gdouble expected_rms;
+static gboolean audiodelay, videodelay, per_channel, long_video;
+static gboolean early_video, late_video;
+static gboolean video_gaps, video_overlaps;
+static gboolean audio_nondiscont, audio_drift;
+
+static guint fill_value_per_channel[] = { 0, 1 };
+static gdouble expected_rms_per_channel[] = { 0, 0.0078125 };
+
+static void
+set_default_params (void)
+{
+  n_abuffers = 40;
+  n_vbuffers = 15;
+  channels = 2;
+  expected_rms = 0.0078125;
+  fill_value = 1;
+  audiodelay = FALSE;
+  videodelay = FALSE;
+  per_channel = FALSE;
+  long_video = FALSE;
+  video_gaps = FALSE;
+  video_overlaps = FALSE;
+  audio_nondiscont = FALSE;
+  audio_drift = FALSE;
+  early_video = FALSE;
+  late_video = FALSE;
+};
+
+static GstFlowReturn
+output_achain (GstPad * pad, GstObject * parent, GstBuffer * buffer)
+{
+  GstClockTime timestamp;
+  guint8 b;
+  gboolean audio_jitter = audio_nondiscont || audio_drift || early_video;
+
+  timestamp = GST_BUFFER_TIMESTAMP (buffer);
+  if (!audio_jitter)
+    fail_unless_equals_int64 (timestamp,
+        (audio_buffer_count % n_abuffers) * 1 * GST_SECOND);
+  timestamp =
+      gst_segment_to_stream_time (&current_audio_segment, GST_FORMAT_TIME,
+      timestamp);
+  if (!audio_jitter)
+    fail_unless_equals_int64 (timestamp,
+        (audio_buffer_count % n_abuffers) * 1 * GST_SECOND);
+
+  timestamp = GST_BUFFER_TIMESTAMP (buffer);
+  timestamp =
+      gst_segment_to_running_time (&current_audio_segment, GST_FORMAT_TIME,
+      timestamp);
+  if (!audio_jitter)
+    fail_unless_equals_int64 (timestamp, audio_buffer_count * 1 * GST_SECOND);
+
+  gst_buffer_extract (buffer, 0, &b, 1);
+
+  if (per_channel) {
+    fail_unless_equals_int (b, fill_value_per_channel[0]);
+  } else {
+    fail_unless_equals_int (b, fill_value);
+  }
+
+  audio_buffer_count++;
+  gst_buffer_unref (buffer);
+  return GST_FLOW_OK;
+}
+
+static gboolean
+output_aevent (GstPad * pad, GstObject * parent, GstEvent * event)
+{
+  switch (GST_EVENT_TYPE (event)) {
+    case GST_EVENT_FLUSH_STOP:
+      gst_segment_init (&current_audio_segment, GST_FORMAT_UNDEFINED);
+      break;
+    case GST_EVENT_SEGMENT:
+      gst_event_copy_segment (event, &current_audio_segment);
+      break;
+    case GST_EVENT_EOS:
+      got_eos = TRUE;
+      break;
+    default:
+      break;
+  }
+
+  gst_event_unref (event);
+  return TRUE;
+}
+
+static GstFlowReturn
+output_vchain (GstPad * pad, GstObject * parent, GstBuffer * buffer)
+{
+  GstClockTime timestamp;
+  guint8 b;
+  gboolean jitter = video_gaps || video_overlaps || late_video;
+
+  timestamp = GST_BUFFER_TIMESTAMP (buffer);
+  if (!jitter)
+    fail_unless_equals_int64 (timestamp,
+        (video_buffer_count % n_vbuffers) * 25 * GST_MSECOND);
+  timestamp =
+      gst_segment_to_stream_time (&current_video_segment, GST_FORMAT_TIME,
+      timestamp);
+  if (!jitter)
+    fail_unless_equals_int64 (timestamp,
+        (video_buffer_count % n_vbuffers) * 25 * GST_MSECOND);
+
+  timestamp = GST_BUFFER_TIMESTAMP (buffer);
+  timestamp =
+      gst_segment_to_running_time (&current_video_segment, GST_FORMAT_TIME,
+      timestamp);
+  if (!jitter)
+    fail_unless_equals_int64 (timestamp, video_buffer_count * 25 * GST_MSECOND);
+
+  gst_buffer_extract (buffer, 0, &b, 1);
+  if (!jitter)
+    fail_unless_equals_int (b, video_buffer_count % n_vbuffers);
+
+  video_buffer_count++;
+  gst_buffer_unref (buffer);
+  return GST_FLOW_OK;
+}
+
+static gboolean
+output_vevent (GstPad * pad, GstObject * parent, GstEvent * event)
+{
+  switch (GST_EVENT_TYPE (event)) {
+    case GST_EVENT_FLUSH_STOP:
+      gst_segment_init (&current_video_segment, GST_FORMAT_UNDEFINED);
+      break;
+    case GST_EVENT_SEGMENT:
+      gst_event_copy_segment (event, &current_video_segment);
+      break;
+    case GST_EVENT_EOS:
+      got_eos = TRUE;
+      break;
+    default:
+      break;
+  }
+
+  gst_event_unref (event);
+  return TRUE;
+}
+
+static gpointer
+push_abuffers (gpointer data)
+{
+  GstSegment segment;
+  GstPad *pad = data;
+  gint i, j, k;
+  GstClockTime timestamp = 0;
+  GstAudioInfo info;
+  GstCaps *caps;
+  guint buf_size = 1000;
+
+  if (audiodelay)
+    g_usleep (2000);
+
+  if (early_video)
+    timestamp = 50 * GST_MSECOND;
+
+  gst_pad_send_event (pad, gst_event_new_stream_start ("test"));
+
+  gst_audio_info_set_format (&info, GST_AUDIO_FORMAT_S8, buf_size, channels,
+      NULL);
+  caps = gst_audio_info_to_caps (&info);
+  gst_pad_send_event (pad, gst_event_new_caps (caps));
+  gst_caps_unref (caps);
+
+  gst_segment_init (&segment, GST_FORMAT_TIME);
+  gst_pad_send_event (pad, gst_event_new_segment (&segment));
+
+  for (i = 0; i < n_abuffers; i++) {
+    GstBuffer *buf = gst_buffer_new_and_alloc (channels * buf_size);
+
+    if (per_channel) {
+      GstMapInfo map;
+      guint8 *in_data;
+
+      gst_buffer_map (buf, &map, GST_MAP_WRITE);
+      in_data = map.data;
+
+      for (j = 0; j < buf_size; j++) {
+        for (k = 0; k < channels; k++) {
+          in_data[j * channels + k] = fill_value_per_channel[k];
+        }
+      }
+
+      gst_buffer_unmap (buf, &map);
+    } else {
+      gst_buffer_memset (buf, 0, fill_value, channels * buf_size);
+    }
+
+    GST_BUFFER_TIMESTAMP (buf) = timestamp;
+    timestamp += 1 * GST_SECOND;
+    if (audio_drift)
+      timestamp += 50 * GST_MSECOND;
+    else if (i == 4 && audio_nondiscont)
+      timestamp += 30 * GST_MSECOND;
+    GST_BUFFER_DURATION (buf) = timestamp - GST_BUFFER_TIMESTAMP (buf);
+
+    fail_unless (gst_pad_chain (pad, buf) == GST_FLOW_OK);
+  }
+  gst_pad_send_event (pad, gst_event_new_eos ());
+
+  return NULL;
+}
+
+static gpointer
+push_vbuffers (gpointer data)
+{
+  GstSegment segment;
+  GstPad *pad = data;
+  gint i;
+  GstClockTime timestamp = 0;
+
+  if (videodelay)
+    g_usleep (2000);
+
+  if (late_video)
+    timestamp = 50 * GST_MSECOND;
+
+  gst_pad_send_event (pad, gst_event_new_stream_start ("test"));
+  gst_segment_init (&segment, GST_FORMAT_TIME);
+  gst_pad_send_event (pad, gst_event_new_segment (&segment));
+
+  for (i = 0; i < n_vbuffers; i++) {
+    GstBuffer *buf = gst_buffer_new_and_alloc (1000);
+    GstClockTime *rtime = g_new (GstClockTime, 1);
+
+    gst_buffer_memset (buf, 0, i, 1);
+
+    GST_BUFFER_TIMESTAMP (buf) = timestamp;
+    timestamp += 25 * GST_MSECOND;
+    GST_BUFFER_DURATION (buf) = timestamp - GST_BUFFER_TIMESTAMP (buf);
+    *rtime = gst_segment_to_running_time (&segment, GST_FORMAT_TIME, timestamp);
+    g_queue_push_tail (&v_timestamp_q, rtime);
+
+    if (i == 4) {
+      if (video_gaps)
+        timestamp += 10 * GST_MSECOND;
+      else if (video_overlaps)
+        timestamp -= 10 * GST_MSECOND;
+    }
+
+    fail_unless (gst_pad_chain (pad, buf) == GST_FLOW_OK);
+  }
+  gst_pad_send_event (pad, gst_event_new_eos ());
+
+  return NULL;
+}
+
+static GstBusSyncReply
+on_message (GstBus * bus, GstMessage * message, gpointer user_data)
+{
+  const GstStructure *s = gst_message_get_structure (message);
+  const gchar *name = gst_structure_get_name (s);
+  GValueArray *rms_arr;
+  const GValue *array_val;
+  const GValue *value;
+  gdouble rms;
+  gint channels2;
+  guint i;
+  GstClockTime *rtime = g_new (GstClockTime, 1);
+
+  if (message->type != GST_MESSAGE_ELEMENT
+      || strcmp (name, "videoframe-audiolevel") != 0)
+    goto done;
+
+  num_msgs++;
+  if (!gst_structure_get_clock_time (s, "running-time", rtime))
+    g_warning ("Could not parse running time");
+  else
+    g_queue_push_tail (&msg_timestamp_q, rtime);
+
+  /* the values are packed into GValueArrays with the value per channel */
+  array_val = gst_structure_get_value (s, "rms");
+  rms_arr = (GValueArray *) g_value_get_boxed (array_val);
+  channels2 = rms_arr->n_values;
+  fail_unless_equals_int (channels2, channels);
+
+  for (i = 0; i < channels; ++i) {
+    value = g_value_array_get_nth (rms_arr, i);
+    rms = g_value_get_double (value);
+    if (per_channel) {
+      fail_unless_equals_float (rms, expected_rms_per_channel[i]);
+    } else if (early_video && *rtime <= 50 * GST_MSECOND) {
+      fail_unless_equals_float (rms, 0);
+    } else {
+      fail_unless_equals_float (rms, expected_rms);
+    }
+  }
+
+done:
+  return GST_BUS_PASS;
+}
+
+static void
+test_videoframe_audiolevel_generic (void)
+{
+  GstElement *alevel;
+  GstPad *asink, *vsink, *asrc, *vsrc, *aoutput_sink, *voutput_sink;
+  GThread *athread, *vthread;
+  GstBus *bus;
+  guint i;
+
+  got_eos = FALSE;
+  audio_buffer_count = 0;
+  video_buffer_count = 0;
+  num_msgs = 0;
+
+  g_queue_init (&v_timestamp_q);
+  g_queue_init (&msg_timestamp_q);
+
+  alevel = gst_element_factory_make ("videoframe-audiolevel", NULL);
+  fail_unless (alevel != NULL);
+
+  bus = gst_bus_new ();
+  gst_element_set_bus (alevel, bus);
+  gst_bus_set_sync_handler (bus, on_message, NULL, NULL);
+
+  asink = gst_element_get_static_pad (alevel, "asink");
+  fail_unless (asink != NULL);
+
+  vsink = gst_element_get_static_pad (alevel, "vsink");
+  fail_unless (vsink != NULL);
+
+  asrc = gst_element_get_static_pad (alevel, "asrc");
+  aoutput_sink = gst_pad_new ("sink", GST_PAD_SINK);
+  fail_unless (aoutput_sink != NULL);
+  fail_unless (gst_pad_link (asrc, aoutput_sink) == GST_PAD_LINK_OK);
+
+  vsrc = gst_element_get_static_pad (alevel, "vsrc");
+  voutput_sink = gst_pad_new ("sink", GST_PAD_SINK);
+  fail_unless (voutput_sink != NULL);
+  fail_unless (gst_pad_link (vsrc, voutput_sink) == GST_PAD_LINK_OK);
+
+  gst_pad_set_chain_function (aoutput_sink, output_achain);
+  gst_pad_set_event_function (aoutput_sink, output_aevent);
+
+  gst_pad_set_chain_function (voutput_sink, output_vchain);
+  gst_pad_set_event_function (voutput_sink, output_vevent);
+
+  gst_pad_set_active (aoutput_sink, TRUE);
+  gst_pad_set_active (voutput_sink, TRUE);
+  fail_unless (gst_element_set_state (alevel,
+          GST_STATE_PLAYING) == GST_STATE_CHANGE_SUCCESS);
+
+  athread = g_thread_new ("athread", (GThreadFunc) push_abuffers, asink);
+  vthread = g_thread_new ("vthread", (GThreadFunc) push_vbuffers, vsink);
+
+  g_thread_join (vthread);
+  g_thread_join (athread);
+
+  fail_unless (got_eos);
+  fail_unless_equals_int (audio_buffer_count, n_abuffers);
+  fail_unless_equals_int (video_buffer_count, n_vbuffers);
+  if (!long_video)
+    fail_unless_equals_int (num_msgs, n_vbuffers);
+
+  fail_unless_equals_int (g_queue_get_length (&v_timestamp_q), n_vbuffers);
+  /* num_msgs is equal to n_vbuffers except in the case of long_video */
+  fail_unless_equals_int (g_queue_get_length (&msg_timestamp_q), num_msgs);
+
+  for (i = 0; i < g_queue_get_length (&msg_timestamp_q); i++) {
+    GstClockTime *vt = g_queue_pop_head (&v_timestamp_q);
+    GstClockTime *mt = g_queue_pop_head (&msg_timestamp_q);
+    fail_unless (vt != NULL);
+    fail_unless (mt != NULL);
+    if (!video_gaps && !video_overlaps && !early_video)
+      fail_unless_equals_uint64 (*vt, *mt);
+    g_free (vt);
+    g_free (mt);
+  }
+
+  /* teardown */
+  gst_element_set_state (alevel, GST_STATE_NULL);
+  gst_bus_set_flushing (bus, TRUE);
+  gst_object_unref (bus);
+  g_queue_foreach (&v_timestamp_q, (GFunc) g_free, NULL);
+  g_queue_foreach (&msg_timestamp_q, (GFunc) g_free, NULL);
+  g_queue_clear (&v_timestamp_q);
+  g_queue_clear (&msg_timestamp_q);
+  gst_pad_unlink (asrc, aoutput_sink);
+  gst_object_unref (asrc);
+  gst_pad_unlink (vsrc, voutput_sink);
+  gst_object_unref (vsrc);
+  gst_object_unref (asink);
+  gst_object_unref (vsink);
+  gst_pad_set_active (aoutput_sink, FALSE);
+  gst_object_unref (aoutput_sink);
+  gst_pad_set_active (voutput_sink, FALSE);
+  gst_object_unref (voutput_sink);
+  gst_object_unref (alevel);
+}
+
+GST_START_TEST (test_videoframe_audiolevel_16chan_1)
+{
+  set_default_params ();
+  channels = 16;
+  test_videoframe_audiolevel_generic ();
+}
+
+GST_END_TEST;
+
+GST_START_TEST (test_videoframe_audiolevel_8chan_1)
+{
+  set_default_params ();
+  channels = 8;
+  test_videoframe_audiolevel_generic ();
+}
+
+GST_END_TEST;
+
+GST_START_TEST (test_videoframe_audiolevel_2chan_1)
+{
+  set_default_params ();
+  test_videoframe_audiolevel_generic ();
+}
+
+GST_END_TEST;
+
+GST_START_TEST (test_videoframe_audiolevel_1chan_1)
+{
+  set_default_params ();
+  channels = 1;
+  test_videoframe_audiolevel_generic ();
+}
+
+GST_END_TEST;
+
+GST_START_TEST (test_videoframe_audiolevel_16chan_0)
+{
+  set_default_params ();
+  channels = 16;
+  expected_rms = 0;
+  fill_value = 0;
+  test_videoframe_audiolevel_generic ();
+}
+
+GST_END_TEST;
+
+GST_START_TEST (test_videoframe_audiolevel_8chan_0)
+{
+  set_default_params ();
+  channels = 8;
+  expected_rms = 0;
+  fill_value = 0;
+  test_videoframe_audiolevel_generic ();
+}
+
+GST_END_TEST;
+
+GST_START_TEST (test_videoframe_audiolevel_2chan_0)
+{
+  set_default_params ();
+  channels = 2;
+  expected_rms = 0;
+  fill_value = 0;
+  test_videoframe_audiolevel_generic ();
+}
+
+GST_END_TEST;
+
+GST_START_TEST (test_videoframe_audiolevel_1chan_0)
+{
+  set_default_params ();
+  channels = 1;
+  expected_rms = 0;
+  fill_value = 0;
+  test_videoframe_audiolevel_generic ();
+}
+
+GST_END_TEST;
+
+GST_START_TEST (test_videoframe_audiolevel_adelay)
+{
+  set_default_params ();
+  audiodelay = TRUE;
+  test_videoframe_audiolevel_generic ();
+}
+
+GST_END_TEST;
+
+GST_START_TEST (test_videoframe_audiolevel_vdelay)
+{
+  set_default_params ();
+  videodelay = TRUE;
+  test_videoframe_audiolevel_generic ();
+}
+
+GST_END_TEST;
+
+GST_START_TEST (test_videoframe_audiolevel_per_channel)
+{
+  set_default_params ();
+  per_channel = TRUE;
+  test_videoframe_audiolevel_generic ();
+}
+
+GST_END_TEST;
+
+GST_START_TEST (test_videoframe_audiolevel_long_video)
+{
+  set_default_params ();
+  n_abuffers = 6;
+  n_vbuffers = 255;
+  long_video = TRUE;
+  test_videoframe_audiolevel_generic ();
+}
+
+GST_END_TEST;
+
+GST_START_TEST (test_videoframe_audiolevel_video_gaps)
+{
+  set_default_params ();
+  video_gaps = TRUE;
+  test_videoframe_audiolevel_generic ();
+}
+
+GST_END_TEST;
+
+GST_START_TEST (test_videoframe_audiolevel_video_overlaps)
+{
+  set_default_params ();
+  video_overlaps = TRUE;
+  test_videoframe_audiolevel_generic ();
+}
+
+GST_END_TEST;
+
+GST_START_TEST (test_videoframe_audiolevel_audio_nondiscont)
+{
+  set_default_params ();
+  audio_nondiscont = TRUE;
+  test_videoframe_audiolevel_generic ();
+}
+
+GST_END_TEST;
+
+GST_START_TEST (test_videoframe_audiolevel_audio_drift)
+{
+  set_default_params ();
+  audio_drift = TRUE;
+  test_videoframe_audiolevel_generic ();
+}
+
+GST_END_TEST;
+GST_START_TEST (test_videoframe_audiolevel_early_video)
+{
+  set_default_params ();
+  early_video = TRUE;
+  test_videoframe_audiolevel_generic ();
+}
+
+GST_END_TEST;
+
+GST_START_TEST (test_videoframe_audiolevel_late_video)
+{
+  set_default_params ();
+  late_video = TRUE;
+  test_videoframe_audiolevel_generic ();
+}
+
+GST_END_TEST;
+
+
+static Suite *
+videoframe_audiolevel_suite (void)
+{
+  Suite *s = suite_create ("videoframe-audiolevel");
+  TCase *tc_chain;
+
+  tc_chain = tcase_create ("videoframe-audiolevel");
+  tcase_add_test (tc_chain, test_videoframe_audiolevel_16chan_1);
+  tcase_add_test (tc_chain, test_videoframe_audiolevel_8chan_1);
+  tcase_add_test (tc_chain, test_videoframe_audiolevel_2chan_1);
+  tcase_add_test (tc_chain, test_videoframe_audiolevel_1chan_1);
+  tcase_add_test (tc_chain, test_videoframe_audiolevel_16chan_0);
+  tcase_add_test (tc_chain, test_videoframe_audiolevel_8chan_0);
+  tcase_add_test (tc_chain, test_videoframe_audiolevel_2chan_0);
+  tcase_add_test (tc_chain, test_videoframe_audiolevel_1chan_0);
+  tcase_add_test (tc_chain, test_videoframe_audiolevel_adelay);
+  tcase_add_test (tc_chain, test_videoframe_audiolevel_vdelay);
+  tcase_add_test (tc_chain, test_videoframe_audiolevel_per_channel);
+  tcase_add_test (tc_chain, test_videoframe_audiolevel_long_video);
+  tcase_add_test (tc_chain, test_videoframe_audiolevel_video_gaps);
+  tcase_add_test (tc_chain, test_videoframe_audiolevel_video_overlaps);
+  tcase_add_test (tc_chain, test_videoframe_audiolevel_audio_nondiscont);
+  tcase_add_test (tc_chain, test_videoframe_audiolevel_audio_drift);
+  tcase_add_test (tc_chain, test_videoframe_audiolevel_early_video);
+  tcase_add_test (tc_chain, test_videoframe_audiolevel_late_video);
+  suite_add_tcase (s, tc_chain);
+
+  return s;
+}
+
+GST_CHECK_MAIN (videoframe_audiolevel);