webrtcdsp: Update code for webrtc-audio-processing-1

Updated API usage appropriately, and now we have a versioned package to track breaking vs. non-breaking updates. Deprecates a number of properties (and we have to plug in our own values for related enums which are now gone): * echo-suprression-level * experimental-agc * extended-filter * delay-agnostic * voice-detection-frame-size-ms * voice-detection-likelihood Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/2943>
2025-01-30 19:18:31 +00:00 · 2020-12-02 18:31:44 -05:00 · 2020-12-02 18:31:44 -05:00 · d5755744c3
commit d5755744c3
parent b3d27da397
4 changed files with 164 additions and 207 deletions
--- a/subprojects/gst-plugins-bad/ext/webrtcdsp/gstwebrtcdsp.cpp
+++ b/subprojects/gst-plugins-bad/ext/webrtcdsp/gstwebrtcdsp.cpp
@ -71,9 +71,7 @@
 #include "gstwebrtcdsp.h"
 #include "gstwebrtcechoprobe.h"

-#include <webrtc/modules/audio_processing/include/audio_processing.h>
-#include <webrtc/modules/interface/module_common_types.h>
-#include <webrtc/system_wrappers/include/trace.h>
+#include <modules/audio_processing/include/audio_processing.h>

 GST_DEBUG_CATEGORY (webrtc_dsp_debug);
 #define GST_CAT_DEFAULT (webrtc_dsp_debug)
@ -82,10 +80,9 @@ GST_DEBUG_CATEGORY (webrtc_dsp_debug);
 #define DEFAULT_COMPRESSION_GAIN_DB 9
 #define DEFAULT_STARTUP_MIN_VOLUME 12
 #define DEFAULT_LIMITER TRUE
-#define DEFAULT_GAIN_CONTROL_MODE webrtc::GainControl::kAdaptiveDigital
+#define DEFAULT_GAIN_CONTROL_MODE webrtc::AudioProcessing::Config::GainController1::Mode::kAdaptiveDigital
 #define DEFAULT_VOICE_DETECTION FALSE
 #define DEFAULT_VOICE_DETECTION_FRAME_SIZE_MS 10
-#define DEFAULT_VOICE_DETECTION_LIKELIHOOD webrtc::VoiceDetection::kLowLikelihood

 static GstStaticPadTemplate gst_webrtc_dsp_sink_template =
 GST_STATIC_PAD_TEMPLATE ("sink",
@ -119,7 +116,7 @@ GST_STATIC_PAD_TEMPLATE ("src",
        "channels = (int) [1, MAX]")
    );

-typedef webrtc::EchoCancellation::SuppressionLevel GstWebrtcEchoSuppressionLevel;
+typedef int GstWebrtcEchoSuppressionLevel;
 #define GST_TYPE_WEBRTC_ECHO_SUPPRESSION_LEVEL \
    (gst_webrtc_echo_suppression_level_get_type ())
 static GType
@ -127,10 +124,9 @@ gst_webrtc_echo_suppression_level_get_type (void)
 {
  static GType suppression_level_type = 0;
  static const GEnumValue level_types[] = {
-    {webrtc::EchoCancellation::kLowSuppression, "Low Suppression", "low"},
-    {webrtc::EchoCancellation::kModerateSuppression,
-      "Moderate Suppression", "moderate"},
-    {webrtc::EchoCancellation::kHighSuppression, "high Suppression", "high"},
+    {1, "Low Suppression", "low"},
+    {2, "Moderate Suppression", "moderate"},
+    {3, "high Suppression", "high"},
    {0, NULL, NULL}
  };

@ -141,7 +137,7 @@ gst_webrtc_echo_suppression_level_get_type (void)
  return suppression_level_type;
 }

-typedef webrtc::NoiseSuppression::Level GstWebrtcNoiseSuppressionLevel;
+typedef webrtc::AudioProcessing::Config::NoiseSuppression::Level GstWebrtcNoiseSuppressionLevel;
 #define GST_TYPE_WEBRTC_NOISE_SUPPRESSION_LEVEL \
    (gst_webrtc_noise_suppression_level_get_type ())
 static GType
@ -149,10 +145,10 @@ gst_webrtc_noise_suppression_level_get_type (void)
 {
  static GType suppression_level_type = 0;
  static const GEnumValue level_types[] = {
-    {webrtc::NoiseSuppression::kLow, "Low Suppression", "low"},
-    {webrtc::NoiseSuppression::kModerate, "Moderate Suppression", "moderate"},
-    {webrtc::NoiseSuppression::kHigh, "High Suppression", "high"},
-    {webrtc::NoiseSuppression::kVeryHigh, "Very High Suppression",
+    {webrtc::AudioProcessing::Config::NoiseSuppression::Level::kLow, "Low Suppression", "low"},
+    {webrtc::AudioProcessing::Config::NoiseSuppression::Level::kModerate, "Moderate Suppression", "moderate"},
+    {webrtc::AudioProcessing::Config::NoiseSuppression::Level::kHigh, "High Suppression", "high"},
+    {webrtc::AudioProcessing::Config::NoiseSuppression::Level::kVeryHigh, "Very High Suppression",
      "very-high"},
    {0, NULL, NULL}
  };
@ -164,7 +160,7 @@ gst_webrtc_noise_suppression_level_get_type (void)
  return suppression_level_type;
 }

-typedef webrtc::GainControl::Mode GstWebrtcGainControlMode;
+typedef webrtc::AudioProcessing::Config::GainController1::Mode GstWebrtcGainControlMode;
 #define GST_TYPE_WEBRTC_GAIN_CONTROL_MODE \
    (gst_webrtc_gain_control_mode_get_type ())
 static GType
@ -172,8 +168,9 @@ gst_webrtc_gain_control_mode_get_type (void)
 {
  static GType gain_control_mode_type = 0;
  static const GEnumValue mode_types[] = {
-    {webrtc::GainControl::kAdaptiveDigital, "Adaptive Digital", "adaptive-digital"},
-    {webrtc::GainControl::kFixedDigital, "Fixed Digital", "fixed-digital"},
+    {webrtc::AudioProcessing::Config::GainController1::kAdaptiveDigital, "Adaptive Digital", "adaptive-digital"},
+    {webrtc::AudioProcessing::Config::GainController1::kFixedDigital, "Fixed Digital", "fixed-digital"},
+    {webrtc::AudioProcessing::Config::GainController1::kAdaptiveAnalog, "Adaptive Analog", "adaptive-analog"},
    {0, NULL, NULL}
  };

@ -184,7 +181,7 @@ gst_webrtc_gain_control_mode_get_type (void)
  return gain_control_mode_type;
 }

-typedef webrtc::VoiceDetection::Likelihood GstWebrtcVoiceDetectionLikelihood;
+typedef int GstWebrtcVoiceDetectionLikelihood;
 #define GST_TYPE_WEBRTC_VOICE_DETECTION_LIKELIHOOD \
    (gst_webrtc_voice_detection_likelihood_get_type ())
 static GType
@ -192,10 +189,10 @@ gst_webrtc_voice_detection_likelihood_get_type (void)
 {
  static GType likelihood_type = 0;
  static const GEnumValue likelihood_types[] = {
-    {webrtc::VoiceDetection::kVeryLowLikelihood, "Very Low Likelihood", "very-low"},
-    {webrtc::VoiceDetection::kLowLikelihood, "Low Likelihood", "low"},
-    {webrtc::VoiceDetection::kModerateLikelihood, "Moderate Likelihood", "moderate"},
-    {webrtc::VoiceDetection::kHighLikelihood, "High Likelihood", "high"},
+    {1, "Very Low Likelihood", "very-low"},
+    {2, "Low Likelihood", "low"},
+    {3, "Moderate Likelihood", "moderate"},
+    {4, "High Likelihood", "high"},
    {0, NULL, NULL}
  };

@ -227,6 +224,7 @@ enum
  PROP_VOICE_DETECTION,
  PROP_VOICE_DETECTION_FRAME_SIZE_MS,
  PROP_VOICE_DETECTION_LIKELIHOOD,
+  PROP_EXTRA_DELAY_MS,
 };

 /**
@ -248,7 +246,7 @@ struct _GstWebrtcDsp
  /* Protected by the stream lock */
  GstAdapter *adapter;
  GstPlanarAudioAdapter *padapter;
-  webrtc::AudioProcessing * apm;
+  webrtc::AudioProcessing *apm;

  /* Protected by the object lock */
  gchar *probe_name;
@ -257,21 +255,15 @@ struct _GstWebrtcDsp
  /* Properties */
  gboolean high_pass_filter;
  gboolean echo_cancel;
-  webrtc::EchoCancellation::SuppressionLevel echo_suppression_level;
  gboolean noise_suppression;
-  webrtc::NoiseSuppression::Level noise_suppression_level;
+  webrtc::AudioProcessing::Config::NoiseSuppression::Level noise_suppression_level;
  gboolean gain_control;
-  gboolean experimental_agc;
-  gboolean extended_filter;
-  gboolean delay_agnostic;
  gint target_level_dbfs;
  gint compression_gain_db;
  gint startup_min_volume;
  gboolean limiter;
-  webrtc::GainControl::Mode gain_control_mode;
+  webrtc::AudioProcessing::Config::GainController1::Mode gain_control_mode;
  gboolean voice_detection;
-  gint voice_detection_frame_size_ms;
-  webrtc::VoiceDetection::Likelihood voice_detection_likelihood;
 };

 G_DEFINE_TYPE_WITH_CODE (GstWebrtcDsp, gst_webrtc_dsp, GST_TYPE_AUDIO_FILTER,
@ -376,9 +368,9 @@ gst_webrtc_dsp_analyze_reverse_stream (GstWebrtcDsp * self,
    GstClockTime rec_time)
 {
  GstWebrtcEchoProbe *probe = NULL;
-  webrtc::AudioProcessing * apm;
-  webrtc::AudioFrame frame;
+  webrtc::AudioProcessing *apm;
  GstBuffer *buf = NULL;
+  GstAudioBuffer abuf;
  GstFlowReturn ret = GST_FLOW_OK;
  gint err, delay;

@ -391,48 +383,44 @@ gst_webrtc_dsp_analyze_reverse_stream (GstWebrtcDsp * self,
  if (!probe)
    return GST_FLOW_OK;

+  webrtc::StreamConfig config (probe->info.rate, probe->info.channels,
+      false);
  apm = self->apm;

-  if (self->delay_agnostic)
-    rec_time = GST_CLOCK_TIME_NONE;
-
-again:
-  delay = gst_webrtc_echo_probe_read (probe, rec_time, (gpointer) &frame, &buf);
+  delay = gst_webrtc_echo_probe_read (probe, rec_time, &buf);
  apm->set_stream_delay_ms (delay);

+  g_return_val_if_fail (buf != NULL, GST_FLOW_ERROR);
+
  if (delay < 0)
    goto done;

-  if (frame.sample_rate_hz_ != self->info.rate) {
+  if (probe->info.rate != self->info.rate) {
    GST_ELEMENT_ERROR (self, STREAM, FORMAT,
        ("Echo Probe has rate %i , while the DSP is running at rate %i,"
         " use a caps filter to ensure those are the same.",
-         frame.sample_rate_hz_, self->info.rate), (NULL));
+         probe->info.rate, self->info.rate), (NULL));
    ret = GST_FLOW_ERROR;
    goto done;
  }

-  if (buf) {
-    webrtc::StreamConfig config (frame.sample_rate_hz_, frame.num_channels_,
-        false);
-    GstAudioBuffer abuf;
-    float * const * data;
+  gst_audio_buffer_map (&abuf, &self->info, buf, GST_MAP_READWRITE);
+
+  if (probe->interleaved) {
+    int16_t * const data = (int16_t * const) abuf.planes[0];

-    gst_audio_buffer_map (&abuf, &self->info, buf, GST_MAP_READWRITE);
-    data = (float * const *) abuf.planes;
    if ((err = apm->ProcessReverseStream (data, config, config, data)) < 0)
      GST_WARNING_OBJECT (self, "Reverse stream analyses failed: %s.",
          webrtc_error_to_string (err));
-    gst_audio_buffer_unmap (&abuf);
-    gst_buffer_replace (&buf, NULL);
  } else {
-    if ((err = apm->AnalyzeReverseStream (&frame)) < 0)
+    float * const * data = (float * const *) abuf.planes;
+
+    if ((err = apm->ProcessReverseStream (data, config, config, data)) < 0)
      GST_WARNING_OBJECT (self, "Reverse stream analyses failed: %s.",
          webrtc_error_to_string (err));
  }

-  if (self->delay_agnostic)
-      goto again;
+  gst_audio_buffer_unmap (&abuf);

 done:
  gst_object_unref (probe);
@ -443,16 +431,14 @@ done:

 static void
 gst_webrtc_vad_post_activity (GstWebrtcDsp *self, GstBuffer *buffer,
-    gboolean stream_has_voice)
+    gboolean stream_has_voice, guint8 level)
 {
  GstClockTime timestamp = GST_BUFFER_PTS (buffer);
  GstBaseTransform *trans = GST_BASE_TRANSFORM_CAST (self);
  GstStructure *s;
  GstClockTime stream_time;
  GstAudioLevelMeta *meta;
-  guint8 level;

-  level = self->apm->level_estimator ()->RMS ();
  meta = gst_buffer_get_audio_level_meta (buffer);
  if (meta) {
    meta->voice_activity = stream_has_voice;
@ -481,6 +467,7 @@ gst_webrtc_dsp_process_stream (GstWebrtcDsp * self,
 {
  GstAudioBuffer abuf;
  webrtc::AudioProcessing * apm = self->apm;
+  webrtc::StreamConfig config (self->info.rate, self->info.channels, false);
  gint err;

  if (!gst_audio_buffer_map (&abuf, &self->info, buffer,
@ -490,19 +477,10 @@ gst_webrtc_dsp_process_stream (GstWebrtcDsp * self,
  }

  if (self->interleaved) {
-    webrtc::AudioFrame frame;
-    frame.num_channels_ = self->info.channels;
-    frame.sample_rate_hz_ = self->info.rate;
-    frame.samples_per_channel_ = self->period_samples;
-
-    memcpy (frame.data_, abuf.planes[0], self->period_size);
-    err = apm->ProcessStream (&frame);
-    if (err >= 0)
-      memcpy (abuf.planes[0], frame.data_, self->period_size);
+    int16_t * const data = (int16_t * const) abuf.planes[0];
+    err = apm->ProcessStream (data, config, config, data);
  } else {
    float * const * data = (float * const *) abuf.planes;
-    webrtc::StreamConfig config (self->info.rate, self->info.channels, false);
-
    err = apm->ProcessStream (data, config, config, data);
  }

@ -511,10 +489,13 @@ gst_webrtc_dsp_process_stream (GstWebrtcDsp * self,
        webrtc_error_to_string (err));
  } else {
    if (self->voice_detection) {
-      gboolean stream_has_voice = apm->voice_detection ()->stream_has_voice ();
+      webrtc::AudioProcessingStats stats = apm->GetStatistics ();
+      gboolean stream_has_voice = stats.voice_detected && *stats.voice_detected;
+      // The meta takes the value as -dbov, so we negate
+      guint8 level = stats.output_rms_dbfs ? (guint8) -(*stats.output_rms_dbfs) : 127;

      if (stream_has_voice != self->stream_has_voice)
-        gst_webrtc_vad_post_activity (self, buffer, stream_has_voice);
+        gst_webrtc_vad_post_activity (self, buffer, stream_has_voice, level);

      self->stream_has_voice = stream_has_voice;
    }
@ -583,21 +564,9 @@ static gboolean
 gst_webrtc_dsp_start (GstBaseTransform * btrans)
 {
  GstWebrtcDsp *self = GST_WEBRTC_DSP (btrans);
-  webrtc::Config config;

  GST_OBJECT_LOCK (self);

-  config.Set < webrtc::ExtendedFilter >
-      (new webrtc::ExtendedFilter (self->extended_filter));
-  config.Set < webrtc::ExperimentalAgc >
-      (new webrtc::ExperimentalAgc (self->experimental_agc, self->startup_min_volume));
-  config.Set < webrtc::DelayAgnostic >
-      (new webrtc::DelayAgnostic (self->delay_agnostic));
-
-  /* TODO Intelligibility enhancer, Beamforming, etc. */
-
-  self->apm = webrtc::AudioProcessing::Create (config);
-
  if (self->echo_cancel) {
    self->probe = gst_webrtc_acquire_echo_probe (self->probe_name);

@ -618,10 +587,8 @@ static gboolean
 gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info)
 {
  GstWebrtcDsp *self = GST_WEBRTC_DSP (filter);
-  webrtc::AudioProcessing * apm;
-  webrtc::ProcessingConfig pconfig;
+  webrtc::AudioProcessing::Config config;
  GstAudioInfo probe_info = *info;
-  gint err = 0;

  GST_LOG_OBJECT (self, "setting format to %s with %i Hz and %i channels",
      info->finfo->description, info->rate, info->channels);
@ -633,7 +600,7 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info)

  self->info = *info;
  self->interleaved = (info->layout == GST_AUDIO_LAYOUT_INTERLEAVED);
-  apm = self->apm;
+  self->apm = webrtc::AudioProcessingBuilder().Create();

  if (!self->interleaved)
    gst_planar_audio_adapter_configure (self->padapter, info);
@ -642,8 +609,7 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info)
  self->period_samples = info->rate / 100;
  self->period_size = self->period_samples * info->bpf;

-  if (self->interleaved &&
-      (webrtc::AudioFrame::kMaxDataSizeSamples * 2) < self->period_size)
+  if (self->interleaved && (self->period_size > MAX_DATA_SIZE_SAMPLES * 2))
    goto period_too_big;

  if (self->probe) {
@ -658,40 +624,31 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info)
    GST_WEBRTC_ECHO_PROBE_UNLOCK (self->probe);
  }

-  /* input stream */
-  pconfig.streams[webrtc::ProcessingConfig::kInputStream] =
-      webrtc::StreamConfig (info->rate, info->channels, false);
-  /* output stream */
-  pconfig.streams[webrtc::ProcessingConfig::kOutputStream] =
-      webrtc::StreamConfig (info->rate, info->channels, false);
-  /* reverse input stream */
-  pconfig.streams[webrtc::ProcessingConfig::kReverseInputStream] =
-      webrtc::StreamConfig (probe_info.rate, probe_info.channels, false);
-  /* reverse output stream */
-  pconfig.streams[webrtc::ProcessingConfig::kReverseOutputStream] =
-      webrtc::StreamConfig (probe_info.rate, probe_info.channels, false);
-
-  if ((err = apm->Initialize (pconfig)) < 0)
-    goto initialize_failed;
-
  /* Setup Filters */
+  // TODO: expose pre_amplifier
+
  if (self->high_pass_filter) {
    GST_DEBUG_OBJECT (self, "Enabling High Pass filter");
-    apm->high_pass_filter ()->Enable (true);
+    config.high_pass_filter.enabled = true;
  }

  if (self->echo_cancel) {
    GST_DEBUG_OBJECT (self, "Enabling Echo Cancellation");
-    apm->echo_cancellation ()->enable_drift_compensation (false);
-    apm->echo_cancellation ()
-        ->set_suppression_level (self->echo_suppression_level);
-    apm->echo_cancellation ()->Enable (true);
+    config.echo_canceller.enabled = true;
  }

  if (self->noise_suppression) {
    GST_DEBUG_OBJECT (self, "Enabling Noise Suppression");
-    apm->noise_suppression ()->set_level (self->noise_suppression_level);
-    apm->noise_suppression ()->Enable (true);
+    config.noise_suppression.enabled = true;
+    config.noise_suppression.level = self->noise_suppression_level;
+  }
+
+  // TODO: expose transient suppression
+
+  if (self->voice_detection) {
+    GST_DEBUG_OBJECT (self, "Enabling Voice Activity Detection");
+    config.voice_detection.enabled = true;
+    self->stream_has_voice = FALSE;
  }

  if (self->gain_control) {
@ -706,30 +663,17 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info)

    g_type_class_unref (mode_class);

-    apm->gain_control ()->set_mode (self->gain_control_mode);
-    apm->gain_control ()->set_target_level_dbfs (self->target_level_dbfs);
-    apm->gain_control ()->set_compression_gain_db (self->compression_gain_db);
-    apm->gain_control ()->enable_limiter (self->limiter);
-    apm->gain_control ()->Enable (true);
+    config.gain_controller1.enabled = true;
+    config.gain_controller1.target_level_dbfs = self->target_level_dbfs;
+    config.gain_controller1.compression_gain_db = self->compression_gain_db;
+    config.gain_controller1.enable_limiter = self->limiter;
+    config.level_estimation.enabled = true;
  }

-  if (self->voice_detection) {
-    GEnumClass *likelihood_class = (GEnumClass *)
-        g_type_class_ref (GST_TYPE_WEBRTC_VOICE_DETECTION_LIKELIHOOD);
-    GST_DEBUG_OBJECT (self, "Enabling Voice Activity Detection, frame size "
-      "%d milliseconds, likelihood: %s", self->voice_detection_frame_size_ms,
-      g_enum_get_value (likelihood_class,
-          self->voice_detection_likelihood)->value_name);
-    g_type_class_unref (likelihood_class);
+  // TODO: expose gain controller 2
+  // TODO: expose residual echo detector

-    self->stream_has_voice = FALSE;
-
-    apm->voice_detection ()->Enable (true);
-    apm->voice_detection ()->set_likelihood (self->voice_detection_likelihood);
-    apm->voice_detection ()->set_frame_size_ms (
-        self->voice_detection_frame_size_ms);
-    apm->level_estimator ()->Enable (true);
-  }
+  self->apm->ApplyConfig (config);

  GST_OBJECT_UNLOCK (self);

@ -738,9 +682,9 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info)
 period_too_big:
  GST_OBJECT_UNLOCK (self);
  GST_WARNING_OBJECT (self, "webrtcdsp format produce too big period "
-      "(maximum is %" G_GSIZE_FORMAT " samples and we have %u samples), "
+      "(maximum is %d samples and we have %u samples), "
      "reduce the number of channels or the rate.",
-      webrtc::AudioFrame::kMaxDataSizeSamples, self->period_size / 2);
+      MAX_DATA_SIZE_SAMPLES, self->period_size / 2);
  return FALSE;

 probe_has_wrong_rate:
@ -751,14 +695,6 @@ probe_has_wrong_rate:
          " use a caps filter to ensure those are the same.",
          probe_info.rate, info->rate), (NULL));
  return FALSE;
-
-initialize_failed:
-  GST_OBJECT_UNLOCK (self);
-  GST_ELEMENT_ERROR (self, LIBRARY, INIT,
-      ("Failed to initialize WebRTC Audio Processing library"),
-      ("webrtc::AudioProcessing::Initialize() failed: %s",
-          webrtc_error_to_string (err)));
-  return FALSE;
 }

 static gboolean
@ -803,8 +739,6 @@ gst_webrtc_dsp_set_property (GObject * object,
      self->echo_cancel = g_value_get_boolean (value);
      break;
    case PROP_ECHO_SUPPRESSION_LEVEL:
-      self->echo_suppression_level =
-          (GstWebrtcEchoSuppressionLevel) g_value_get_enum (value);
      break;
    case PROP_NOISE_SUPPRESSION:
      self->noise_suppression = g_value_get_boolean (value);
@ -817,13 +751,10 @@ gst_webrtc_dsp_set_property (GObject * object,
      self->gain_control = g_value_get_boolean (value);
      break;
    case PROP_EXPERIMENTAL_AGC:
-      self->experimental_agc = g_value_get_boolean (value);
      break;
    case PROP_EXTENDED_FILTER:
-      self->extended_filter = g_value_get_boolean (value);
      break;
    case PROP_DELAY_AGNOSTIC:
-      self->delay_agnostic = g_value_get_boolean (value);
      break;
    case PROP_TARGET_LEVEL_DBFS:
      self->target_level_dbfs = g_value_get_int (value);
@ -845,11 +776,8 @@ gst_webrtc_dsp_set_property (GObject * object,
      self->voice_detection = g_value_get_boolean (value);
      break;
    case PROP_VOICE_DETECTION_FRAME_SIZE_MS:
-      self->voice_detection_frame_size_ms = g_value_get_int (value);
      break;
    case PROP_VOICE_DETECTION_LIKELIHOOD:
-      self->voice_detection_likelihood =
-          (GstWebrtcVoiceDetectionLikelihood) g_value_get_enum (value);
      break;
    default:
      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
@ -876,7 +804,7 @@ gst_webrtc_dsp_get_property (GObject * object,
      g_value_set_boolean (value, self->echo_cancel);
      break;
    case PROP_ECHO_SUPPRESSION_LEVEL:
-      g_value_set_enum (value, self->echo_suppression_level);
+      g_value_set_enum (value, (GstWebrtcEchoSuppressionLevel) 2);
      break;
    case PROP_NOISE_SUPPRESSION:
      g_value_set_boolean (value, self->noise_suppression);
@ -888,13 +816,13 @@ gst_webrtc_dsp_get_property (GObject * object,
      g_value_set_boolean (value, self->gain_control);
      break;
    case PROP_EXPERIMENTAL_AGC:
-      g_value_set_boolean (value, self->experimental_agc);
+      g_value_set_boolean (value, false);
      break;
    case PROP_EXTENDED_FILTER:
-      g_value_set_boolean (value, self->extended_filter);
+      g_value_set_boolean (value, false);
      break;
    case PROP_DELAY_AGNOSTIC:
-      g_value_set_boolean (value, self->delay_agnostic);
+      g_value_set_boolean (value, false);
      break;
    case PROP_TARGET_LEVEL_DBFS:
      g_value_set_int (value, self->target_level_dbfs);
@ -915,10 +843,10 @@ gst_webrtc_dsp_get_property (GObject * object,
      g_value_set_boolean (value, self->voice_detection);
      break;
    case PROP_VOICE_DETECTION_FRAME_SIZE_MS:
-      g_value_set_int (value, self->voice_detection_frame_size_ms);
+      g_value_set_int (value, 0);
      break;
    case PROP_VOICE_DETECTION_LIKELIHOOD:
-      g_value_set_enum (value, self->voice_detection_likelihood);
+      g_value_set_enum (value, 2);
      break;
    default:
      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
@ -1005,13 +933,13 @@ gst_webrtc_dsp_class_init (GstWebrtcDspClass * klass)

  g_object_class_install_property (gobject_class,
      PROP_ECHO_SUPPRESSION_LEVEL,
-      g_param_spec_enum ("echo-suppression-level", "Echo Suppression Level",
+      g_param_spec_enum ("echo-suppression-level",
+          "Echo Suppression Level (does nothing)",
          "Controls the aggressiveness of the suppressor. A higher level "
          "trades off double-talk performance for increased echo suppression.",
-          GST_TYPE_WEBRTC_ECHO_SUPPRESSION_LEVEL,
-          webrtc::EchoCancellation::kModerateSuppression,
+          GST_TYPE_WEBRTC_ECHO_SUPPRESSION_LEVEL, 2,
          (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS |
-              G_PARAM_CONSTRUCT)));
+              G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED)));

  g_object_class_install_property (gobject_class,
      PROP_NOISE_SUPPRESSION,
@ -1026,7 +954,7 @@ gst_webrtc_dsp_class_init (GstWebrtcDspClass * klass)
          "Controls the aggressiveness of the suppression. Increasing the "
          "level will reduce the noise level at the expense of a higher "
          "speech distortion.", GST_TYPE_WEBRTC_NOISE_SUPPRESSION_LEVEL,
-          webrtc::EchoCancellation::kModerateSuppression,
+          webrtc::AudioProcessing::Config::NoiseSuppression::Level::kModerate,
          (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS |
              G_PARAM_CONSTRUCT)));

@ -1039,24 +967,26 @@ gst_webrtc_dsp_class_init (GstWebrtcDspClass * klass)

  g_object_class_install_property (gobject_class,
      PROP_EXPERIMENTAL_AGC,
-      g_param_spec_boolean ("experimental-agc", "Experimental AGC",
+      g_param_spec_boolean ("experimental-agc",
+          "Experimental AGC (does nothing)",
          "Enable or disable experimental automatic gain control.",
          FALSE, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS |
-              G_PARAM_CONSTRUCT)));
+              G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED)));

  g_object_class_install_property (gobject_class,
      PROP_EXTENDED_FILTER,
      g_param_spec_boolean ("extended-filter", "Extended Filter",
          "Enable or disable the extended filter.",
          TRUE, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS |
-              G_PARAM_CONSTRUCT)));
+              G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED)));

  g_object_class_install_property (gobject_class,
      PROP_DELAY_AGNOSTIC,
-      g_param_spec_boolean ("delay-agnostic", "Delay Agnostic",
+      g_param_spec_boolean ("delay-agnostic",
+          "Delay agnostic mode (does nothing)",
          "Enable or disable the delay agnostic mode.",
          FALSE, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS |
-              G_PARAM_CONSTRUCT)));
+              G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED)));

  g_object_class_install_property (gobject_class,
      PROP_TARGET_LEVEL_DBFS,
@ -1111,24 +1041,23 @@ gst_webrtc_dsp_class_init (GstWebrtcDspClass * klass)
  g_object_class_install_property (gobject_class,
      PROP_VOICE_DETECTION_FRAME_SIZE_MS,
      g_param_spec_int ("voice-detection-frame-size-ms",
-          "Voice Detection Frame Size Milliseconds",
+          "Voice detection frame size in milliseconds (does nothing)",
          "Sets the |size| of the frames in ms on which the VAD will operate. "
          "Larger frames will improve detection accuracy, but reduce the "
          "frequency of updates",
          10, 30, DEFAULT_VOICE_DETECTION_FRAME_SIZE_MS,
          (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS |
-              G_PARAM_CONSTRUCT)));
+              G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED)));

  g_object_class_install_property (gobject_class,
      PROP_VOICE_DETECTION_LIKELIHOOD,
      g_param_spec_enum ("voice-detection-likelihood",
-          "Voice Detection Likelihood",
+          "Voice detection likelihood (does nothing)",
          "Specifies the likelihood that a frame will be declared to contain "
          "voice.",
-          GST_TYPE_WEBRTC_VOICE_DETECTION_LIKELIHOOD,
-          DEFAULT_VOICE_DETECTION_LIKELIHOOD,
+          GST_TYPE_WEBRTC_VOICE_DETECTION_LIKELIHOOD, 2,
          (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS |
-              G_PARAM_CONSTRUCT)));
+              G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED)));

  gst_type_mark_as_plugin_api (GST_TYPE_WEBRTC_GAIN_CONTROL_MODE, (GstPluginAPIFlags) 0);
  gst_type_mark_as_plugin_api (GST_TYPE_WEBRTC_NOISE_SUPPRESSION_LEVEL, (GstPluginAPIFlags) 0);
--- a/subprojects/gst-plugins-bad/ext/webrtcdsp/gstwebrtcechoprobe.cpp
+++ b/subprojects/gst-plugins-bad/ext/webrtcdsp/gstwebrtcechoprobe.cpp
@ -33,7 +33,8 @@

 #include "gstwebrtcechoprobe.h"

-#include <webrtc/modules/interface/module_common_types.h>
+#include <modules/audio_processing/include/audio_processing.h>
+
 #include <gst/audio/audio.h>

 GST_DEBUG_CATEGORY_EXTERN (webrtc_dsp_debug);
@ -102,7 +103,7 @@ gst_webrtc_echo_probe_setup (GstAudioFilter * filter, const GstAudioInfo * info)
  self->period_size = self->period_samples * info->bpf;

  if (self->interleaved &&
-      (webrtc::AudioFrame::kMaxDataSizeSamples * 2) < self->period_size)
+      (MAX_DATA_SIZE_SAMPLES * 2) < self->period_size)
    goto period_too_big;

  GST_WEBRTC_ECHO_PROBE_UNLOCK (self);
@ -112,9 +113,9 @@ gst_webrtc_echo_probe_setup (GstAudioFilter * filter, const GstAudioInfo * info)
 period_too_big:
  GST_WEBRTC_ECHO_PROBE_UNLOCK (self);
  GST_WARNING_OBJECT (self, "webrtcdsp format produce too big period "
-      "(maximum is %" G_GSIZE_FORMAT " samples and we have %u samples), "
+      "(maximum is %d samples and we have %u samples), "
      "reduce the number of channels or the rate.",
-      webrtc::AudioFrame::kMaxDataSizeSamples, self->period_size / 2);
+      MAX_DATA_SIZE_SAMPLES, self->period_size / 2);
  return FALSE;
 }

@ -303,18 +304,20 @@ gst_webrtc_release_echo_probe (GstWebrtcEchoProbe * probe)

 gint
 gst_webrtc_echo_probe_read (GstWebrtcEchoProbe * self, GstClockTime rec_time,
-    gpointer _frame, GstBuffer ** buf)
+    GstBuffer ** buf)
 {
-  webrtc::AudioFrame * frame = (webrtc::AudioFrame *) _frame;
  GstClockTimeDiff diff;
-  gsize avail, skip, offset, size;
+  gsize avail, skip, offset, size = 0;
  gint delay = -1;

  GST_WEBRTC_ECHO_PROBE_LOCK (self);

+  /* We always return a buffer -- if don't have data (size == 0), we generate a
+   * silence buffer */
+
  if (!GST_CLOCK_TIME_IS_VALID (self->latency) ||
      !GST_AUDIO_INFO_IS_VALID (&self->info))
-    goto done;
+    goto copy;

  if (self->interleaved)
    avail = gst_adapter_available (self->adapter) / self->info.bpf;
@ -324,7 +327,7 @@ gst_webrtc_echo_probe_read (GstWebrtcEchoProbe * self, GstClockTime rec_time,
  /* In delay agnostic mode, just return 10ms of data */
  if (!GST_CLOCK_TIME_IS_VALID (rec_time)) {
    if (avail < self->period_samples)
-      goto done;
+      goto copy;

    size = self->period_samples;
    skip = 0;
@ -371,23 +374,51 @@ gst_webrtc_echo_probe_read (GstWebrtcEchoProbe * self, GstClockTime rec_time,
  size = MIN (avail - offset, self->period_samples - skip);

 copy:
-  if (self->interleaved) {
-    skip *= self->info.bpf;
-    offset *= self->info.bpf;
-    size *= self->info.bpf;
-
-    if (size < self->period_size)
-      memset (frame->data_, 0, self->period_size);
-
-    if (size) {
-      gst_adapter_copy (self->adapter, (guint8 *) frame->data_ + skip,
-          offset, size);
-      gst_adapter_flush (self->adapter, offset + size);
-    }
+  if (!size) {
+    /* No data, provide a period's worth of silence */
+    *buf = gst_buffer_new_allocate (NULL, self->period_size, NULL);
+    gst_buffer_memset (*buf, 0, 0, self->period_size);
+    gst_buffer_add_audio_meta (*buf, &self->info, self->period_samples,
+        NULL);
  } else {
+    /* We have some actual data, pop period_samples' worth if have it, else pad
+     * with silence and provide what we do have */
    GstBuffer *ret, *taken, *tmp;

-    if (size) {
+    if (self->interleaved) {
+      skip *= self->info.bpf;
+      offset *= self->info.bpf;
+      size *= self->info.bpf;
+
+      gst_adapter_flush (self->adapter, offset);
+
+      /* we need to fill silence at the beginning and/or the end of the
+       * buffer in order to have period_samples in the buffer */
+      if (size < self->period_size) {
+        gsize padding = self->period_size - (skip + size);
+
+        taken = gst_adapter_take_buffer (self->adapter, size);
+        ret = gst_buffer_new ();
+
+        /* need some silence at the beginning */
+        if (skip) {
+          tmp = gst_buffer_new_allocate (NULL, skip, NULL);
+          gst_buffer_memset (tmp, 0, 0, skip);
+          ret = gst_buffer_append (ret, tmp);
+        }
+
+        ret = gst_buffer_append (ret, taken);
+
+        /* need some silence at the end */
+        if (padding) {
+          tmp = gst_buffer_new_allocate (NULL, padding, NULL);
+          gst_buffer_memset (tmp, 0, 0, padding);
+          ret = gst_buffer_append (ret, tmp);
+        }
+      } else {
+        ret = gst_adapter_take_buffer (self->adapter, size);
+      }
+    } else {
      gst_planar_audio_adapter_flush (self->padapter, offset);

      /* we need to fill silence at the beginning and/or the end of each
@ -430,23 +461,13 @@ copy:
        ret = gst_planar_audio_adapter_take_buffer (self->padapter, size,
          GST_MAP_READWRITE);
      }
-    } else {
-      ret = gst_buffer_new_allocate (NULL, self->period_size, NULL);
-      gst_buffer_memset (ret, 0, 0, self->period_size);
-      gst_buffer_add_audio_meta (ret, &self->info, self->period_samples,
-          NULL);
    }

    *buf = ret;
  }

-  frame->num_channels_ = self->info.channels;
-  frame->sample_rate_hz_ = self->info.rate;
-  frame->samples_per_channel_ = self->period_samples;
-
  delay = self->delay;

-done:
  GST_WEBRTC_ECHO_PROBE_UNLOCK (self);

  return delay;
--- a/subprojects/gst-plugins-bad/ext/webrtcdsp/gstwebrtcechoprobe.h
+++ b/subprojects/gst-plugins-bad/ext/webrtcdsp/gstwebrtcechoprobe.h
@ -45,6 +45,12 @@ G_BEGIN_DECLS
 #define GST_WEBRTC_ECHO_PROBE_LOCK(obj) g_mutex_lock (&GST_WEBRTC_ECHO_PROBE (obj)->lock)
 #define GST_WEBRTC_ECHO_PROBE_UNLOCK(obj) g_mutex_unlock (&GST_WEBRTC_ECHO_PROBE (obj)->lock)

+/* From the webrtc audio_frame.h definition of kMaxDataSizeSamples:
+ * Stereo, 32 kHz, 120 ms (2 * 32 * 120)
+ * Stereo, 192 kHz, 20 ms (2 * 192 * 20)
+ */
+#define MAX_DATA_SIZE_SAMPLES 7680
+
 typedef struct _GstWebrtcEchoProbe GstWebrtcEchoProbe;
 typedef struct _GstWebrtcEchoProbeClass GstWebrtcEchoProbeClass;

@ -71,6 +77,7 @@ struct _GstWebrtcEchoProbe
  GstClockTime latency;
  gint delay;
  gboolean interleaved;
+  gint extra_delay;

  GstSegment segment;
  GstAdapter *adapter;
@ -92,7 +99,7 @@ GST_ELEMENT_REGISTER_DECLARE (webrtcechoprobe);
 GstWebrtcEchoProbe *gst_webrtc_acquire_echo_probe (const gchar * name);
 void gst_webrtc_release_echo_probe (GstWebrtcEchoProbe * probe);
 gint gst_webrtc_echo_probe_read (GstWebrtcEchoProbe * self,
-    GstClockTime rec_time, gpointer frame, GstBuffer ** buf);
+    GstClockTime rec_time, GstBuffer ** buf);

 G_END_DECLS
 #endif /* __GST_WEBRTC_ECHO_PROBE_H__ */
--- a/subprojects/gst-plugins-bad/ext/webrtcdsp/meson.build
+++ b/subprojects/gst-plugins-bad/ext/webrtcdsp/meson.build
@ -4,7 +4,7 @@ webrtc_sources = [
  'gstwebrtcdspplugin.cpp'
 ]

-webrtc_dep = dependency('webrtc-audio-processing', version : ['>= 0.2', '< 0.4'],
+webrtc_dep = dependency('webrtc-audio-processing-1', version : ['>= 1.0'],
                        required : get_option('webrtcdsp'))

 if not gnustl_dep.found() and get_option('webrtcdsp').enabled()
@ -20,7 +20,7 @@ if webrtc_dep.found() and gnustl_dep.found()
    dependencies : [gstbase_dep, gstaudio_dep, gstbadaudio_dep, webrtc_dep, gnustl_dep],
    install : true,
    install_dir : plugins_install_dir,
-    override_options : ['cpp_std=c++11'],
+    override_options : ['cpp_std=c++17'],
  )
  plugins += [gstwebrtcdsp]
 endif