1From d5755744c3e2b70e9f04704ae9d18b928d9fa456 Mon Sep 17 00:00:00 2001
2From: Arun Raghavan <arun@asymptotic.io>
3Date: Wed, 2 Dec 2020 18:31:44 -0500
4Subject: [PATCH] webrtcdsp: Update code for webrtc-audio-processing-1
5
6Updated API usage appropriately, and now we have a versioned package to
7track breaking vs. non-breaking updates.
8
9Deprecates a number of properties (and we have to plug in our own values
10for related enums which are now gone):
11
12  * echo-suprression-level
13  * experimental-agc
14  * extended-filter
15  * delay-agnostic
16  * voice-detection-frame-size-ms
17  * voice-detection-likelihood
18
19Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/2943>
20Signed-off-by: James Hilliard <james.hilliard1@gmail.com>
21Upstream: https://gitlab.freedesktop.org/gstreamer/gstreamer/-/commit/d5755744c3e2b70e9f04704ae9d18b928d9fa456
22---
23 .../ext/webrtcdsp/gstwebrtcdsp.cpp            | 271 +++++++-----------
24 .../ext/webrtcdsp/gstwebrtcechoprobe.cpp      |  87 +++---
25 .../ext/webrtcdsp/gstwebrtcechoprobe.h        |   9 +-
26 .../gst-plugins-bad/ext/webrtcdsp/meson.build |   4 +-
27 4 files changed, 164 insertions(+), 207 deletions(-)
28
29diff --git a/ext/webrtcdsp/gstwebrtcdsp.cpp b/ext/webrtcdsp/gstwebrtcdsp.cpp
30index 7ee09488fb..c9a7cdae2f 100644
31--- a/ext/webrtcdsp/gstwebrtcdsp.cpp
32+++ b/ext/webrtcdsp/gstwebrtcdsp.cpp
33@@ -71,9 +71,7 @@
34 #include "gstwebrtcdsp.h"
35 #include "gstwebrtcechoprobe.h"
36
37-#include <webrtc/modules/audio_processing/include/audio_processing.h>
38-#include <webrtc/modules/interface/module_common_types.h>
39-#include <webrtc/system_wrappers/include/trace.h>
40+#include <modules/audio_processing/include/audio_processing.h>
41
42 GST_DEBUG_CATEGORY (webrtc_dsp_debug);
43 #define GST_CAT_DEFAULT (webrtc_dsp_debug)
44@@ -82,10 +80,9 @@ GST_DEBUG_CATEGORY (webrtc_dsp_debug);
45 #define DEFAULT_COMPRESSION_GAIN_DB 9
46 #define DEFAULT_STARTUP_MIN_VOLUME 12
47 #define DEFAULT_LIMITER TRUE
48-#define DEFAULT_GAIN_CONTROL_MODE webrtc::GainControl::kAdaptiveDigital
49+#define DEFAULT_GAIN_CONTROL_MODE webrtc::AudioProcessing::Config::GainController1::Mode::kAdaptiveDigital
50 #define DEFAULT_VOICE_DETECTION FALSE
51 #define DEFAULT_VOICE_DETECTION_FRAME_SIZE_MS 10
52-#define DEFAULT_VOICE_DETECTION_LIKELIHOOD webrtc::VoiceDetection::kLowLikelihood
53
54 static GstStaticPadTemplate gst_webrtc_dsp_sink_template =
55 GST_STATIC_PAD_TEMPLATE ("sink",
56@@ -119,7 +116,7 @@ GST_STATIC_PAD_TEMPLATE ("src",
57         "channels = (int) [1, MAX]")
58     );
59
60-typedef webrtc::EchoCancellation::SuppressionLevel GstWebrtcEchoSuppressionLevel;
61+typedef int GstWebrtcEchoSuppressionLevel;
62 #define GST_TYPE_WEBRTC_ECHO_SUPPRESSION_LEVEL \
63     (gst_webrtc_echo_suppression_level_get_type ())
64 static GType
65@@ -127,10 +124,9 @@ gst_webrtc_echo_suppression_level_get_type (void)
66 {
67   static GType suppression_level_type = 0;
68   static const GEnumValue level_types[] = {
69-    {webrtc::EchoCancellation::kLowSuppression, "Low Suppression", "low"},
70-    {webrtc::EchoCancellation::kModerateSuppression,
71-      "Moderate Suppression", "moderate"},
72-    {webrtc::EchoCancellation::kHighSuppression, "high Suppression", "high"},
73+    {1, "Low Suppression", "low"},
74+    {2, "Moderate Suppression", "moderate"},
75+    {3, "high Suppression", "high"},
76     {0, NULL, NULL}
77   };
78
79@@ -141,7 +137,7 @@ gst_webrtc_echo_suppression_level_get_type (void)
80   return suppression_level_type;
81 }
82
83-typedef webrtc::NoiseSuppression::Level GstWebrtcNoiseSuppressionLevel;
84+typedef webrtc::AudioProcessing::Config::NoiseSuppression::Level GstWebrtcNoiseSuppressionLevel;
85 #define GST_TYPE_WEBRTC_NOISE_SUPPRESSION_LEVEL \
86     (gst_webrtc_noise_suppression_level_get_type ())
87 static GType
88@@ -149,10 +145,10 @@ gst_webrtc_noise_suppression_level_get_type (void)
89 {
90   static GType suppression_level_type = 0;
91   static const GEnumValue level_types[] = {
92-    {webrtc::NoiseSuppression::kLow, "Low Suppression", "low"},
93-    {webrtc::NoiseSuppression::kModerate, "Moderate Suppression", "moderate"},
94-    {webrtc::NoiseSuppression::kHigh, "High Suppression", "high"},
95-    {webrtc::NoiseSuppression::kVeryHigh, "Very High Suppression",
96+    {webrtc::AudioProcessing::Config::NoiseSuppression::Level::kLow, "Low Suppression", "low"},
97+    {webrtc::AudioProcessing::Config::NoiseSuppression::Level::kModerate, "Moderate Suppression", "moderate"},
98+    {webrtc::AudioProcessing::Config::NoiseSuppression::Level::kHigh, "High Suppression", "high"},
99+    {webrtc::AudioProcessing::Config::NoiseSuppression::Level::kVeryHigh, "Very High Suppression",
100       "very-high"},
101     {0, NULL, NULL}
102   };
103@@ -164,7 +160,7 @@ gst_webrtc_noise_suppression_level_get_type (void)
104   return suppression_level_type;
105 }
106
107-typedef webrtc::GainControl::Mode GstWebrtcGainControlMode;
108+typedef webrtc::AudioProcessing::Config::GainController1::Mode GstWebrtcGainControlMode;
109 #define GST_TYPE_WEBRTC_GAIN_CONTROL_MODE \
110     (gst_webrtc_gain_control_mode_get_type ())
111 static GType
112@@ -172,8 +168,9 @@ gst_webrtc_gain_control_mode_get_type (void)
113 {
114   static GType gain_control_mode_type = 0;
115   static const GEnumValue mode_types[] = {
116-    {webrtc::GainControl::kAdaptiveDigital, "Adaptive Digital", "adaptive-digital"},
117-    {webrtc::GainControl::kFixedDigital, "Fixed Digital", "fixed-digital"},
118+    {webrtc::AudioProcessing::Config::GainController1::kAdaptiveDigital, "Adaptive Digital", "adaptive-digital"},
119+    {webrtc::AudioProcessing::Config::GainController1::kFixedDigital, "Fixed Digital", "fixed-digital"},
120+    {webrtc::AudioProcessing::Config::GainController1::kAdaptiveAnalog, "Adaptive Analog", "adaptive-analog"},
121     {0, NULL, NULL}
122   };
123
124@@ -184,7 +181,7 @@ gst_webrtc_gain_control_mode_get_type (void)
125   return gain_control_mode_type;
126 }
127
128-typedef webrtc::VoiceDetection::Likelihood GstWebrtcVoiceDetectionLikelihood;
129+typedef int GstWebrtcVoiceDetectionLikelihood;
130 #define GST_TYPE_WEBRTC_VOICE_DETECTION_LIKELIHOOD \
131     (gst_webrtc_voice_detection_likelihood_get_type ())
132 static GType
133@@ -192,10 +189,10 @@ gst_webrtc_voice_detection_likelihood_get_type (void)
134 {
135   static GType likelihood_type = 0;
136   static const GEnumValue likelihood_types[] = {
137-    {webrtc::VoiceDetection::kVeryLowLikelihood, "Very Low Likelihood", "very-low"},
138-    {webrtc::VoiceDetection::kLowLikelihood, "Low Likelihood", "low"},
139-    {webrtc::VoiceDetection::kModerateLikelihood, "Moderate Likelihood", "moderate"},
140-    {webrtc::VoiceDetection::kHighLikelihood, "High Likelihood", "high"},
141+    {1, "Very Low Likelihood", "very-low"},
142+    {2, "Low Likelihood", "low"},
143+    {3, "Moderate Likelihood", "moderate"},
144+    {4, "High Likelihood", "high"},
145     {0, NULL, NULL}
146   };
147
148@@ -227,6 +224,7 @@ enum
149   PROP_VOICE_DETECTION,
150   PROP_VOICE_DETECTION_FRAME_SIZE_MS,
151   PROP_VOICE_DETECTION_LIKELIHOOD,
152+  PROP_EXTRA_DELAY_MS,
153 };
154
155 /**
156@@ -248,7 +246,7 @@ struct _GstWebrtcDsp
157   /* Protected by the stream lock */
158   GstAdapter *adapter;
159   GstPlanarAudioAdapter *padapter;
160-  webrtc::AudioProcessing * apm;
161+  webrtc::AudioProcessing *apm;
162
163   /* Protected by the object lock */
164   gchar *probe_name;
165@@ -257,21 +255,15 @@ struct _GstWebrtcDsp
166   /* Properties */
167   gboolean high_pass_filter;
168   gboolean echo_cancel;
169-  webrtc::EchoCancellation::SuppressionLevel echo_suppression_level;
170   gboolean noise_suppression;
171-  webrtc::NoiseSuppression::Level noise_suppression_level;
172+  webrtc::AudioProcessing::Config::NoiseSuppression::Level noise_suppression_level;
173   gboolean gain_control;
174-  gboolean experimental_agc;
175-  gboolean extended_filter;
176-  gboolean delay_agnostic;
177   gint target_level_dbfs;
178   gint compression_gain_db;
179   gint startup_min_volume;
180   gboolean limiter;
181-  webrtc::GainControl::Mode gain_control_mode;
182+  webrtc::AudioProcessing::Config::GainController1::Mode gain_control_mode;
183   gboolean voice_detection;
184-  gint voice_detection_frame_size_ms;
185-  webrtc::VoiceDetection::Likelihood voice_detection_likelihood;
186 };
187
188 G_DEFINE_TYPE_WITH_CODE (GstWebrtcDsp, gst_webrtc_dsp, GST_TYPE_AUDIO_FILTER,
189@@ -376,9 +368,9 @@ gst_webrtc_dsp_analyze_reverse_stream (GstWebrtcDsp * self,
190     GstClockTime rec_time)
191 {
192   GstWebrtcEchoProbe *probe = NULL;
193-  webrtc::AudioProcessing * apm;
194-  webrtc::AudioFrame frame;
195+  webrtc::AudioProcessing *apm;
196   GstBuffer *buf = NULL;
197+  GstAudioBuffer abuf;
198   GstFlowReturn ret = GST_FLOW_OK;
199   gint err, delay;
200
201@@ -391,48 +383,44 @@ gst_webrtc_dsp_analyze_reverse_stream (GstWebrtcDsp * self,
202   if (!probe)
203     return GST_FLOW_OK;
204
205+  webrtc::StreamConfig config (probe->info.rate, probe->info.channels,
206+      false);
207   apm = self->apm;
208
209-  if (self->delay_agnostic)
210-    rec_time = GST_CLOCK_TIME_NONE;
211-
212-again:
213-  delay = gst_webrtc_echo_probe_read (probe, rec_time, (gpointer) &frame, &buf);
214+  delay = gst_webrtc_echo_probe_read (probe, rec_time, &buf);
215   apm->set_stream_delay_ms (delay);
216
217+  g_return_val_if_fail (buf != NULL, GST_FLOW_ERROR);
218+
219   if (delay < 0)
220     goto done;
221
222-  if (frame.sample_rate_hz_ != self->info.rate) {
223+  if (probe->info.rate != self->info.rate) {
224     GST_ELEMENT_ERROR (self, STREAM, FORMAT,
225         ("Echo Probe has rate %i , while the DSP is running at rate %i,"
226          " use a caps filter to ensure those are the same.",
227-         frame.sample_rate_hz_, self->info.rate), (NULL));
228+         probe->info.rate, self->info.rate), (NULL));
229     ret = GST_FLOW_ERROR;
230     goto done;
231   }
232
233-  if (buf) {
234-    webrtc::StreamConfig config (frame.sample_rate_hz_, frame.num_channels_,
235-        false);
236-    GstAudioBuffer abuf;
237-    float * const * data;
238+  gst_audio_buffer_map (&abuf, &self->info, buf, GST_MAP_READWRITE);
239+
240+  if (probe->interleaved) {
241+    int16_t * const data = (int16_t * const) abuf.planes[0];
242
243-    gst_audio_buffer_map (&abuf, &self->info, buf, GST_MAP_READWRITE);
244-    data = (float * const *) abuf.planes;
245     if ((err = apm->ProcessReverseStream (data, config, config, data)) < 0)
246       GST_WARNING_OBJECT (self, "Reverse stream analyses failed: %s.",
247           webrtc_error_to_string (err));
248-    gst_audio_buffer_unmap (&abuf);
249-    gst_buffer_replace (&buf, NULL);
250   } else {
251-    if ((err = apm->AnalyzeReverseStream (&frame)) < 0)
252+    float * const * data = (float * const *) abuf.planes;
253+
254+    if ((err = apm->ProcessReverseStream (data, config, config, data)) < 0)
255       GST_WARNING_OBJECT (self, "Reverse stream analyses failed: %s.",
256           webrtc_error_to_string (err));
257   }
258
259-  if (self->delay_agnostic)
260-      goto again;
261+  gst_audio_buffer_unmap (&abuf);
262
263 done:
264   gst_object_unref (probe);
265@@ -443,16 +431,14 @@ done:
266
267 static void
268 gst_webrtc_vad_post_activity (GstWebrtcDsp *self, GstBuffer *buffer,
269-    gboolean stream_has_voice)
270+    gboolean stream_has_voice, guint8 level)
271 {
272   GstClockTime timestamp = GST_BUFFER_PTS (buffer);
273   GstBaseTransform *trans = GST_BASE_TRANSFORM_CAST (self);
274   GstStructure *s;
275   GstClockTime stream_time;
276   GstAudioLevelMeta *meta;
277-  guint8 level;
278
279-  level = self->apm->level_estimator ()->RMS ();
280   meta = gst_buffer_get_audio_level_meta (buffer);
281   if (meta) {
282     meta->voice_activity = stream_has_voice;
283@@ -481,6 +467,7 @@ gst_webrtc_dsp_process_stream (GstWebrtcDsp * self,
284 {
285   GstAudioBuffer abuf;
286   webrtc::AudioProcessing * apm = self->apm;
287+  webrtc::StreamConfig config (self->info.rate, self->info.channels, false);
288   gint err;
289
290   if (!gst_audio_buffer_map (&abuf, &self->info, buffer,
291@@ -490,19 +477,10 @@ gst_webrtc_dsp_process_stream (GstWebrtcDsp * self,
292   }
293
294   if (self->interleaved) {
295-    webrtc::AudioFrame frame;
296-    frame.num_channels_ = self->info.channels;
297-    frame.sample_rate_hz_ = self->info.rate;
298-    frame.samples_per_channel_ = self->period_samples;
299-
300-    memcpy (frame.data_, abuf.planes[0], self->period_size);
301-    err = apm->ProcessStream (&frame);
302-    if (err >= 0)
303-      memcpy (abuf.planes[0], frame.data_, self->period_size);
304+    int16_t * const data = (int16_t * const) abuf.planes[0];
305+    err = apm->ProcessStream (data, config, config, data);
306   } else {
307     float * const * data = (float * const *) abuf.planes;
308-    webrtc::StreamConfig config (self->info.rate, self->info.channels, false);
309-
310     err = apm->ProcessStream (data, config, config, data);
311   }
312
313@@ -511,10 +489,13 @@ gst_webrtc_dsp_process_stream (GstWebrtcDsp * self,
314         webrtc_error_to_string (err));
315   } else {
316     if (self->voice_detection) {
317-      gboolean stream_has_voice = apm->voice_detection ()->stream_has_voice ();
318+      webrtc::AudioProcessingStats stats = apm->GetStatistics ();
319+      gboolean stream_has_voice = stats.voice_detected && *stats.voice_detected;
320+      // The meta takes the value as -dbov, so we negate
321+      guint8 level = stats.output_rms_dbfs ? (guint8) -(*stats.output_rms_dbfs) : 127;
322
323       if (stream_has_voice != self->stream_has_voice)
324-        gst_webrtc_vad_post_activity (self, buffer, stream_has_voice);
325+        gst_webrtc_vad_post_activity (self, buffer, stream_has_voice, level);
326
327       self->stream_has_voice = stream_has_voice;
328     }
329@@ -583,21 +564,9 @@ static gboolean
330 gst_webrtc_dsp_start (GstBaseTransform * btrans)
331 {
332   GstWebrtcDsp *self = GST_WEBRTC_DSP (btrans);
333-  webrtc::Config config;
334
335   GST_OBJECT_LOCK (self);
336
337-  config.Set < webrtc::ExtendedFilter >
338-      (new webrtc::ExtendedFilter (self->extended_filter));
339-  config.Set < webrtc::ExperimentalAgc >
340-      (new webrtc::ExperimentalAgc (self->experimental_agc, self->startup_min_volume));
341-  config.Set < webrtc::DelayAgnostic >
342-      (new webrtc::DelayAgnostic (self->delay_agnostic));
343-
344-  /* TODO Intelligibility enhancer, Beamforming, etc. */
345-
346-  self->apm = webrtc::AudioProcessing::Create (config);
347-
348   if (self->echo_cancel) {
349     self->probe = gst_webrtc_acquire_echo_probe (self->probe_name);
350
351@@ -618,10 +587,8 @@ static gboolean
352 gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info)
353 {
354   GstWebrtcDsp *self = GST_WEBRTC_DSP (filter);
355-  webrtc::AudioProcessing * apm;
356-  webrtc::ProcessingConfig pconfig;
357+  webrtc::AudioProcessing::Config config;
358   GstAudioInfo probe_info = *info;
359-  gint err = 0;
360
361   GST_LOG_OBJECT (self, "setting format to %s with %i Hz and %i channels",
362       info->finfo->description, info->rate, info->channels);
363@@ -633,7 +600,7 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info)
364
365   self->info = *info;
366   self->interleaved = (info->layout == GST_AUDIO_LAYOUT_INTERLEAVED);
367-  apm = self->apm;
368+  self->apm = webrtc::AudioProcessingBuilder().Create();
369
370   if (!self->interleaved)
371     gst_planar_audio_adapter_configure (self->padapter, info);
372@@ -642,8 +609,7 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info)
373   self->period_samples = info->rate / 100;
374   self->period_size = self->period_samples * info->bpf;
375
376-  if (self->interleaved &&
377-      (webrtc::AudioFrame::kMaxDataSizeSamples * 2) < self->period_size)
378+  if (self->interleaved && (self->period_size > MAX_DATA_SIZE_SAMPLES * 2))
379     goto period_too_big;
380
381   if (self->probe) {
382@@ -658,40 +624,31 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info)
383     GST_WEBRTC_ECHO_PROBE_UNLOCK (self->probe);
384   }
385
386-  /* input stream */
387-  pconfig.streams[webrtc::ProcessingConfig::kInputStream] =
388-      webrtc::StreamConfig (info->rate, info->channels, false);
389-  /* output stream */
390-  pconfig.streams[webrtc::ProcessingConfig::kOutputStream] =
391-      webrtc::StreamConfig (info->rate, info->channels, false);
392-  /* reverse input stream */
393-  pconfig.streams[webrtc::ProcessingConfig::kReverseInputStream] =
394-      webrtc::StreamConfig (probe_info.rate, probe_info.channels, false);
395-  /* reverse output stream */
396-  pconfig.streams[webrtc::ProcessingConfig::kReverseOutputStream] =
397-      webrtc::StreamConfig (probe_info.rate, probe_info.channels, false);
398-
399-  if ((err = apm->Initialize (pconfig)) < 0)
400-    goto initialize_failed;
401-
402   /* Setup Filters */
403+  // TODO: expose pre_amplifier
404+
405   if (self->high_pass_filter) {
406     GST_DEBUG_OBJECT (self, "Enabling High Pass filter");
407-    apm->high_pass_filter ()->Enable (true);
408+    config.high_pass_filter.enabled = true;
409   }
410
411   if (self->echo_cancel) {
412     GST_DEBUG_OBJECT (self, "Enabling Echo Cancellation");
413-    apm->echo_cancellation ()->enable_drift_compensation (false);
414-    apm->echo_cancellation ()
415-        ->set_suppression_level (self->echo_suppression_level);
416-    apm->echo_cancellation ()->Enable (true);
417+    config.echo_canceller.enabled = true;
418   }
419
420   if (self->noise_suppression) {
421     GST_DEBUG_OBJECT (self, "Enabling Noise Suppression");
422-    apm->noise_suppression ()->set_level (self->noise_suppression_level);
423-    apm->noise_suppression ()->Enable (true);
424+    config.noise_suppression.enabled = true;
425+    config.noise_suppression.level = self->noise_suppression_level;
426+  }
427+
428+  // TODO: expose transient suppression
429+
430+  if (self->voice_detection) {
431+    GST_DEBUG_OBJECT (self, "Enabling Voice Activity Detection");
432+    config.voice_detection.enabled = true;
433+    self->stream_has_voice = FALSE;
434   }
435
436   if (self->gain_control) {
437@@ -706,30 +663,17 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info)
438
439     g_type_class_unref (mode_class);
440
441-    apm->gain_control ()->set_mode (self->gain_control_mode);
442-    apm->gain_control ()->set_target_level_dbfs (self->target_level_dbfs);
443-    apm->gain_control ()->set_compression_gain_db (self->compression_gain_db);
444-    apm->gain_control ()->enable_limiter (self->limiter);
445-    apm->gain_control ()->Enable (true);
446+    config.gain_controller1.enabled = true;
447+    config.gain_controller1.target_level_dbfs = self->target_level_dbfs;
448+    config.gain_controller1.compression_gain_db = self->compression_gain_db;
449+    config.gain_controller1.enable_limiter = self->limiter;
450+    config.level_estimation.enabled = true;
451   }
452
453-  if (self->voice_detection) {
454-    GEnumClass *likelihood_class = (GEnumClass *)
455-        g_type_class_ref (GST_TYPE_WEBRTC_VOICE_DETECTION_LIKELIHOOD);
456-    GST_DEBUG_OBJECT (self, "Enabling Voice Activity Detection, frame size "
457-      "%d milliseconds, likelihood: %s", self->voice_detection_frame_size_ms,
458-      g_enum_get_value (likelihood_class,
459-          self->voice_detection_likelihood)->value_name);
460-    g_type_class_unref (likelihood_class);
461+  // TODO: expose gain controller 2
462+  // TODO: expose residual echo detector
463
464-    self->stream_has_voice = FALSE;
465-
466-    apm->voice_detection ()->Enable (true);
467-    apm->voice_detection ()->set_likelihood (self->voice_detection_likelihood);
468-    apm->voice_detection ()->set_frame_size_ms (
469-        self->voice_detection_frame_size_ms);
470-    apm->level_estimator ()->Enable (true);
471-  }
472+  self->apm->ApplyConfig (config);
473
474   GST_OBJECT_UNLOCK (self);
475
476@@ -738,9 +682,9 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info)
477 period_too_big:
478   GST_OBJECT_UNLOCK (self);
479   GST_WARNING_OBJECT (self, "webrtcdsp format produce too big period "
480-      "(maximum is %" G_GSIZE_FORMAT " samples and we have %u samples), "
481+      "(maximum is %d samples and we have %u samples), "
482       "reduce the number of channels or the rate.",
483-      webrtc::AudioFrame::kMaxDataSizeSamples, self->period_size / 2);
484+      MAX_DATA_SIZE_SAMPLES, self->period_size / 2);
485   return FALSE;
486
487 probe_has_wrong_rate:
488@@ -751,14 +695,6 @@ probe_has_wrong_rate:
489           " use a caps filter to ensure those are the same.",
490           probe_info.rate, info->rate), (NULL));
491   return FALSE;
492-
493-initialize_failed:
494-  GST_OBJECT_UNLOCK (self);
495-  GST_ELEMENT_ERROR (self, LIBRARY, INIT,
496-      ("Failed to initialize WebRTC Audio Processing library"),
497-      ("webrtc::AudioProcessing::Initialize() failed: %s",
498-          webrtc_error_to_string (err)));
499-  return FALSE;
500 }
501
502 static gboolean
503@@ -803,8 +739,6 @@ gst_webrtc_dsp_set_property (GObject * object,
504       self->echo_cancel = g_value_get_boolean (value);
505       break;
506     case PROP_ECHO_SUPPRESSION_LEVEL:
507-      self->echo_suppression_level =
508-          (GstWebrtcEchoSuppressionLevel) g_value_get_enum (value);
509       break;
510     case PROP_NOISE_SUPPRESSION:
511       self->noise_suppression = g_value_get_boolean (value);
512@@ -817,13 +751,10 @@ gst_webrtc_dsp_set_property (GObject * object,
513       self->gain_control = g_value_get_boolean (value);
514       break;
515     case PROP_EXPERIMENTAL_AGC:
516-      self->experimental_agc = g_value_get_boolean (value);
517       break;
518     case PROP_EXTENDED_FILTER:
519-      self->extended_filter = g_value_get_boolean (value);
520       break;
521     case PROP_DELAY_AGNOSTIC:
522-      self->delay_agnostic = g_value_get_boolean (value);
523       break;
524     case PROP_TARGET_LEVEL_DBFS:
525       self->target_level_dbfs = g_value_get_int (value);
526@@ -845,11 +776,8 @@ gst_webrtc_dsp_set_property (GObject * object,
527       self->voice_detection = g_value_get_boolean (value);
528       break;
529     case PROP_VOICE_DETECTION_FRAME_SIZE_MS:
530-      self->voice_detection_frame_size_ms = g_value_get_int (value);
531       break;
532     case PROP_VOICE_DETECTION_LIKELIHOOD:
533-      self->voice_detection_likelihood =
534-          (GstWebrtcVoiceDetectionLikelihood) g_value_get_enum (value);
535       break;
536     default:
537       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
538@@ -876,7 +804,7 @@ gst_webrtc_dsp_get_property (GObject * object,
539       g_value_set_boolean (value, self->echo_cancel);
540       break;
541     case PROP_ECHO_SUPPRESSION_LEVEL:
542-      g_value_set_enum (value, self->echo_suppression_level);
543+      g_value_set_enum (value, (GstWebrtcEchoSuppressionLevel) 2);
544       break;
545     case PROP_NOISE_SUPPRESSION:
546       g_value_set_boolean (value, self->noise_suppression);
547@@ -888,13 +816,13 @@ gst_webrtc_dsp_get_property (GObject * object,
548       g_value_set_boolean (value, self->gain_control);
549       break;
550     case PROP_EXPERIMENTAL_AGC:
551-      g_value_set_boolean (value, self->experimental_agc);
552+      g_value_set_boolean (value, false);
553       break;
554     case PROP_EXTENDED_FILTER:
555-      g_value_set_boolean (value, self->extended_filter);
556+      g_value_set_boolean (value, false);
557       break;
558     case PROP_DELAY_AGNOSTIC:
559-      g_value_set_boolean (value, self->delay_agnostic);
560+      g_value_set_boolean (value, false);
561       break;
562     case PROP_TARGET_LEVEL_DBFS:
563       g_value_set_int (value, self->target_level_dbfs);
564@@ -915,10 +843,10 @@ gst_webrtc_dsp_get_property (GObject * object,
565       g_value_set_boolean (value, self->voice_detection);
566       break;
567     case PROP_VOICE_DETECTION_FRAME_SIZE_MS:
568-      g_value_set_int (value, self->voice_detection_frame_size_ms);
569+      g_value_set_int (value, 0);
570       break;
571     case PROP_VOICE_DETECTION_LIKELIHOOD:
572-      g_value_set_enum (value, self->voice_detection_likelihood);
573+      g_value_set_enum (value, 2);
574       break;
575     default:
576       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
577@@ -1005,13 +933,13 @@ gst_webrtc_dsp_class_init (GstWebrtcDspClass * klass)
578
579   g_object_class_install_property (gobject_class,
580       PROP_ECHO_SUPPRESSION_LEVEL,
581-      g_param_spec_enum ("echo-suppression-level", "Echo Suppression Level",
582+      g_param_spec_enum ("echo-suppression-level",
583+          "Echo Suppression Level (does nothing)",
584           "Controls the aggressiveness of the suppressor. A higher level "
585           "trades off double-talk performance for increased echo suppression.",
586-          GST_TYPE_WEBRTC_ECHO_SUPPRESSION_LEVEL,
587-          webrtc::EchoCancellation::kModerateSuppression,
588+          GST_TYPE_WEBRTC_ECHO_SUPPRESSION_LEVEL, 2,
589           (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS |
590-              G_PARAM_CONSTRUCT)));
591+              G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED)));
592
593   g_object_class_install_property (gobject_class,
594       PROP_NOISE_SUPPRESSION,
595@@ -1026,7 +954,7 @@ gst_webrtc_dsp_class_init (GstWebrtcDspClass * klass)
596           "Controls the aggressiveness of the suppression. Increasing the "
597           "level will reduce the noise level at the expense of a higher "
598           "speech distortion.", GST_TYPE_WEBRTC_NOISE_SUPPRESSION_LEVEL,
599-          webrtc::EchoCancellation::kModerateSuppression,
600+          webrtc::AudioProcessing::Config::NoiseSuppression::Level::kModerate,
601           (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS |
602               G_PARAM_CONSTRUCT)));
603
604@@ -1039,24 +967,26 @@ gst_webrtc_dsp_class_init (GstWebrtcDspClass * klass)
605
606   g_object_class_install_property (gobject_class,
607       PROP_EXPERIMENTAL_AGC,
608-      g_param_spec_boolean ("experimental-agc", "Experimental AGC",
609+      g_param_spec_boolean ("experimental-agc",
610+          "Experimental AGC (does nothing)",
611           "Enable or disable experimental automatic gain control.",
612           FALSE, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS |
613-              G_PARAM_CONSTRUCT)));
614+              G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED)));
615
616   g_object_class_install_property (gobject_class,
617       PROP_EXTENDED_FILTER,
618       g_param_spec_boolean ("extended-filter", "Extended Filter",
619           "Enable or disable the extended filter.",
620           TRUE, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS |
621-              G_PARAM_CONSTRUCT)));
622+              G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED)));
623
624   g_object_class_install_property (gobject_class,
625       PROP_DELAY_AGNOSTIC,
626-      g_param_spec_boolean ("delay-agnostic", "Delay Agnostic",
627+      g_param_spec_boolean ("delay-agnostic",
628+          "Delay agnostic mode (does nothing)",
629           "Enable or disable the delay agnostic mode.",
630           FALSE, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS |
631-              G_PARAM_CONSTRUCT)));
632+              G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED)));
633
634   g_object_class_install_property (gobject_class,
635       PROP_TARGET_LEVEL_DBFS,
636@@ -1111,24 +1041,23 @@ gst_webrtc_dsp_class_init (GstWebrtcDspClass * klass)
637   g_object_class_install_property (gobject_class,
638       PROP_VOICE_DETECTION_FRAME_SIZE_MS,
639       g_param_spec_int ("voice-detection-frame-size-ms",
640-          "Voice Detection Frame Size Milliseconds",
641+          "Voice detection frame size in milliseconds (does nothing)",
642           "Sets the |size| of the frames in ms on which the VAD will operate. "
643           "Larger frames will improve detection accuracy, but reduce the "
644           "frequency of updates",
645           10, 30, DEFAULT_VOICE_DETECTION_FRAME_SIZE_MS,
646           (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS |
647-              G_PARAM_CONSTRUCT)));
648+              G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED)));
649
650   g_object_class_install_property (gobject_class,
651       PROP_VOICE_DETECTION_LIKELIHOOD,
652       g_param_spec_enum ("voice-detection-likelihood",
653-          "Voice Detection Likelihood",
654+          "Voice detection likelihood (does nothing)",
655           "Specifies the likelihood that a frame will be declared to contain "
656           "voice.",
657-          GST_TYPE_WEBRTC_VOICE_DETECTION_LIKELIHOOD,
658-          DEFAULT_VOICE_DETECTION_LIKELIHOOD,
659+          GST_TYPE_WEBRTC_VOICE_DETECTION_LIKELIHOOD, 2,
660           (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS |
661-              G_PARAM_CONSTRUCT)));
662+              G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED)));
663
664   gst_type_mark_as_plugin_api (GST_TYPE_WEBRTC_GAIN_CONTROL_MODE, (GstPluginAPIFlags) 0);
665   gst_type_mark_as_plugin_api (GST_TYPE_WEBRTC_NOISE_SUPPRESSION_LEVEL, (GstPluginAPIFlags) 0);
666diff --git a/ext/webrtcdsp/gstwebrtcechoprobe.cpp b/ext/webrtcdsp/gstwebrtcechoprobe.cpp
667index acdb3d8a7d..8e8ca064c4 100644
668--- a/ext/webrtcdsp/gstwebrtcechoprobe.cpp
669+++ b/ext/webrtcdsp/gstwebrtcechoprobe.cpp
670@@ -33,7 +33,8 @@
671
672 #include "gstwebrtcechoprobe.h"
673
674-#include <webrtc/modules/interface/module_common_types.h>
675+#include <modules/audio_processing/include/audio_processing.h>
676+
677 #include <gst/audio/audio.h>
678
679 GST_DEBUG_CATEGORY_EXTERN (webrtc_dsp_debug);
680@@ -102,7 +103,7 @@ gst_webrtc_echo_probe_setup (GstAudioFilter * filter, const GstAudioInfo * info)
681   self->period_size = self->period_samples * info->bpf;
682
683   if (self->interleaved &&
684-      (webrtc::AudioFrame::kMaxDataSizeSamples * 2) < self->period_size)
685+      (MAX_DATA_SIZE_SAMPLES * 2) < self->period_size)
686     goto period_too_big;
687
688   GST_WEBRTC_ECHO_PROBE_UNLOCK (self);
689@@ -112,9 +113,9 @@ gst_webrtc_echo_probe_setup (GstAudioFilter * filter, const GstAudioInfo * info)
690 period_too_big:
691   GST_WEBRTC_ECHO_PROBE_UNLOCK (self);
692   GST_WARNING_OBJECT (self, "webrtcdsp format produce too big period "
693-      "(maximum is %" G_GSIZE_FORMAT " samples and we have %u samples), "
694+      "(maximum is %d samples and we have %u samples), "
695       "reduce the number of channels or the rate.",
696-      webrtc::AudioFrame::kMaxDataSizeSamples, self->period_size / 2);
697+      MAX_DATA_SIZE_SAMPLES, self->period_size / 2);
698   return FALSE;
699 }
700
701@@ -303,18 +304,20 @@ gst_webrtc_release_echo_probe (GstWebrtcEchoProbe * probe)
702
703 gint
704 gst_webrtc_echo_probe_read (GstWebrtcEchoProbe * self, GstClockTime rec_time,
705-    gpointer _frame, GstBuffer ** buf)
706+    GstBuffer ** buf)
707 {
708-  webrtc::AudioFrame * frame = (webrtc::AudioFrame *) _frame;
709   GstClockTimeDiff diff;
710-  gsize avail, skip, offset, size;
711+  gsize avail, skip, offset, size = 0;
712   gint delay = -1;
713
714   GST_WEBRTC_ECHO_PROBE_LOCK (self);
715
716+  /* We always return a buffer -- if don't have data (size == 0), we generate a
717+   * silence buffer */
718+
719   if (!GST_CLOCK_TIME_IS_VALID (self->latency) ||
720       !GST_AUDIO_INFO_IS_VALID (&self->info))
721-    goto done;
722+    goto copy;
723
724   if (self->interleaved)
725     avail = gst_adapter_available (self->adapter) / self->info.bpf;
726@@ -324,7 +327,7 @@ gst_webrtc_echo_probe_read (GstWebrtcEchoProbe * self, GstClockTime rec_time,
727   /* In delay agnostic mode, just return 10ms of data */
728   if (!GST_CLOCK_TIME_IS_VALID (rec_time)) {
729     if (avail < self->period_samples)
730-      goto done;
731+      goto copy;
732
733     size = self->period_samples;
734     skip = 0;
735@@ -371,23 +374,51 @@ gst_webrtc_echo_probe_read (GstWebrtcEchoProbe * self, GstClockTime rec_time,
736   size = MIN (avail - offset, self->period_samples - skip);
737
738 copy:
739-  if (self->interleaved) {
740-    skip *= self->info.bpf;
741-    offset *= self->info.bpf;
742-    size *= self->info.bpf;
743-
744-    if (size < self->period_size)
745-      memset (frame->data_, 0, self->period_size);
746-
747-    if (size) {
748-      gst_adapter_copy (self->adapter, (guint8 *) frame->data_ + skip,
749-          offset, size);
750-      gst_adapter_flush (self->adapter, offset + size);
751-    }
752+  if (!size) {
753+    /* No data, provide a period's worth of silence */
754+    *buf = gst_buffer_new_allocate (NULL, self->period_size, NULL);
755+    gst_buffer_memset (*buf, 0, 0, self->period_size);
756+    gst_buffer_add_audio_meta (*buf, &self->info, self->period_samples,
757+        NULL);
758   } else {
759+    /* We have some actual data, pop period_samples' worth if have it, else pad
760+     * with silence and provide what we do have */
761     GstBuffer *ret, *taken, *tmp;
762
763-    if (size) {
764+    if (self->interleaved) {
765+      skip *= self->info.bpf;
766+      offset *= self->info.bpf;
767+      size *= self->info.bpf;
768+
769+      gst_adapter_flush (self->adapter, offset);
770+
771+      /* we need to fill silence at the beginning and/or the end of the
772+       * buffer in order to have period_samples in the buffer */
773+      if (size < self->period_size) {
774+        gsize padding = self->period_size - (skip + size);
775+
776+        taken = gst_adapter_take_buffer (self->adapter, size);
777+        ret = gst_buffer_new ();
778+
779+        /* need some silence at the beginning */
780+        if (skip) {
781+          tmp = gst_buffer_new_allocate (NULL, skip, NULL);
782+          gst_buffer_memset (tmp, 0, 0, skip);
783+          ret = gst_buffer_append (ret, tmp);
784+        }
785+
786+        ret = gst_buffer_append (ret, taken);
787+
788+        /* need some silence at the end */
789+        if (padding) {
790+          tmp = gst_buffer_new_allocate (NULL, padding, NULL);
791+          gst_buffer_memset (tmp, 0, 0, padding);
792+          ret = gst_buffer_append (ret, tmp);
793+        }
794+      } else {
795+        ret = gst_adapter_take_buffer (self->adapter, size);
796+      }
797+    } else {
798       gst_planar_audio_adapter_flush (self->padapter, offset);
799
800       /* we need to fill silence at the beginning and/or the end of each
801@@ -430,23 +461,13 @@ copy:
802         ret = gst_planar_audio_adapter_take_buffer (self->padapter, size,
803           GST_MAP_READWRITE);
804       }
805-    } else {
806-      ret = gst_buffer_new_allocate (NULL, self->period_size, NULL);
807-      gst_buffer_memset (ret, 0, 0, self->period_size);
808-      gst_buffer_add_audio_meta (ret, &self->info, self->period_samples,
809-          NULL);
810     }
811
812     *buf = ret;
813   }
814
815-  frame->num_channels_ = self->info.channels;
816-  frame->sample_rate_hz_ = self->info.rate;
817-  frame->samples_per_channel_ = self->period_samples;
818-
819   delay = self->delay;
820
821-done:
822   GST_WEBRTC_ECHO_PROBE_UNLOCK (self);
823
824   return delay;
825diff --git a/ext/webrtcdsp/gstwebrtcechoprobe.h b/ext/webrtcdsp/gstwebrtcechoprobe.h
826index 36fd34f179..488c0e958f 100644
827--- a/ext/webrtcdsp/gstwebrtcechoprobe.h
828+++ b/ext/webrtcdsp/gstwebrtcechoprobe.h
829@@ -45,6 +45,12 @@ G_BEGIN_DECLS
830 #define GST_WEBRTC_ECHO_PROBE_LOCK(obj) g_mutex_lock (&GST_WEBRTC_ECHO_PROBE (obj)->lock)
831 #define GST_WEBRTC_ECHO_PROBE_UNLOCK(obj) g_mutex_unlock (&GST_WEBRTC_ECHO_PROBE (obj)->lock)
832
833+/* From the webrtc audio_frame.h definition of kMaxDataSizeSamples:
834+ * Stereo, 32 kHz, 120 ms (2 * 32 * 120)
835+ * Stereo, 192 kHz, 20 ms (2 * 192 * 20)
836+ */
837+#define MAX_DATA_SIZE_SAMPLES 7680
838+
839 typedef struct _GstWebrtcEchoProbe GstWebrtcEchoProbe;
840 typedef struct _GstWebrtcEchoProbeClass GstWebrtcEchoProbeClass;
841
842@@ -71,6 +77,7 @@ struct _GstWebrtcEchoProbe
843   GstClockTime latency;
844   gint delay;
845   gboolean interleaved;
846+  gint extra_delay;
847
848   GstSegment segment;
849   GstAdapter *adapter;
850@@ -92,7 +99,7 @@ GST_ELEMENT_REGISTER_DECLARE (webrtcechoprobe);
851 GstWebrtcEchoProbe *gst_webrtc_acquire_echo_probe (const gchar * name);
852 void gst_webrtc_release_echo_probe (GstWebrtcEchoProbe * probe);
853 gint gst_webrtc_echo_probe_read (GstWebrtcEchoProbe * self,
854-    GstClockTime rec_time, gpointer frame, GstBuffer ** buf);
855+    GstClockTime rec_time, GstBuffer ** buf);
856
857 G_END_DECLS
858 #endif /* __GST_WEBRTC_ECHO_PROBE_H__ */
859diff --git a/ext/webrtcdsp/meson.build b/ext/webrtcdsp/meson.build
860index 5aeae69a44..09565e27c7 100644
861--- a/ext/webrtcdsp/meson.build
862+++ b/ext/webrtcdsp/meson.build
863@@ -4,7 +4,7 @@ webrtc_sources = [
864   'gstwebrtcdspplugin.cpp'
865 ]
866
867-webrtc_dep = dependency('webrtc-audio-processing', version : ['>= 0.2', '< 0.4'],
868+webrtc_dep = dependency('webrtc-audio-processing-1', version : ['>= 1.0'],
869                         required : get_option('webrtcdsp'))
870
871 if not gnustl_dep.found() and get_option('webrtcdsp').enabled()
872@@ -20,7 +20,7 @@ if webrtc_dep.found() and gnustl_dep.found()
873     dependencies : [gstbase_dep, gstaudio_dep, gstbadaudio_dep, webrtc_dep, gnustl_dep],
874     install : true,
875     install_dir : plugins_install_dir,
876-    override_options : ['cpp_std=c++11'],
877+    override_options : ['cpp_std=c++17'],
878   )
879   plugins += [gstwebrtcdsp]
880 endif
881--
8822.34.1
883
884