1From d5755744c3e2b70e9f04704ae9d18b928d9fa456 Mon Sep 17 00:00:00 2001 2From: Arun Raghavan <arun@asymptotic.io> 3Date: Wed, 2 Dec 2020 18:31:44 -0500 4Subject: [PATCH] webrtcdsp: Update code for webrtc-audio-processing-1 5 6Updated API usage appropriately, and now we have a versioned package to 7track breaking vs. non-breaking updates. 8 9Deprecates a number of properties (and we have to plug in our own values 10for related enums which are now gone): 11 12 * echo-suprression-level 13 * experimental-agc 14 * extended-filter 15 * delay-agnostic 16 * voice-detection-frame-size-ms 17 * voice-detection-likelihood 18 19Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/2943> 20Signed-off-by: James Hilliard <james.hilliard1@gmail.com> 21Upstream: https://gitlab.freedesktop.org/gstreamer/gstreamer/-/commit/d5755744c3e2b70e9f04704ae9d18b928d9fa456 22--- 23 .../ext/webrtcdsp/gstwebrtcdsp.cpp | 271 +++++++----------- 24 .../ext/webrtcdsp/gstwebrtcechoprobe.cpp | 87 +++--- 25 .../ext/webrtcdsp/gstwebrtcechoprobe.h | 9 +- 26 .../gst-plugins-bad/ext/webrtcdsp/meson.build | 4 +- 27 4 files changed, 164 insertions(+), 207 deletions(-) 28 29diff --git a/ext/webrtcdsp/gstwebrtcdsp.cpp b/ext/webrtcdsp/gstwebrtcdsp.cpp 30index 7ee09488fb..c9a7cdae2f 100644 31--- a/ext/webrtcdsp/gstwebrtcdsp.cpp 32+++ b/ext/webrtcdsp/gstwebrtcdsp.cpp 33@@ -71,9 +71,7 @@ 34 #include "gstwebrtcdsp.h" 35 #include "gstwebrtcechoprobe.h" 36 37-#include <webrtc/modules/audio_processing/include/audio_processing.h> 38-#include <webrtc/modules/interface/module_common_types.h> 39-#include <webrtc/system_wrappers/include/trace.h> 40+#include <modules/audio_processing/include/audio_processing.h> 41 42 GST_DEBUG_CATEGORY (webrtc_dsp_debug); 43 #define GST_CAT_DEFAULT (webrtc_dsp_debug) 44@@ -82,10 +80,9 @@ GST_DEBUG_CATEGORY (webrtc_dsp_debug); 45 #define DEFAULT_COMPRESSION_GAIN_DB 9 46 #define DEFAULT_STARTUP_MIN_VOLUME 12 47 #define DEFAULT_LIMITER TRUE 48-#define DEFAULT_GAIN_CONTROL_MODE webrtc::GainControl::kAdaptiveDigital 49+#define DEFAULT_GAIN_CONTROL_MODE webrtc::AudioProcessing::Config::GainController1::Mode::kAdaptiveDigital 50 #define DEFAULT_VOICE_DETECTION FALSE 51 #define DEFAULT_VOICE_DETECTION_FRAME_SIZE_MS 10 52-#define DEFAULT_VOICE_DETECTION_LIKELIHOOD webrtc::VoiceDetection::kLowLikelihood 53 54 static GstStaticPadTemplate gst_webrtc_dsp_sink_template = 55 GST_STATIC_PAD_TEMPLATE ("sink", 56@@ -119,7 +116,7 @@ GST_STATIC_PAD_TEMPLATE ("src", 57 "channels = (int) [1, MAX]") 58 ); 59 60-typedef webrtc::EchoCancellation::SuppressionLevel GstWebrtcEchoSuppressionLevel; 61+typedef int GstWebrtcEchoSuppressionLevel; 62 #define GST_TYPE_WEBRTC_ECHO_SUPPRESSION_LEVEL \ 63 (gst_webrtc_echo_suppression_level_get_type ()) 64 static GType 65@@ -127,10 +124,9 @@ gst_webrtc_echo_suppression_level_get_type (void) 66 { 67 static GType suppression_level_type = 0; 68 static const GEnumValue level_types[] = { 69- {webrtc::EchoCancellation::kLowSuppression, "Low Suppression", "low"}, 70- {webrtc::EchoCancellation::kModerateSuppression, 71- "Moderate Suppression", "moderate"}, 72- {webrtc::EchoCancellation::kHighSuppression, "high Suppression", "high"}, 73+ {1, "Low Suppression", "low"}, 74+ {2, "Moderate Suppression", "moderate"}, 75+ {3, "high Suppression", "high"}, 76 {0, NULL, NULL} 77 }; 78 79@@ -141,7 +137,7 @@ gst_webrtc_echo_suppression_level_get_type (void) 80 return suppression_level_type; 81 } 82 83-typedef webrtc::NoiseSuppression::Level GstWebrtcNoiseSuppressionLevel; 84+typedef webrtc::AudioProcessing::Config::NoiseSuppression::Level GstWebrtcNoiseSuppressionLevel; 85 #define GST_TYPE_WEBRTC_NOISE_SUPPRESSION_LEVEL \ 86 (gst_webrtc_noise_suppression_level_get_type ()) 87 static GType 88@@ -149,10 +145,10 @@ gst_webrtc_noise_suppression_level_get_type (void) 89 { 90 static GType suppression_level_type = 0; 91 static const GEnumValue level_types[] = { 92- {webrtc::NoiseSuppression::kLow, "Low Suppression", "low"}, 93- {webrtc::NoiseSuppression::kModerate, "Moderate Suppression", "moderate"}, 94- {webrtc::NoiseSuppression::kHigh, "High Suppression", "high"}, 95- {webrtc::NoiseSuppression::kVeryHigh, "Very High Suppression", 96+ {webrtc::AudioProcessing::Config::NoiseSuppression::Level::kLow, "Low Suppression", "low"}, 97+ {webrtc::AudioProcessing::Config::NoiseSuppression::Level::kModerate, "Moderate Suppression", "moderate"}, 98+ {webrtc::AudioProcessing::Config::NoiseSuppression::Level::kHigh, "High Suppression", "high"}, 99+ {webrtc::AudioProcessing::Config::NoiseSuppression::Level::kVeryHigh, "Very High Suppression", 100 "very-high"}, 101 {0, NULL, NULL} 102 }; 103@@ -164,7 +160,7 @@ gst_webrtc_noise_suppression_level_get_type (void) 104 return suppression_level_type; 105 } 106 107-typedef webrtc::GainControl::Mode GstWebrtcGainControlMode; 108+typedef webrtc::AudioProcessing::Config::GainController1::Mode GstWebrtcGainControlMode; 109 #define GST_TYPE_WEBRTC_GAIN_CONTROL_MODE \ 110 (gst_webrtc_gain_control_mode_get_type ()) 111 static GType 112@@ -172,8 +168,9 @@ gst_webrtc_gain_control_mode_get_type (void) 113 { 114 static GType gain_control_mode_type = 0; 115 static const GEnumValue mode_types[] = { 116- {webrtc::GainControl::kAdaptiveDigital, "Adaptive Digital", "adaptive-digital"}, 117- {webrtc::GainControl::kFixedDigital, "Fixed Digital", "fixed-digital"}, 118+ {webrtc::AudioProcessing::Config::GainController1::kAdaptiveDigital, "Adaptive Digital", "adaptive-digital"}, 119+ {webrtc::AudioProcessing::Config::GainController1::kFixedDigital, "Fixed Digital", "fixed-digital"}, 120+ {webrtc::AudioProcessing::Config::GainController1::kAdaptiveAnalog, "Adaptive Analog", "adaptive-analog"}, 121 {0, NULL, NULL} 122 }; 123 124@@ -184,7 +181,7 @@ gst_webrtc_gain_control_mode_get_type (void) 125 return gain_control_mode_type; 126 } 127 128-typedef webrtc::VoiceDetection::Likelihood GstWebrtcVoiceDetectionLikelihood; 129+typedef int GstWebrtcVoiceDetectionLikelihood; 130 #define GST_TYPE_WEBRTC_VOICE_DETECTION_LIKELIHOOD \ 131 (gst_webrtc_voice_detection_likelihood_get_type ()) 132 static GType 133@@ -192,10 +189,10 @@ gst_webrtc_voice_detection_likelihood_get_type (void) 134 { 135 static GType likelihood_type = 0; 136 static const GEnumValue likelihood_types[] = { 137- {webrtc::VoiceDetection::kVeryLowLikelihood, "Very Low Likelihood", "very-low"}, 138- {webrtc::VoiceDetection::kLowLikelihood, "Low Likelihood", "low"}, 139- {webrtc::VoiceDetection::kModerateLikelihood, "Moderate Likelihood", "moderate"}, 140- {webrtc::VoiceDetection::kHighLikelihood, "High Likelihood", "high"}, 141+ {1, "Very Low Likelihood", "very-low"}, 142+ {2, "Low Likelihood", "low"}, 143+ {3, "Moderate Likelihood", "moderate"}, 144+ {4, "High Likelihood", "high"}, 145 {0, NULL, NULL} 146 }; 147 148@@ -227,6 +224,7 @@ enum 149 PROP_VOICE_DETECTION, 150 PROP_VOICE_DETECTION_FRAME_SIZE_MS, 151 PROP_VOICE_DETECTION_LIKELIHOOD, 152+ PROP_EXTRA_DELAY_MS, 153 }; 154 155 /** 156@@ -248,7 +246,7 @@ struct _GstWebrtcDsp 157 /* Protected by the stream lock */ 158 GstAdapter *adapter; 159 GstPlanarAudioAdapter *padapter; 160- webrtc::AudioProcessing * apm; 161+ webrtc::AudioProcessing *apm; 162 163 /* Protected by the object lock */ 164 gchar *probe_name; 165@@ -257,21 +255,15 @@ struct _GstWebrtcDsp 166 /* Properties */ 167 gboolean high_pass_filter; 168 gboolean echo_cancel; 169- webrtc::EchoCancellation::SuppressionLevel echo_suppression_level; 170 gboolean noise_suppression; 171- webrtc::NoiseSuppression::Level noise_suppression_level; 172+ webrtc::AudioProcessing::Config::NoiseSuppression::Level noise_suppression_level; 173 gboolean gain_control; 174- gboolean experimental_agc; 175- gboolean extended_filter; 176- gboolean delay_agnostic; 177 gint target_level_dbfs; 178 gint compression_gain_db; 179 gint startup_min_volume; 180 gboolean limiter; 181- webrtc::GainControl::Mode gain_control_mode; 182+ webrtc::AudioProcessing::Config::GainController1::Mode gain_control_mode; 183 gboolean voice_detection; 184- gint voice_detection_frame_size_ms; 185- webrtc::VoiceDetection::Likelihood voice_detection_likelihood; 186 }; 187 188 G_DEFINE_TYPE_WITH_CODE (GstWebrtcDsp, gst_webrtc_dsp, GST_TYPE_AUDIO_FILTER, 189@@ -376,9 +368,9 @@ gst_webrtc_dsp_analyze_reverse_stream (GstWebrtcDsp * self, 190 GstClockTime rec_time) 191 { 192 GstWebrtcEchoProbe *probe = NULL; 193- webrtc::AudioProcessing * apm; 194- webrtc::AudioFrame frame; 195+ webrtc::AudioProcessing *apm; 196 GstBuffer *buf = NULL; 197+ GstAudioBuffer abuf; 198 GstFlowReturn ret = GST_FLOW_OK; 199 gint err, delay; 200 201@@ -391,48 +383,44 @@ gst_webrtc_dsp_analyze_reverse_stream (GstWebrtcDsp * self, 202 if (!probe) 203 return GST_FLOW_OK; 204 205+ webrtc::StreamConfig config (probe->info.rate, probe->info.channels, 206+ false); 207 apm = self->apm; 208 209- if (self->delay_agnostic) 210- rec_time = GST_CLOCK_TIME_NONE; 211- 212-again: 213- delay = gst_webrtc_echo_probe_read (probe, rec_time, (gpointer) &frame, &buf); 214+ delay = gst_webrtc_echo_probe_read (probe, rec_time, &buf); 215 apm->set_stream_delay_ms (delay); 216 217+ g_return_val_if_fail (buf != NULL, GST_FLOW_ERROR); 218+ 219 if (delay < 0) 220 goto done; 221 222- if (frame.sample_rate_hz_ != self->info.rate) { 223+ if (probe->info.rate != self->info.rate) { 224 GST_ELEMENT_ERROR (self, STREAM, FORMAT, 225 ("Echo Probe has rate %i , while the DSP is running at rate %i," 226 " use a caps filter to ensure those are the same.", 227- frame.sample_rate_hz_, self->info.rate), (NULL)); 228+ probe->info.rate, self->info.rate), (NULL)); 229 ret = GST_FLOW_ERROR; 230 goto done; 231 } 232 233- if (buf) { 234- webrtc::StreamConfig config (frame.sample_rate_hz_, frame.num_channels_, 235- false); 236- GstAudioBuffer abuf; 237- float * const * data; 238+ gst_audio_buffer_map (&abuf, &self->info, buf, GST_MAP_READWRITE); 239+ 240+ if (probe->interleaved) { 241+ int16_t * const data = (int16_t * const) abuf.planes[0]; 242 243- gst_audio_buffer_map (&abuf, &self->info, buf, GST_MAP_READWRITE); 244- data = (float * const *) abuf.planes; 245 if ((err = apm->ProcessReverseStream (data, config, config, data)) < 0) 246 GST_WARNING_OBJECT (self, "Reverse stream analyses failed: %s.", 247 webrtc_error_to_string (err)); 248- gst_audio_buffer_unmap (&abuf); 249- gst_buffer_replace (&buf, NULL); 250 } else { 251- if ((err = apm->AnalyzeReverseStream (&frame)) < 0) 252+ float * const * data = (float * const *) abuf.planes; 253+ 254+ if ((err = apm->ProcessReverseStream (data, config, config, data)) < 0) 255 GST_WARNING_OBJECT (self, "Reverse stream analyses failed: %s.", 256 webrtc_error_to_string (err)); 257 } 258 259- if (self->delay_agnostic) 260- goto again; 261+ gst_audio_buffer_unmap (&abuf); 262 263 done: 264 gst_object_unref (probe); 265@@ -443,16 +431,14 @@ done: 266 267 static void 268 gst_webrtc_vad_post_activity (GstWebrtcDsp *self, GstBuffer *buffer, 269- gboolean stream_has_voice) 270+ gboolean stream_has_voice, guint8 level) 271 { 272 GstClockTime timestamp = GST_BUFFER_PTS (buffer); 273 GstBaseTransform *trans = GST_BASE_TRANSFORM_CAST (self); 274 GstStructure *s; 275 GstClockTime stream_time; 276 GstAudioLevelMeta *meta; 277- guint8 level; 278 279- level = self->apm->level_estimator ()->RMS (); 280 meta = gst_buffer_get_audio_level_meta (buffer); 281 if (meta) { 282 meta->voice_activity = stream_has_voice; 283@@ -481,6 +467,7 @@ gst_webrtc_dsp_process_stream (GstWebrtcDsp * self, 284 { 285 GstAudioBuffer abuf; 286 webrtc::AudioProcessing * apm = self->apm; 287+ webrtc::StreamConfig config (self->info.rate, self->info.channels, false); 288 gint err; 289 290 if (!gst_audio_buffer_map (&abuf, &self->info, buffer, 291@@ -490,19 +477,10 @@ gst_webrtc_dsp_process_stream (GstWebrtcDsp * self, 292 } 293 294 if (self->interleaved) { 295- webrtc::AudioFrame frame; 296- frame.num_channels_ = self->info.channels; 297- frame.sample_rate_hz_ = self->info.rate; 298- frame.samples_per_channel_ = self->period_samples; 299- 300- memcpy (frame.data_, abuf.planes[0], self->period_size); 301- err = apm->ProcessStream (&frame); 302- if (err >= 0) 303- memcpy (abuf.planes[0], frame.data_, self->period_size); 304+ int16_t * const data = (int16_t * const) abuf.planes[0]; 305+ err = apm->ProcessStream (data, config, config, data); 306 } else { 307 float * const * data = (float * const *) abuf.planes; 308- webrtc::StreamConfig config (self->info.rate, self->info.channels, false); 309- 310 err = apm->ProcessStream (data, config, config, data); 311 } 312 313@@ -511,10 +489,13 @@ gst_webrtc_dsp_process_stream (GstWebrtcDsp * self, 314 webrtc_error_to_string (err)); 315 } else { 316 if (self->voice_detection) { 317- gboolean stream_has_voice = apm->voice_detection ()->stream_has_voice (); 318+ webrtc::AudioProcessingStats stats = apm->GetStatistics (); 319+ gboolean stream_has_voice = stats.voice_detected && *stats.voice_detected; 320+ // The meta takes the value as -dbov, so we negate 321+ guint8 level = stats.output_rms_dbfs ? (guint8) -(*stats.output_rms_dbfs) : 127; 322 323 if (stream_has_voice != self->stream_has_voice) 324- gst_webrtc_vad_post_activity (self, buffer, stream_has_voice); 325+ gst_webrtc_vad_post_activity (self, buffer, stream_has_voice, level); 326 327 self->stream_has_voice = stream_has_voice; 328 } 329@@ -583,21 +564,9 @@ static gboolean 330 gst_webrtc_dsp_start (GstBaseTransform * btrans) 331 { 332 GstWebrtcDsp *self = GST_WEBRTC_DSP (btrans); 333- webrtc::Config config; 334 335 GST_OBJECT_LOCK (self); 336 337- config.Set < webrtc::ExtendedFilter > 338- (new webrtc::ExtendedFilter (self->extended_filter)); 339- config.Set < webrtc::ExperimentalAgc > 340- (new webrtc::ExperimentalAgc (self->experimental_agc, self->startup_min_volume)); 341- config.Set < webrtc::DelayAgnostic > 342- (new webrtc::DelayAgnostic (self->delay_agnostic)); 343- 344- /* TODO Intelligibility enhancer, Beamforming, etc. */ 345- 346- self->apm = webrtc::AudioProcessing::Create (config); 347- 348 if (self->echo_cancel) { 349 self->probe = gst_webrtc_acquire_echo_probe (self->probe_name); 350 351@@ -618,10 +587,8 @@ static gboolean 352 gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info) 353 { 354 GstWebrtcDsp *self = GST_WEBRTC_DSP (filter); 355- webrtc::AudioProcessing * apm; 356- webrtc::ProcessingConfig pconfig; 357+ webrtc::AudioProcessing::Config config; 358 GstAudioInfo probe_info = *info; 359- gint err = 0; 360 361 GST_LOG_OBJECT (self, "setting format to %s with %i Hz and %i channels", 362 info->finfo->description, info->rate, info->channels); 363@@ -633,7 +600,7 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info) 364 365 self->info = *info; 366 self->interleaved = (info->layout == GST_AUDIO_LAYOUT_INTERLEAVED); 367- apm = self->apm; 368+ self->apm = webrtc::AudioProcessingBuilder().Create(); 369 370 if (!self->interleaved) 371 gst_planar_audio_adapter_configure (self->padapter, info); 372@@ -642,8 +609,7 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info) 373 self->period_samples = info->rate / 100; 374 self->period_size = self->period_samples * info->bpf; 375 376- if (self->interleaved && 377- (webrtc::AudioFrame::kMaxDataSizeSamples * 2) < self->period_size) 378+ if (self->interleaved && (self->period_size > MAX_DATA_SIZE_SAMPLES * 2)) 379 goto period_too_big; 380 381 if (self->probe) { 382@@ -658,40 +624,31 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info) 383 GST_WEBRTC_ECHO_PROBE_UNLOCK (self->probe); 384 } 385 386- /* input stream */ 387- pconfig.streams[webrtc::ProcessingConfig::kInputStream] = 388- webrtc::StreamConfig (info->rate, info->channels, false); 389- /* output stream */ 390- pconfig.streams[webrtc::ProcessingConfig::kOutputStream] = 391- webrtc::StreamConfig (info->rate, info->channels, false); 392- /* reverse input stream */ 393- pconfig.streams[webrtc::ProcessingConfig::kReverseInputStream] = 394- webrtc::StreamConfig (probe_info.rate, probe_info.channels, false); 395- /* reverse output stream */ 396- pconfig.streams[webrtc::ProcessingConfig::kReverseOutputStream] = 397- webrtc::StreamConfig (probe_info.rate, probe_info.channels, false); 398- 399- if ((err = apm->Initialize (pconfig)) < 0) 400- goto initialize_failed; 401- 402 /* Setup Filters */ 403+ // TODO: expose pre_amplifier 404+ 405 if (self->high_pass_filter) { 406 GST_DEBUG_OBJECT (self, "Enabling High Pass filter"); 407- apm->high_pass_filter ()->Enable (true); 408+ config.high_pass_filter.enabled = true; 409 } 410 411 if (self->echo_cancel) { 412 GST_DEBUG_OBJECT (self, "Enabling Echo Cancellation"); 413- apm->echo_cancellation ()->enable_drift_compensation (false); 414- apm->echo_cancellation () 415- ->set_suppression_level (self->echo_suppression_level); 416- apm->echo_cancellation ()->Enable (true); 417+ config.echo_canceller.enabled = true; 418 } 419 420 if (self->noise_suppression) { 421 GST_DEBUG_OBJECT (self, "Enabling Noise Suppression"); 422- apm->noise_suppression ()->set_level (self->noise_suppression_level); 423- apm->noise_suppression ()->Enable (true); 424+ config.noise_suppression.enabled = true; 425+ config.noise_suppression.level = self->noise_suppression_level; 426+ } 427+ 428+ // TODO: expose transient suppression 429+ 430+ if (self->voice_detection) { 431+ GST_DEBUG_OBJECT (self, "Enabling Voice Activity Detection"); 432+ config.voice_detection.enabled = true; 433+ self->stream_has_voice = FALSE; 434 } 435 436 if (self->gain_control) { 437@@ -706,30 +663,17 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info) 438 439 g_type_class_unref (mode_class); 440 441- apm->gain_control ()->set_mode (self->gain_control_mode); 442- apm->gain_control ()->set_target_level_dbfs (self->target_level_dbfs); 443- apm->gain_control ()->set_compression_gain_db (self->compression_gain_db); 444- apm->gain_control ()->enable_limiter (self->limiter); 445- apm->gain_control ()->Enable (true); 446+ config.gain_controller1.enabled = true; 447+ config.gain_controller1.target_level_dbfs = self->target_level_dbfs; 448+ config.gain_controller1.compression_gain_db = self->compression_gain_db; 449+ config.gain_controller1.enable_limiter = self->limiter; 450+ config.level_estimation.enabled = true; 451 } 452 453- if (self->voice_detection) { 454- GEnumClass *likelihood_class = (GEnumClass *) 455- g_type_class_ref (GST_TYPE_WEBRTC_VOICE_DETECTION_LIKELIHOOD); 456- GST_DEBUG_OBJECT (self, "Enabling Voice Activity Detection, frame size " 457- "%d milliseconds, likelihood: %s", self->voice_detection_frame_size_ms, 458- g_enum_get_value (likelihood_class, 459- self->voice_detection_likelihood)->value_name); 460- g_type_class_unref (likelihood_class); 461+ // TODO: expose gain controller 2 462+ // TODO: expose residual echo detector 463 464- self->stream_has_voice = FALSE; 465- 466- apm->voice_detection ()->Enable (true); 467- apm->voice_detection ()->set_likelihood (self->voice_detection_likelihood); 468- apm->voice_detection ()->set_frame_size_ms ( 469- self->voice_detection_frame_size_ms); 470- apm->level_estimator ()->Enable (true); 471- } 472+ self->apm->ApplyConfig (config); 473 474 GST_OBJECT_UNLOCK (self); 475 476@@ -738,9 +682,9 @@ gst_webrtc_dsp_setup (GstAudioFilter * filter, const GstAudioInfo * info) 477 period_too_big: 478 GST_OBJECT_UNLOCK (self); 479 GST_WARNING_OBJECT (self, "webrtcdsp format produce too big period " 480- "(maximum is %" G_GSIZE_FORMAT " samples and we have %u samples), " 481+ "(maximum is %d samples and we have %u samples), " 482 "reduce the number of channels or the rate.", 483- webrtc::AudioFrame::kMaxDataSizeSamples, self->period_size / 2); 484+ MAX_DATA_SIZE_SAMPLES, self->period_size / 2); 485 return FALSE; 486 487 probe_has_wrong_rate: 488@@ -751,14 +695,6 @@ probe_has_wrong_rate: 489 " use a caps filter to ensure those are the same.", 490 probe_info.rate, info->rate), (NULL)); 491 return FALSE; 492- 493-initialize_failed: 494- GST_OBJECT_UNLOCK (self); 495- GST_ELEMENT_ERROR (self, LIBRARY, INIT, 496- ("Failed to initialize WebRTC Audio Processing library"), 497- ("webrtc::AudioProcessing::Initialize() failed: %s", 498- webrtc_error_to_string (err))); 499- return FALSE; 500 } 501 502 static gboolean 503@@ -803,8 +739,6 @@ gst_webrtc_dsp_set_property (GObject * object, 504 self->echo_cancel = g_value_get_boolean (value); 505 break; 506 case PROP_ECHO_SUPPRESSION_LEVEL: 507- self->echo_suppression_level = 508- (GstWebrtcEchoSuppressionLevel) g_value_get_enum (value); 509 break; 510 case PROP_NOISE_SUPPRESSION: 511 self->noise_suppression = g_value_get_boolean (value); 512@@ -817,13 +751,10 @@ gst_webrtc_dsp_set_property (GObject * object, 513 self->gain_control = g_value_get_boolean (value); 514 break; 515 case PROP_EXPERIMENTAL_AGC: 516- self->experimental_agc = g_value_get_boolean (value); 517 break; 518 case PROP_EXTENDED_FILTER: 519- self->extended_filter = g_value_get_boolean (value); 520 break; 521 case PROP_DELAY_AGNOSTIC: 522- self->delay_agnostic = g_value_get_boolean (value); 523 break; 524 case PROP_TARGET_LEVEL_DBFS: 525 self->target_level_dbfs = g_value_get_int (value); 526@@ -845,11 +776,8 @@ gst_webrtc_dsp_set_property (GObject * object, 527 self->voice_detection = g_value_get_boolean (value); 528 break; 529 case PROP_VOICE_DETECTION_FRAME_SIZE_MS: 530- self->voice_detection_frame_size_ms = g_value_get_int (value); 531 break; 532 case PROP_VOICE_DETECTION_LIKELIHOOD: 533- self->voice_detection_likelihood = 534- (GstWebrtcVoiceDetectionLikelihood) g_value_get_enum (value); 535 break; 536 default: 537 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); 538@@ -876,7 +804,7 @@ gst_webrtc_dsp_get_property (GObject * object, 539 g_value_set_boolean (value, self->echo_cancel); 540 break; 541 case PROP_ECHO_SUPPRESSION_LEVEL: 542- g_value_set_enum (value, self->echo_suppression_level); 543+ g_value_set_enum (value, (GstWebrtcEchoSuppressionLevel) 2); 544 break; 545 case PROP_NOISE_SUPPRESSION: 546 g_value_set_boolean (value, self->noise_suppression); 547@@ -888,13 +816,13 @@ gst_webrtc_dsp_get_property (GObject * object, 548 g_value_set_boolean (value, self->gain_control); 549 break; 550 case PROP_EXPERIMENTAL_AGC: 551- g_value_set_boolean (value, self->experimental_agc); 552+ g_value_set_boolean (value, false); 553 break; 554 case PROP_EXTENDED_FILTER: 555- g_value_set_boolean (value, self->extended_filter); 556+ g_value_set_boolean (value, false); 557 break; 558 case PROP_DELAY_AGNOSTIC: 559- g_value_set_boolean (value, self->delay_agnostic); 560+ g_value_set_boolean (value, false); 561 break; 562 case PROP_TARGET_LEVEL_DBFS: 563 g_value_set_int (value, self->target_level_dbfs); 564@@ -915,10 +843,10 @@ gst_webrtc_dsp_get_property (GObject * object, 565 g_value_set_boolean (value, self->voice_detection); 566 break; 567 case PROP_VOICE_DETECTION_FRAME_SIZE_MS: 568- g_value_set_int (value, self->voice_detection_frame_size_ms); 569+ g_value_set_int (value, 0); 570 break; 571 case PROP_VOICE_DETECTION_LIKELIHOOD: 572- g_value_set_enum (value, self->voice_detection_likelihood); 573+ g_value_set_enum (value, 2); 574 break; 575 default: 576 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); 577@@ -1005,13 +933,13 @@ gst_webrtc_dsp_class_init (GstWebrtcDspClass * klass) 578 579 g_object_class_install_property (gobject_class, 580 PROP_ECHO_SUPPRESSION_LEVEL, 581- g_param_spec_enum ("echo-suppression-level", "Echo Suppression Level", 582+ g_param_spec_enum ("echo-suppression-level", 583+ "Echo Suppression Level (does nothing)", 584 "Controls the aggressiveness of the suppressor. A higher level " 585 "trades off double-talk performance for increased echo suppression.", 586- GST_TYPE_WEBRTC_ECHO_SUPPRESSION_LEVEL, 587- webrtc::EchoCancellation::kModerateSuppression, 588+ GST_TYPE_WEBRTC_ECHO_SUPPRESSION_LEVEL, 2, 589 (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | 590- G_PARAM_CONSTRUCT))); 591+ G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED))); 592 593 g_object_class_install_property (gobject_class, 594 PROP_NOISE_SUPPRESSION, 595@@ -1026,7 +954,7 @@ gst_webrtc_dsp_class_init (GstWebrtcDspClass * klass) 596 "Controls the aggressiveness of the suppression. Increasing the " 597 "level will reduce the noise level at the expense of a higher " 598 "speech distortion.", GST_TYPE_WEBRTC_NOISE_SUPPRESSION_LEVEL, 599- webrtc::EchoCancellation::kModerateSuppression, 600+ webrtc::AudioProcessing::Config::NoiseSuppression::Level::kModerate, 601 (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | 602 G_PARAM_CONSTRUCT))); 603 604@@ -1039,24 +967,26 @@ gst_webrtc_dsp_class_init (GstWebrtcDspClass * klass) 605 606 g_object_class_install_property (gobject_class, 607 PROP_EXPERIMENTAL_AGC, 608- g_param_spec_boolean ("experimental-agc", "Experimental AGC", 609+ g_param_spec_boolean ("experimental-agc", 610+ "Experimental AGC (does nothing)", 611 "Enable or disable experimental automatic gain control.", 612 FALSE, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | 613- G_PARAM_CONSTRUCT))); 614+ G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED))); 615 616 g_object_class_install_property (gobject_class, 617 PROP_EXTENDED_FILTER, 618 g_param_spec_boolean ("extended-filter", "Extended Filter", 619 "Enable or disable the extended filter.", 620 TRUE, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | 621- G_PARAM_CONSTRUCT))); 622+ G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED))); 623 624 g_object_class_install_property (gobject_class, 625 PROP_DELAY_AGNOSTIC, 626- g_param_spec_boolean ("delay-agnostic", "Delay Agnostic", 627+ g_param_spec_boolean ("delay-agnostic", 628+ "Delay agnostic mode (does nothing)", 629 "Enable or disable the delay agnostic mode.", 630 FALSE, (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | 631- G_PARAM_CONSTRUCT))); 632+ G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED))); 633 634 g_object_class_install_property (gobject_class, 635 PROP_TARGET_LEVEL_DBFS, 636@@ -1111,24 +1041,23 @@ gst_webrtc_dsp_class_init (GstWebrtcDspClass * klass) 637 g_object_class_install_property (gobject_class, 638 PROP_VOICE_DETECTION_FRAME_SIZE_MS, 639 g_param_spec_int ("voice-detection-frame-size-ms", 640- "Voice Detection Frame Size Milliseconds", 641+ "Voice detection frame size in milliseconds (does nothing)", 642 "Sets the |size| of the frames in ms on which the VAD will operate. " 643 "Larger frames will improve detection accuracy, but reduce the " 644 "frequency of updates", 645 10, 30, DEFAULT_VOICE_DETECTION_FRAME_SIZE_MS, 646 (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | 647- G_PARAM_CONSTRUCT))); 648+ G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED))); 649 650 g_object_class_install_property (gobject_class, 651 PROP_VOICE_DETECTION_LIKELIHOOD, 652 g_param_spec_enum ("voice-detection-likelihood", 653- "Voice Detection Likelihood", 654+ "Voice detection likelihood (does nothing)", 655 "Specifies the likelihood that a frame will be declared to contain " 656 "voice.", 657- GST_TYPE_WEBRTC_VOICE_DETECTION_LIKELIHOOD, 658- DEFAULT_VOICE_DETECTION_LIKELIHOOD, 659+ GST_TYPE_WEBRTC_VOICE_DETECTION_LIKELIHOOD, 2, 660 (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | 661- G_PARAM_CONSTRUCT))); 662+ G_PARAM_CONSTRUCT | G_PARAM_DEPRECATED))); 663 664 gst_type_mark_as_plugin_api (GST_TYPE_WEBRTC_GAIN_CONTROL_MODE, (GstPluginAPIFlags) 0); 665 gst_type_mark_as_plugin_api (GST_TYPE_WEBRTC_NOISE_SUPPRESSION_LEVEL, (GstPluginAPIFlags) 0); 666diff --git a/ext/webrtcdsp/gstwebrtcechoprobe.cpp b/ext/webrtcdsp/gstwebrtcechoprobe.cpp 667index acdb3d8a7d..8e8ca064c4 100644 668--- a/ext/webrtcdsp/gstwebrtcechoprobe.cpp 669+++ b/ext/webrtcdsp/gstwebrtcechoprobe.cpp 670@@ -33,7 +33,8 @@ 671 672 #include "gstwebrtcechoprobe.h" 673 674-#include <webrtc/modules/interface/module_common_types.h> 675+#include <modules/audio_processing/include/audio_processing.h> 676+ 677 #include <gst/audio/audio.h> 678 679 GST_DEBUG_CATEGORY_EXTERN (webrtc_dsp_debug); 680@@ -102,7 +103,7 @@ gst_webrtc_echo_probe_setup (GstAudioFilter * filter, const GstAudioInfo * info) 681 self->period_size = self->period_samples * info->bpf; 682 683 if (self->interleaved && 684- (webrtc::AudioFrame::kMaxDataSizeSamples * 2) < self->period_size) 685+ (MAX_DATA_SIZE_SAMPLES * 2) < self->period_size) 686 goto period_too_big; 687 688 GST_WEBRTC_ECHO_PROBE_UNLOCK (self); 689@@ -112,9 +113,9 @@ gst_webrtc_echo_probe_setup (GstAudioFilter * filter, const GstAudioInfo * info) 690 period_too_big: 691 GST_WEBRTC_ECHO_PROBE_UNLOCK (self); 692 GST_WARNING_OBJECT (self, "webrtcdsp format produce too big period " 693- "(maximum is %" G_GSIZE_FORMAT " samples and we have %u samples), " 694+ "(maximum is %d samples and we have %u samples), " 695 "reduce the number of channels or the rate.", 696- webrtc::AudioFrame::kMaxDataSizeSamples, self->period_size / 2); 697+ MAX_DATA_SIZE_SAMPLES, self->period_size / 2); 698 return FALSE; 699 } 700 701@@ -303,18 +304,20 @@ gst_webrtc_release_echo_probe (GstWebrtcEchoProbe * probe) 702 703 gint 704 gst_webrtc_echo_probe_read (GstWebrtcEchoProbe * self, GstClockTime rec_time, 705- gpointer _frame, GstBuffer ** buf) 706+ GstBuffer ** buf) 707 { 708- webrtc::AudioFrame * frame = (webrtc::AudioFrame *) _frame; 709 GstClockTimeDiff diff; 710- gsize avail, skip, offset, size; 711+ gsize avail, skip, offset, size = 0; 712 gint delay = -1; 713 714 GST_WEBRTC_ECHO_PROBE_LOCK (self); 715 716+ /* We always return a buffer -- if don't have data (size == 0), we generate a 717+ * silence buffer */ 718+ 719 if (!GST_CLOCK_TIME_IS_VALID (self->latency) || 720 !GST_AUDIO_INFO_IS_VALID (&self->info)) 721- goto done; 722+ goto copy; 723 724 if (self->interleaved) 725 avail = gst_adapter_available (self->adapter) / self->info.bpf; 726@@ -324,7 +327,7 @@ gst_webrtc_echo_probe_read (GstWebrtcEchoProbe * self, GstClockTime rec_time, 727 /* In delay agnostic mode, just return 10ms of data */ 728 if (!GST_CLOCK_TIME_IS_VALID (rec_time)) { 729 if (avail < self->period_samples) 730- goto done; 731+ goto copy; 732 733 size = self->period_samples; 734 skip = 0; 735@@ -371,23 +374,51 @@ gst_webrtc_echo_probe_read (GstWebrtcEchoProbe * self, GstClockTime rec_time, 736 size = MIN (avail - offset, self->period_samples - skip); 737 738 copy: 739- if (self->interleaved) { 740- skip *= self->info.bpf; 741- offset *= self->info.bpf; 742- size *= self->info.bpf; 743- 744- if (size < self->period_size) 745- memset (frame->data_, 0, self->period_size); 746- 747- if (size) { 748- gst_adapter_copy (self->adapter, (guint8 *) frame->data_ + skip, 749- offset, size); 750- gst_adapter_flush (self->adapter, offset + size); 751- } 752+ if (!size) { 753+ /* No data, provide a period's worth of silence */ 754+ *buf = gst_buffer_new_allocate (NULL, self->period_size, NULL); 755+ gst_buffer_memset (*buf, 0, 0, self->period_size); 756+ gst_buffer_add_audio_meta (*buf, &self->info, self->period_samples, 757+ NULL); 758 } else { 759+ /* We have some actual data, pop period_samples' worth if have it, else pad 760+ * with silence and provide what we do have */ 761 GstBuffer *ret, *taken, *tmp; 762 763- if (size) { 764+ if (self->interleaved) { 765+ skip *= self->info.bpf; 766+ offset *= self->info.bpf; 767+ size *= self->info.bpf; 768+ 769+ gst_adapter_flush (self->adapter, offset); 770+ 771+ /* we need to fill silence at the beginning and/or the end of the 772+ * buffer in order to have period_samples in the buffer */ 773+ if (size < self->period_size) { 774+ gsize padding = self->period_size - (skip + size); 775+ 776+ taken = gst_adapter_take_buffer (self->adapter, size); 777+ ret = gst_buffer_new (); 778+ 779+ /* need some silence at the beginning */ 780+ if (skip) { 781+ tmp = gst_buffer_new_allocate (NULL, skip, NULL); 782+ gst_buffer_memset (tmp, 0, 0, skip); 783+ ret = gst_buffer_append (ret, tmp); 784+ } 785+ 786+ ret = gst_buffer_append (ret, taken); 787+ 788+ /* need some silence at the end */ 789+ if (padding) { 790+ tmp = gst_buffer_new_allocate (NULL, padding, NULL); 791+ gst_buffer_memset (tmp, 0, 0, padding); 792+ ret = gst_buffer_append (ret, tmp); 793+ } 794+ } else { 795+ ret = gst_adapter_take_buffer (self->adapter, size); 796+ } 797+ } else { 798 gst_planar_audio_adapter_flush (self->padapter, offset); 799 800 /* we need to fill silence at the beginning and/or the end of each 801@@ -430,23 +461,13 @@ copy: 802 ret = gst_planar_audio_adapter_take_buffer (self->padapter, size, 803 GST_MAP_READWRITE); 804 } 805- } else { 806- ret = gst_buffer_new_allocate (NULL, self->period_size, NULL); 807- gst_buffer_memset (ret, 0, 0, self->period_size); 808- gst_buffer_add_audio_meta (ret, &self->info, self->period_samples, 809- NULL); 810 } 811 812 *buf = ret; 813 } 814 815- frame->num_channels_ = self->info.channels; 816- frame->sample_rate_hz_ = self->info.rate; 817- frame->samples_per_channel_ = self->period_samples; 818- 819 delay = self->delay; 820 821-done: 822 GST_WEBRTC_ECHO_PROBE_UNLOCK (self); 823 824 return delay; 825diff --git a/ext/webrtcdsp/gstwebrtcechoprobe.h b/ext/webrtcdsp/gstwebrtcechoprobe.h 826index 36fd34f179..488c0e958f 100644 827--- a/ext/webrtcdsp/gstwebrtcechoprobe.h 828+++ b/ext/webrtcdsp/gstwebrtcechoprobe.h 829@@ -45,6 +45,12 @@ G_BEGIN_DECLS 830 #define GST_WEBRTC_ECHO_PROBE_LOCK(obj) g_mutex_lock (&GST_WEBRTC_ECHO_PROBE (obj)->lock) 831 #define GST_WEBRTC_ECHO_PROBE_UNLOCK(obj) g_mutex_unlock (&GST_WEBRTC_ECHO_PROBE (obj)->lock) 832 833+/* From the webrtc audio_frame.h definition of kMaxDataSizeSamples: 834+ * Stereo, 32 kHz, 120 ms (2 * 32 * 120) 835+ * Stereo, 192 kHz, 20 ms (2 * 192 * 20) 836+ */ 837+#define MAX_DATA_SIZE_SAMPLES 7680 838+ 839 typedef struct _GstWebrtcEchoProbe GstWebrtcEchoProbe; 840 typedef struct _GstWebrtcEchoProbeClass GstWebrtcEchoProbeClass; 841 842@@ -71,6 +77,7 @@ struct _GstWebrtcEchoProbe 843 GstClockTime latency; 844 gint delay; 845 gboolean interleaved; 846+ gint extra_delay; 847 848 GstSegment segment; 849 GstAdapter *adapter; 850@@ -92,7 +99,7 @@ GST_ELEMENT_REGISTER_DECLARE (webrtcechoprobe); 851 GstWebrtcEchoProbe *gst_webrtc_acquire_echo_probe (const gchar * name); 852 void gst_webrtc_release_echo_probe (GstWebrtcEchoProbe * probe); 853 gint gst_webrtc_echo_probe_read (GstWebrtcEchoProbe * self, 854- GstClockTime rec_time, gpointer frame, GstBuffer ** buf); 855+ GstClockTime rec_time, GstBuffer ** buf); 856 857 G_END_DECLS 858 #endif /* __GST_WEBRTC_ECHO_PROBE_H__ */ 859diff --git a/ext/webrtcdsp/meson.build b/ext/webrtcdsp/meson.build 860index 5aeae69a44..09565e27c7 100644 861--- a/ext/webrtcdsp/meson.build 862+++ b/ext/webrtcdsp/meson.build 863@@ -4,7 +4,7 @@ webrtc_sources = [ 864 'gstwebrtcdspplugin.cpp' 865 ] 866 867-webrtc_dep = dependency('webrtc-audio-processing', version : ['>= 0.2', '< 0.4'], 868+webrtc_dep = dependency('webrtc-audio-processing-1', version : ['>= 1.0'], 869 required : get_option('webrtcdsp')) 870 871 if not gnustl_dep.found() and get_option('webrtcdsp').enabled() 872@@ -20,7 +20,7 @@ if webrtc_dep.found() and gnustl_dep.found() 873 dependencies : [gstbase_dep, gstaudio_dep, gstbadaudio_dep, webrtc_dep, gnustl_dep], 874 install : true, 875 install_dir : plugins_install_dir, 876- override_options : ['cpp_std=c++11'], 877+ override_options : ['cpp_std=c++17'], 878 ) 879 plugins += [gstwebrtcdsp] 880 endif 881-- 8822.34.1 883 884