1 /*
2 * Copyright (C) 2015-2020 Alibaba Group Holding Limited
3 *
4 */
5
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 #include "uvoice_common.h"
12
13 #include "uvoice_os.h"
14 #include "uvoice_audio.h"
15
16 #include "audio_common.h"
17 #include "audio_stream.h"
18 #include "audio_vad.h"
19
20 #include "opensource/webrtc/common_audio/vad/include/webrtc_vad.h"
21
22
23 typedef struct {
24 short *in[2];
25 short *out[2];
26 int32_t *filter_state[4];
27 int rate;
28 int samples;
29 int band_samples;
30 int band_num;
31 VadInst *inst;
32 } webrtc_vad_t;
33
34
vad_buffer_alloc(webrtc_vad_t * webrtc_vad)35 static int vad_buffer_alloc(webrtc_vad_t *webrtc_vad)
36 {
37 int i;
38
39 if (!webrtc_vad) {
40 snd_err("webrtc_vad null !\n");
41 return -1;
42 }
43
44 for (i = 0; i < webrtc_vad->band_num; i++) {
45 webrtc_vad->in[i] = snd_zalloc(
46 webrtc_vad->band_samples * sizeof(short), AFM_MAIN);
47 if (!webrtc_vad->in) {
48 snd_err("alloc in buffer failed !\n");
49 return -1;
50 }
51
52 webrtc_vad->out[i] = snd_zalloc(
53 webrtc_vad->band_samples * sizeof(short), AFM_MAIN);
54 if (!webrtc_vad->out) {
55 snd_err("alloc out buffer failed !\n");
56 return -1;
57 }
58 }
59
60 if (webrtc_vad->band_num == 2) {
61 for (i = 0; i < 4; i++) {
62 webrtc_vad->filter_state[i] = snd_zalloc(
63 6 * sizeof(int32_t), AFM_MAIN);
64 if (!webrtc_vad->filter_state[i]) {
65 snd_err("alloc filter state failed !\n");
66 return -1;
67 }
68 }
69 }
70
71 return 0;
72 }
73
vad_buffer_free(webrtc_vad_t * webrtc_vad)74 static int vad_buffer_free(webrtc_vad_t *webrtc_vad)
75 {
76 int i;
77
78 if (!webrtc_vad) {
79 snd_err("webrtc_vad null !\n");
80 return -1;
81 }
82
83 for (i = 0; i < 4; i++) {
84 if (webrtc_vad->filter_state[i]) {
85 snd_free(webrtc_vad->filter_state[i]);
86 webrtc_vad->filter_state[i] = NULL;
87 }
88 }
89
90 for (i = 0; i < webrtc_vad->band_num; i++) {
91 if (webrtc_vad->in[i]) {
92 snd_free(webrtc_vad->in[i]);
93 webrtc_vad->in[i] = NULL;
94 }
95
96 if (webrtc_vad->out[i]) {
97 snd_free(webrtc_vad->out[i]);
98 webrtc_vad->out[i] = NULL;
99 }
100 }
101
102 return 0;
103 }
104
105 /*
106 * return value: 0 - noise, 1 - speech
107 */
vad_process(struct voice_active_detect * vad,const uint8_t * buffer,int nbytes)108 int vad_process(struct voice_active_detect *vad,
109 const uint8_t *buffer, int nbytes)
110 {
111 webrtc_vad_t *webrtc_vad;
112 int proc_size;
113 int ret = 0;
114 int i;
115
116 if (!vad) {
117 snd_err("vad null !\n");
118 return -1;
119 }
120
121 webrtc_vad = vad->handler;
122 if (!webrtc_vad) {
123 snd_err("webrtc_vad null !\n");
124 return -1;
125 }
126
127 proc_size = webrtc_vad->samples * sizeof(short);
128
129 if (nbytes % proc_size != 0) {
130 snd_err("invalid input size !\n");
131 return -1;
132 }
133
134 if (webrtc_vad->band_num == 1) {
135 for (i = 0; i < nbytes; i += proc_size) {
136 memcpy(webrtc_vad->in[0], buffer + i, proc_size);
137 ret = WebRtcVad_Process(webrtc_vad->inst,
138 webrtc_vad->rate,
139 webrtc_vad->in[0], webrtc_vad->samples);
140 }
141 } else if (webrtc_vad->band_num == 2) {
142 for (i = 0; i < nbytes; i += proc_size) {
143 WebRtcSpl_AnalysisQMF(buffer + i,
144 webrtc_vad->samples,
145 webrtc_vad->in[0],
146 webrtc_vad->in[1],
147 webrtc_vad->filter_state[0],
148 webrtc_vad->filter_state[1]);
149
150 ret = WebRtcVad_Process(webrtc_vad->inst,
151 webrtc_vad->rate,
152 webrtc_vad->in[0], webrtc_vad->samples);
153 }
154 }
155
156 return ret;
157 }
158
vad_create(struct voice_active_detect * vad,int rate,int samples,int mode)159 int vad_create(struct voice_active_detect *vad,
160 int rate, int samples, int mode)
161 {
162 webrtc_vad_t *webrtc_vad;
163 int ret;
164
165 if (!vad) {
166 snd_err("vad null !\n");
167 return -1;
168 }
169
170 if (samples % (rate / 100) != 0) {
171 snd_err("samples and rate not mulitple !\n");
172 return -1;
173 }
174
175 webrtc_vad = snd_zalloc(sizeof(webrtc_vad_t), AFM_EXTN);
176 if (!webrtc_vad) {
177 snd_err("alloc webrtc ns failed !\n");
178 return -1;
179 }
180
181 webrtc_vad->inst = WebRtcVad_Create();
182 if (!webrtc_vad->inst) {
183 snd_err("create webrtc vad failed !\n");
184 snd_free(webrtc_vad);
185 return -1;
186 }
187
188 ret = WebRtcVad_Init(webrtc_vad->inst);
189 if (ret) {
190 snd_err("init webrtc vad failed %d!\n", ret);
191 WebRtcVad_Free(webrtc_vad->inst);
192 snd_free(webrtc_vad);
193 return -1;
194 }
195
196 webrtc_vad->samples = samples;
197 if (rate == 48000 || rate == 32000) {
198 webrtc_vad->band_num = 2;
199 webrtc_vad->band_samples = webrtc_vad->samples / 2;
200 } else {
201 webrtc_vad->band_num = 1;
202 webrtc_vad->band_samples = webrtc_vad->samples;
203 }
204 webrtc_vad->band_num = 1;
205 webrtc_vad->band_samples = webrtc_vad->samples;
206 webrtc_vad->rate = rate;
207
208 ret = WebRtcVad_set_mode(webrtc_vad->inst, mode);
209 if (ret) {
210 snd_err("set webrtc vad mode failed %d!\n", ret);
211 WebRtcVad_Free(webrtc_vad->inst);
212 snd_free(webrtc_vad);
213 return -1;
214 }
215
216 if (vad_buffer_alloc(webrtc_vad)) {
217 snd_err("alloc vad buffer failed !\n");
218 vad_buffer_free(webrtc_vad);
219 WebRtcVad_Free(webrtc_vad->inst);
220 snd_free(webrtc_vad);
221 return -1;
222 }
223
224 snd_debug("vad create\n");
225 vad->handler = webrtc_vad;
226 return 0;
227 }
228
vad_release(struct voice_active_detect * vad)229 int vad_release(struct voice_active_detect *vad)
230 {
231 webrtc_vad_t *webrtc_vad;
232
233 if (!vad) {
234 snd_err("vad null !\n");
235 return -1;
236 }
237
238 webrtc_vad = vad->handler;
239 if (!webrtc_vad) {
240 snd_err("webrtc_vad null !\n");
241 return -1;
242 }
243
244 vad_buffer_free(webrtc_vad);
245
246 WebRtcVad_Free(webrtc_vad->inst);
247 snd_free(webrtc_vad);
248 vad->handler = NULL;
249
250 snd_debug("vad release\n");
251 return 0;
252 }
253
254