1 /*
2  * Copyright (C) 2015-2020 Alibaba Group Holding Limited
3  *
4  */
5 
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <stdint.h>
9 #include <string.h>
10 
11 #include "uvoice_types.h"
12 #include "uvoice_player.h"
13 #include "uvoice_tts.h"
14 
15 #include "../../../internal/uvoice_os.h"
16 #include "../../../internal/uvoice_common.h"
17 #include "../../../internal/uvoice_play.h"
18 
19 #include "alicloudtts_intf.h"
20 #include "alicloudtts.h"
21 #include "httpclient.h"
22 
23 #define ALIYUN_TTS_HTTP_URL "https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/tts?"
24 
25 /* https://help.aliyun.com/document_detail/84435.html?spm=a2c4g.11186623.6.581.16a75275MJHPrH */
26 static const voice_spec_t voice_spec[] = {
27     { "xiaoyun",
28       { VOICE_CHINESE, VOICE_CN_EN_MIX, VOICE_NULL },
29       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, 0 } },
30     { "xiaogang",
31       { VOICE_CHINESE, VOICE_CN_EN_MIX, VOICE_NULL },
32       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, 0 } },
33     { "ruoxi",
34       { VOICE_CHINESE, VOICE_CN_EN_MIX, VOICE_NULL },
35       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, TTS_ALIYUN_SAMPLE_RATE_24K } },
36     { "xiaomeng",
37       { VOICE_CHINESE, VOICE_CN_EN_MIX, VOICE_NULL },
38       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, 0 } },
39     { "xiaowei",
40       { VOICE_CHINESE, VOICE_CN_EN_MIX, VOICE_NULL },
41       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, 0 } },
42     { "amei",
43       { VOICE_CHINESE, VOICE_CN_EN_MIX, VOICE_NULL },
44       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, 0 } },
45     { "xiaoxue",
46       { VOICE_CHINESE, VOICE_CN_EN_MIX, VOICE_NULL },
47       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, 0 } },
48     { "siqi",
49       { VOICE_CHINESE, VOICE_CN_EN_MIX, VOICE_NULL },
50       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, TTS_ALIYUN_SAMPLE_RATE_24K } },
51     { "sijia",
52       { VOICE_CHINESE, VOICE_CN_EN_MIX, VOICE_NULL },
53       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, TTS_ALIYUN_SAMPLE_RATE_24K } },
54     { "sicheng",
55       { VOICE_CHINESE, VOICE_CN_EN_MIX, VOICE_NULL },
56       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, TTS_ALIYUN_SAMPLE_RATE_24K } },
57     { "siyue",
58       { VOICE_CHINESE, VOICE_CN_EN_MIX, VOICE_NULL },
59       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, TTS_ALIYUN_SAMPLE_RATE_24K } },
60     { "xiaomei",
61       { VOICE_CHINESE, VOICE_CN_EN_MIX, VOICE_NULL },
62       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, TTS_ALIYUN_SAMPLE_RATE_24K } },
63     { "sitong",
64       { VOICE_CHINESE, VOICE_NULL, VOICE_NULL },
65       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, TTS_ALIYUN_SAMPLE_RATE_24K } },
66     { "ninger",
67       { VOICE_CHINESE, VOICE_NULL, VOICE_NULL },
68       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, TTS_ALIYUN_SAMPLE_RATE_24K } },
69     { "xiaobei",
70       { VOICE_CHINESE, VOICE_NULL, VOICE_NULL },
71       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, TTS_ALIYUN_SAMPLE_RATE_24K } },
72     { "yina",
73       { VOICE_CHINESE, VOICE_NULL, VOICE_NULL },
74       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, TTS_ALIYUN_SAMPLE_RATE_24K } },
75     { "sijing",
76       { VOICE_CHINESE, VOICE_NULL, VOICE_NULL },
77       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, TTS_ALIYUN_SAMPLE_RATE_24K } },
78     { "wendy",
79       { VOICE_ENGLISH, VOICE_NULL, VOICE_NULL },
80       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, TTS_ALIYUN_SAMPLE_RATE_24K } },
81     { "william",
82       { VOICE_ENGLISH, VOICE_NULL, VOICE_NULL },
83       { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, TTS_ALIYUN_SAMPLE_RATE_24K } },
84     { "halen", { VOICE_ENGLISH, VOICE_NULL, VOICE_NULL }, { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, 0 } },
85     { "harry", { VOICE_ENGLISH, VOICE_NULL, VOICE_NULL }, { TTS_ALIYUN_SAMPLE_RATE_8K, TTS_ALIYUN_SAMPLE_RATE_16K, 0 } },
86 };
87 
88 static aliyun_tts_config_t g_tts_config;
89 static int tts_state = ALIYUN_TTS_STATE_NULL;
90 static char *tts_format[4] = { "null", "pcm", "wav", "mp3" };
91 
uvoice_tts_aliyun_init(tts_config_t * config)92 int uvoice_tts_aliyun_init(tts_config_t *config)
93 {
94     int i = 0;
95     int j = 0;
96     int flag = 0;
97 
98     if (!config) {
99         M_LOGE("config is null!");
100         return -1;
101     }
102 
103     if (!config->app_key) {
104         M_LOGE("app_key is null!");
105         return -1;
106     }
107 
108     if (!config->token) {
109         M_LOGE("token is null!");
110         return -1;
111     }
112 
113     if ((config->format != MEDIA_FMT_PCM) && (config->format != MEDIA_FMT_WAV) && (config->format != MEDIA_FMT_MP3)) {
114         M_LOGE("format %d is not supported !", config->format);
115         return -1;
116     }
117 
118     if ((config->sample_rate != TTS_ALIYUN_SAMPLE_RATE_8K) && (config->sample_rate != TTS_ALIYUN_SAMPLE_RATE_16K) &&
119         (config->sample_rate != TTS_ALIYUN_SAMPLE_RATE_24K)) {
120         M_LOGE("sample rate %d is not supported !", config->sample_rate);
121         return -1;
122     }
123 
124     if ((config->speech_rate < TTS_SPEECH_RATE_MIN) || (config->speech_rate > TTS_SPEECH_RATE_MAX)) {
125         M_LOGE("speech rate %d is not supported !", config->speech_rate);
126         return -1;
127     }
128 
129     if ((config->pitch_rate < TTS_PITCH_RATE_MIN) || (config->pitch_rate > TTS_PITCH_RATE_MAX)) {
130         M_LOGE("pitch rate %d is not supported !", config->pitch_rate);
131         return -1;
132     }
133 
134     if ((config->volume < TTS_VOLUME_MIN) || (config->volume > TTS_VOLUME_MAX)) {
135         M_LOGE("volume %d is not supported !", config->volume);
136         return -1;
137     }
138 
139     for (i = 0; i < sizeof(voice_spec) / sizeof(voice_spec_t); i++) {
140         if (!strcmp(voice_spec[i].voice_people, config->voice)) {
141             for (j = 0; j < 3; j++) {
142                 if ((voice_spec[i].sample_rate[j] == config->sample_rate) && (voice_spec[i].sample_rate[j] != 0)) {
143                     flag = 1;
144                     break;
145                 }
146             }
147         }
148 
149         if (flag == 1)
150             break;
151     }
152 
153     if (flag == 0) {
154         M_LOGE("sample rate %d not match %s !", config->sample_rate, config->voice);
155         return -1;
156     }
157 
158     memset(&g_tts_config, 0, sizeof(aliyun_tts_config_t));
159     //strncpy(g_tts_config.app_key, config->app_key, strlen(config->app_key));
160     //strncpy(g_tts_config.token, config->token, strlen(config->token));
161     snprintf(g_tts_config.app_key, sizeof(g_tts_config.app_key), "%s", config->app_key);
162     snprintf(g_tts_config.token, sizeof(g_tts_config.token), "%s", config->token);
163     g_tts_config.format = config->format; /* tts output format, now only support wav, pcm, mp3 */
164     g_tts_config.sample_rate = config->sample_rate; /* support 8000Hz��16000Hz */
165     //strncpy(g_tts_config.voice, config->voice, strlen(config->voice));     /* voice people */
166     snprintf(g_tts_config.voice, sizeof(g_tts_config.voice), "%s", config->voice);
167     g_tts_config.volume = config->volume; /* 0 ~ 100 */
168     g_tts_config.speech_rate = config->speech_rate; /* -500 ~ 500 */
169     g_tts_config.pitch_rate = config->pitch_rate; /* -500 ~ 500 */
170     g_tts_config.text_encode_type = config->text_encode_type;
171 
172     tts_state = ALIYUN_TTS_STATE_INITED;
173     return 0;
174 }
175 
http_download(char * req_url,tts_recv_callback_t * recv_cb)176 static void http_download(char *req_url, tts_recv_callback_t *recv_cb)
177 {
178     char *rsp_buf = malloc(2048);
179     char *req_buf = malloc(1024 + strlen(req_url));
180 
181     httpclient_t client = { 0 };
182     httpclient_data_t client_data = { 0 };
183     int num = 0;
184     int ret = 0;
185     int idx = 0;
186 
187     memset(req_buf, 0, sizeof(req_buf));
188     client_data.header_buf = req_buf;
189     client_data.header_buf_len = 1024 + strlen(req_url);
190 
191     memset(rsp_buf, 0, 2048);
192     client_data.response_buf = rsp_buf;
193     client_data.response_buf_len = 2048;
194 
195     ret = httpclient_conn(&client, req_url);
196     if (!ret) {
197         ret = httpclient_send(&client, req_url, HTTP_GET, &client_data);
198 
199         do {
200             ret = httpclient_recv(&client, &client_data);
201             M_LOGD("response_content_len=%d, retrieve_len=%d,content_block_len=%d\n", client_data.response_content_len,
202                    client_data.retrieve_len, client_data.content_block_len);
203             M_LOGD("ismore = %d\n", client_data.is_more);
204 
205             num = recv_cb->recv_data(client_data.response_buf, client_data.content_block_len, idx++);
206             if (num > 0) {
207                 printf("aos_write num=%d\n", num);
208             }
209         } while (client_data.is_more);
210     }
211     httpclient_clse(&client);
212 
213     free(rsp_buf);
214     free(req_buf);
215 
216     return;
217 }
218 
219 //by: alicloud_tts_download_task()
uvoice_tts_aliyun_request(char * text,tts_recv_type_e recv_type,tts_recv_callback_t * recv_cb)220 int uvoice_tts_aliyun_request(char *text, tts_recv_type_e recv_type, tts_recv_callback_t *recv_cb)
221 {
222     int i = 0;
223     int ret = 0;
224     char *tts_text = NULL;
225     char *utf8_text = NULL;
226     int tts_text_len;
227     char *tts_get_url = NULL;
228 
229     if (tts_state != ALIYUN_TTS_STATE_INITED) {
230         M_LOGE("aliyun tts: not initialized\n");
231         return -1;
232     }
233 
234     if (!recv_cb) {
235         M_LOGE("aliyun tts: recv callback is null\n");
236         return -1;
237     }
238 
239     tts_text_len = strlen(text);
240     if (tts_text_len > UVOICE_TTS_MAX_TEXT_LEN) {
241         tts_text_len = UVOICE_TTS_MAX_TEXT_LEN;
242         if (recv_cb->event) {
243             recv_cb->event(TTS_WARNING, "text length too long, cut to " UVOICE_TTS_MAX_TEXT_LEN_STR);
244         }
245     }
246 
247     tts_text = snd_zalloc(tts_text_len * 3 + 1, AFM_EXTN);
248     if (!tts_text) {
249         M_LOGE("alloc tts text buffer fail !\n");
250         return -1;
251     }
252 
253     snprintf(tts_text, tts_text_len * 3 + 1, "%s", text);
254 #ifdef UVOICE_TTS_SUPPORT_GBK_ENCODE
255     if (g_tts_config.text_encode_type == TTS_ENCODE_GBK) {
256         utf8_text = snd_zalloc(tts_text_len * 2 * 3 + 1, AFM_EXTN);
257         if (!utf8_text) {
258             M_LOGE("alloc utf8 text buffer fail !\n");
259             goto exit_free;
260         }
261         uvoice_gbk2utf8(tts_text, utf8_text, tts_text_len * 2);
262         M_LOGI("text encode type GBK\n");
263     } else
264 #endif
265     {
266         utf8_text = tts_text;
267         M_LOGI("text encode type UTF-8\n");
268     }
269 
270     uvoice_urlencode(utf8_text);
271 
272     tts_get_url = snd_zalloc(UVOICE_TTS_HTTPGET_URL_LENGTH, AFM_MAIN);
273     if (!tts_get_url) {
274         M_LOGE("alloc url buffer fail !\n");
275         goto exit_free;
276     }
277 
278     snprintf(tts_get_url, UVOICE_TTS_HTTPGET_URL_LENGTH,
279              "%sappkey=%s&token=%s&text=%s&format=%s&sample_rate=%u&voice=%s&volume=%d&speech_rate=%d&pitch_rate=%d",
280              ALIYUN_TTS_HTTP_URL, g_tts_config.app_key, g_tts_config.token, utf8_text, tts_format[g_tts_config.format],
281              g_tts_config.sample_rate, g_tts_config.voice, g_tts_config.volume, g_tts_config.speech_rate,
282              g_tts_config.pitch_rate);
283 
284     if (recv_type == TTS_RECV_URL) {
285         if (recv_cb->recv_url) {
286             printf("tts url is %s\n", tts_get_url);
287             recv_cb->recv_url(tts_get_url);
288         }
289         if (recv_cb->event)
290             recv_cb->event(TTS_TRANS_COMPLETE, "OK");
291         ret = 0;
292         goto exit_free;
293     }
294 
295     if (recv_type == TTS_RECV_DATA) {
296         media_loader_t *mloader;
297         uint8_t *buffer = NULL;
298         int size = 2048;
299         int ret_size = 0;
300         int index = 0;
301 
302         if (recv_cb->recv_data == NULL) {
303             M_LOGE("recv_cb->recv_data is null !\n");
304             ret = -1;
305             goto exit_free;
306         }
307 
308         http_download(tts_get_url, recv_cb);
309 
310         goto exit_free;
311         mloader = snd_zalloc(sizeof(media_loader_t), AFM_EXTN);
312         if (!mloader) {
313             M_LOGE("alloc mloader failed !\n");
314             ret = -1;
315             goto exit_free;
316         }
317 
318         M_LOGD("url %s\n", tts_get_url);
319         ret = http_loader_create(mloader, tts_get_url);
320         if (ret) {
321             M_LOGE("alloc mloader failed !\n");
322             ret = -1;
323             goto exit_free;
324         }
325 
326         buffer = snd_zalloc(size, AFM_EXTN);
327         if (!buffer) {
328             M_LOGE("alloc buffer fail !\n");
329             ret = -1;
330             http_loader_release(mloader);
331             goto exit_free;
332         }
333         if (recv_cb->event)
334             recv_cb->event(TTS_RECV_START, "");
335 
336         if (mloader->action(mloader, PLAYER_START, NULL)) {
337             M_LOGE("start http load failed !\n");
338             ret = -1;
339             snd_free(buffer);
340             http_loader_release(mloader);
341             snd_free(mloader);
342             goto exit_free;
343         }
344 
345         while (1) {
346             ret_size = mloader->read(mloader, buffer, size);
347             if (ret_size > 0) {
348                 recv_cb->recv_data(buffer, ret_size, index);
349                 index++;
350             } else {
351                 break;
352             }
353         }
354 
355         mloader->action(mloader, PLAYER_STOP, NULL);
356         if (recv_cb->event)
357             recv_cb->event(TTS_RECV_COMPLETE, NULL);
358         snd_free(buffer);
359         http_loader_release(mloader);
360         snd_free(mloader);
361     }
362 
363 exit_free:
364     if (tts_text)
365         snd_free(tts_text);
366 
367 #ifdef UVOICE_TTS_SUPPORT_GBK_ENCODE
368     if (utf8_text)
369         snd_free(utf8_text);
370 #endif
371     if (tts_get_url)
372         snd_free(tts_get_url);
373     return ret;
374 }
375 
uvoice_tts_aliyun_stop()376 int uvoice_tts_aliyun_stop()
377 {
378     return 0;
379 }