1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "micro_speech/micro_features/micro_features_generator.h"
17
18 #include <cmath>
19 #include <cstring>
20
21 #include "tensorflow/lite/experimental/microfrontend/lib/frontend.h"
22 #include "tensorflow/lite/experimental/microfrontend/lib/frontend_util.h"
23 #include "micro_speech/micro_features/micro_model_settings.h"
24
25 // Configure FFT to output 16 bit fixed point.
26 #define FIXED_POINT 16
27
28 namespace {
29
30 FrontendState g_micro_features_state;
31 bool g_is_first_time = true;
32
33 } // namespace
34
InitializeMicroFeatures(tflite::ErrorReporter * error_reporter)35 TfLiteStatus InitializeMicroFeatures(tflite::ErrorReporter* error_reporter) {
36 FrontendConfig config;
37 config.window.size_ms = kFeatureSliceDurationMs;
38 config.window.step_size_ms = kFeatureSliceStrideMs;
39 config.noise_reduction.smoothing_bits = 10;
40 config.filterbank.num_channels = kFeatureSliceSize;
41 config.filterbank.lower_band_limit = 125.0;
42 config.filterbank.upper_band_limit = 7500.0;
43 config.noise_reduction.smoothing_bits = 10;
44 config.noise_reduction.even_smoothing = 0.025;
45 config.noise_reduction.odd_smoothing = 0.06;
46 config.noise_reduction.min_signal_remaining = 0.05;
47 config.pcan_gain_control.enable_pcan = 1;
48 config.pcan_gain_control.strength = 0.95;
49 config.pcan_gain_control.offset = 80.0;
50 config.pcan_gain_control.gain_bits = 21;
51 config.log_scale.enable_log = 1;
52 config.log_scale.scale_shift = 6;
53 if (!FrontendPopulateState(&config, &g_micro_features_state,
54 kAudioSampleFrequency)) {
55 TF_LITE_REPORT_ERROR(error_reporter, "FrontendPopulateState() failed");
56 return kTfLiteError;
57 }
58 g_is_first_time = true;
59 return kTfLiteOk;
60 }
61
62 // This is not exposed in any header, and is only used for testing, to ensure
63 // that the state is correctly set up before generating results.
SetMicroFeaturesNoiseEstimates(const uint32_t * estimate_presets)64 void SetMicroFeaturesNoiseEstimates(const uint32_t* estimate_presets) {
65 for (int i = 0; i < g_micro_features_state.filterbank.num_channels; ++i) {
66 g_micro_features_state.noise_reduction.estimate[i] = estimate_presets[i];
67 }
68 }
69
GenerateMicroFeatures(tflite::ErrorReporter * error_reporter,const int16_t * input,int input_size,int output_size,int8_t * output,size_t * num_samples_read)70 TfLiteStatus GenerateMicroFeatures(tflite::ErrorReporter* error_reporter,
71 const int16_t* input, int input_size,
72 int output_size, int8_t* output,
73 size_t* num_samples_read) {
74 const int16_t* frontend_input;
75 if (g_is_first_time) {
76 frontend_input = input;
77 g_is_first_time = false;
78 } else {
79 frontend_input = input + 160;
80 }
81 FrontendOutput frontend_output = FrontendProcessSamples(
82 &g_micro_features_state, frontend_input, input_size, num_samples_read);
83
84 for (size_t i = 0; i < frontend_output.size; ++i) {
85 // These scaling values are derived from those used in input_data.py in the
86 // training pipeline.
87 // The feature pipeline outputs 16-bit signed integers in roughly a 0 to 670
88 // range. In training, these are then arbitrarily divided by 25.6 to get
89 // float values in the rough range of 0.0 to 26.0. This scaling is performed
90 // for historical reasons, to match up with the output of other feature
91 // generators.
92 // The process is then further complicated when we quantize the model. This
93 // means we have to scale the 0.0 to 26.0 real values to the -128 to 127
94 // signed integer numbers.
95 // All this means that to get matching values from our integer feature
96 // output into the tensor input, we have to perform:
97 // input = (((feature / 25.6) / 26.0) * 256) - 128
98 // To simplify this and perform it in 32-bit integer math, we rearrange to:
99 // input = (feature * 256) / (25.6 * 26.0) - 128
100 constexpr int32_t value_scale = 256;
101 constexpr int32_t value_div = static_cast<int32_t>((25.6f * 26.0f) + 0.5f);
102 int32_t value =
103 ((frontend_output.values[i] * value_scale) + (value_div / 2)) /
104 value_div;
105 value -= 128;
106 if (value < -128) {
107 value = -128;
108 }
109 if (value > 127) {
110 value = 127;
111 }
112 output[i] = value;
113 }
114
115 return kTfLiteOk;
116 }
117