1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_RECOGNIZE_COMMANDS_H_ 17 #define TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_RECOGNIZE_COMMANDS_H_ 18 19 #include <cstdint> 20 21 #include "tensorflow/lite/c/common.h" 22 #include "micro_speech/micro_features/micro_model_settings.h" 23 #include "tensorflow/lite/micro/micro_error_reporter.h" 24 25 // Partial implementation of std::dequeue, just providing the functionality 26 // that's needed to keep a record of previous neural network results over a 27 // short time period, so they can be averaged together to produce a more 28 // accurate overall prediction. This doesn't use any dynamic memory allocation 29 // so it's a better fit for microcontroller applications, but this does mean 30 // there are hard limits on the number of results it can store. 31 class PreviousResultsQueue { 32 public: PreviousResultsQueue(tflite::ErrorReporter * error_reporter)33 PreviousResultsQueue(tflite::ErrorReporter* error_reporter) 34 : error_reporter_(error_reporter), front_index_(0), size_(0) {} 35 36 // Data structure that holds an inference result, and the time when it 37 // was recorded. 38 struct Result { ResultResult39 Result() : time_(0), scores() {} ResultResult40 Result(int32_t time, int8_t* input_scores) : time_(time) { 41 for (int i = 0; i < kCategoryCount; ++i) { 42 scores[i] = input_scores[i]; 43 } 44 } 45 int32_t time_; 46 int8_t scores[kCategoryCount]; 47 }; 48 size()49 int size() { return size_; } empty()50 bool empty() { return size_ == 0; } front()51 Result& front() { return results_[front_index_]; } back()52 Result& back() { 53 int back_index = front_index_ + (size_ - 1); 54 if (back_index >= kMaxResults) { 55 back_index -= kMaxResults; 56 } 57 return results_[back_index]; 58 } 59 push_back(const Result & entry)60 void push_back(const Result& entry) { 61 if (size() >= kMaxResults) { 62 TF_LITE_REPORT_ERROR( 63 error_reporter_, 64 "Couldn't push_back latest result, too many already!"); 65 return; 66 } 67 size_ += 1; 68 back() = entry; 69 } 70 pop_front()71 Result pop_front() { 72 if (size() <= 0) { 73 TF_LITE_REPORT_ERROR(error_reporter_, 74 "Couldn't pop_front result, none present!"); 75 return Result(); 76 } 77 Result result = front(); 78 front_index_ += 1; 79 if (front_index_ >= kMaxResults) { 80 front_index_ = 0; 81 } 82 size_ -= 1; 83 return result; 84 } 85 86 // Most of the functions are duplicates of dequeue containers, but this 87 // is a helper that makes it easy to iterate through the contents of the 88 // queue. from_front(int offset)89 Result& from_front(int offset) { 90 if ((offset < 0) || (offset >= size_)) { 91 TF_LITE_REPORT_ERROR(error_reporter_, 92 "Attempt to read beyond the end of the queue!"); 93 offset = size_ - 1; 94 } 95 int index = front_index_ + offset; 96 if (index >= kMaxResults) { 97 index -= kMaxResults; 98 } 99 return results_[index]; 100 } 101 102 private: 103 tflite::ErrorReporter* error_reporter_; 104 static constexpr int kMaxResults = 50; 105 Result results_[kMaxResults]; 106 107 int front_index_; 108 int size_; 109 }; 110 111 // This class is designed to apply a very primitive decoding model on top of the 112 // instantaneous results from running an audio recognition model on a single 113 // window of samples. It applies smoothing over time so that noisy individual 114 // label scores are averaged, increasing the confidence that apparent matches 115 // are real. 116 // To use it, you should create a class object with the configuration you 117 // want, and then feed results from running a TensorFlow model into the 118 // processing method. The timestamp for each subsequent call should be 119 // increasing from the previous, since the class is designed to process a stream 120 // of data over time. 121 class RecognizeCommands { 122 public: 123 // labels should be a list of the strings associated with each one-hot score. 124 // The window duration controls the smoothing. Longer durations will give a 125 // higher confidence that the results are correct, but may miss some commands. 126 // The detection threshold has a similar effect, with high values increasing 127 // the precision at the cost of recall. The minimum count controls how many 128 // results need to be in the averaging window before it's seen as a reliable 129 // average. This prevents erroneous results when the averaging window is 130 // initially being populated for example. The suppression argument disables 131 // further recognitions for a set time after one has been triggered, which can 132 // help reduce spurious recognitions. 133 explicit RecognizeCommands(tflite::ErrorReporter* error_reporter, 134 int32_t average_window_duration_ms = 1000, 135 uint8_t detection_threshold = 200, 136 int32_t suppression_ms = 1500, 137 int32_t minimum_count = 3); 138 139 // Call this with the results of running a model on sample data. 140 TfLiteStatus ProcessLatestResults(const TfLiteTensor* latest_results, 141 const int32_t current_time_ms, 142 const char** found_command, uint8_t* score, 143 bool* is_new_command); 144 145 private: 146 // Configuration 147 tflite::ErrorReporter* error_reporter_; 148 int32_t average_window_duration_ms_; 149 uint8_t detection_threshold_; 150 int32_t suppression_ms_; 151 int32_t minimum_count_; 152 153 // Working variables 154 PreviousResultsQueue previous_results_; 155 const char* previous_top_label_; 156 int32_t previous_top_label_time_; 157 }; 158 159 #endif // TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_RECOGNIZE_COMMANDS_H_ 160