1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // Copied from tensorflow/core/util/ctc/ctc_decoder.h
17 // TODO(b/111524997): Remove this file.
18 #ifndef TENSORFLOW_LITE_EXPERIMENTAL_KERNELS_CTC_DECODER_H_
19 #define TENSORFLOW_LITE_EXPERIMENTAL_KERNELS_CTC_DECODER_H_
20 
21 #include <memory>
22 #include <vector>
23 
24 #include "third_party/eigen3/Eigen/Core"
25 
26 namespace tflite {
27 namespace experimental {
28 namespace ctc {
29 
30 // The CTCDecoder is an abstract interface to be implemented when providing a
31 // decoding method on the timestep output of a RNN trained with CTC loss.
32 //
33 // The two types of decoding available are:
34 //   - greedy path, through the CTCGreedyDecoder
35 //   - beam search, through the CTCBeamSearchDecoder
36 class CTCDecoder {
37  public:
38   typedef Eigen::Map<const Eigen::ArrayXi> SequenceLength;
39   typedef Eigen::Map<const Eigen::MatrixXf> Input;
40   typedef std::vector<std::vector<int>> Output;
41   typedef Eigen::Map<Eigen::MatrixXf> ScoreOutput;
42 
CTCDecoder(int num_classes,int batch_size,bool merge_repeated)43   CTCDecoder(int num_classes, int batch_size, bool merge_repeated)
44       : num_classes_(num_classes),
45         blank_index_(num_classes - 1),
46         batch_size_(batch_size),
47         merge_repeated_(merge_repeated) {}
48 
~CTCDecoder()49   virtual ~CTCDecoder() {}
50 
51   // Dimensionality of the input/output is expected to be:
52   //  - seq_len[b] - b = 0 to batch_size_
53   //  - input[t].rows(b) - t = 0 to timesteps; b = 0 t batch_size_
54   //  - output.size() specifies the number of beams to be returned.
55   //  - scores(b, i) - b = 0 to batch_size; i = 0 to output.size()
56   virtual bool Decode(const SequenceLength& seq_len,
57                       const std::vector<Input>& input,
58                       std::vector<Output>* output, ScoreOutput* scores) = 0;
59 
batch_size()60   int batch_size() { return batch_size_; }
num_classes()61   int num_classes() { return num_classes_; }
62 
63  protected:
64   int num_classes_;
65   int blank_index_;
66   int batch_size_;
67   bool merge_repeated_;
68 };
69 
70 // CTCGreedyDecoder is an implementation of the simple best path decoding
71 // algorithm, selecting at each timestep the most likely class at each timestep.
72 class CTCGreedyDecoder : public CTCDecoder {
73  public:
CTCGreedyDecoder(int num_classes,int batch_size,bool merge_repeated)74   CTCGreedyDecoder(int num_classes, int batch_size, bool merge_repeated)
75       : CTCDecoder(num_classes, batch_size, merge_repeated) {}
76 
Decode(const CTCDecoder::SequenceLength & seq_len,const std::vector<CTCDecoder::Input> & input,std::vector<CTCDecoder::Output> * output,CTCDecoder::ScoreOutput * scores)77   bool Decode(const CTCDecoder::SequenceLength& seq_len,
78               const std::vector<CTCDecoder::Input>& input,
79               std::vector<CTCDecoder::Output>* output,
80               CTCDecoder::ScoreOutput* scores) override {
81     if (output->empty() || (*output)[0].size() < batch_size_) {
82       return false;
83     }
84     if (scores->rows() < batch_size_ || scores->cols() == 0) {
85       return false;
86     }
87     // For each batch entry, identify the transitions
88     for (int b = 0; b < batch_size_; ++b) {
89       int seq_len_b = seq_len[b];
90       // Only writing to beam 0
91       std::vector<int>& output_b = (*output)[0][b];
92 
93       int prev_class_ix = -1;
94       (*scores)(b, 0) = 0;
95       for (int t = 0; t < seq_len_b; ++t) {
96         auto row = input[t].row(b);
97         int max_class_ix;
98         (*scores)(b, 0) += -row.maxCoeff(&max_class_ix);
99         if (max_class_ix != blank_index_ &&
100             !(merge_repeated_ && max_class_ix == prev_class_ix)) {
101           output_b.push_back(max_class_ix);
102         }
103         prev_class_ix = max_class_ix;
104       }
105     }
106     return true;
107   }
108 };
109 
110 }  // namespace ctc
111 }  // namespace experimental
112 }  // namespace tflite
113 
114 #endif  // TENSORFLOW_LITE_EXPERIMENTAL_KERNELS_CTC_DECODER_H_
115