Caffe2 - C++ API
A deep learning, cross platform ML framework
ctc_greedy_decoder_op.cc
1 #include "caffe2/operators/ctc_greedy_decoder_op.h"
2 
3 namespace caffe2 {
4 
5 namespace {
6 
7 const float* getTensorDataPtr(const Tensor& tensor, int t, int n) {
8  const auto dims = tensor.sizes();
9  CAFFE_ENFORCE_EQ(dims.size(), 3);
10  int offset = (t * dims[1] + n) * dims[2];
11  CAFFE_ENFORCE_LT(offset, tensor.numel());
12  return tensor.template data<float>() + offset;
13 }
14 
15 } // namespace
16 
17 template <>
18 bool CTCGreedyDecoderOp<CPUContext>::RunOnDevice() {
19  // [max_time_step, batch_size, num_classes]
20  auto& inputs = Input(INPUTS);
21  // [batch_size]
22 
23  // [total_decoded_output]
24 
25  const auto inputs_dims = inputs.sizes();
26  int32_t max_time_step = inputs_dims[0];
27  int32_t batch_size = inputs_dims[1];
28  int32_t num_classes = inputs_dims[2];
29  // [batch_size]
30  const int* seq_len_data =
31  (InputSize() == 2) ? Input(SEQ_LEN).data<int>() : nullptr;
32 
33  vector<int> values_cach;
34  auto* output_len =
35  Output(OUTPUT_LEN, vector<int64_t>{batch_size}, at::dtype<int>());
36  int* output_len_data = output_len->template mutable_data<int>();
37 
38  for (int32_t i = 0; i < batch_size; ++i) {
39  int previous_label = 0, t_dec = 0;
40  int32_t seq_len_i = (seq_len_data) ? seq_len_data[i] : max_time_step;
41  CAFFE_ENFORCE_LE(seq_len_i, max_time_step);
42  for (int32_t t = 0; t < seq_len_i; ++t) {
43  auto* prob_data = getTensorDataPtr(inputs, t, i);
44  int curr_label =
45  std::max_element(prob_data, prob_data + num_classes) - prob_data;
46  if (curr_label != 0 &&
47  (!merge_repeated_ || (previous_label != curr_label))) {
48  t_dec++;
49  values_cach.push_back(curr_label);
50  }
51  previous_label = curr_label;
52  }
53  output_len_data[i] = t_dec;
54  }
55 
56  int32_t values_cach_size = values_cach.size();
57  auto* values =
58  Output(VALUES, vector<int64_t>{values_cach_size}, at::dtype<int>());
59  int* values_data = values->mutable_data<int>();
60  for (size_t i = 0; i < values_cach.size(); ++i) {
61  values_data[i] = values_cach.at(i);
62  }
63  values_cach.clear();
64 
65  return true;
66 }
67 
68 REGISTER_CPU_OPERATOR(CTCGreedyDecoder, CTCGreedyDecoderOp<CPUContext>);
69 OPERATOR_SCHEMA(CTCGreedyDecoder)
70  .NumInputs(1, 2)
71  .NumOutputs(2)
72  .Arg(
73  "merge_repeated",
74  "When merge_repeated is true, merge repeated classes in output.")
75  .SetDoc("Greedy decoder for connectionist temporal classification.")
76  .Input(
77  0,
78  "INPUTS",
79  "3D float Tensor sized [max_time, batch_size, num_classes]")
80  .Input(
81  1,
82  "SEQ_LEN",
83  "(optional) 1D int vector containing sequence lengths, "
84  "having size [batch_size]"
85  "seq_len will be set to max_time if not provided")
86  .Output(
87  0,
88  "OUTPUT_LEN",
89  "Output_len matrix size (batch). "
90  "The row store: [decoded_length]")
91  .Output(
92  1,
93  "VALUES",
94  "Values vector, size (total_decoded_outputs). "
95  "The vector stores the decoded classes")
96  .InheritOnnxSchema();
97 SHOULD_NOT_DO_GRADIENT(CTCGreedyDecoder);
98 
99 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13