1 #include "caffe2/operators/ctc_greedy_decoder_op.h" 7 const float* getTensorDataPtr(
const Tensor& tensor,
int t,
int n) {
8 const auto dims = tensor.sizes();
9 CAFFE_ENFORCE_EQ(dims.size(), 3);
10 int offset = (t * dims[1] + n) * dims[2];
11 CAFFE_ENFORCE_LT(offset, tensor.numel());
12 return tensor.template data<float>() + offset;
18 bool CTCGreedyDecoderOp<CPUContext>::RunOnDevice() {
20 auto& inputs = Input(INPUTS);
25 const auto inputs_dims = inputs.sizes();
26 int32_t max_time_step = inputs_dims[0];
27 int32_t batch_size = inputs_dims[1];
28 int32_t num_classes = inputs_dims[2];
30 const int* seq_len_data =
31 (InputSize() == 2) ? Input(SEQ_LEN).data<
int>() :
nullptr;
33 vector<int> values_cach;
35 Output(OUTPUT_LEN, vector<int64_t>{batch_size}, at::dtype<int>());
36 int* output_len_data = output_len->template mutable_data<int>();
38 for (int32_t i = 0; i < batch_size; ++i) {
39 int previous_label = 0, t_dec = 0;
40 int32_t seq_len_i = (seq_len_data) ? seq_len_data[i] : max_time_step;
41 CAFFE_ENFORCE_LE(seq_len_i, max_time_step);
42 for (int32_t t = 0; t < seq_len_i; ++t) {
43 auto* prob_data = getTensorDataPtr(inputs, t, i);
45 std::max_element(prob_data, prob_data + num_classes) - prob_data;
46 if (curr_label != 0 &&
47 (!merge_repeated_ || (previous_label != curr_label))) {
49 values_cach.push_back(curr_label);
51 previous_label = curr_label;
53 output_len_data[i] = t_dec;
56 int32_t values_cach_size = values_cach.size();
58 Output(VALUES, vector<int64_t>{values_cach_size}, at::dtype<int>());
59 int* values_data = values->mutable_data<
int>();
60 for (
size_t i = 0; i < values_cach.size(); ++i) {
61 values_data[i] = values_cach.at(i);
68 REGISTER_CPU_OPERATOR(CTCGreedyDecoder, CTCGreedyDecoderOp<CPUContext>);
69 OPERATOR_SCHEMA(CTCGreedyDecoder)
74 "When merge_repeated is true, merge repeated classes in output.")
75 .SetDoc(
"Greedy decoder for connectionist temporal classification.")
79 "3D float Tensor sized [max_time, batch_size, num_classes]")
83 "(optional) 1D int vector containing sequence lengths, " 84 "having size [batch_size]" 85 "seq_len will be set to max_time if not provided")
89 "Output_len matrix size (batch). " 90 "The row store: [decoded_length]")
94 "Values vector, size (total_decoded_outputs). " 95 "The vector stores the decoded classes")
97 SHOULD_NOT_DO_GRADIENT(CTCGreedyDecoder);
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...