Caffe2 - C++ API
A deep learning, cross platform ML framework
crf_viterbi_op.cc
1 #include <algorithm>
2 #include <sstream>
3 #include <unordered_map>
4 #include <vector>
5 #include "caffe2/core/blob_serialization.h"
6 #include "caffe2/core/operator.h"
7 #include "caffe2/core/tensor.h"
8 #include "caffe2/utils/eigen_utils.h"
9 #include "caffe2/utils/math.h"
10 
11 namespace caffe2 {
12 namespace {
13 
14 void RowwiseMaxAndArg(
15  const float* mat,
16  int32_t N,
17  int32_t D,
18  float* rowMax,
19  int32_t* argMax) {
20  auto eigenMat = ConstEigenMatrixMap<float>(mat, D, N);
21  for (auto i = 0; i < D; i++) {
22  // eigenMat.row(i) is equivalent to column i in mat
23  rowMax[i] = eigenMat.row(i).maxCoeff(argMax + i);
24  }
25 }
26 void ColwiseMaxAndArg(
27  const float* mat,
28  int32_t N,
29  int32_t D,
30  float* colMax,
31  int32_t* argMax) {
32  auto eigenMat = ConstEigenMatrixMap<float>(mat, D, N);
33  for (auto i = 0; i < N; i++) {
34  // eigenMat.col(i) is equivalent to row i in mat
35  colMax[i] = eigenMat.col(i).maxCoeff(argMax + i);
36  }
37 }
38 
39 class ViterbiPathOp : public Operator<CPUContext> {
40  public:
41  template <class... Args>
42  explicit ViterbiPathOp(Args&&... args)
43  : Operator(std::forward<Args>(args)...) {}
44 
45  void GatherRow(
46  const TensorCPU& data,
47  int32_t rowIndex,
48  int32_t block_size,
49  int32_t block_bytesize,
50  TensorCPU* outRow) {
51  CAFFE_ENFORCE(
52  0 <= rowIndex && rowIndex < data.size(0),
53  "rowIndex is out of DATA bounds");
54  auto out = static_cast<char*>(outRow->raw_mutable_data(data.dtype()));
55  auto src_base = static_cast<const char*>(data.raw_data());
56  auto src = src_base + rowIndex * block_bytesize;
57  context_.CopyItemsSameDevice(data.dtype(), block_size, src, out);
58  }
59 
60  void
61  AddColToMat(const TensorCPU& mat, const TensorCPU& col, TensorCPU* result) {
62  float* resultData = result->template mutable_data<float>();
63  const float* colData = col.template data<float>();
64  // Initialize the columns of the result to be = the input col
65  for (auto i = 0; i < result->dim32(1); i++) {
66  for (auto j = 0; j < result->dim32(0); j++) {
67  resultData[i * result->dim32(0) + j] = colData[i];
68  }
69  }
70  // Element-wise add of the result and the input matrix
71  math::Add<float, CPUContext>(
72  mat.numel(),
73  resultData,
74  mat.template data<float>(),
75  resultData,
76  &context_);
77  }
78 
79  bool RunOnDevice() override {
80  auto& predictions = Input(0);
81  auto& transitions = Input(1);
82 
83  CAFFE_ENFORCE(
84  predictions.dim() == 2 && transitions.dim() == 2,
85  "Predictions and transitions hould 2D matrices");
86 
87  CAFFE_ENFORCE(
88  predictions.size(1) == transitions.size(0),
89  "Predictions and transitions dimensions not matching");
90 
91  auto seqLen = predictions.dim32(0);
92 
93  auto* viterbiPath = Output(0, {seqLen}, at::dtype<int32_t>());
94  auto block_size = predictions.numel() / predictions.size(0);
95  auto block_bytesize =
96  predictions.size_from_dim(1) * predictions.dtype().itemsize();
97  Tensor backpointers(CPU);
98  backpointers.ResizeLike(predictions);
99 
100  Tensor trellis(std::vector<int64_t>{block_size}, CPU);
101  Tensor dpMat(CPU);
102  dpMat.ResizeLike(transitions);
103  Tensor dpMax(std::vector<int64_t>{block_size}, CPU);
104  GatherRow(predictions, 0, block_size, block_bytesize, &trellis);
105  for (auto i = 1; i < seqLen; i++) {
106  AddColToMat(transitions, trellis, &dpMat);
107  RowwiseMaxAndArg(
108  dpMat.template data<float>(),
109  dpMat.size(0),
110  dpMat.size(1),
111  dpMax.template mutable_data<float>(),
112  backpointers.template mutable_data<int32_t>() + (i * block_size));
113 
114  GatherRow(predictions, i, block_size, block_bytesize, &trellis);
115  math::Add<float, CPUContext>(
116  trellis.numel(),
117  trellis.template data<float>(),
118  dpMax.template data<float>(),
119  trellis.template mutable_data<float>(),
120  &context_);
121  }
122 
123  Tensor tMax(std::vector<int64_t>{1}, CPU);
124  Tensor tArgMax(std::vector<int64_t>{1}, CPU);
125  ColwiseMaxAndArg(
126  trellis.template data<float>(),
127  1,
128  trellis.numel(),
129  tMax.template mutable_data<float>(),
130  tArgMax.template mutable_data<int32_t>());
131 
132  std::vector<int32_t> viterbiVec;
133  viterbiVec.push_back(tArgMax.template data<int32_t>()[0]);
134  Tensor bpEntry(std::vector<int64_t>{block_size}, CPU);
135  block_bytesize =
136  backpointers.size_from_dim(1) * backpointers.dtype().itemsize();
137  for (auto i = seqLen - 1; i > 0; i--) {
138  GatherRow(backpointers, i, block_size, block_bytesize, &bpEntry);
139  viterbiVec.push_back(bpEntry.template data<int32_t>()[viterbiVec.back()]);
140  }
141  std::reverse_copy(
142  viterbiVec.begin(),
143  viterbiVec.end(),
144  viterbiPath->template mutable_data<int32_t>());
145  return true;
146  }
147 };
148 class SwapBestPathOp : public Operator<CPUContext> {
149  public:
150  template <class... Args>
151  explicit SwapBestPathOp(Args&&... args)
152  : Operator(std::forward<Args>(args)...) {}
153  bool RunOnDevice() override {
154  auto& data = Input(0);
155  auto& newBestIdicies = Input(1);
156 
157  CAFFE_ENFORCE(
158  data.dim() == 2 && newBestIdicies.dim() == 1,
159  "predictions should be a 2D matrix and bestPath should be 1D vector");
160 
161  CAFFE_ENFORCE(
162  data.size(0) == newBestIdicies.size(0),
163  "predictions and bestPath dimensions not matching");
164 
165  auto* updatedData = Output(0, data.sizes(), at::dtype<float>());
166  float* outData = updatedData->template mutable_data<float>();
167  context_.CopyItemsSameDevice(
168  data.dtype(), data.numel(), data.template data<float>(), outData);
169 
170  Tensor bestScores(CPU);
171  bestScores.ResizeLike(newBestIdicies);
172  Tensor oldBestIndices(CPU);
173  oldBestIndices.ResizeLike(newBestIdicies);
174 
175  ColwiseMaxAndArg(
176  data.template data<float>(),
177  data.size(0),
178  data.size(1),
179  bestScores.template mutable_data<float>(),
180  oldBestIndices.template mutable_data<int32_t>());
181 
182  auto block_size = data.numel() / data.size(0);
183 
184  const int32_t* oldBestIdx = oldBestIndices.template data<int32_t>();
185  const int32_t* newIdx = newBestIdicies.template data<int32_t>();
186 
187  for (auto i = 0; i < data.dim32(0); i++) {
188  std::swap(
189  outData[i * block_size + newIdx[i]],
190  outData[i * block_size + oldBestIdx[i]]);
191  }
192  return true;
193  }
194 };
195 REGISTER_CPU_OPERATOR(ViterbiPath, ViterbiPathOp);
196 OPERATOR_SCHEMA(ViterbiPath)
197  .NumInputs(2)
198  .NumOutputs(1)
199  .SetDoc(R"DOC(
200 Given a predictions matrix and a transitions matrix, get the path with the best
201 score
202 )DOC")
203  .Input(0, "predictions", "N*D predictions matrix")
204  .Input(1, "transitions", "D*D transitions matrix")
205  .Output(0, "viterbi_path", "N*1 vector holds the best path indices");
206 NO_GRADIENT(ViterbiPath);
207 REGISTER_CPU_OPERATOR(SwapBestPath, SwapBestPathOp);
208 OPERATOR_SCHEMA(SwapBestPath)
209  .NumInputs(2)
210  .NumOutputs(1)
211  .SetDoc(R"DOC(
212 Given a sequence of idices and a matrix, enforce that these indices have the
213 best columnwise scores
214 score
215 )DOC")
216  .Input(0, "predictions", "N*D predictions matrix")
217  .Input(1, "bestPath", "N*1 vector holds the best path indices ")
218  .Output(0, "new_predictions", "N*D updated predictions matrix");
219 NO_GRADIENT(SwapBestPath);
220 } // namespace
221 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: static.cpp:70