3 #include <unordered_map> 5 #include "caffe2/core/blob_serialization.h" 6 #include "caffe2/core/operator.h" 7 #include "caffe2/core/tensor.h" 8 #include "caffe2/utils/eigen_utils.h" 9 #include "caffe2/utils/math.h" 14 void RowwiseMaxAndArg(
20 auto eigenMat = ConstEigenMatrixMap<float>(mat, D, N);
21 for (
auto i = 0; i < D; i++) {
23 rowMax[i] = eigenMat.row(i).maxCoeff(argMax + i);
26 void ColwiseMaxAndArg(
32 auto eigenMat = ConstEigenMatrixMap<float>(mat, D, N);
33 for (
auto i = 0; i < N; i++) {
35 colMax[i] = eigenMat.col(i).maxCoeff(argMax + i);
39 class ViterbiPathOp :
public Operator<CPUContext> {
41 template <
class... Args>
42 explicit ViterbiPathOp(Args&&... args)
43 : Operator(
std::forward<Args>(args)...) {}
46 const TensorCPU& data,
49 int32_t block_bytesize,
52 0 <= rowIndex && rowIndex < data.size(0),
53 "rowIndex is out of DATA bounds");
54 auto out =
static_cast<char*
>(outRow->raw_mutable_data(data.dtype()));
55 auto src_base =
static_cast<const char*
>(data.raw_data());
56 auto src = src_base + rowIndex * block_bytesize;
57 context_.CopyItemsSameDevice(data.dtype(), block_size, src, out);
61 AddColToMat(
const TensorCPU& mat,
const TensorCPU& col, TensorCPU* result) {
62 float* resultData = result->template mutable_data<float>();
63 const float* colData = col.template data<float>();
65 for (
auto i = 0; i < result->dim32(1); i++) {
66 for (
auto j = 0; j < result->dim32(0); j++) {
67 resultData[i * result->dim32(0) + j] = colData[i];
71 math::Add<float, CPUContext>(
74 mat.template data<float>(),
79 bool RunOnDevice()
override {
80 auto& predictions = Input(0);
81 auto& transitions = Input(1);
84 predictions.dim() == 2 && transitions.dim() == 2,
85 "Predictions and transitions hould 2D matrices");
88 predictions.size(1) == transitions.size(0),
89 "Predictions and transitions dimensions not matching");
91 auto seqLen = predictions.dim32(0);
93 auto* viterbiPath = Output(0, {seqLen}, at::dtype<int32_t>());
94 auto block_size = predictions.numel() / predictions.size(0);
96 predictions.size_from_dim(1) * predictions.dtype().itemsize();
98 backpointers.ResizeLike(predictions);
100 Tensor trellis(std::vector<int64_t>{block_size}, CPU);
102 dpMat.ResizeLike(transitions);
103 Tensor dpMax(std::vector<int64_t>{block_size}, CPU);
104 GatherRow(predictions, 0, block_size, block_bytesize, &trellis);
105 for (
auto i = 1; i < seqLen; i++) {
106 AddColToMat(transitions, trellis, &dpMat);
108 dpMat.template data<float>(),
111 dpMax.template mutable_data<float>(),
112 backpointers.template mutable_data<int32_t>() + (i * block_size));
114 GatherRow(predictions, i, block_size, block_bytesize, &trellis);
115 math::Add<float, CPUContext>(
117 trellis.template data<float>(),
118 dpMax.template data<float>(),
119 trellis.template mutable_data<float>(),
123 Tensor tMax(std::vector<int64_t>{1}, CPU);
124 Tensor tArgMax(std::vector<int64_t>{1}, CPU);
126 trellis.template data<float>(),
129 tMax.template mutable_data<float>(),
130 tArgMax.template mutable_data<int32_t>());
132 std::vector<int32_t> viterbiVec;
133 viterbiVec.push_back(tArgMax.template data<int32_t>()[0]);
134 Tensor bpEntry(std::vector<int64_t>{block_size}, CPU);
136 backpointers.size_from_dim(1) * backpointers.dtype().itemsize();
137 for (
auto i = seqLen - 1; i > 0; i--) {
138 GatherRow(backpointers, i, block_size, block_bytesize, &bpEntry);
139 viterbiVec.push_back(bpEntry.template data<int32_t>()[viterbiVec.back()]);
144 viterbiPath->template mutable_data<int32_t>());
148 class SwapBestPathOp :
public Operator<CPUContext> {
150 template <
class... Args>
151 explicit SwapBestPathOp(Args&&... args)
152 : Operator(
std::forward<Args>(args)...) {}
153 bool RunOnDevice()
override {
154 auto& data = Input(0);
155 auto& newBestIdicies = Input(1);
158 data.dim() == 2 && newBestIdicies.dim() == 1,
159 "predictions should be a 2D matrix and bestPath should be 1D vector");
162 data.size(0) == newBestIdicies.size(0),
163 "predictions and bestPath dimensions not matching");
165 auto* updatedData = Output(0, data.sizes(), at::dtype<float>());
166 float* outData = updatedData->template mutable_data<float>();
167 context_.CopyItemsSameDevice(
168 data.dtype(), data.numel(), data.template data<float>(), outData);
171 bestScores.ResizeLike(newBestIdicies);
172 Tensor oldBestIndices(CPU);
173 oldBestIndices.ResizeLike(newBestIdicies);
176 data.template data<float>(),
179 bestScores.template mutable_data<float>(),
180 oldBestIndices.template mutable_data<int32_t>());
182 auto block_size = data.numel() / data.size(0);
184 const int32_t* oldBestIdx = oldBestIndices.template data<int32_t>();
185 const int32_t* newIdx = newBestIdicies.template data<int32_t>();
187 for (
auto i = 0; i < data.dim32(0); i++) {
189 outData[i * block_size + newIdx[i]],
190 outData[i * block_size + oldBestIdx[i]]);
195 REGISTER_CPU_OPERATOR(ViterbiPath, ViterbiPathOp);
196 OPERATOR_SCHEMA(ViterbiPath)
200 Given a predictions matrix and a transitions matrix, get the path with the best 203 .Input(0, "predictions",
"N*D predictions matrix")
204 .Input(1,
"transitions",
"D*D transitions matrix")
205 .Output(0,
"viterbi_path",
"N*1 vector holds the best path indices");
206 NO_GRADIENT(ViterbiPath);
207 REGISTER_CPU_OPERATOR(SwapBestPath, SwapBestPathOp);
208 OPERATOR_SCHEMA(SwapBestPath)
212 Given a sequence of idices and a matrix, enforce that these indices have the 213 best columnwise scores 216 .Input(0, "predictions",
"N*D predictions matrix")
217 .Input(1,
"bestPath",
"N*1 vector holds the best path indices ")
218 .Output(0,
"new_predictions",
"N*D updated predictions matrix");
219 NO_GRADIENT(SwapBestPath);
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...