Caffe2 - C++ API
A deep learning, cross platform ML framework
integral_image_op.cc
1 #include "integral_image_op.h"
2 #include "caffe2/utils/eigen_utils.h"
3 
4 namespace caffe2 {
5 
6 namespace {
7 template <typename T>
8 using EigenMatrixMapRowMajor = Eigen::Map<
9  Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;
10 
11 template <typename T>
12 using ConstEigenMatrixMapRowMajor = Eigen::Map<
13  const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;
14 } // namespace
15 
16 template <>
17 bool IntegralImageOp<float, CPUContext>::RunOnDevice() {
18  const auto& X = Input(0);
19 
20  CAFFE_ENFORCE_EQ(X.dim(), 4, "Only supports 4D tensors for the momement");
21 
22  vector<int64_t> out_shape(X.sizes().vec());
23  out_shape[2] += 1; // H + 1 output size
24  out_shape[3] += 1; // W + 1 output size
25  auto* Y = Output(0, out_shape, at::dtype<float>());
26  const int ind = X.dim32(0);
27  const int chans = X.dim32(1);
28  const int rows_in = X.dim32(2);
29  const int cols_in = X.dim32(3);
30  const int rows_out = Y->dim32(2);
31  const int cols_out = Y->dim32(3);
32 
33  const float* input_data = X.template data<float>();
34  float* output_data = Y->template mutable_data<float>();
35 
36  const int row_out_pass_size = ind * chans * rows_out;
37  const int row_in_pass_size = ind * chans * rows_in;
38  EigenMatrixMapRowMajor<float> Y_arr(output_data, row_out_pass_size, cols_out);
39  ConstEigenMatrixMapRowMajor<float> X_arr(
40  input_data, row_in_pass_size, cols_in);
41 
42  // Row Pass
43  for (int i = 0; i < row_out_pass_size; i++) {
44  int row = i % rows_out;
45  int diff = i / rows_out + 1;
46  Y_arr(i, 0) = 0.;
47  if (row == 0) {
48  for (int j = 1; j < cols_out; ++j) {
49  Y_arr(i, j) = 0.;
50  }
51  } else {
52  for (int j = 1; j < cols_out; ++j) {
53  Y_arr(i, j) = Y_arr(i, j - 1) + X_arr(i - diff, j - 1);
54  }
55  }
56  }
57 
58  // Col Pass
59  const int col_out_pass_size = X.dim32(0) * chans * cols_out;
60  for (int i = 0; i < col_out_pass_size; i++) {
61  int col = i % cols_out;
62  int row = i / cols_out;
63  for (int j = row * rows_out + 1; j < (row + 1) * rows_out; ++j) {
64  Y_arr(j, col) += Y_arr(j - 1, col);
65  }
66  }
67  return true;
68 }
69 
70 template <>
71 bool IntegralImageGradientOp<float, CPUContext>::RunOnDevice() {
72  auto& X = Input(0); // Original input to "forward" op
73  auto& dY = Input(1); // Gradient of net w.r.t. output of "forward" op
74  // (aka "gradOutput")
75  auto* dX = Output(
76  0, X.sizes(), at::dtype<float>()); // Gradient of net w.r.t. input to
77  // "forward" op (aka "gradInput")
78 
79  const int ind = X.dim32(0);
80  const int chans = X.dim32(1);
81  const int rows_in = dY.dim32(2);
82  const int cols_in = dY.dim32(3);
83  const int rows_out = dX->dim32(2);
84  const int cols_out = dX->dim32(3);
85 
86  const float* input_data = dY.template data<float>();
87  float* output_data = dX->template mutable_data<float>();
88 
89  const int row_out_pass_size = ind * chans * rows_out;
90  const int row_in_pass_size = ind * chans * rows_in;
91  EigenMatrixMapRowMajor<float> dX_arr(
92  output_data, row_out_pass_size, cols_out);
93  ConstEigenMatrixMapRowMajor<float> dY_arr(
94  input_data, row_in_pass_size, cols_in);
95  Eigen::MatrixXf tmp(row_in_pass_size, cols_out);
96 
97  // Row Pass dY(N, C, H+1, W+1) => tmp(N, C, H+1, W)
98  for (int i = 0; i < row_in_pass_size; i++) {
99  tmp(i, 0) = dY_arr(i, 0);
100  for (int j = 1; j < cols_out; ++j) {
101  tmp(i, j) = tmp(i, j - 1) + dY_arr(i, j);
102  }
103  }
104 
105  // Col Pass tmp(N, C, H+1, W)=>dX(N, C, H, W)
106  const int col_out_pass_size = X.dim32(0) * chans * cols_out;
107  for (int i = 0; i < col_out_pass_size; i++) {
108  int col = i % cols_out;
109  int row_out_start = (i / cols_out) * rows_out;
110  int row_in_start = (i / cols_out) * rows_in;
111  dX_arr(row_out_start, col) = tmp(row_in_start, col);
112  for (int j = 1; j < rows_out; ++j) {
113  dX_arr(row_out_start + j, col) =
114  dX_arr(row_out_start + j - 1, col) + tmp(row_in_start + j, col);
115  }
116  }
117  return true;
118 }
119 
120 REGISTER_CPU_OPERATOR(IntegralImage, IntegralImageOp<float, CPUContext>);
121 REGISTER_CPU_OPERATOR(
122  IntegralImageGradient,
123  IntegralImageGradientOp<float, CPUContext>);
124 
125 // Input: X; Output: Y
126 OPERATOR_SCHEMA(IntegralImage)
127  .NumInputs(1)
128  .NumOutputs(1)
129  .SetDoc(R"DOC(
130 Computes an integral image, which contains the sum of pixel values within
131 an image vertically and horizontally. This integral image can then be used
132 with other detection and tracking techniques.
133 )DOC")
134  .Input(0, "X", "Images tensor of the form (N, C, H, W)")
135  .Output(0, "Y", "Integrated image of the form (N, C, H+1, W+1)");
136 
137 // Input: X, dY (aka "gradOutput"); Output: dX (aka "gradInput")
138 OPERATOR_SCHEMA(IntegralImageGradient).NumInputs(2).NumOutputs(1);
139 
141  using GradientMakerBase::GradientMakerBase;
142  vector<OperatorDef> GetGradientDefs() override {
143  return SingleGradientDef(
144  "IntegralImageGradient",
145  "",
146  vector<string>{I(0), GO(0)},
147  vector<string>{GI(0)});
148  }
149 };
150 
151 REGISTER_GRADIENT(IntegralImage, GetIntegralImageGradient);
152 
153 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...