Caffe2 - C++ API
A deep learning, cross platform ML framework
roi_pool_op.cc
1 #include "roi_pool_op.h"
2 
3 #include <cfloat>
4 
5 namespace caffe2 {
6 
7 using std::max;
8 using std::min;
9 
10 template <>
11 bool RoIPoolOp<float, CPUContext>::RunOnDevice() {
12  const auto& X = Input(0); // Input data to pool
13  const auto& R = Input(1); // RoIs
14  auto* Y = Output(0); // RoI pooled data
15  auto* A = is_test_ ? nullptr : Output(1); // argmaxes
16 
17  // Each ROI is of the form [batch_index x1 y1 x2 y2]
18  CAFFE_ENFORCE_EQ(R.dim32(1), 5);
19 
20  // TODO: Handle the storage_order properly to get the NCWH.
21  int batch_size = X.dim32(0);
22  int channels = X.dim32(1);
23  int height = X.dim32(2);
24  int width = X.dim32(3);
25  int num_rois = R.dim32(0);
26 
27  Y->Resize(num_rois, channels, pooled_height_, pooled_width_);
28  if (!is_test_) {
29  A->Resize(Y->sizes());
30  }
31 
32  const float* Xdata = X.data<float>();
33  const float* rois = R.data<float>();
34  float* Ydata = Y->template mutable_data<float>();
35  int* argmax_data = is_test_ ? nullptr : A->template mutable_data<int>();
36 
37  // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
38  for (int n = 0; n < num_rois; ++n) {
39  int roi_batch_id = rois[0];
40  int roi_start_w = round(rois[1] * spatial_scale_);
41  int roi_start_h = round(rois[2] * spatial_scale_);
42  int roi_end_w = round(rois[3] * spatial_scale_);
43  int roi_end_h = round(rois[4] * spatial_scale_);
44  CAFFE_ENFORCE_GE(roi_batch_id, 0);
45  CAFFE_ENFORCE_LT(roi_batch_id, batch_size);
46 
47  // Force malformed ROIs to be 1x1
48  int roi_height = max(roi_end_h - roi_start_h + 1, 1);
49  int roi_width = max(roi_end_w - roi_start_w + 1, 1);
50 
51  const float bin_size_h =
52  static_cast<float>(roi_height) / static_cast<float>(pooled_height_);
53  const float bin_size_w =
54  static_cast<float>(roi_width) / static_cast<float>(pooled_width_);
55 
56  const float* batch_data = Xdata + roi_batch_id * X.size_from_dim(1);
57 
58  for (int c = 0; c < channels; ++c) {
59  for (int ph = 0; ph < pooled_height_; ++ph) {
60  for (int pw = 0; pw < pooled_width_; ++pw) {
61  // Compute pooling region for this output unit:
62  // start (included) = floor(ph * roi_height / pooled_height_)
63  // end (excluded) = ceil((ph + 1) * roi_height / pooled_height_)
64  int hstart =
65  static_cast<int>(floor(static_cast<float>(ph) * bin_size_h));
66  int wstart =
67  static_cast<int>(floor(static_cast<float>(pw) * bin_size_w));
68  int hend =
69  static_cast<int>(ceil(static_cast<float>(ph + 1) * bin_size_h));
70  int wend =
71  static_cast<int>(ceil(static_cast<float>(pw + 1) * bin_size_w));
72 
73  // Add roi offsets and clip to input boundaries
74  hstart = min(max(hstart + roi_start_h, 0), height);
75  hend = min(max(hend + roi_start_h, 0), height);
76  wstart = min(max(wstart + roi_start_w, 0), width);
77  wend = min(max(wend + roi_start_w, 0), width);
78 
79  const int pool_index = ph * pooled_width_ + pw;
80 
81  // Define an empty pooling region to be zero
82  bool is_empty = (hend <= hstart) || (wend <= wstart);
83  Ydata[pool_index] = is_empty ? 0 : -FLT_MAX;
84  if (!is_test_) {
85  // If nothing is pooled, argmax = -1 causes nothing to be backprop'd
86  argmax_data[pool_index] = -1;
87  }
88 
89  for (int h = hstart; h < hend; ++h) {
90  for (int w = wstart; w < wend; ++w) {
91  const int index = h * width + w;
92  if (batch_data[index] > Ydata[pool_index]) {
93  Ydata[pool_index] = batch_data[index];
94  if (!is_test_) {
95  argmax_data[pool_index] = index;
96  }
97  }
98  }
99  }
100  }
101  }
102  // Increment all data pointers by one channel
103  batch_data += X.size_from_dim(2);
104  Ydata += Y->size_from_dim(2);
105  if (!is_test_) {
106  argmax_data += A->size_from_dim(2);
107  }
108  }
109  // Increment ROI data pointer
110  rois += R.size_from_dim(1);
111  }
112 
113  return true;
114 }
115 
116 REGISTER_CPU_OPERATOR(RoIPool, RoIPoolOp<float, CPUContext>);
117 REGISTER_CPU_OPERATOR(RoIPoolGradient, RoIPoolGradientOp<float, CPUContext>);
118 
119 // Input: X, rois
120 // Output case #1: Y, argmaxes (train mode)
121 // Output case #2: Y (test mode)
122 OPERATOR_SCHEMA(RoIPool)
123  .NumInputs(2)
124  .NumOutputs({1, 2})
125  .TensorInferenceFunction([](const OperatorDef& def,
126  const vector<TensorShape>& in) {
127  ArgumentHelper helper(def);
128  const StorageOrder order = StringToStorageOrder(
129  helper.GetSingleArgument<string>("order", "NCHW"));
130  const TensorShape& X = in[0];
131  const int num_channels =
132  (order == StorageOrder::NCHW ? X.dims(1) : X.dims(3));
133  const TensorShape& R = in[1];
134  const int num_rois = R.dims(0);
135  const int pooled_height = helper.GetSingleArgument<int>("pooled_h", 1);
136  const int pooled_width = helper.GetSingleArgument<int>("pooled_w", 1);
137  TensorShape Y = CreateTensorShape(
138  vector<int>({num_rois, num_channels, pooled_height, pooled_width}),
139  X.data_type());
140 
141  bool is_test = helper.GetSingleArgument<int>(OpSchema::Arg_IsTest, 0);
142  if (!is_test) {
143  TensorShape argmaxes = Y;
144  argmaxes.set_data_type(TensorProto_DataType_INT32);
145  return vector<TensorShape>({Y, argmaxes});
146  } else {
147  return vector<TensorShape>({Y});
148  }
149  })
150  .SetDoc(R"DOC(
151 Carries out ROI Pooling for Faster-RCNN.
152 Depending on the mode, there are multiple output cases:
153 
154  Output case #1: Y, argmaxes (train mode)
155  Output case #2: Y (test mode)
156 )DOC")
157  .Arg(
158  "is_test",
159  "If set, run in test mode and skip computation of argmaxes (used for "
160  "gradient computation). Only one output tensor is produced. "
161  "(Default: false).")
162  .Arg("order", "A StorageOrder string (Default: \"NCHW\").")
163  .Arg("pooled_h", "The pooled output height (Default: 1).")
164  .Arg("pooled_w", "The pooled output width (Default: 1).")
165  .Arg(
166  "spatial_scale",
167  "Multiplicative spatial scale factor to translate ROI coords from "
168  "their input scale to the scale used when pooling (Default: 1.0).")
169  .Input(
170  0,
171  "X",
172  "The input 4-D tensor of data. Only NCHW order is currently supported.")
173  .Input(
174  1,
175  "rois",
176  "RoIs (Regions of Interest) to pool over. Should be a 2-D tensor of "
177  "shape (num_rois, 5) given as [[batch_id, x1, y1, x2, y2], ...].")
178  .Output(
179  0,
180  "Y",
181  "RoI pooled output 4-D tensor of shape "
182  "(num_rois, channels, pooled_h, pooled_w).")
183  .Output(
184  1,
185  "argmaxes",
186  "Argmaxes corresponding to indices in X used for gradient computation. "
187  "Only output if arg \"is_test\" is false.");
188 
189 // Input: X, rois, argmaxes, dY (aka "gradOutput")
190 // Output: dX (aka "gradInput")
191 OPERATOR_SCHEMA(RoIPoolGradient).NumInputs(4).NumOutputs(1);
192 
194  using GradientMakerBase::GradientMakerBase;
195  vector<OperatorDef> GetGradientDefs() override {
196  return SingleGradientDef(
197  "RoIPoolGradient",
198  "",
199  vector<string>{I(0), I(1), O(1), GO(0)},
200  vector<string>{GI(0)});
201  }
202 };
203 
204 REGISTER_GRADIENT(RoIPool, GetRoIPoolGradient);
205 
206 } // namespace caffe2
Definition: static.cpp:52
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...