1 #include "roi_pool_op.h" 11 bool RoIPoolOp<float, CPUContext>::RunOnDevice() {
12 const auto& X = Input(0);
13 const auto& R = Input(1);
15 auto*
A = is_test_ ?
nullptr : Output(1);
18 CAFFE_ENFORCE_EQ(R.dim32(1), 5);
21 int batch_size = X.dim32(0);
22 int channels = X.dim32(1);
23 int height = X.dim32(2);
24 int width = X.dim32(3);
25 int num_rois = R.dim32(0);
27 Y->Resize(num_rois, channels, pooled_height_, pooled_width_);
29 A->Resize(Y->sizes());
32 const float* Xdata = X.data<
float>();
33 const float* rois = R.data<
float>();
34 float* Ydata = Y->template mutable_data<float>();
35 int* argmax_data = is_test_ ?
nullptr :
A->template mutable_data<int>();
38 for (
int n = 0; n < num_rois; ++n) {
39 int roi_batch_id = rois[0];
40 int roi_start_w = round(rois[1] * spatial_scale_);
41 int roi_start_h = round(rois[2] * spatial_scale_);
42 int roi_end_w = round(rois[3] * spatial_scale_);
43 int roi_end_h = round(rois[4] * spatial_scale_);
44 CAFFE_ENFORCE_GE(roi_batch_id, 0);
45 CAFFE_ENFORCE_LT(roi_batch_id, batch_size);
48 int roi_height = max(roi_end_h - roi_start_h + 1, 1);
49 int roi_width = max(roi_end_w - roi_start_w + 1, 1);
51 const float bin_size_h =
52 static_cast<float>(roi_height) / static_cast<float>(pooled_height_);
53 const float bin_size_w =
54 static_cast<float>(roi_width) / static_cast<float>(pooled_width_);
56 const float* batch_data = Xdata + roi_batch_id * X.size_from_dim(1);
58 for (
int c = 0; c < channels; ++c) {
59 for (
int ph = 0; ph < pooled_height_; ++ph) {
60 for (
int pw = 0; pw < pooled_width_; ++pw) {
65 static_cast<int>(floor(static_cast<float>(ph) * bin_size_h));
67 static_cast<int>(floor(static_cast<float>(pw) * bin_size_w));
69 static_cast<int>(ceil(static_cast<float>(ph + 1) * bin_size_h));
71 static_cast<int>(ceil(static_cast<float>(pw + 1) * bin_size_w));
74 hstart = min(max(hstart + roi_start_h, 0), height);
75 hend = min(max(hend + roi_start_h, 0), height);
76 wstart = min(max(wstart + roi_start_w, 0), width);
77 wend = min(max(wend + roi_start_w, 0), width);
79 const int pool_index = ph * pooled_width_ + pw;
82 bool is_empty = (hend <= hstart) || (wend <= wstart);
83 Ydata[pool_index] = is_empty ? 0 : -FLT_MAX;
86 argmax_data[pool_index] = -1;
89 for (
int h = hstart; h < hend; ++h) {
90 for (
int w = wstart; w < wend; ++w) {
91 const int index = h * width + w;
92 if (batch_data[index] > Ydata[pool_index]) {
93 Ydata[pool_index] = batch_data[index];
95 argmax_data[pool_index] = index;
103 batch_data += X.size_from_dim(2);
104 Ydata += Y->size_from_dim(2);
106 argmax_data +=
A->size_from_dim(2);
110 rois += R.size_from_dim(1);
116 REGISTER_CPU_OPERATOR(RoIPool, RoIPoolOp<float, CPUContext>);
117 REGISTER_CPU_OPERATOR(RoIPoolGradient, RoIPoolGradientOp<float, CPUContext>);
122 OPERATOR_SCHEMA(RoIPool)
125 .TensorInferenceFunction([](
const OperatorDef& def,
126 const vector<TensorShape>& in) {
127 ArgumentHelper helper(def);
128 const StorageOrder order = StringToStorageOrder(
129 helper.GetSingleArgument<
string>(
"order",
"NCHW"));
130 const TensorShape& X = in[0];
131 const int num_channels =
132 (order == StorageOrder::NCHW ? X.dims(1) : X.dims(3));
133 const TensorShape& R = in[1];
134 const int num_rois = R.dims(0);
135 const int pooled_height = helper.GetSingleArgument<
int>(
"pooled_h", 1);
136 const int pooled_width = helper.GetSingleArgument<
int>(
"pooled_w", 1);
137 TensorShape Y = CreateTensorShape(
138 vector<int>({num_rois, num_channels, pooled_height, pooled_width}),
141 bool is_test = helper.GetSingleArgument<
int>(OpSchema::Arg_IsTest, 0);
143 TensorShape argmaxes = Y;
144 argmaxes.set_data_type(TensorProto_DataType_INT32);
145 return vector<TensorShape>({Y, argmaxes});
147 return vector<TensorShape>({Y});
151 Carries out ROI Pooling for Faster-RCNN. 152 Depending on the mode, there are multiple output cases: 154 Output case #1: Y, argmaxes (train mode) 155 Output case #2: Y (test mode) 159 "If set, run in test mode and skip computation of argmaxes (used for " 160 "gradient computation). Only one output tensor is produced. " 162 .Arg(
"order",
"A StorageOrder string (Default: \"NCHW\").")
163 .Arg(
"pooled_h",
"The pooled output height (Default: 1).")
164 .Arg(
"pooled_w",
"The pooled output width (Default: 1).")
167 "Multiplicative spatial scale factor to translate ROI coords from " 168 "their input scale to the scale used when pooling (Default: 1.0).")
172 "The input 4-D tensor of data. Only NCHW order is currently supported.")
176 "RoIs (Regions of Interest) to pool over. Should be a 2-D tensor of " 177 "shape (num_rois, 5) given as [[batch_id, x1, y1, x2, y2], ...].")
181 "RoI pooled output 4-D tensor of shape " 182 "(num_rois, channels, pooled_h, pooled_w).")
186 "Argmaxes corresponding to indices in X used for gradient computation. " 187 "Only output if arg \"is_test\" is false.");
191 OPERATOR_SCHEMA(RoIPoolGradient).NumInputs(4).NumOutputs(1);
194 using GradientMakerBase::GradientMakerBase;
195 vector<OperatorDef> GetGradientDefs()
override {
199 vector<string>{I(0), I(1), O(1), GO(0)},
200 vector<string>{GI(0)});
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...