2 #define _USE_MATH_DEFINES // For M_PI 6 #include "roi_align_rotated_op.h" 8 #include "caffe2/utils/eigen_utils.h" 9 #include "caffe2/utils/math.h" 27 void pre_calc_for_bilinear_interpolate(
30 const int pooled_height,
31 const int pooled_width,
43 std::vector<PreCalc<T>>& pre_calc) {
44 int pre_calc_index = 0;
45 T cosTheta = cos(theta);
46 T sinTheta = sin(theta);
47 for (
int ph = 0; ph < pooled_height; ph++) {
48 for (
int pw = 0; pw < pooled_width; pw++) {
49 for (
int iy = 0; iy < iy_upper; iy++) {
50 const T yy = roi_start_h + ph * bin_size_h +
51 static_cast<T>(iy + .5f) * bin_size_h /
52 static_cast<T>(roi_bin_grid_h);
53 for (
int ix = 0; ix < ix_upper; ix++) {
54 const T xx = roi_start_w + pw * bin_size_w +
55 static_cast<T>(ix + .5f) * bin_size_w /
56 static_cast<T>(roi_bin_grid_w);
59 T x = xx * cosTheta + yy * sinTheta + roi_center_w;
60 T y = yy * cosTheta - xx * sinTheta + roi_center_h;
63 if (y < -1.0 || y > height || x < -1.0 || x > width) {
74 pre_calc[pre_calc_index] = pc;
91 if (y_low >= height - 1) {
92 y_high = y_low = height - 1;
98 if (x_low >= width - 1) {
99 x_high = x_low = width - 1;
107 T hy = 1. - ly, hx = 1. - lx;
108 T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
112 pc.pos1 = y_low * width + x_low;
113 pc.pos2 = y_low * width + x_high;
114 pc.pos3 = y_high * width + x_low;
115 pc.pos4 = y_high * width + x_high;
120 pre_calc[pre_calc_index] = pc;
129 template <
typename T>
130 void ROIAlignRotatedForward(
132 const T* bottom_data,
133 const T& spatial_scale,
137 const int pooled_height,
138 const int pooled_width,
139 const int sampling_ratio,
140 const T* bottom_rois,
143 StorageOrder order) {
144 DCHECK(roi_cols == 5 || roi_cols == 6);
146 int n_rois = nthreads / channels / pooled_width / pooled_height;
149 #pragma omp parallel for 151 for (
int n = 0; n < n_rois; n++) {
152 int index_n = n * channels * pooled_width * pooled_height;
154 const T* offset_bottom_rois = bottom_rois + n * roi_cols;
155 int roi_batch_ind = 0;
157 roi_batch_ind = offset_bottom_rois[0];
158 offset_bottom_rois++;
162 T roi_center_w = offset_bottom_rois[0] * spatial_scale;
163 T roi_center_h = offset_bottom_rois[1] * spatial_scale;
164 T roi_width = offset_bottom_rois[2] * spatial_scale;
165 T roi_height = offset_bottom_rois[3] * spatial_scale;
166 T theta = offset_bottom_rois[4] * M_PI / 180.0;
169 roi_width = std::max(roi_width, (
T)1.);
170 roi_height = std::max(roi_height, (
T)1.);
171 T bin_size_h =
static_cast<T>(roi_height) / static_cast<T>(pooled_height);
172 T bin_size_w =
static_cast<T>(roi_width) / static_cast<T>(pooled_width);
175 int roi_bin_grid_h = (sampling_ratio > 0)
177 : ceil(roi_height / pooled_height);
179 (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
182 const T count = roi_bin_grid_h * roi_bin_grid_w;
186 std::vector<PreCalc<T>> pre_calc(
187 roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height);
191 T roi_start_h = -roi_height / 2.0;
192 T roi_start_w = -roi_width / 2.0;
193 pre_calc_for_bilinear_interpolate(
211 if (order == StorageOrder::NCHW) {
212 for (
int c = 0; c < channels; c++) {
213 int index_n_c = index_n + c * pooled_width * pooled_height;
214 const T* offset_bottom_data =
215 bottom_data + (roi_batch_ind * channels + c) * height * width;
216 int pre_calc_index = 0;
218 for (
int ph = 0; ph < pooled_height; ph++) {
219 for (
int pw = 0; pw < pooled_width; pw++) {
220 int index = index_n_c + ph * pooled_width + pw;
223 for (
int iy = 0; iy < roi_bin_grid_h; iy++) {
224 for (
int ix = 0; ix < roi_bin_grid_w; ix++) {
225 PreCalc<T> pc = pre_calc[pre_calc_index];
226 output_val += pc.w1 * offset_bottom_data[pc.pos1] +
227 pc.w2 * offset_bottom_data[pc.pos2] +
228 pc.w3 * offset_bottom_data[pc.pos3] +
229 pc.w4 * offset_bottom_data[pc.pos4];
236 top_data[index] = output_val;
242 if (order == StorageOrder::NHWC) {
243 const T* offset_bottom_data =
244 bottom_data + roi_batch_ind * channels * height * width;
245 int pre_calc_index = 0;
247 for (
int ph = 0; ph < pooled_height; ph++) {
248 for (
int pw = 0; pw < pooled_width; pw++) {
249 EVecXf output_vals = EVecXf::Zero(channels);
251 for (
int iy = 0; iy < roi_bin_grid_h; iy++) {
252 for (
int ix = 0; ix < roi_bin_grid_w; ix++) {
253 PreCalc<T> pc = pre_calc[pre_calc_index];
255 ConstEigenVectorMap<T> data_1(
256 offset_bottom_data + channels * pc.pos1, channels);
257 ConstEigenVectorMap<T> data_2(
258 offset_bottom_data + channels * pc.pos2, channels);
259 ConstEigenVectorMap<T> data_3(
260 offset_bottom_data + channels * pc.pos3, channels);
261 ConstEigenVectorMap<T> data_4(
262 offset_bottom_data + channels * pc.pos4, channels);
264 output_vals += pc.w1 * data_1 + pc.w2 * data_2 + pc.w3 * data_3 +
270 output_vals /= count;
272 int index_nhw = index_n + (ph * pooled_width + pw) * channels;
274 top_data + index_nhw, output_vals.data(), channels *
sizeof(
T));
284 bool RoIAlignRotatedOp<float, CPUContext>::RunOnDevice() {
288 if (R.numel() == 0) {
289 std::vector<int64_t> sizes;
291 if (order_ == StorageOrder::NCHW) {
292 sizes = {0, X.dim32(1), pooled_height_, pooled_width_};
293 }
else if (order_ == StorageOrder::NHWC) {
294 sizes = {0, pooled_height_, pooled_width_, X.dim32(3)};
297 Output(0, sizes, at::dtype<float>());
301 CAFFE_ENFORCE_EQ(R.dim(), 2);
304 CAFFE_ENFORCE(R.dim32(1) == 5 || R.dim32(1) == 6);
306 assert(sampling_ratio_ >= 0);
308 if (order_ == StorageOrder::NCHW) {
311 {R.dim32(0), X.dim32(1), pooled_height_, pooled_width_},
314 int output_size = Y->numel();
315 ROIAlignRotatedForward<float>(
327 Y->mutable_data<
float>(),
329 }
else if (order_ == StorageOrder::NHWC) {
332 {R.dim32(0), pooled_height_, pooled_width_, X.dim32(3)},
334 int output_size = Y->numel();
335 ROIAlignRotatedForward<float>(
347 Y->mutable_data<
float>(),
354 REGISTER_CPU_OPERATOR(RoIAlignRotated, RoIAlignRotatedOp<float, CPUContext>);
357 OPERATOR_SCHEMA(RoIAlignRotated)
361 Similar to RoIAlign but can handle rotated region proposals. 362 Based on https://arxiv.org/abs/1703.01086. 366 "(float) default 1.0; Spatial scale of the input feature map X " 367 "relative to the input image. E.g., 0.0625 if X has a stride of 16 " 368 "w.r.t. the input image.")
369 .Arg(
"pooled_h",
"(int) default 1; Pooled output Y's height.")
370 .Arg(
"pooled_w",
"(int) default 1; Pooled output Y's width.")
373 "(int) default -1; number of sampling points in the interpolation grid " 374 "used to compute the output value of each pooled output bin. If > 0, " 375 "then exactly sampling_ratio x sampling_ratio grid points are used. If " 376 "<= 0, then an adaptive number of grid points are used (computed as " 377 "ceil(roi_width / pooled_w), and likewise for height).")
378 .Input(0,
"X",
"4D feature map input of shape (N, C, H, W).")
382 "2D input of shape (R, 5 or 6) specifying R RoIs " 383 "representing: batch index in [0, N - 1], center_x, center_y, width, " 384 "height, angle. The RoI coordinates are in the coordinate system of " 385 "the input image. `angle` should be specified in degrees and " 386 "represents the RoI rotated counter-clockwise. For inputs " 387 "corresponding to a single image, batch index can be excluded to " 388 "have just 5 columns.")
392 "4D output of shape (R, C, pooled_h, pooled_w). The r-th batch element " 393 "is a pooled feature map cooresponding to the r-th RoI.");
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...