Caffe2 - C++ API
A deep learning, cross platform ML framework
generate_proposals_op_util_boxes.h
1 #ifndef CAFFE2_OPERATORS_UTILS_BOXES_H_
2 #define CAFFE2_OPERATORS_UTILS_BOXES_H_
3 
4 #include "caffe2/utils/eigen_utils.h"
5 #include "caffe2/utils/math.h"
6 
7 // Bounding box utils for generate_proposals_op
8 // Reference: facebookresearch/Detectron/detectron/utils/boxes.py
9 
10 namespace caffe2 {
11 namespace utils {
12 
13 // Default value for minimum bounding box width and height after bounding box
14 // transformation (bbox_transform()) in log-space
15 const float BBOX_XFORM_CLIP_DEFAULT = log(1000.0 / 16.0);
16 const float PI = 3.14159265358979323846;
17 
18 // Forward transform that maps proposal boxes to ground-truth boxes using
19 // bounding-box regression deltas.
20 // boxes: pixel coordinates of the bounding boxes
21 // size (M, 4), format [x1; y1; x2; y2], x2 >= x1, y2 >= y1
22 // deltas: bounding box translations and scales
23 // size (M, 4), format [dx; dy; dw; dh]
24 // dx, dy: scale-invariant translation of the center of the bounding box
25 // dw, dh: log-space scaling of the width and height of the bounding box
26 // weights: weights [wx, wy, ww, wh] for the deltas
27 // bbox_xform_clip: minimum bounding box width and height in log-space after
28 // transofmration
29 // correct_transform_coords: Correct bounding box transform coordates. Set to
30 // true to match the detectron code, set to false for backward compatibility
31 // return: pixel coordinates of the bounding boxes
32 // size (M, 4), format [x1; y1; x2; y2]
33 // see "Rich feature hierarchies for accurate object detection and semantic
34 // segmentation" Appendix C for more details
35 // reference: detectron/lib/utils/boxes.py bbox_transform()
36 template <class Derived1, class Derived2>
37 EArrXXt<typename Derived1::Scalar> bbox_transform_upright(
38  const Eigen::ArrayBase<Derived1>& boxes,
39  const Eigen::ArrayBase<Derived2>& deltas,
40  const std::vector<typename Derived2::Scalar>& weights =
41  std::vector<typename Derived2::Scalar>{1.0, 1.0, 1.0, 1.0},
42  const float bbox_xform_clip = BBOX_XFORM_CLIP_DEFAULT) {
43  using T = typename Derived1::Scalar;
44  using EArrXX = EArrXXt<T>;
45  using EArrX = EArrXt<T>;
46 
47  if (boxes.rows() == 0) {
48  return EArrXX::Zero(T(0), deltas.cols());
49  }
50 
51  CAFFE_ENFORCE_EQ(boxes.rows(), deltas.rows());
52  CAFFE_ENFORCE_EQ(boxes.cols(), 4);
53  CAFFE_ENFORCE_EQ(deltas.cols(), 4);
54 
55  EArrX widths = boxes.col(2) - boxes.col(0) + T(1.0);
56  EArrX heights = boxes.col(3) - boxes.col(1) + T(1.0);
57  auto ctr_x = boxes.col(0) + T(0.5) * widths;
58  auto ctr_y = boxes.col(1) + T(0.5) * heights;
59 
60  auto dx = deltas.col(0).template cast<T>() / weights[0];
61  auto dy = deltas.col(1).template cast<T>() / weights[1];
62  auto dw =
63  (deltas.col(2).template cast<T>() / weights[2]).cwiseMin(bbox_xform_clip);
64  auto dh =
65  (deltas.col(3).template cast<T>() / weights[3]).cwiseMin(bbox_xform_clip);
66 
67  EArrX pred_ctr_x = dx * widths + ctr_x;
68  EArrX pred_ctr_y = dy * heights + ctr_y;
69  EArrX pred_w = dw.exp() * widths;
70  EArrX pred_h = dh.exp() * heights;
71 
72  EArrXX pred_boxes = EArrXX::Zero(deltas.rows(), deltas.cols());
73  // x1
74  pred_boxes.col(0) = pred_ctr_x - T(0.5) * pred_w;
75  // y1
76  pred_boxes.col(1) = pred_ctr_y - T(0.5) * pred_h;
77  // x2
78  pred_boxes.col(2) = pred_ctr_x + T(0.5) * pred_w - T(1.0);
79  // y2
80  pred_boxes.col(3) = pred_ctr_y + T(0.5) * pred_h - T(1.0);
81 
82  return pred_boxes;
83 }
84 
85 // Like bbox_transform_upright, but works on rotated boxes.
86 // boxes: pixel coordinates of the bounding boxes
87 // size (M, 5), format [ctr_x; ctr_y; width; height; angle (in degrees)]
88 // deltas: bounding box translations and scales
89 // size (M, 5), format [dx; dy; dw; dh; da]
90 // dx, dy: scale-invariant translation of the center of the bounding box
91 // dw, dh: log-space scaling of the width and height of the bounding box
92 // da: delta for angle in radians
93 // return: pixel coordinates of the bounding boxes
94 // size (M, 5), format [ctr_x; ctr_y; width; height; angle (in degrees)]
95 template <class Derived1, class Derived2>
96 EArrXXt<typename Derived1::Scalar> bbox_transform_rotated(
97  const Eigen::ArrayBase<Derived1>& boxes,
98  const Eigen::ArrayBase<Derived2>& deltas,
99  const std::vector<typename Derived2::Scalar>& weights =
100  std::vector<typename Derived2::Scalar>{1.0, 1.0, 1.0, 1.0},
101  const float bbox_xform_clip = BBOX_XFORM_CLIP_DEFAULT,
102  const bool angle_bound_on = true,
103  const int angle_bound_lo = -90,
104  const int angle_bound_hi = 90) {
105  using T = typename Derived1::Scalar;
106  using EArrXX = EArrXXt<T>;
107 
108  if (boxes.rows() == 0) {
109  return EArrXX::Zero(T(0), deltas.cols());
110  }
111 
112  CAFFE_ENFORCE_EQ(boxes.rows(), deltas.rows());
113  CAFFE_ENFORCE_EQ(boxes.cols(), 5);
114  CAFFE_ENFORCE_EQ(deltas.cols(), 5);
115 
116  const auto& ctr_x = boxes.col(0);
117  const auto& ctr_y = boxes.col(1);
118  const auto& widths = boxes.col(2);
119  const auto& heights = boxes.col(3);
120  const auto& angles = boxes.col(4);
121 
122  auto dx = deltas.col(0).template cast<T>() / weights[0];
123  auto dy = deltas.col(1).template cast<T>() / weights[1];
124  auto dw =
125  (deltas.col(2).template cast<T>() / weights[2]).cwiseMin(bbox_xform_clip);
126  auto dh =
127  (deltas.col(3).template cast<T>() / weights[3]).cwiseMin(bbox_xform_clip);
128  // Convert back to degrees
129  auto da = deltas.col(4).template cast<T>() * 180.0 / PI;
130 
131  EArrXX pred_boxes = EArrXX::Zero(deltas.rows(), deltas.cols());
132  // new ctr_x
133  pred_boxes.col(0) = dx * widths + ctr_x;
134  // new ctr_y
135  pred_boxes.col(1) = dy * heights + ctr_y;
136  // new width
137  pred_boxes.col(2) = dw.exp() * widths;
138  // new height
139  pred_boxes.col(3) = dh.exp() * heights;
140  // new angle
141  pred_boxes.col(4) = da + angles;
142 
143  if (angle_bound_on) {
144  // Normalize angle to be within [angle_bound_lo, angle_bound_hi].
145  // Deltas are guaranteed to be <= period / 2 while computing training
146  // targets by bbox_transform_inv.
147  const int period = angle_bound_hi - angle_bound_lo;
148  CAFFE_ENFORCE(period > 0 && period % 180 == 0);
149  auto angles = pred_boxes.col(4);
150  for (int i = 0; i < angles.size(); ++i) {
151  if (angles[i] < angle_bound_lo) {
152  angles[i] += T(period);
153  } else if (angles[i] > angle_bound_hi) {
154  angles[i] -= T(period);
155  }
156  }
157  }
158 
159  return pred_boxes;
160 }
161 
162 template <class Derived1, class Derived2>
163 EArrXXt<typename Derived1::Scalar> bbox_transform(
164  const Eigen::ArrayBase<Derived1>& boxes,
165  const Eigen::ArrayBase<Derived2>& deltas,
166  const std::vector<typename Derived2::Scalar>& weights =
167  std::vector<typename Derived2::Scalar>{1.0, 1.0, 1.0, 1.0},
168  const float bbox_xform_clip = BBOX_XFORM_CLIP_DEFAULT,
169  const bool angle_bound_on = true,
170  const int angle_bound_lo = -90,
171  const int angle_bound_hi = 90) {
172  CAFFE_ENFORCE(boxes.cols() == 4 || boxes.cols() == 5);
173  if (boxes.cols() == 4) {
174  // Upright boxes
175  return bbox_transform_upright(boxes, deltas, weights, bbox_xform_clip);
176  } else {
177  // Rotated boxes with angle info
178  return bbox_transform_rotated(
179  boxes,
180  deltas,
181  weights,
182  bbox_xform_clip,
183  angle_bound_on,
184  angle_bound_lo,
185  angle_bound_hi);
186  }
187 }
188 
189 template <class Derived>
190 EArrXXt<typename Derived::Scalar> bbox_xyxy_to_ctrwh(
191  const Eigen::ArrayBase<Derived>& boxes) {
192  CAFFE_ENFORCE_EQ(boxes.cols(), 4);
193 
194  const auto& x1 = boxes.col(0);
195  const auto& y1 = boxes.col(1);
196  const auto& x2 = boxes.col(2);
197  const auto& y2 = boxes.col(3);
198 
199  EArrXXt<typename Derived::Scalar> ret(boxes.rows(), 4);
200  ret.col(0) = (x1 + x2) / 2.0; // x_ctr
201  ret.col(1) = (y1 + y2) / 2.0; // y_ctr
202  ret.col(2) = x2 - x1 + 1.0; // w
203  ret.col(3) = y2 - y1 + 1.0; // h
204  return ret;
205 }
206 
207 template <class Derived>
208 EArrXXt<typename Derived::Scalar> bbox_ctrwh_to_xyxy(
209  const Eigen::ArrayBase<Derived>& boxes) {
210  CAFFE_ENFORCE_EQ(boxes.cols(), 4);
211 
212  const auto& x_ctr = boxes.col(0);
213  const auto& y_ctr = boxes.col(1);
214  const auto& w = boxes.col(2);
215  const auto& h = boxes.col(3);
216 
217  EArrXXt<typename Derived::Scalar> ret(boxes.rows(), 4);
218  ret.col(0) = x_ctr - (w - 1) / 2.0; // x1
219  ret.col(1) = y_ctr - (h - 1) / 2.0; // y1
220  ret.col(2) = x_ctr + (w - 1) / 2.0; // x2
221  ret.col(3) = y_ctr + (h - 1) / 2.0; // y2
222  return ret;
223 }
224 
225 // Clip boxes to image boundaries
226 // boxes: pixel coordinates of bounding box, size (M * 4)
227 template <class Derived>
228 EArrXXt<typename Derived::Scalar> clip_boxes_upright(
229  const Eigen::ArrayBase<Derived>& boxes,
230  int height,
231  int width) {
232  CAFFE_ENFORCE(boxes.cols() == 4);
233 
234  EArrXXt<typename Derived::Scalar> ret(boxes.rows(), boxes.cols());
235 
236  // x1 >= 0 && x1 < width
237  ret.col(0) = boxes.col(0).cwiseMin(width - 1).cwiseMax(0);
238  // y1 >= 0 && y1 < height
239  ret.col(1) = boxes.col(1).cwiseMin(height - 1).cwiseMax(0);
240  // x2 >= 0 && x2 < width
241  ret.col(2) = boxes.col(2).cwiseMin(width - 1).cwiseMax(0);
242  // y2 >= 0 && y2 < height
243  ret.col(3) = boxes.col(3).cwiseMin(height - 1).cwiseMax(0);
244 
245  return ret;
246 }
247 
248 // Similar to clip_boxes_upright but handles rotated boxes with angle info.
249 // boxes: size (M, 5), format [ctr_x; ctr_y; width; height; angle (in degrees)]
250 //
251 // Clipping is only performed for boxes that are almost upright
252 // (within a given `angle_thresh` tolerance) to maintain backward compatibility
253 // for non-rotated boxes.
254 //
255 // We don't clip rotated boxes due to a couple of reasons:
256 // (1) There are potentially multiple ways to clip a rotated box to make it
257 // fit within the image.
258 // (2) It's tricky to make the entire rectangular box fit within the image and
259 // still be able to not leave out pixels of interest.
260 // Therefore, we rely on upstream ops like RoIAlignRotated safely handling this.
261 template <class Derived>
262 EArrXXt<typename Derived::Scalar> clip_boxes_rotated(
263  const Eigen::ArrayBase<Derived>& boxes,
264  int height,
265  int width,
266  float angle_thresh = 1.0) {
267  CAFFE_ENFORCE(boxes.cols() == 5);
268 
269  const auto& angles = boxes.col(4);
270 
271  // Filter boxes that are upright (with a tolerance of angle_thresh)
272  EArrXXt<typename Derived::Scalar> upright_boxes;
273  const auto& indices = GetArrayIndices(angles.abs() <= angle_thresh);
274  GetSubArrayRows(boxes, AsEArrXt(indices), &upright_boxes);
275 
276  // Convert to [x1, y1, x2, y2] format and clip them
277  const auto& upright_boxes_xyxy =
278  bbox_ctrwh_to_xyxy(upright_boxes.leftCols(4));
279  const auto& clipped_upright_boxes_xyxy =
280  clip_boxes_upright(upright_boxes_xyxy, height, width);
281 
282  // Convert back to [x_ctr, y_ctr, w, h, angle] and update upright boxes
283  upright_boxes.block(0, 0, upright_boxes.rows(), 4) =
284  bbox_xyxy_to_ctrwh(clipped_upright_boxes_xyxy);
285 
286  EArrXXt<typename Derived::Scalar> ret(boxes.rows(), boxes.cols());
287  ret = boxes;
288  for (int i = 0; i < upright_boxes.rows(); ++i) {
289  ret.row(indices[i]) = upright_boxes.row(i);
290  }
291  return ret;
292 }
293 
294 // Clip boxes to image boundaries.
295 template <class Derived>
296 EArrXXt<typename Derived::Scalar> clip_boxes(
297  const Eigen::ArrayBase<Derived>& boxes,
298  int height,
299  int width,
300  float angle_thresh = 1.0) {
301  CAFFE_ENFORCE(boxes.cols() == 4 || boxes.cols() == 5);
302  if (boxes.cols() == 4) {
303  // Upright boxes
304  return clip_boxes_upright(boxes, height, width);
305  } else {
306  // Rotated boxes with angle info
307  return clip_boxes_rotated(boxes, height, width, angle_thresh);
308  }
309 }
310 
311 // Only keep boxes with both sides >= min_size and center within the image.
312 // boxes: pixel coordinates of bounding box, size (M * 4)
313 // im_info: [height, width, img_scale]
314 // return: row indices for 'boxes'
315 template <class Derived>
316 std::vector<int> filter_boxes_upright(
317  const Eigen::ArrayBase<Derived>& boxes,
318  double min_size,
319  const Eigen::Array3f& im_info) {
320  CAFFE_ENFORCE_EQ(boxes.cols(), 4);
321 
322  // Scale min_size to match image scale
323  min_size *= im_info[2];
324 
325  using T = typename Derived::Scalar;
326  using EArrX = EArrXt<T>;
327 
328  EArrX ws = boxes.col(2) - boxes.col(0) + T(1);
329  EArrX hs = boxes.col(3) - boxes.col(1) + T(1);
330  EArrX x_ctr = boxes.col(0) + ws / T(2);
331  EArrX y_ctr = boxes.col(1) + hs / T(2);
332 
333  EArrXb keep = (ws >= min_size) && (hs >= min_size) &&
334  (x_ctr < T(im_info[1])) && (y_ctr < T(im_info[0]));
335 
336  return GetArrayIndices(keep);
337 }
338 
339 // Similar to filter_boxes_upright but works for rotated boxes.
340 // boxes: pixel coordinates of the bounding boxes
341 // size (M, 5), format [ctr_x; ctr_y; width; height; angle (in degrees)]
342 // im_info: [height, width, img_scale]
343 // return: row indices for 'boxes'
344 template <class Derived>
345 std::vector<int> filter_boxes_rotated(
346  const Eigen::ArrayBase<Derived>& boxes,
347  double min_size,
348  const Eigen::Array3f& im_info) {
349  CAFFE_ENFORCE_EQ(boxes.cols(), 5);
350 
351  // Scale min_size to match image scale
352  min_size *= im_info[2];
353 
354  using T = typename Derived::Scalar;
355 
356  const auto& x_ctr = boxes.col(0);
357  const auto& y_ctr = boxes.col(1);
358  const auto& ws = boxes.col(2);
359  const auto& hs = boxes.col(3);
360 
361  EArrXb keep = (ws >= min_size) && (hs >= min_size) &&
362  (x_ctr < T(im_info[1])) && (y_ctr < T(im_info[0]));
363 
364  return GetArrayIndices(keep);
365 }
366 
367 template <class Derived>
368 std::vector<int> filter_boxes(
369  const Eigen::ArrayBase<Derived>& boxes,
370  double min_size,
371  const Eigen::Array3f& im_info) {
372  CAFFE_ENFORCE(boxes.cols() == 4 || boxes.cols() == 5);
373  if (boxes.cols() == 4) {
374  // Upright boxes
375  return filter_boxes_upright(boxes, min_size, im_info);
376  } else {
377  // Rotated boxes with angle info
378  return filter_boxes_rotated(boxes, min_size, im_info);
379  }
380 }
381 
382 } // namespace utils
383 } // namespace caffe2
384 
385 #endif // CAFFE2_OPERATORS_UTILS_BOXES_H_
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13