Caffe2 - C++ API
A deep learning, cross platform ML framework
generate_proposals_op_util_boxes.h
1 // Copyright 2004-present Facebook. All Rights Reserved.
2 
3 #ifndef CAFFE2_OPERATORS_UTILS_BOXES_H_
4 #define CAFFE2_OPERATORS_UTILS_BOXES_H_
5 
6 #include "caffe2/utils/eigen_utils.h"
7 #include "caffe2/utils/math.h"
8 
9 // Bounding box utils for generate_proposals_op
10 // Reference: detectron/lib/utils/boxes.py
11 
12 namespace caffe2 {
13 namespace utils {
14 
15 // Default value for minimum bounding box width and height after bounding box
16 // transformation (bbox_transform()) in log-space
17 const float BBOX_XFORM_CLIP_DEFAULT = log(1000.0 / 16.0);
18 
19 // Forward transform that maps proposal boxes to ground-truth boxes using
20 // bounding-box regression deltas.
21 // boxes: pixel coordinates of the bounding boxes
22 // size (M, 4), format [x1; y1; x2; y2], x2 >= x1, y2 >= y1
23 // deltas: bounding box translations and scales
24 // size (M, 4), format [dx; dy; dw; dh]
25 // dx, dy: scale-invariant translation of the center of the bounding box
26 // dw, dh: log-space sclaing of the width and height of the bounding box
27 // weights: weights [wx, wy, ww, wh] for the deltas
28 // bbox_xform_clip: minimum bounding box width and height in log-space after
29 // transofmration
30 // correct_transform_coords: Correct bounding box transform coordates. Set to
31 // true to match the detectron code, set to false for backward compatibility
32 // return: pixel coordinates of the bounding boxes
33 // size (M, 4), format [x1; y1; x2; y2]
34 // see "Rich feature hierarchies for accurate object detection and semantic
35 // segmentation" Appendix C for more details
36 // reference: detectron/lib/utils/boxes.py bbox_transform()
37 template <class Derived1, class Derived2>
38 EArrXXt<typename Derived1::Scalar> bbox_transform(
39  const Eigen::ArrayBase<Derived1>& boxes,
40  const Eigen::ArrayBase<Derived2>& deltas,
41  const std::vector<typename Derived2::Scalar>& weights =
42  std::vector<typename Derived2::Scalar>{1.0, 1.0, 1.0, 1.0},
43  const float bbox_xform_clip = BBOX_XFORM_CLIP_DEFAULT,
44  const bool correct_transform_coords = false) {
45  using T = typename Derived1::Scalar;
46  using EArrXX = EArrXXt<T>;
47  using EArrX = EArrXt<T>;
48 
49  if (boxes.rows() == 0) {
50  return EArrXX::Zero(T(0), deltas.cols());
51  }
52 
53  CAFFE_ENFORCE_EQ(boxes.rows(), deltas.rows());
54  CAFFE_ENFORCE_EQ(boxes.cols(), 4);
55  CAFFE_ENFORCE_EQ(deltas.cols(), 4);
56 
57  EArrX widths = boxes.col(2) - boxes.col(0) + T(1.0);
58  EArrX heights = boxes.col(3) - boxes.col(1) + T(1.0);
59  auto ctr_x = boxes.col(0) + T(0.5) * widths;
60  auto ctr_y = boxes.col(1) + T(0.5) * heights;
61 
62  auto dx = deltas.col(0).template cast<T>() / weights[0];
63  auto dy = deltas.col(1).template cast<T>() / weights[1];
64  auto dw =
65  (deltas.col(2).template cast<T>() / weights[2]).cwiseMin(bbox_xform_clip);
66  auto dh =
67  (deltas.col(3).template cast<T>() / weights[3]).cwiseMin(bbox_xform_clip);
68 
69  EArrX pred_ctr_x = dx * widths + ctr_x;
70  EArrX pred_ctr_y = dy * heights + ctr_y;
71  EArrX pred_w = dw.exp() * widths;
72  EArrX pred_h = dh.exp() * heights;
73 
74  T offset(correct_transform_coords ? 1.0 : 0.0);
75 
76  EArrXX pred_boxes = EArrXX::Zero(deltas.rows(), deltas.cols());
77  // x1
78  pred_boxes.col(0) = pred_ctr_x - T(0.5) * pred_w;
79  // y1
80  pred_boxes.col(1) = pred_ctr_y - T(0.5) * pred_h;
81  // x2
82  pred_boxes.col(2) = pred_ctr_x + T(0.5) * pred_w - offset;
83  // y2
84  pred_boxes.col(3) = pred_ctr_y + T(0.5) * pred_h - offset;
85 
86  return pred_boxes;
87 }
88 
89 // Clip boxes to image boundaries
90 // boxes: pixel coordinates of bounding box, size (M * 4)
91 template <class Derived>
92 EArrXXt<typename Derived::Scalar>
93 clip_boxes(const Eigen::ArrayBase<Derived>& boxes, int height, int width) {
94  CAFFE_ENFORCE_EQ(boxes.cols(), 4);
95 
96  EArrXXt<typename Derived::Scalar> ret(boxes.rows(), boxes.cols());
97 
98  // x1 >= 0 && x1 < width
99  ret.col(0) = boxes.col(0).cwiseMin(width - 1).cwiseMax(0);
100  // y1 >= 0 && y1 < height
101  ret.col(1) = boxes.col(1).cwiseMin(height - 1).cwiseMax(0);
102  // x2 >= 0 && x2 < width
103  ret.col(2) = boxes.col(2).cwiseMin(width - 1).cwiseMax(0);
104  // y2 >= 0 && y2 < height
105  ret.col(3) = boxes.col(3).cwiseMin(height - 1).cwiseMax(0);
106 
107  return ret;
108 }
109 
110 // Only keep boxes with both sides >= min_size and center within the image.
111 // boxes: pixel coordinates of bounding box, size (M * 4)
112 // im_info: [height, width, img_scale]
113 // return: row indices for 'boxes'
114 template <class Derived>
115 std::vector<int> filter_boxes(
116  const Eigen::ArrayBase<Derived>& boxes,
117  double min_size,
118  const Eigen::Array3f& im_info) {
119  CAFFE_ENFORCE_EQ(boxes.cols(), 4);
120 
121  // Scale min_size to match image scale
122  min_size *= im_info[2];
123 
124  using T = typename Derived::Scalar;
125  using EArrX = EArrXt<T>;
126 
127  EArrX ws = boxes.col(2) - boxes.col(0) + T(1);
128  EArrX hs = boxes.col(3) - boxes.col(1) + T(1);
129  EArrX x_ctr = boxes.col(0) + ws / T(2);
130  EArrX y_ctr = boxes.col(1) + hs / T(2);
131 
132  EArrXb keep = (ws >= min_size) && (hs >= min_size) &&
133  (x_ctr < T(im_info[1])) && (y_ctr < T(im_info[0]));
134 
135  return GetArrayIndices(keep);
136 }
137 
138 } // namespace utils
139 } // namespace caffe2
140 
141 #endif // CAFFE2_OPERATORS_UTILS_BOXES_H_
Copyright (c) 2016-present, Facebook, Inc.