Caffe2 - C++ API
A deep learning, cross platform ML framework
bbox_transform_op.cc
1 #include "bbox_transform_op.h"
2 #include "caffe2/operators/generate_proposals_op_util_boxes.h"
3 
4 namespace caffe2 {
5 namespace {
6 
7 REGISTER_CPU_OPERATOR(BBoxTransform, BBoxTransformOp<float, CPUContext>);
8 
9 // Input: box, delta Output: box
10 OPERATOR_SCHEMA(BBoxTransform)
11  .NumInputs(3)
12  .NumOutputs(1, 2)
13  .SetDoc(R"DOC(
14 Transform proposal bounding boxes to target bounding box using bounding box
15  regression deltas.
16 )DOC")
17  .Arg("weights", "vector<float> weights [wx, wy, ww, wh] for the deltas")
18  .Arg(
19  "apply_scale",
20  "bool (default true), transform the boxes to the scaled image space"
21  " after applying the bbox deltas."
22  "Set to false to match the detectron code, set to true for keypoint"
23  " models and for backward compatibility")
24  .Arg(
25  "rotated",
26  "bool (default false). If true, then boxes (rois and deltas) include "
27  "angle info to handle rotation. The format will be "
28  "[ctr_x, ctr_y, width, height, angle (in degrees)].")
29  .Arg(
30  "angle_bound_on",
31  "bool (default true). If set, for rotated boxes, angle is "
32  "normalized to be within [angle_bound_lo, angle_bound_hi].")
33  .Arg(
34  "angle_bound_lo",
35  "int (default -90 degrees). If set, for rotated boxes, angle is "
36  "normalized to be within [angle_bound_lo, angle_bound_hi].")
37  .Arg(
38  "angle_bound_hi",
39  "int (default 90 degrees). If set, for rotated boxes, angle is "
40  "normalized to be within [angle_bound_lo, angle_bound_hi].")
41  .Arg(
42  "clip_angle_thresh",
43  "float (default 1.0 degrees). For RRPN, clip almost horizontal boxes "
44  "within this threshold of tolerance for backward compatibility. "
45  "Set to negative value for no clipping.")
46  .Input(
47  0,
48  "rois",
49  "Bounding box proposals in pixel coordinates, "
50  "Size (M, 4), format [x1, y1, x2, y2], or"
51  "Size (M, 5), format [batch_index, x1, y1, x2, y2]. "
52  "If proposals from multiple images in a batch are present, they "
53  "should be grouped sequentially and in incremental order."
54  "For rotated boxes, this would have an additional angle (in degrees) "
55  "in the format [<optionaal_batch_id>, ctr_x, ctr_y, w, h, angle].")
56  .Input(
57  1,
58  "deltas",
59  "bounding box translations and scales,"
60  "size (M, 4*K), format [dx, dy, dw, dh], K = # classes. "
61  "For rotated boxes, size (M, 5*K, format [dx, dy, dw, dh, da].")
62  .Input(
63  2,
64  "im_info",
65  "Image dimensions, size (batch_size, 3), "
66  "format [img_height, img_width, img_scale]")
67  .Output(
68  0,
69  "box_out",
70  "Pixel coordinates of the transformed bounding boxes,"
71  "Size (M, 4*K), format [x1, y1, x2, y2]. "
72  "For rotated boxes, size (M, 5*K), "
73  "format [ctr_x, ctr_y, w, h, angle].")
74  .Output(
75  1,
76  "roi_batch_splits",
77  "Tensor of shape (batch_size) with each element denoting the number "
78  "of RoIs belonging to the corresponding image in batch");
79 
80 SHOULD_NOT_DO_GRADIENT(BBoxTransform);
81 } // namespace
82 
83 template <>
84 bool BBoxTransformOp<float, CPUContext>::RunOnDevice() {
85  const auto& roi_in = Input(0);
86  const auto& delta_in = Input(1);
87  const auto& iminfo_in = Input(2);
88 
89  const int box_dim = rotated_ ? 5 : 4;
90  const int N = roi_in.dim32(0);
91  CAFFE_ENFORCE_EQ(roi_in.dim(), 2);
92  CAFFE_ENFORCE(roi_in.dim32(1) == box_dim || roi_in.dim32(1) == box_dim + 1);
93 
94  CAFFE_ENFORCE_EQ(delta_in.dim(), 2);
95  CAFFE_ENFORCE_EQ(delta_in.dim32(0), N);
96  CAFFE_ENFORCE_EQ(delta_in.dim32(1) % box_dim, 0);
97  const int num_classes = delta_in.dim32(1) / box_dim;
98 
99  CAFFE_ENFORCE_EQ(iminfo_in.dim(), 2);
100  CAFFE_ENFORCE_EQ(iminfo_in.dim32(1), 3);
101  const int batch_size = iminfo_in.dim32(0);
102 
103  DCHECK_EQ(weights_.size(), 4);
104 
105  Eigen::Map<const ERArrXXf> boxes0(
106  roi_in.data<float>(), roi_in.dim32(0), roi_in.dim32(1));
107  Eigen::Map<const ERArrXXf> deltas0(
108  delta_in.data<float>(), delta_in.dim32(0), delta_in.dim32(1));
109 
110  // Count the number of RoIs per batch
111  vector<int> num_rois_per_batch(batch_size, 0);
112  if (roi_in.dim32(1) == box_dim) {
113  CAFFE_ENFORCE_EQ(batch_size, 1);
114  num_rois_per_batch[0] = N;
115  } else {
116  const auto& roi_batch_ids = boxes0.col(0);
117  for (int i = 0; i < roi_batch_ids.size(); ++i) {
118  const int roi_batch_id = roi_batch_ids(i);
119  CAFFE_ENFORCE_LT(roi_batch_id, batch_size);
120  num_rois_per_batch[roi_batch_id]++;
121  }
122  }
123 
124  CAFFE_ENFORCE_EQ(iminfo_in.sizes(), (at::IntArrayRef{batch_size, 3}));
125  Eigen::Map<const ERArrXXf> iminfo(
126  iminfo_in.data<float>(), iminfo_in.size(0), iminfo_in.size(1));
127 
128  auto* box_out = Output(0, delta_in.sizes(), at::dtype<float>());
129  Eigen::Map<ERArrXXf> new_boxes(
130  box_out->template mutable_data<float>(),
131  box_out->dim32(0),
132  box_out->dim32(1));
133 
134  // We assume roi_in and delta_in over multiple batches are grouped
135  // together in increasing order as generated by GenerateProposalsOp
136  int offset = 0;
137  for (int i = 0; i < batch_size; ++i) {
138  const int num_rois = num_rois_per_batch[i];
139  const auto& cur_iminfo = iminfo.row(i);
140  const float scale_before = cur_iminfo(2);
141  const float scale_after = apply_scale_ ? cur_iminfo(2) : 1.0;
142  int img_h = int(cur_iminfo(0) / scale_before + 0.5);
143  int img_w = int(cur_iminfo(1) / scale_before + 0.5);
144 
145  EArrXXf cur_boxes =
146  boxes0.rightCols(box_dim).block(offset, 0, num_rois, box_dim);
147  // Do not apply scale for angle in rotated boxes
148  cur_boxes.leftCols(4) /= scale_before;
149  for (int k = 0; k < num_classes; k++) {
150  const auto& cur_deltas =
151  deltas0.block(offset, k * box_dim, num_rois, box_dim);
152  const auto& trans_boxes = utils::bbox_transform(
153  cur_boxes,
154  cur_deltas,
155  weights_,
156  utils::BBOX_XFORM_CLIP_DEFAULT,
157  angle_bound_on_,
158  angle_bound_lo_,
159  angle_bound_hi_);
160  EArrXXf clip_boxes =
161  utils::clip_boxes(trans_boxes, img_h, img_w, clip_angle_thresh_);
162  // Do not apply scale for angle in rotated boxes
163  clip_boxes.leftCols(4) *= scale_after;
164  new_boxes.block(offset, k * box_dim, num_rois, box_dim) = clip_boxes;
165  }
166 
167  offset += num_rois;
168  }
169 
170  if (OutputSize() > 1) {
171  auto* roi_batch_splits = Output(1, {batch_size}, at::dtype<float>());
172  Eigen::Map<EArrXf> roi_batch_splits_map(
173  roi_batch_splits->template mutable_data<float>(), batch_size);
174  roi_batch_splits_map =
175  Eigen::Map<const EArrXi>(num_rois_per_batch.data(), batch_size)
176  .cast<float>();
177  }
178 
179  return true;
180 }
181 
182 } // namespace caffe2
183 
186 
187 C10_REGISTER_CAFFE2_OPERATOR_CPU(
188  BBoxTransform,
189  (std::vector<c10::Argument>{
190  c10::Argument("rois"),
191  c10::Argument("deltas"),
192  c10::Argument("im_info"),
193  c10::Argument("weights", ListType::create(FloatType::get())),
194  c10::Argument("apply_scale", BoolType::get()),
195  c10::Argument("rotated", BoolType::get()),
196  c10::Argument("angle_bound_on", BoolType::get()),
197  c10::Argument("angle_bound_lo", IntType::get()),
198  c10::Argument("angle_bound_hi", IntType::get()),
199  c10::Argument("clip_angle_thresh", FloatType::get()),
200  }),
201  (std::vector<c10::Argument>{
202  c10::Argument("output_0"),
203  c10::Argument("output_1"),
204  }),
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13