1 #include "bbox_transform_op.h" 2 #include "caffe2/operators/generate_proposals_op_util_boxes.h" 7 REGISTER_CPU_OPERATOR(BBoxTransform, BBoxTransformOp<float, CPUContext>);
10 OPERATOR_SCHEMA(BBoxTransform)
14 Transform proposal bounding boxes to target bounding box using bounding box 17 .Arg("weights",
"vector<float> weights [wx, wy, ww, wh] for the deltas")
20 "bool (default true), transform the boxes to the scaled image space" 21 " after applying the bbox deltas." 22 "Set to false to match the detectron code, set to true for keypoint" 23 " models and for backward compatibility")
26 "bool (default false). If true, then boxes (rois and deltas) include " 27 "angle info to handle rotation. The format will be " 28 "[ctr_x, ctr_y, width, height, angle (in degrees)].")
31 "bool (default true). If set, for rotated boxes, angle is " 32 "normalized to be within [angle_bound_lo, angle_bound_hi].")
35 "int (default -90 degrees). If set, for rotated boxes, angle is " 36 "normalized to be within [angle_bound_lo, angle_bound_hi].")
39 "int (default 90 degrees). If set, for rotated boxes, angle is " 40 "normalized to be within [angle_bound_lo, angle_bound_hi].")
43 "float (default 1.0 degrees). For RRPN, clip almost horizontal boxes " 44 "within this threshold of tolerance for backward compatibility. " 45 "Set to negative value for no clipping.")
49 "Bounding box proposals in pixel coordinates, " 50 "Size (M, 4), format [x1, y1, x2, y2], or" 51 "Size (M, 5), format [batch_index, x1, y1, x2, y2]. " 52 "If proposals from multiple images in a batch are present, they " 53 "should be grouped sequentially and in incremental order." 54 "For rotated boxes, this would have an additional angle (in degrees) " 55 "in the format [<optionaal_batch_id>, ctr_x, ctr_y, w, h, angle].")
59 "bounding box translations and scales," 60 "size (M, 4*K), format [dx, dy, dw, dh], K = # classes. " 61 "For rotated boxes, size (M, 5*K, format [dx, dy, dw, dh, da].")
65 "Image dimensions, size (batch_size, 3), " 66 "format [img_height, img_width, img_scale]")
70 "Pixel coordinates of the transformed bounding boxes," 71 "Size (M, 4*K), format [x1, y1, x2, y2]. " 72 "For rotated boxes, size (M, 5*K), " 73 "format [ctr_x, ctr_y, w, h, angle].")
77 "Tensor of shape (batch_size) with each element denoting the number " 78 "of RoIs belonging to the corresponding image in batch");
80 SHOULD_NOT_DO_GRADIENT(BBoxTransform);
84 bool BBoxTransformOp<float, CPUContext>::RunOnDevice() {
85 const auto& roi_in = Input(0);
86 const auto& delta_in = Input(1);
87 const auto& iminfo_in = Input(2);
89 const int box_dim = rotated_ ? 5 : 4;
90 const int N = roi_in.dim32(0);
91 CAFFE_ENFORCE_EQ(roi_in.dim(), 2);
92 CAFFE_ENFORCE(roi_in.dim32(1) == box_dim || roi_in.dim32(1) == box_dim + 1);
94 CAFFE_ENFORCE_EQ(delta_in.dim(), 2);
95 CAFFE_ENFORCE_EQ(delta_in.dim32(0), N);
96 CAFFE_ENFORCE_EQ(delta_in.dim32(1) % box_dim, 0);
97 const int num_classes = delta_in.dim32(1) / box_dim;
99 CAFFE_ENFORCE_EQ(iminfo_in.dim(), 2);
100 CAFFE_ENFORCE_EQ(iminfo_in.dim32(1), 3);
101 const int batch_size = iminfo_in.dim32(0);
103 DCHECK_EQ(weights_.size(), 4);
105 Eigen::Map<const ERArrXXf> boxes0(
106 roi_in.data<
float>(), roi_in.dim32(0), roi_in.dim32(1));
107 Eigen::Map<const ERArrXXf> deltas0(
108 delta_in.data<
float>(), delta_in.dim32(0), delta_in.dim32(1));
111 vector<int> num_rois_per_batch(batch_size, 0);
112 if (roi_in.dim32(1) == box_dim) {
113 CAFFE_ENFORCE_EQ(batch_size, 1);
114 num_rois_per_batch[0] = N;
116 const auto& roi_batch_ids = boxes0.col(0);
117 for (
int i = 0; i < roi_batch_ids.size(); ++i) {
118 const int roi_batch_id = roi_batch_ids(i);
119 CAFFE_ENFORCE_LT(roi_batch_id, batch_size);
120 num_rois_per_batch[roi_batch_id]++;
125 Eigen::Map<const ERArrXXf> iminfo(
126 iminfo_in.data<
float>(), iminfo_in.size(0), iminfo_in.size(1));
128 auto* box_out = Output(0, delta_in.sizes(), at::dtype<float>());
129 Eigen::Map<ERArrXXf> new_boxes(
130 box_out->template mutable_data<float>(),
137 for (
int i = 0; i < batch_size; ++i) {
138 const int num_rois = num_rois_per_batch[i];
139 const auto& cur_iminfo = iminfo.row(i);
140 const float scale_before = cur_iminfo(2);
141 const float scale_after = apply_scale_ ? cur_iminfo(2) : 1.0;
142 int img_h = int(cur_iminfo(0) / scale_before + 0.5);
143 int img_w = int(cur_iminfo(1) / scale_before + 0.5);
146 boxes0.rightCols(box_dim).block(offset, 0, num_rois, box_dim);
148 cur_boxes.leftCols(4) /= scale_before;
149 for (
int k = 0; k < num_classes; k++) {
150 const auto& cur_deltas =
151 deltas0.block(offset, k * box_dim, num_rois, box_dim);
152 const auto& trans_boxes = utils::bbox_transform(
156 utils::BBOX_XFORM_CLIP_DEFAULT,
161 utils::clip_boxes(trans_boxes, img_h, img_w, clip_angle_thresh_);
163 clip_boxes.leftCols(4) *= scale_after;
164 new_boxes.block(offset, k * box_dim, num_rois, box_dim) = clip_boxes;
170 if (OutputSize() > 1) {
171 auto* roi_batch_splits = Output(1, {batch_size}, at::dtype<float>());
172 Eigen::Map<EArrXf> roi_batch_splits_map(
173 roi_batch_splits->template mutable_data<float>(), batch_size);
174 roi_batch_splits_map =
175 Eigen::Map<const EArrXi>(num_rois_per_batch.data(), batch_size)
187 C10_REGISTER_CAFFE2_OPERATOR_CPU(
189 (std::vector<c10::Argument>{
193 c10::Argument(
"weights", ListType::create(FloatType::get())),
201 (std::vector<c10::Argument>{
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...