1 #include "caffe2/operators/generate_proposals_op.h" 2 #include "caffe2/operators/generate_proposals_op_util_boxes.h" 3 #include "generate_proposals_op_util_nms.h" 11 size_t ComputeStartIndex(
12 const TensorCPU& tensor,
13 const std::vector<int>& index) {
14 DCHECK_EQ(index.size(), tensor.dim());
17 for (
int i = 0; i < index.size(); i++) {
18 ret += index[i] * tensor.size_from_dim(i + 1);
26 utils::ConstTensorView<T> GetSubTensorView(
27 const TensorCPU& tensor,
28 int dim0_start_index) {
29 DCHECK_EQ(tensor.dtype().itemsize(),
sizeof(
T));
31 if (tensor.numel() == 0) {
32 return utils::ConstTensorView<T>(
nullptr, {});
35 std::vector<int> start_dims(tensor.dim(), 0);
36 start_dims.at(0) = dim0_start_index;
37 auto st_idx = ComputeStartIndex(tensor, start_dims);
38 auto ptr = tensor.data<
T>() + st_idx;
40 auto input_dims = tensor.sizes();
41 std::vector<int> ret_dims(input_dims.begin() + 1, input_dims.end());
43 utils::ConstTensorView<T> ret(ptr, ret_dims);
51 ERMatXf ComputeAllAnchors(
52 const TensorCPU& anchors,
56 const auto K = height * width;
57 const auto A = anchors.size(0);
58 const auto box_dim = anchors.size(1);
59 CAFFE_ENFORCE(box_dim == 4 || box_dim == 5);
61 ERMatXf shift_x = (ERVecXf::LinSpaced(width, 0.0, width - 1.0) * feat_stride)
62 .replicate(height, 1);
63 ERMatXf shift_y = (EVecXf::LinSpaced(height, 0.0, height - 1.0) * feat_stride)
65 Eigen::MatrixXf shifts(K, box_dim);
68 shifts << ConstEigenVectorMap<float>(shift_x.data(), shift_x.size()),
69 ConstEigenVectorMap<float>(shift_y.data(), shift_y.size()),
70 ConstEigenVectorMap<float>(shift_x.data(), shift_x.size()),
71 ConstEigenVectorMap<float>(shift_y.data(), shift_y.size());
75 ERMatXf shift_zero = ERMatXf::Constant(height, width, 0.0);
76 shifts << ConstEigenVectorMap<float>(shift_x.data(), shift_x.size()),
77 ConstEigenVectorMap<float>(shift_y.data(), shift_y.size()),
78 ConstEigenVectorMap<float>(shift_zero.data(), shift_zero.size()),
79 ConstEigenVectorMap<float>(shift_zero.data(), shift_zero.size()),
80 ConstEigenVectorMap<float>(shift_zero.data(), shift_zero.size());
89 ConstEigenMatrixMap<float> anchors_vec(
90 anchors.template data<float>(), 1,
A * box_dim);
97 ERMatXf all_anchors_vec =
98 anchors_vec.replicate(K, 1) + shifts.rowwise().replicate(
A);
104 return all_anchors_vec;
107 ERArrXXf ComputeSortedAnchors(
108 const Eigen::Map<const ERArrXXf>& anchors,
112 const vector<int>& order) {
113 const auto box_dim = anchors.cols();
114 CAFFE_ENFORCE(box_dim == 4 || box_dim == 5);
117 const auto& order_AHW = utils::AsEArrXt(order);
118 const auto& order_AH = order_AHW / width;
119 const auto& order_W = order_AHW - order_AH * width;
120 const auto& order_A = order_AH / height;
121 const auto& order_H = order_AH - order_A * height;
125 const auto& shift_x = order_W.cast<
float>() * feat_stride;
126 const auto& shift_y = order_H.cast<
float>() * feat_stride;
127 Eigen::MatrixXf shifts(order.size(), box_dim);
130 shifts << shift_x, shift_y, shift_x, shift_y;
134 const auto& shift_zero = EArrXf::Constant(order.size(), 0.0);
135 shifts << shift_x, shift_y, shift_zero, shift_zero, shift_zero;
140 ERArrXXf anchors_sorted;
141 utils::GetSubArrayRows(anchors, order_A, &anchors_sorted);
142 const auto& all_anchors_sorted = anchors_sorted + shifts.array();
143 return all_anchors_sorted;
149 void GenerateProposalsOp<CPUContext>::ProposalsForOneImage(
150 const Eigen::Array3f& im_info,
151 const Eigen::Map<const ERArrXXf>& anchors,
152 const utils::ConstTensorView<float>& bbox_deltas_tensor,
153 const utils::ConstTensorView<float>& scores_tensor,
155 EArrXf* out_probs)
const {
156 const auto& post_nms_topN = rpn_post_nms_topN_;
157 const auto& nms_thresh = rpn_nms_thresh_;
158 const auto& min_size = rpn_min_size_;
159 const int box_dim =
static_cast<int>(anchors.cols());
160 CAFFE_ENFORCE(box_dim == 4 || box_dim == 5);
162 CAFFE_ENFORCE_EQ(bbox_deltas_tensor.ndim(), 3);
163 CAFFE_ENFORCE_EQ(bbox_deltas_tensor.dim(0) % box_dim, 0);
164 auto A = bbox_deltas_tensor.dim(0) / box_dim;
165 auto H = bbox_deltas_tensor.dim(1);
166 auto W = bbox_deltas_tensor.dim(2);
168 CAFFE_ENFORCE_EQ(A, anchors.rows());
173 CAFFE_ENFORCE_EQ(scores_tensor.ndim(), 3);
174 CAFFE_ENFORCE_EQ(scores_tensor.dims(), (vector<int>{A, H, W}));
175 Eigen::Map<const EArrXf> scores(scores_tensor.data(), scores_tensor.size());
177 std::vector<int> order(scores.size());
178 std::iota(order.begin(), order.end(), 0);
179 if (rpn_pre_nms_topN_ <= 0 || rpn_pre_nms_topN_ >= scores.size()) {
182 std::sort(order.begin(), order.end(), [&scores](
int lhs,
int rhs) {
183 return scores[lhs] > scores[rhs];
190 order.begin() + rpn_pre_nms_topN_,
192 [&scores](
int lhs,
int rhs) {
return scores[lhs] > scores[rhs]; });
193 order.resize(rpn_pre_nms_topN_);
196 EArrXf scores_sorted;
197 utils::GetSubArray(scores, utils::AsEArrXt(order), &scores_sorted);
207 ERArrXXf bbox_deltas_sorted(order.size(), box_dim);
208 EArrXf bbox_deltas_per_dim(A * K);
209 EigenOuterStride stride(box_dim * K);
210 for (
int j = 0; j < box_dim; ++j) {
211 Eigen::Map<ERMatXf>(bbox_deltas_per_dim.data(), A, K) =
212 Eigen::Map<const ERMatXf, 0, EigenOuterStride>(
213 bbox_deltas_tensor.data() + j * K, A, K, stride);
214 for (
int i = 0; i < order.size(); ++i) {
215 bbox_deltas_sorted(i, j) = bbox_deltas_per_dim[order[i]];
221 const auto& all_anchors_sorted =
222 utils::ComputeSortedAnchors(anchors, H, W, feat_stride_, order);
225 static const std::vector<float> bbox_weights{1.0, 1.0, 1.0, 1.0};
226 auto proposals = utils::bbox_transform(
230 utils::BBOX_XFORM_CLIP_DEFAULT,
238 utils::clip_boxes(proposals, im_info[0], im_info[1], clip_angle_thresh_);
241 auto keep = utils::filter_boxes(proposals, min_size, im_info);
242 DCHECK_LE(keep.size(), scores_sorted.size());
247 if (post_nms_topN > 0 && post_nms_topN < keep.size()) {
248 keep = utils::nms_cpu(
249 proposals, scores_sorted, keep, nms_thresh, post_nms_topN);
251 keep = utils::nms_cpu(proposals, scores_sorted, keep, nms_thresh);
255 utils::GetSubArrayRows(proposals, utils::AsEArrXt(keep), out_boxes);
256 utils::GetSubArray(scores_sorted, utils::AsEArrXt(keep), out_probs);
260 bool GenerateProposalsOp<CPUContext>::RunOnDevice() {
261 const auto& scores = Input(0);
262 const auto& bbox_deltas = Input(1);
263 const auto& im_info_tensor = Input(2);
264 const auto& anchors_tensor = Input(3);
266 CAFFE_ENFORCE_EQ(scores.dim(), 4, scores.dim());
267 CAFFE_ENFORCE(scores.template IsType<float>(), scores.dtype().name());
268 const auto num_images = scores.size(0);
269 const auto A = scores.size(1);
270 const auto height = scores.size(2);
271 const auto width = scores.size(3);
272 const auto box_dim = anchors_tensor.size(1);
273 CAFFE_ENFORCE(box_dim == 4 || box_dim == 5);
281 CAFFE_ENFORCE_EQ(im_info_tensor.sizes(), (vector<int64_t>{num_images, 3}));
283 im_info_tensor.template IsType<float>(), im_info_tensor.dtype().name());
286 CAFFE_ENFORCE_EQ(anchors_tensor.sizes(), (vector<int64_t>{A, box_dim}));
288 anchors_tensor.template IsType<float>(), anchors_tensor.dtype().name());
290 Eigen::Map<const ERArrXXf> im_info(
291 im_info_tensor.data<
float>(),
292 im_info_tensor.size(0),
293 im_info_tensor.size(1));
295 Eigen::Map<const ERArrXXf> anchors(
296 anchors_tensor.data<
float>(),
297 anchors_tensor.size(0),
298 anchors_tensor.size(1));
300 std::vector<ERArrXXf> im_boxes(num_images);
301 std::vector<EArrXf> im_probs(num_images);
302 for (
int i = 0; i < num_images; i++) {
303 auto cur_im_info = im_info.row(i);
304 auto cur_bbox_deltas = GetSubTensorView<float>(bbox_deltas, i);
305 auto cur_scores = GetSubTensorView<float>(scores, i);
307 ERArrXXf& im_i_boxes = im_boxes[i];
308 EArrXf& im_i_probs = im_probs[i];
309 ProposalsForOneImage(
319 for (
int i = 0; i < num_images; i++) {
320 roi_counts += im_boxes[i].rows();
322 const int roi_col_count = box_dim + 1;
323 auto* out_rois = Output(0, {roi_counts, roi_col_count}, at::dtype<float>());
324 auto* out_rois_probs = Output(1, {roi_counts}, at::dtype<float>());
325 float* out_rois_ptr = out_rois->template mutable_data<float>();
326 float* out_rois_probs_ptr = out_rois_probs->template mutable_data<float>();
327 for (
int i = 0; i < num_images; i++) {
328 const ERArrXXf& im_i_boxes = im_boxes[i];
329 const EArrXf& im_i_probs = im_probs[i];
330 int csz = im_i_boxes.rows();
333 Eigen::Map<ERArrXXf> cur_rois(out_rois_ptr, csz, roi_col_count);
334 cur_rois.col(0).setConstant(i);
335 cur_rois.block(0, 1, csz, box_dim) = im_i_boxes;
338 Eigen::Map<EArrXf>(out_rois_probs_ptr, csz) = im_i_probs;
340 out_rois_ptr += csz * roi_col_count;
341 out_rois_probs_ptr += csz;
347 REGISTER_CPU_OPERATOR(GenerateProposals, GenerateProposalsOp<CPUContext>);
349 REGISTER_CPU_OPERATOR(GenerateProposalsCPP, GenerateProposalsOp<CPUContext>);
351 OPERATOR_SCHEMA(GenerateProposals)
355 Generate bounding box proposals for Faster RCNN. The propoasls are generated for 356 a list of images based on image score 'score', bounding box regression result 357 'deltas' as well as predefined bounding box shapes 'anchors'. Greedy 358 non-maximum suppression is applied to generate the final bounding boxes. 360 .Arg("spatial_scale",
"(float) spatial scale")
361 .Arg(
"pre_nms_topN",
"(int) RPN_PRE_NMS_TOP_N")
362 .Arg(
"post_nms_topN",
"(int) RPN_POST_NMS_TOP_N")
363 .Arg(
"nms_thresh",
"(float) RPN_NMS_THRESH")
364 .Arg(
"min_size",
"(float) RPN_MIN_SIZE")
367 "bool (default true). If set, for rotated boxes, angle is " 368 "normalized to be within [angle_bound_lo, angle_bound_hi].")
371 "int (default -90 degrees). If set, for rotated boxes, angle is " 372 "normalized to be within [angle_bound_lo, angle_bound_hi].")
375 "int (default 90 degrees). If set, for rotated boxes, angle is " 376 "normalized to be within [angle_bound_lo, angle_bound_hi].")
379 "float (default 1.0 degrees). For RRPN, clip almost horizontal boxes " 380 "within this threshold of tolerance for backward compatibility. " 381 "Set to negative value for no clipping.")
382 .Input(0,
"scores",
"Scores from conv layer, size (img_count, A, H, W)")
386 "Bounding box deltas from conv layer, " 387 "size (img_count, 4 * A, H, W)")
391 "Image info, size (img_count, 3), " 392 "format (height, width, scale)")
393 .Input(3,
"anchors",
"Bounding box anchors, size (A, 4)")
397 "Proposals, size (n x 5), " 398 "format (image_index, x1, y1, x2, y2)")
399 .Output(1,
"rois_probs",
"scores of proposals, size (n)");
401 OPERATOR_SCHEMA(GenerateProposalsCPP).NumInputs(4).NumOutputs(2);
403 SHOULD_NOT_DO_GRADIENT(GenerateProposals);
405 SHOULD_NOT_DO_GRADIENT(GenerateProposalsCPP);
409 C10_REGISTER_CAFFE2_OPERATOR_CPU(
411 (std::vector<c10::Argument>{
426 (std::vector<c10::Argument>{
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...