Caffe2 - C++ API
A deep learning, cross platform ML framework
generate_proposals_op.cc
1 #include "caffe2/operators/generate_proposals_op.h"
2 #include "caffe2/operators/generate_proposals_op_util_boxes.h"
3 #include "generate_proposals_op_util_nms.h"
4 
5 namespace caffe2 {
6 
7 namespace {
8 
9 // Compute the 1-d index of a n-dimensional contiguous row-major tensor for
10 // a given n-dimensional index 'index'
11 size_t ComputeStartIndex(
12  const TensorCPU& tensor,
13  const std::vector<int>& index) {
14  DCHECK_EQ(index.size(), tensor.dim());
15 
16  size_t ret = 0;
17  for (int i = 0; i < index.size(); i++) {
18  ret += index[i] * tensor.size_from_dim(i + 1);
19  }
20 
21  return ret;
22 }
23 
24 // Get a sub tensor view from 'tensor' using data pointer from 'tensor'
25 template <class T>
26 utils::ConstTensorView<T> GetSubTensorView(
27  const TensorCPU& tensor,
28  int dim0_start_index) {
29  DCHECK_EQ(tensor.dtype().itemsize(), sizeof(T));
30 
31  if (tensor.numel() == 0) {
32  return utils::ConstTensorView<T>(nullptr, {});
33  }
34 
35  std::vector<int> start_dims(tensor.dim(), 0);
36  start_dims.at(0) = dim0_start_index;
37  auto st_idx = ComputeStartIndex(tensor, start_dims);
38  auto ptr = tensor.data<T>() + st_idx;
39 
40  auto input_dims = tensor.sizes();
41  std::vector<int> ret_dims(input_dims.begin() + 1, input_dims.end());
42 
43  utils::ConstTensorView<T> ret(ptr, ret_dims);
44  return ret;
45 }
46 
47 } // namespace
48 
49 namespace utils {
50 
51 ERMatXf ComputeAllAnchors(
52  const TensorCPU& anchors,
53  int height,
54  int width,
55  float feat_stride) {
56  const auto K = height * width;
57  const auto A = anchors.size(0);
58  const auto box_dim = anchors.size(1);
59  CAFFE_ENFORCE(box_dim == 4 || box_dim == 5);
60 
61  ERMatXf shift_x = (ERVecXf::LinSpaced(width, 0.0, width - 1.0) * feat_stride)
62  .replicate(height, 1);
63  ERMatXf shift_y = (EVecXf::LinSpaced(height, 0.0, height - 1.0) * feat_stride)
64  .replicate(1, width);
65  Eigen::MatrixXf shifts(K, box_dim);
66  if (box_dim == 4) {
67  // Upright boxes in [x1, y1, x2, y2] format
68  shifts << ConstEigenVectorMap<float>(shift_x.data(), shift_x.size()),
69  ConstEigenVectorMap<float>(shift_y.data(), shift_y.size()),
70  ConstEigenVectorMap<float>(shift_x.data(), shift_x.size()),
71  ConstEigenVectorMap<float>(shift_y.data(), shift_y.size());
72  } else {
73  // Rotated boxes in [ctr_x, ctr_y, w, h, angle] format.
74  // Zero shift for width, height and angle.
75  ERMatXf shift_zero = ERMatXf::Constant(height, width, 0.0);
76  shifts << ConstEigenVectorMap<float>(shift_x.data(), shift_x.size()),
77  ConstEigenVectorMap<float>(shift_y.data(), shift_y.size()),
78  ConstEigenVectorMap<float>(shift_zero.data(), shift_zero.size()),
79  ConstEigenVectorMap<float>(shift_zero.data(), shift_zero.size()),
80  ConstEigenVectorMap<float>(shift_zero.data(), shift_zero.size());
81  }
82 
83  // Broacast anchors over shifts to enumerate all anchors at all positions
84  // in the (H, W) grid:
85  // - add A anchors of shape (1, A, box_dim) to
86  // - K shifts of shape (K, 1, box_dim) to get
87  // - all shifted anchors of shape (K, A, box_dim)
88  // - reshape to (K*A, box_dim) shifted anchors
89  ConstEigenMatrixMap<float> anchors_vec(
90  anchors.template data<float>(), 1, A * box_dim);
91  // equivalent to python code
92  // all_anchors = (
93  // self._model.anchors.reshape((1, A, box_dim)) +
94  // shifts.reshape((1, K, box_dim)).transpose((1, 0, 2)))
95  // all_anchors = all_anchors.reshape((K * A, box_dim))
96  // all_anchors_vec: (K, A * box_dim)
97  ERMatXf all_anchors_vec =
98  anchors_vec.replicate(K, 1) + shifts.rowwise().replicate(A);
99 
100  // use the following to reshape to (K * A, box_dim)
101  // Eigen::Map<const ERMatXf> all_anchors(
102  // all_anchors_vec.data(), K * A, box_dim);
103 
104  return all_anchors_vec;
105 }
106 
107 ERArrXXf ComputeSortedAnchors(
108  const Eigen::Map<const ERArrXXf>& anchors,
109  int height,
110  int width,
111  float feat_stride,
112  const vector<int>& order) {
113  const auto box_dim = anchors.cols();
114  CAFFE_ENFORCE(box_dim == 4 || box_dim == 5);
115 
116  // Order is flattened in (A, H, W) format. Unravel the indices.
117  const auto& order_AHW = utils::AsEArrXt(order);
118  const auto& order_AH = order_AHW / width;
119  const auto& order_W = order_AHW - order_AH * width;
120  const auto& order_A = order_AH / height;
121  const auto& order_H = order_AH - order_A * height;
122 
123  // Generate shifts for each location in the H * W grid corresponding
124  // to the sorted scores in (A, H, W) order.
125  const auto& shift_x = order_W.cast<float>() * feat_stride;
126  const auto& shift_y = order_H.cast<float>() * feat_stride;
127  Eigen::MatrixXf shifts(order.size(), box_dim);
128  if (box_dim == 4) {
129  // Upright boxes in [x1, y1, x2, y2] format
130  shifts << shift_x, shift_y, shift_x, shift_y;
131  } else {
132  // Rotated boxes in [ctr_x, ctr_y, w, h, angle] format.
133  // Zero shift for width, height and angle.
134  const auto& shift_zero = EArrXf::Constant(order.size(), 0.0);
135  shifts << shift_x, shift_y, shift_zero, shift_zero, shift_zero;
136  }
137 
138  // Apply shifts to the relevant anchors.
139  // Equivalent to python code `all_anchors = self._anchors[order_A] + shifts`
140  ERArrXXf anchors_sorted;
141  utils::GetSubArrayRows(anchors, order_A, &anchors_sorted);
142  const auto& all_anchors_sorted = anchors_sorted + shifts.array();
143  return all_anchors_sorted;
144 }
145 
146 } // namespace utils
147 
148 template <>
149 void GenerateProposalsOp<CPUContext>::ProposalsForOneImage(
150  const Eigen::Array3f& im_info,
151  const Eigen::Map<const ERArrXXf>& anchors,
152  const utils::ConstTensorView<float>& bbox_deltas_tensor,
153  const utils::ConstTensorView<float>& scores_tensor,
154  ERArrXXf* out_boxes,
155  EArrXf* out_probs) const {
156  const auto& post_nms_topN = rpn_post_nms_topN_;
157  const auto& nms_thresh = rpn_nms_thresh_;
158  const auto& min_size = rpn_min_size_;
159  const int box_dim = static_cast<int>(anchors.cols());
160  CAFFE_ENFORCE(box_dim == 4 || box_dim == 5);
161 
162  CAFFE_ENFORCE_EQ(bbox_deltas_tensor.ndim(), 3);
163  CAFFE_ENFORCE_EQ(bbox_deltas_tensor.dim(0) % box_dim, 0);
164  auto A = bbox_deltas_tensor.dim(0) / box_dim;
165  auto H = bbox_deltas_tensor.dim(1);
166  auto W = bbox_deltas_tensor.dim(2);
167  auto K = H * W;
168  CAFFE_ENFORCE_EQ(A, anchors.rows());
169 
170  // scores are (A, H, W) format from conv output.
171  // Maintain the same order without transposing (which is slow)
172  // and compute anchors accordingly.
173  CAFFE_ENFORCE_EQ(scores_tensor.ndim(), 3);
174  CAFFE_ENFORCE_EQ(scores_tensor.dims(), (vector<int>{A, H, W}));
175  Eigen::Map<const EArrXf> scores(scores_tensor.data(), scores_tensor.size());
176 
177  std::vector<int> order(scores.size());
178  std::iota(order.begin(), order.end(), 0);
179  if (rpn_pre_nms_topN_ <= 0 || rpn_pre_nms_topN_ >= scores.size()) {
180  // 4. sort all (proposal, score) pairs by score from highest to lowest
181  // 5. take top pre_nms_topN (e.g. 6000)
182  std::sort(order.begin(), order.end(), [&scores](int lhs, int rhs) {
183  return scores[lhs] > scores[rhs];
184  });
185  } else {
186  // Avoid sorting possibly large arrays; First partition to get top K
187  // unsorted and then sort just those (~20x faster for 200k scores)
188  std::partial_sort(
189  order.begin(),
190  order.begin() + rpn_pre_nms_topN_,
191  order.end(),
192  [&scores](int lhs, int rhs) { return scores[lhs] > scores[rhs]; });
193  order.resize(rpn_pre_nms_topN_);
194  }
195 
196  EArrXf scores_sorted;
197  utils::GetSubArray(scores, utils::AsEArrXt(order), &scores_sorted);
198 
199  // bbox_deltas are (A * box_dim, H, W) format from conv output.
200  // Order them based on scores maintaining the same format without
201  // expensive transpose.
202  // Note that order corresponds to (A, H * W) in row-major whereas
203  // bbox_deltas are in (A, box_dim, H * W) in row-major. Hence, we
204  // obtain a sub-view of bbox_deltas for each dim (4 for RPN, 5 for RRPN)
205  // in (A, H * W) with an outer stride of box_dim * H * W. Then we apply
206  // the ordering and filtering for each dim iteratively.
207  ERArrXXf bbox_deltas_sorted(order.size(), box_dim);
208  EArrXf bbox_deltas_per_dim(A * K);
209  EigenOuterStride stride(box_dim * K);
210  for (int j = 0; j < box_dim; ++j) {
211  Eigen::Map<ERMatXf>(bbox_deltas_per_dim.data(), A, K) =
212  Eigen::Map<const ERMatXf, 0, EigenOuterStride>(
213  bbox_deltas_tensor.data() + j * K, A, K, stride);
214  for (int i = 0; i < order.size(); ++i) {
215  bbox_deltas_sorted(i, j) = bbox_deltas_per_dim[order[i]];
216  }
217  }
218 
219  // Compute anchors specific to the ordered and pre-filtered indices
220  // in (A, H, W) format.
221  const auto& all_anchors_sorted =
222  utils::ComputeSortedAnchors(anchors, H, W, feat_stride_, order);
223 
224  // Transform anchors into proposals via bbox transformations
225  static const std::vector<float> bbox_weights{1.0, 1.0, 1.0, 1.0};
226  auto proposals = utils::bbox_transform(
227  all_anchors_sorted,
228  bbox_deltas_sorted,
229  bbox_weights,
230  utils::BBOX_XFORM_CLIP_DEFAULT,
231  angle_bound_on_,
232  angle_bound_lo_,
233  angle_bound_hi_);
234 
235  // 2. clip proposals to image (may result in proposals with zero area
236  // that will be removed in the next step)
237  proposals =
238  utils::clip_boxes(proposals, im_info[0], im_info[1], clip_angle_thresh_);
239 
240  // 3. remove predicted boxes with either height or width < min_size
241  auto keep = utils::filter_boxes(proposals, min_size, im_info);
242  DCHECK_LE(keep.size(), scores_sorted.size());
243 
244  // 6. apply loose nms (e.g. threshold = 0.7)
245  // 7. take after_nms_topN (e.g. 300)
246  // 8. return the top proposals (-> RoIs top)
247  if (post_nms_topN > 0 && post_nms_topN < keep.size()) {
248  keep = utils::nms_cpu(
249  proposals, scores_sorted, keep, nms_thresh, post_nms_topN);
250  } else {
251  keep = utils::nms_cpu(proposals, scores_sorted, keep, nms_thresh);
252  }
253 
254  // Generate outputs
255  utils::GetSubArrayRows(proposals, utils::AsEArrXt(keep), out_boxes);
256  utils::GetSubArray(scores_sorted, utils::AsEArrXt(keep), out_probs);
257 }
258 
259 template <>
260 bool GenerateProposalsOp<CPUContext>::RunOnDevice() {
261  const auto& scores = Input(0);
262  const auto& bbox_deltas = Input(1);
263  const auto& im_info_tensor = Input(2);
264  const auto& anchors_tensor = Input(3);
265 
266  CAFFE_ENFORCE_EQ(scores.dim(), 4, scores.dim());
267  CAFFE_ENFORCE(scores.template IsType<float>(), scores.dtype().name());
268  const auto num_images = scores.size(0);
269  const auto A = scores.size(1);
270  const auto height = scores.size(2);
271  const auto width = scores.size(3);
272  const auto box_dim = anchors_tensor.size(1);
273  CAFFE_ENFORCE(box_dim == 4 || box_dim == 5);
274 
275  // bbox_deltas: (num_images, A * box_dim, H, W)
276  CAFFE_ENFORCE_EQ(
277  bbox_deltas.sizes(),
278  (at::ArrayRef<int64_t>{num_images, box_dim * A, height, width}));
279 
280  // im_info_tensor: (num_images, 3), format [height, width, scale; ...]
281  CAFFE_ENFORCE_EQ(im_info_tensor.sizes(), (vector<int64_t>{num_images, 3}));
282  CAFFE_ENFORCE(
283  im_info_tensor.template IsType<float>(), im_info_tensor.dtype().name());
284 
285  // anchors: (A, box_dim)
286  CAFFE_ENFORCE_EQ(anchors_tensor.sizes(), (vector<int64_t>{A, box_dim}));
287  CAFFE_ENFORCE(
288  anchors_tensor.template IsType<float>(), anchors_tensor.dtype().name());
289 
290  Eigen::Map<const ERArrXXf> im_info(
291  im_info_tensor.data<float>(),
292  im_info_tensor.size(0),
293  im_info_tensor.size(1));
294 
295  Eigen::Map<const ERArrXXf> anchors(
296  anchors_tensor.data<float>(),
297  anchors_tensor.size(0),
298  anchors_tensor.size(1));
299 
300  std::vector<ERArrXXf> im_boxes(num_images);
301  std::vector<EArrXf> im_probs(num_images);
302  for (int i = 0; i < num_images; i++) {
303  auto cur_im_info = im_info.row(i);
304  auto cur_bbox_deltas = GetSubTensorView<float>(bbox_deltas, i);
305  auto cur_scores = GetSubTensorView<float>(scores, i);
306 
307  ERArrXXf& im_i_boxes = im_boxes[i];
308  EArrXf& im_i_probs = im_probs[i];
309  ProposalsForOneImage(
310  cur_im_info,
311  anchors,
312  cur_bbox_deltas,
313  cur_scores,
314  &im_i_boxes,
315  &im_i_probs);
316  }
317 
318  int roi_counts = 0;
319  for (int i = 0; i < num_images; i++) {
320  roi_counts += im_boxes[i].rows();
321  }
322  const int roi_col_count = box_dim + 1;
323  auto* out_rois = Output(0, {roi_counts, roi_col_count}, at::dtype<float>());
324  auto* out_rois_probs = Output(1, {roi_counts}, at::dtype<float>());
325  float* out_rois_ptr = out_rois->template mutable_data<float>();
326  float* out_rois_probs_ptr = out_rois_probs->template mutable_data<float>();
327  for (int i = 0; i < num_images; i++) {
328  const ERArrXXf& im_i_boxes = im_boxes[i];
329  const EArrXf& im_i_probs = im_probs[i];
330  int csz = im_i_boxes.rows();
331 
332  // write rois
333  Eigen::Map<ERArrXXf> cur_rois(out_rois_ptr, csz, roi_col_count);
334  cur_rois.col(0).setConstant(i);
335  cur_rois.block(0, 1, csz, box_dim) = im_i_boxes;
336 
337  // write rois_probs
338  Eigen::Map<EArrXf>(out_rois_probs_ptr, csz) = im_i_probs;
339 
340  out_rois_ptr += csz * roi_col_count;
341  out_rois_probs_ptr += csz;
342  }
343 
344  return true;
345 }
346 
347 REGISTER_CPU_OPERATOR(GenerateProposals, GenerateProposalsOp<CPUContext>);
348 // For backward compatibility
349 REGISTER_CPU_OPERATOR(GenerateProposalsCPP, GenerateProposalsOp<CPUContext>);
350 
351 OPERATOR_SCHEMA(GenerateProposals)
352  .NumInputs(4)
353  .NumOutputs(2)
354  .SetDoc(R"DOC(
355 Generate bounding box proposals for Faster RCNN. The propoasls are generated for
356 a list of images based on image score 'score', bounding box regression result
357 'deltas' as well as predefined bounding box shapes 'anchors'. Greedy
358 non-maximum suppression is applied to generate the final bounding boxes.
359 )DOC")
360  .Arg("spatial_scale", "(float) spatial scale")
361  .Arg("pre_nms_topN", "(int) RPN_PRE_NMS_TOP_N")
362  .Arg("post_nms_topN", "(int) RPN_POST_NMS_TOP_N")
363  .Arg("nms_thresh", "(float) RPN_NMS_THRESH")
364  .Arg("min_size", "(float) RPN_MIN_SIZE")
365  .Arg(
366  "angle_bound_on",
367  "bool (default true). If set, for rotated boxes, angle is "
368  "normalized to be within [angle_bound_lo, angle_bound_hi].")
369  .Arg(
370  "angle_bound_lo",
371  "int (default -90 degrees). If set, for rotated boxes, angle is "
372  "normalized to be within [angle_bound_lo, angle_bound_hi].")
373  .Arg(
374  "angle_bound_hi",
375  "int (default 90 degrees). If set, for rotated boxes, angle is "
376  "normalized to be within [angle_bound_lo, angle_bound_hi].")
377  .Arg(
378  "clip_angle_thresh",
379  "float (default 1.0 degrees). For RRPN, clip almost horizontal boxes "
380  "within this threshold of tolerance for backward compatibility. "
381  "Set to negative value for no clipping.")
382  .Input(0, "scores", "Scores from conv layer, size (img_count, A, H, W)")
383  .Input(
384  1,
385  "bbox_deltas",
386  "Bounding box deltas from conv layer, "
387  "size (img_count, 4 * A, H, W)")
388  .Input(
389  2,
390  "im_info",
391  "Image info, size (img_count, 3), "
392  "format (height, width, scale)")
393  .Input(3, "anchors", "Bounding box anchors, size (A, 4)")
394  .Output(
395  0,
396  "rois",
397  "Proposals, size (n x 5), "
398  "format (image_index, x1, y1, x2, y2)")
399  .Output(1, "rois_probs", "scores of proposals, size (n)");
400 // For backward compatibility
401 OPERATOR_SCHEMA(GenerateProposalsCPP).NumInputs(4).NumOutputs(2);
402 
403 SHOULD_NOT_DO_GRADIENT(GenerateProposals);
404 // For backward compatibility
405 SHOULD_NOT_DO_GRADIENT(GenerateProposalsCPP);
406 
407 } // namespace caffe2
408 
409 C10_REGISTER_CAFFE2_OPERATOR_CPU(
410  GenerateProposals,
411  (std::vector<c10::Argument>{
412  c10::Argument("scores"),
413  c10::Argument("bbox_deltas"),
414  c10::Argument("im_info"),
415  c10::Argument("anchors"),
416  c10::Argument("spatial_scale", FloatType::get()),
417  c10::Argument("pre_nms_topN", IntType::get()),
418  c10::Argument("post_nms_topN", IntType::get()),
419  c10::Argument("nms_thresh", FloatType::get()),
420  c10::Argument("min_size", FloatType::get()),
421  c10::Argument("angle_bound_on", BoolType::get()),
422  c10::Argument("angle_bound_lo", IntType::get()),
423  c10::Argument("angle_bound_hi", IntType::get()),
424  c10::Argument("clip_angle_thresh", FloatType::get()),
425  }),
426  (std::vector<c10::Argument>{
427  c10::Argument("output_0"),
428  c10::Argument("output_1"),
429  }),
Definition: static.cpp:52
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13