Caffe2 - C++ API
A deep learning, cross platform ML framework
collect_and_distribute_fpn_rpn_proposals_op.cc
1 #include "caffe2/operators/collect_and_distribute_fpn_rpn_proposals_op.h"
2 
3 namespace caffe2 {
4 
5 namespace utils {
6 
7 // Compute the area of an array of boxes.
8 ERArrXXf BoxesArea(const ERArrXXf& boxes) {
9  // equivalent to python code
10  // w = (boxes[:, 2] - boxes[:, 0] + 1)
11  // h = (boxes[:, 3] - boxes[:, 1] + 1)
12  // areas = w * h
13  // assert np.all(areas >= 0), 'Negative areas founds'
14  const auto w = boxes.col(2) - boxes.col(0) + 1;
15  const auto h = boxes.col(3) - boxes.col(1) + 1;
16  const ERArrXXf areas = w * h;
17  CAFFE_ENFORCE((areas >= 0).all(), "Negative areas founds: ", boxes);
18  return areas;
19 }
20 
21 // Determine which FPN level each RoI in a set of RoIs should map to based
22 // on the heuristic in the FPN paper.
23 ERArrXXf MapRoIsToFpnLevels(Eigen::Ref<const ERArrXXf> rois,
24  const float k_min, const float k_max,
25  const float s0, const float lvl0) {
26  // Compute level ids
27  ERArrXXf s = BoxesArea(rois).sqrt();
28  // s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224
29  // lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4
30 
31  // Eqn.(1) in FPN paper
32  // equivalent to python code
33  // target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
34  // target_lvls = np.clip(target_lvls, k_min, k_max)
35  auto target_lvls = (lvl0 + (s / s0 + 1e-6).log() / log(2)).floor();
36  auto target_lvls_clipped = target_lvls.min(k_max).max(k_min);
37  return target_lvls_clipped;
38 }
39 
40 // Sort RoIs from highest to lowest individual RoI score based on
41 // values from scores array and limit to n results
42 void SortAndLimitRoIsByScores(Eigen::Ref<const EArrXf> scores, int n,
43  ERArrXXf& rois) {
44  CAFFE_ENFORCE(rois.rows() == scores.size(), "RoIs and scores count mismatch");
45  // Create index array with 0, 1, ... N
46  std::vector<int> idxs(rois.rows());
47  std::iota(idxs.begin(), idxs.end(), 0);
48  // Reuse a comparator based on scores and store a copy of RoIs that
49  // will be truncated and manipulated below
50  auto comp = [&scores](int lhs, int rhs) {
51  if (scores(lhs) > scores(rhs)) return true;
52  if (scores(lhs) < scores(rhs)) return false;
53  // To ensure the sort is stable
54  return lhs < rhs;
55  };
56  ERArrXXf rois_copy = rois;
57  // Note that people have found nth_element + sort to be much faster
58  // than partial_sort so we use it here
59  if (n > 0 && n < rois.rows()) {
60  std::nth_element(idxs.begin(), idxs.begin() + n, idxs.end(), comp);
61  rois.resize(n, rois.cols());
62  } else {
63  n = rois.rows();
64  }
65  std::sort(idxs.begin(), idxs.begin() + n, comp);
66  // Update RoIs based on new order
67  for (int i = 0; i < n; i++) {
68  rois.row(i) = rois_copy.row(idxs[i]);
69  }
70 }
71 
72 // Updates arr to be indices that would sort the array. Implementation of
73 // https://docs.scipy.org/doc/numpy/reference/generated/numpy.argsort.html
74 void ArgSort(EArrXi& arr) {
75  // Create index array with 0, 1, ... N and sort based on array values
76  std::vector<int> idxs(arr.size());
77  std::iota(std::begin(idxs), std::end(idxs), 0);
78  std::sort(idxs.begin(), idxs.end(), [&arr](int lhs, int rhs) {
79  return arr(lhs) < arr(rhs);
80  });
81  // Update array to match new order
82  for (int i = 0; i < arr.size(); i++) {
83  arr(i) = idxs[i];
84  }
85 }
86 
87 // Update out_filtered and out_indices with rows from rois where lvl matches
88 // value in lvls passed in.
89 void RowsWhereRoILevelEquals(Eigen::Ref<const ERArrXXf> rois,
90  const ERArrXXf& lvls, const int lvl,
91  ERArrXXf* out_filtered, EArrXi* out_indices) {
92  CAFFE_ENFORCE(out_filtered != nullptr, "Output filtered required");
93  CAFFE_ENFORCE(out_indices != nullptr, "Output indices required");
94  CAFFE_ENFORCE(rois.rows() == lvls.rows(), "RoIs and lvls count mismatch");
95  // Calculate how many rows we need
96  int filtered_size = (lvls == lvl).rowwise().any().count();
97  // Fill in the rows and indices
98  out_filtered->resize(filtered_size, rois.cols());
99  out_indices->resize(filtered_size);
100  for (int i = 0, filtered_idx = 0; i < rois.rows(); i++) {
101  auto lvl_row = lvls.row(i);
102  if ((lvl_row == lvl).any()) {
103  out_filtered->row(filtered_idx) = rois.row(i);
104  (*out_indices)(filtered_idx) = i;
105  filtered_idx++;
106  }
107  }
108 }
109 
110 } // namespace utils
111 
112 template <>
113 bool CollectAndDistributeFpnRpnProposalsOp<CPUContext>::RunOnDevice() {
114  int num_rpn_lvls = rpn_max_level_ - rpn_min_level_ + 1;
115  CAFFE_ENFORCE_EQ(InputSize(), 2 * num_rpn_lvls);
116 
117  int num_roi_lvls = roi_max_level_ - roi_min_level_ + 1;
118  CAFFE_ENFORCE_EQ(OutputSize(), num_roi_lvls + 2);
119 
120  // Collect rois and scores in Eigen
121  // rois are in [[batch_idx, x0, y0, x1, y2], ...] format
122  // Combine predictions across all levels and retain the top scoring
123  //
124  // equivalent to python code
125  // roi_inputs = inputs[:num_rpn_lvls]
126  // score_inputs = inputs[num_rpn_lvls:]
127  // rois = np.concatenate([blob.data for blob in roi_inputs])
128  // scores = np.concatenate([blob.data for blob in score_inputs]).squeeze()
129  int proposal_num = 0;
130  for (int i = 0; i < num_rpn_lvls; i++) {
131  const auto& roi_in = Input(i);
132  proposal_num += roi_in.size(0);
133  }
134  ERArrXXf rois(proposal_num, 5);
135  EArrXf scores(proposal_num);
136  int len = 0;
137  for (int i = 0; i < num_rpn_lvls; i++) {
138  const auto& roi_in = Input(i);
139  const int n = roi_in.size(0);
140 
141  Eigen::Map<const ERArrXXf> roi(roi_in.data<float>(), n, 5);
142  rois.block(len, 0, n, 5) = roi;
143 
144  const auto& score_in = Input(num_rpn_lvls + i);
145 
146  // No need to squeeze, since we are reshaping when converting to Eigen
147  // https://docs.scipy.org/doc/numpy/reference/generated/numpy.squeeze.html
148  Eigen::Map<const EArrXf> score(score_in.data<float>(), n);
149  scores.segment(len, n) = score;
150 
151  len += n;
152  }
153 
154  // Grab only top rpn_post_nms_topN rois
155  // equivalent to python code
156  // inds = np.argsort(-scores)[:rpn_post_nms_topN]
157  // rois = rois[inds, :]
158  utils::SortAndLimitRoIsByScores(scores, rpn_post_nms_topN_, rois);
159 
160  // Distribute
161  // equivalent to python code
162  // lvl_min = cfg.FPN.ROI_MIN_LEVEL
163  // lvl_max = cfg.FPN.ROI_MAX_LEVEL
164  // lvls = fpn.map_rois_to_fpn_levels(rois[:, 1:5], lvl_min, lvl_max)
165  const int lvl_min = roi_min_level_;
166  const int lvl_max = roi_max_level_;
167  const int canon_scale = roi_canonical_scale_;
168  const int canon_level = roi_canonical_level_;
169  auto rois_block = rois.block(0, 1, rois.rows(), 4);
170  auto lvls = utils::MapRoIsToFpnLevels(rois_block,
171  lvl_min, lvl_max,
172  canon_scale, canon_level);
173 
174  // equivalent to python code
175  // outputs[0].reshape(rois.shape)
176  // outputs[0].data[...] = rois
177 
178  auto* rois_out = Output(0, {rois.rows(), rois.cols()}, at::dtype<float>());
179  Eigen::Map<ERArrXXf> rois_out_mat(
180  rois_out->template mutable_data<float>(), rois.rows(), rois.cols());
181  rois_out_mat = rois;
182 
183  // Create new roi blobs for each FPN level
184  // (See: modeling.FPN.add_multilevel_roi_blobs which is similar but annoying
185  // to generalize to support this particular case.)
186  //
187  // equivalent to python code
188  // rois_idx_order = np.empty((0, ))
189  // for (output_idx, lvl in enumerate(range(lvl_min, lvl_max + 1)))
190  // idx_lvl = np.where(lvls == lvl)[0]
191  // blob_roi_level = rois[idx_lvl, :]
192  // outputs[output_idx + 1].reshape(blob_roi_level.shape)
193  // outputs[output_idx + 1].data[...] = blob_roi_level
194  // rois_idx_order = np.concatenate((rois_idx_order, idx_lvl))
195  // rois_idx_restore = np.argsort(rois_idx_order)
196  // blob_utils.py_op_copy_blob(rois_idx_restore.astype(np.int32), outputs[-1])
197  EArrXi rois_idx_restore;
198  for (int i = 0, lvl = lvl_min; i < num_roi_lvls; i++, lvl++) {
199  ERArrXXf blob_roi_level;
200  EArrXi idx_lvl;
201  utils::RowsWhereRoILevelEquals(rois, lvls, lvl, &blob_roi_level, &idx_lvl);
202 
203  // Output blob_roi_level
204 
205  auto* roi_out = Output(
206  i + 1,
207  {blob_roi_level.rows(), blob_roi_level.cols()},
208  at::dtype<float>());
209  Eigen::Map<ERArrXXf> roi_out_mat(
210  roi_out->template mutable_data<float>(),
211  blob_roi_level.rows(),
212  blob_roi_level.cols());
213  roi_out_mat = blob_roi_level;
214 
215  // Append indices from idx_lvl to rois_idx_restore
216  rois_idx_restore.conservativeResize(rois_idx_restore.size() + idx_lvl.size());
217  rois_idx_restore.tail(idx_lvl.size()) = idx_lvl;
218  }
219  utils::ArgSort(rois_idx_restore);
220 
221  auto* rois_idx_restore_out =
222  Output(OutputSize() - 1, {rois_idx_restore.size()}, at::dtype<int>());
223  Eigen::Map<EArrXi> rois_idx_restore_out_mat(
224  rois_idx_restore_out->template mutable_data<int>(),
225  rois_idx_restore.size());
226  rois_idx_restore_out_mat = rois_idx_restore;
227 
228  return true;
229 }
230 
231 namespace {
232 
233 REGISTER_CPU_OPERATOR(CollectAndDistributeFpnRpnProposals, CollectAndDistributeFpnRpnProposalsOp<CPUContext>);
234 
235 OPERATOR_SCHEMA(CollectAndDistributeFpnRpnProposals)
236  .NumInputs(2, INT_MAX)
237  .NumOutputs(3, INT_MAX)
238  .SetDoc(R"DOC(
239 Merge RPN proposals generated at multiple FPN levels and then
240 distribute those proposals to their appropriate FPN levels for Faster RCNN.
241 An anchor at one FPN level may predict an RoI that will map to another level,
242 hence the need to redistribute the proposals.
243 
244 Only inference is supported. To train, please use the original Python
245 operator in Detectron.
246 
247 Inputs and outputs are examples only; if min/max levels change,
248 the number of inputs and outputs, as well as their level numbering,
249 will change.
250 )DOC")
251  .Arg("roi_canonical_scale", "(int) ROI_CANONICAL_SCALE")
252  .Arg("roi_canonical_level", "(int) ROI_CANONICAL_LEVEL")
253  .Arg("roi_max_level", "(int) ROI_MAX_LEVEL")
254  .Arg("roi_min_level", "(int) ROI_MIN_LEVEL")
255  .Arg("rpn_max_level", "(int) RPN_MAX_LEVEL")
256  .Arg("rpn_min_level", "(int) RPN_MIN_LEVEL")
257  .Arg("rpn_post_nms_topN", "(int) RPN_POST_NMS_TOP_N")
258  .Input(
259  0,
260  "rpn_rois_fpn2",
261  "RPN proposals for FPN level 2, "
262  "format (image_index, x1, y1, x2, y2). See rpn_rois "
263  "documentation from GenerateProposals.")
264  .Input(
265  1,
266  "rpn_rois_fpn3",
267  "RPN proposals for FPN level 3, "
268  "format (image_index, x1, y1, x2, y2). See rpn_rois "
269  "documentation from GenerateProposals.")
270  .Input(
271  2,
272  "rpn_rois_fpn4",
273  "RPN proposals for FPN level 4, "
274  "format (image_index, x1, y1, x2, y2). See rpn_rois "
275  "documentation from GenerateProposals.")
276  .Input(
277  3,
278  "rpn_rois_fpn5",
279  "RPN proposals for FPN level 5, "
280  "format (image_index, x1, y1, x2, y2). See rpn_rois "
281  "documentation from GenerateProposals.")
282  .Input(
283  4,
284  "rpn_rois_fpn6",
285  "RPN proposals for FPN level 6, "
286  "format (image_index, x1, y1, x2, y2). See rpn_rois "
287  "documentation from GenerateProposals.")
288  .Input(
289  5,
290  "rpn_roi_probs_fpn2",
291  "RPN objectness probabilities for FPN level 2. "
292  "See rpn_roi_probs documentation from GenerateProposals.")
293  .Input(
294  6,
295  "rpn_roi_probs_fpn3",
296  "RPN objectness probabilities for FPN level 3. "
297  "See rpn_roi_probs documentation from GenerateProposals.")
298  .Input(
299  7,
300  "rpn_roi_probs_fpn4",
301  "RPN objectness probabilities for FPN level 4. "
302  "See rpn_roi_probs documentation from GenerateProposals.")
303  .Input(
304  8,
305  "rpn_roi_probs_fpn5",
306  "RPN objectness probabilities for FPN level 5. "
307  "See rpn_roi_probs documentation from GenerateProposals.")
308  .Input(
309  9,
310  "rpn_roi_probs_fpn6",
311  "RPN objectness probabilities for FPN level 6. "
312  "See rpn_roi_probs documentation from GenerateProposals.")
313  .Output(
314  0,
315  "rois",
316  "Top proposals limited to rpn_post_nms_topN total, "
317  "format (image_index, x1, y1, x2, y2)")
318  .Output(
319  1,
320  "rois_fpn2",
321  "RPN proposals for ROI level 2, "
322  "format (image_index, x1, y1, x2, y2)")
323  .Output(
324  2,
325  "rois_fpn3",
326  "RPN proposals for ROI level 3, "
327  "format (image_index, x1, y1, x2, y2)")
328  .Output(
329  3,
330  "rois_fpn4",
331  "RPN proposals for ROI level 4, "
332  "format (image_index, x1, y1, x2, y2)")
333  .Output(
334  4,
335  "rois_fpn5",
336  "RPN proposals for ROI level 5, "
337  "format (image_index, x1, y1, x2, y2)")
338  .Output(
339  5,
340  "rois_idx_restore",
341  "Permutation on the concatenation of all "
342  "rois_fpni, i=min...max, such that when applied the RPN RoIs are "
343  "restored to their original order in the input blobs.");
344 
345 SHOULD_NOT_DO_GRADIENT(CollectAndDistributeFpnRpnProposals);
346 
347 } // namespace
348 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13