Caffe2 - C++ API
A deep learning, cross platform ML framework
bbox_transform_op.cc
1 #include "bbox_transform_op.h"
2 #include "caffe2/operators/generate_proposals_op_util_boxes.h"
3 
4 #ifdef CAFFE2_USE_MKL
5 #include "caffe2/mkl/operators/operator_fallback_mkl.h"
6 #endif // CAFFE2_USE_MKL
7 
8 namespace caffe2 {
9 namespace {
10 
11 REGISTER_CPU_OPERATOR(BBoxTransform, BBoxTransformOp<float, CPUContext>);
12 
13 #ifdef CAFFE2_HAS_MKL_DNN
14 REGISTER_MKL_OPERATOR(
15  BBoxTransform,
16  mkl::MKLFallbackOp<BBoxTransformOp<float, CPUContext>>);
17 #endif // CAFFE2_HAS_MKL_DNN
18 
19 // Input: box, delta Output: box
20 OPERATOR_SCHEMA(BBoxTransform)
21  .NumInputs(3)
22  .NumOutputs(1)
23  .SetDoc(R"DOC(
24 Transform proposal bounding boxes to target bounding box using bounding box
25  regression deltas.
26 )DOC")
27  .Arg("weights", "vector<float> weights [wx, wy, ww, wh] for the deltas")
28  .Arg(
29  "apply_scale",
30  "bool (default true), transform the boxes to the scaled image space"
31  " after applying the bbox deltas."
32  "Set to false to match the detectron code, set to true for keypoint"
33  " models and for backward compatibility")
34  .Arg(
35  "correct_transform_coords",
36  "bool (default false), Correct bounding box transform coordates,"
37  " see bbox_transform() in boxes.py "
38  "Set to true to match the detectron code, set to false for backward"
39  " compatibility")
40  .Input(
41  0,
42  "rois",
43  "Bounding box proposals in pixel coordinates, "
44  "Size (M, 4), format [x1, y1, x2, y2], or"
45  "Size (M, 5), format [img_index_IGNORED, x1, y1, x2, y2]")
46  .Input(
47  1,
48  "deltas",
49  "bounding box translations and scales,"
50  "size (M, 4*K), format [dx, dy, dw, dh], K = # classes")
51  .Input(
52  2,
53  "im_info",
54  "Image dimensions, size (1, 3), "
55  "format [img_height, img_width, img_scale_IGNORED]")
56  .Output(
57  0,
58  "box_out",
59  "Pixel coordinates of the transformed bounding boxes,"
60  "Size (M, 4*K), format [x1, y1, x2, y2]");
61 
62 SHOULD_NOT_DO_GRADIENT(BBoxTransform);
63 } // namespace
64 
65 template <>
66 bool BBoxTransformOp<float, CPUContext>::RunOnDevice() {
67  const auto& roi_in = Input(0);
68  const auto& delta_in = Input(1);
69  const auto& iminfo_in = Input(2);
70  auto* box_out = Output(0);
71 
72  const int N = roi_in.dim32(0);
73  CAFFE_ENFORCE_EQ(roi_in.ndim(), 2);
74  CAFFE_ENFORCE_GE(roi_in.dim32(1), 4);
75 
76  CAFFE_ENFORCE_EQ(roi_in.ndim(), 2);
77  CAFFE_ENFORCE_EQ(delta_in.dim32(0), N);
78  CAFFE_ENFORCE_EQ(delta_in.dim32(1) % 4, 0);
79 
80  DCHECK_EQ(weights_.size(), 4);
81 
82  CAFFE_ENFORCE_EQ(iminfo_in.size(), 3);
83  ConstEigenVectorArrayMap<float> iminfo(iminfo_in.data<float>(), 3);
84  const float scale_before = iminfo(2);
85  const float scale_after = apply_scale_ ? iminfo(2) : 1.0;
86  int img_h = int(iminfo(0) / scale_before + 0.5);
87  int img_w = int(iminfo(1) / scale_before + 0.5);
88 
89  Eigen::Map<const ERArrXXf> boxes0(
90  roi_in.data<float>(), roi_in.dim32(0), roi_in.dim32(1));
91  auto boxes = boxes0.rightCols(4) / scale_before;
92 
93  Eigen::Map<const ERArrXXf> deltas0(
94  delta_in.data<float>(), delta_in.dim32(0), delta_in.dim32(1));
95 
96  box_out->ResizeLike(delta_in);
97  Eigen::Map<ERArrXXf> new_boxes(
98  box_out->mutable_data<float>(), box_out->dim32(0), box_out->dim32(1));
99 
100  int num_classes = deltas0.cols() / 4;
101  for (int k = 0; k < num_classes; k++) {
102  auto deltas = deltas0.block(0, k * 4, N, 4);
103  auto trans_boxes = utils::bbox_transform(
104  boxes,
105  deltas,
106  weights_,
107  utils::BBOX_XFORM_CLIP_DEFAULT,
108  correct_transform_coords_);
109  auto clip_boxes = utils::clip_boxes(trans_boxes, img_h, img_w);
110  new_boxes.block(0, k * 4, N, 4) = clip_boxes * scale_after;
111  }
112 
113  return true;
114 }
115 
116 } // namespace caffe2
Copyright (c) 2016-present, Facebook, Inc.