Caffe2 - C++ API
A deep learning, cross platform ML framework
spatial_batch_norm_op.cc
1 
17 #include "caffe2/operators/spatial_batch_norm_op.h"
18 
19 namespace caffe2 {
20 
21 template <>
22 bool SpatialBNOp<CPUContext>::RunOnDevice() {
23  const auto& X = Input(INPUT);
24  const auto& scale = Input(SCALE);
25  const auto& bias = Input(BIAS);
26 
27  CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
28  const int N = X.dim32(0);
29  const int C =
30  (order_ == StorageOrder::NCHW ? X.dim32(1) : X.dim32(X.ndim() - 1));
31  const int H = (order_ == StorageOrder::NCHW ? X.dim32(2) : X.dim32(1));
32  const int W = X.ndim() > 3
33  ? (order_ == StorageOrder::NCHW ? X.dim32(3) : X.dim32(2))
34  : 1;
35  const int D = X.ndim() > 4
36  ? (order_ == StorageOrder::NCHW ? X.dim32(4) : X.dim32(3))
37  : 1;
38 
39  const int sample_size = H * W * D;
40  CAFFE_ENFORCE_EQ(scale.ndim(), 1);
41  CAFFE_ENFORCE_EQ(bias.ndim(), 1);
42  CAFFE_ENFORCE_EQ(scale.dim32(0), C);
43  CAFFE_ENFORCE_EQ(bias.dim32(0), C);
44 
45  ConstEigenVectorArrayMap<float> scale_arr(scale.data<float>(), C);
46  ConstEigenVectorArrayMap<float> bias_arr(bias.data<float>(), C);
47 
48  auto* Y = Output(OUTPUT);
49  Y->ResizeLike(X);
50 
51  if (!is_test_) {
52  // training mode
53  // Get the mean and variance.
54  // Note that, to be consistent with cudnn, we will output saved inverse
55  // std as output 5, but we will still use the same storage place to
56  // compute var as well. The inverse is going to be carried out at the end
57  // of the op.
58  Output(SAVED_MEAN)->Resize(C);
59  Output(SAVED_INV_VAR)->Resize(C);
60  EigenVectorArrayMap<float> mean(
61  Output(SAVED_MEAN)->mutable_data<float>(), C);
62  EigenVectorArrayMap<float> var(
63  Output(SAVED_INV_VAR)->mutable_data<float>(), C);
64 
65  if (num_batches_ > 1) {
66  ConstEigenVectorArrayMap<float> sums(Input(SUMS).data<float>(), C);
67  ConstEigenVectorArrayMap<float> sumsq(Input(SUMSQ).data<float>(), C);
68  const auto multi_batch_size = N * num_batches_ * sample_size;
69  mean = sums / multi_batch_size;
70  var = (sumsq - (sums * sums) / multi_batch_size) / multi_batch_size;
71  } else {
72  mean.setZero();
73  var.setZero();
74  switch (order_) {
75  case StorageOrder::NCHW: {
76  ConstEigenArrayMap<float> X_arr(X.data<float>(), sample_size, N * C);
77  for (int nc = 0; nc < N * C; ++nc) {
78  mean(nc % C) += X_arr.col(nc).sum();
79  }
80  mean /= N * sample_size;
81  for (int nc = 0; nc < N * C; ++nc) {
82  var(nc % C) +=
83  (X_arr.col(nc) - mean(nc % C)).matrix().squaredNorm();
84  }
85  var /= N * sample_size;
86  break;
87  }
88  case StorageOrder::NHWC: {
89  ConstEigenArrayMap<float> X_arr(X.data<float>(), C, N * sample_size);
90  for (int i = 0; i < N * sample_size; ++i) {
91  mean += X_arr.col(i);
92  }
93  mean /= N * sample_size;
94  for (int i = 0; i < N * sample_size; ++i) {
95  var += (X_arr.col(i) - mean) * (X_arr.col(i) - mean);
96  }
97  var /= N * sample_size;
98  break;
99  }
100  default:
101  CAFFE_THROW("Unknown storage order: ", order_);
102  }
103  }
104 
105  // Compute the running mean and running inv variance.
106  auto* running_mean = Output(RUNNING_MEAN);
107  auto* running_var = Output(RUNNING_VAR);
108  // Check if they are initialized
109  if (!running_mean->size()) {
110  running_mean->Resize(C);
111  EigenVectorArrayMap<float> running_mean_map(
112  running_mean->mutable_data<float>(), C);
113  running_mean_map.setZero();
114  }
115  if (!running_var->size()) {
116  running_var->Resize(C);
117  EigenVectorArrayMap<float> running_var_map(
118  running_var->mutable_data<float>(), C);
119  running_var_map.setZero();
120  }
121  EigenVectorArrayMap<float> running_mean_arr(
122  running_mean->mutable_data<float>(), C);
123  EigenVectorArrayMap<float> running_var_arr(
124  running_var->mutable_data<float>(), C);
125  running_mean_arr = running_mean_arr * momentum_ + mean * (1. - momentum_);
126  running_var_arr = running_var_arr * momentum_ + var * (1. - momentum_);
127  }
128 
129  // Regardless of training or testing, we will apply the estimated mean
130  // and standard deviation to the input. For testing, they are
131  // specified directly by the input, and for training, they are computed
132  // by the op.
133  Eigen::Array<float, Eigen::Dynamic, 1> inv_std(C);
134  if (is_test_) {
135  ConstEigenVectorArrayMap<float> var_arr(Input(EST_VAR).data<float>(), C);
136  inv_std = (var_arr + epsilon_).sqrt().inverse();
137  } else {
138  EigenVectorArrayMap<float> saved_inv_std(
139  Output(SAVED_INV_VAR)->mutable_data<float>(), C);
140  saved_inv_std = (saved_inv_std + epsilon_).inverse().sqrt();
141  inv_std = saved_inv_std;
142  }
143  ConstEigenVectorArrayMap<float> mean_arr(
144  is_test_ ? Input(EST_MEAN).data<float>()
145  : Output(SAVED_MEAN)->data<float>(),
146  C);
147  // We can fuse the output computation as follows:
148  // ((x - est_mean) * (inv_var) * scale + bias
149  // to
150  // (x * inv_var * scale) + (bias - est_mean * inv_var * scale)
151  Eigen::Array<float, Eigen::Dynamic, 1> new_scale = inv_std * scale_arr;
152  Eigen::Array<float, Eigen::Dynamic, 1> new_bias =
153  bias_arr - mean_arr * inv_std * scale_arr;
154  switch (order_) {
155  case StorageOrder::NHWC: {
156  EigenArrayMap<float>(Y->mutable_data<float>(), C, N * sample_size) =
157  (ConstEigenArrayMap<float>(X.data<float>(), C, N * sample_size)
158  .colwise() *
159  new_scale)
160  .colwise() +
161  new_bias;
162  break;
163  }
164  case StorageOrder::NCHW: {
165  EigenArrayMap<float> Y_arr(Y->mutable_data<float>(), sample_size, N * C);
166  ConstEigenArrayMap<float> X_arr(X.data<float>(), sample_size, N * C);
167  for (int nc = 0; nc < N * C; ++nc) {
168  Y_arr.col(nc) = X_arr.col(nc) * new_scale(nc % C) + new_bias(nc % C);
169  }
170  break;
171  }
172  default:
173  CAFFE_THROW("Unknown storage order: ", order_);
174  }
175  return true;
176 }
177 
178 namespace {
179 OpSchema::Cost CostInferenceForSpatialBN(
180  const OperatorDef& def,
181  const vector<TensorShape>& in) {
182  struct OpSchema::Cost cost = PointwiseCostInference<2>(def, in);
183  ArgumentHelper helper(def);
184  auto order =
185  StringToStorageOrder(helper.GetSingleArgument<string>("order", "NCHW"));
186  const TensorShape X = in[0];
187  const int C =
188  (order == StorageOrder::NCHW ? X.dims(1) : X.dims(X.dims_size() - 1));
189  cost.params_bytes = 2 * C * sizeof(float);
190  return cost;
191 }
192 } // namespace
193 
194 REGISTER_CPU_OPERATOR(SpatialBN, SpatialBNOp<CPUContext>);
195 
196 OPERATOR_SCHEMA(SpatialBN)
197  .NumInputs({5, 7})
198  .NumOutputs({1, 5})
199  .AllowInplace({{0, 0}})
200  .CostInferenceFunction(CostInferenceForSpatialBN)
201  .EnforceInplace({{3, 1}, {4, 2}})
202  .TensorInferenceFunction(
203  [](const OperatorDef& def, const vector<TensorShape>& in) {
204  ArgumentHelper helper(def);
205  bool is_test = helper.GetSingleArgument<int>(OpSchema::Arg_IsTest, 0);
206 
207  if (!is_test) {
208  vector<TensorShape> out;
209  StorageOrder order = StringToStorageOrder(
210  helper.GetSingleArgument<string>("order", "NCHW"));
211  const TensorShape& X = in[0];
212  const int C =
213  (order == StorageOrder::NCHW ? X.dims(1)
214  : X.dims(X.dims_size() - 1));
215 
216  out.push_back(in[0]);
217  TensorShape meanvar_tp =
218  CreateTensorShape(vector<int>{C}, TensorProto::FLOAT);
219  out.push_back(meanvar_tp); // RUNNING_MEAN
220  out.push_back(meanvar_tp); // RUNNING_MEAN
221  out.push_back(meanvar_tp); // SAVED_MEAN
222  out.push_back(meanvar_tp); // SAVED_VAR
223  return out;
224  } else {
225  return vector<TensorShape>{in[0]};
226  }
227  })
228  .SetDoc(R"DOC(
229 Carries out spatial batch normalization as described in the paper
230 https://arxiv.org/abs/1502.03167 . Depending on the mode it is being run,
231 there are multiple cases for the number of outputs, which we list below:
232 
233 
234 Output case #1:
235  Y, mean, var, saved_mean, saved_var (training mode)
236 
237 
238 Output case #2:
239  Y (test mode)
240 )DOC")
241  .ArgIsTest(
242  "If set to nonzero, run spatial batch normalization in test mode.")
243  .Arg("epsilon", "The epsilon value to use to avoid division by zero.")
244  .Arg("order", "A StorageOrder string.")
245  .Arg(
246  "momentum",
247  "Factor used in computing the running mean and variance."
248  "e.g., running_mean = running_mean * momentum + mean * (1 - momentum)")
249  .Arg(
250  "num_batches",
251  "(Optional) Specifies the number of batches to apply normalization on. "
252  "Requires specifying the optional sums and sumsq inputs that provide "
253  "statistics across multiple batches from which mean and variance can "
254  "be determined.")
255  .Input(
256  0,
257  "X",
258  "The input 4-dimensional tensor of shape NCHW or NHWC depending "
259  "on the order parameter.")
260  .Input(
261  1,
262  "scale",
263  "The scale as a 1-dimensional tensor of size C to be applied to the "
264  "output.")
265  .Input(
266  2,
267  "bias",
268  "The bias as a 1-dimensional tensor of size C to be applied to the "
269  "output.")
270  .Input(
271  3,
272  "mean",
273  "The running mean (training) or the estimated mean (testing) "
274  "as a 1-dimensional tensor of size C.")
275  .Input(
276  4,
277  "var",
278  "The running variance (training) or the estimated "
279  "variance (testing) as a 1-dimensional tensor of size C.")
280  .Input(
281  5,
282  "sums",
283  "(optional) Per-channel sums of elements to be used to determine the "
284  "mean and variance for this batch")
285  .Input(
286  6,
287  "sumsq",
288  "(optional) Per-channel sum of elements squared per channel to be used "
289  "to determine the variance for this batch")
290 
291  .Output(0, "Y", "The output 4-dimensional tensor of the same shape as X.")
292  .Output(
293  1,
294  "mean",
295  "The running mean after the spatial BN operator. Must be in-place "
296  "with the input mean. Should not be used for testing.")
297  .Output(
298  2,
299  "var",
300  "The running variance after the spatial BN operator. Must be "
301  "in-place with the input var. Should not be used for testing.")
302  .Output(
303  3,
304  "saved_mean",
305  "Saved mean used during training to speed up gradient "
306  "computation. Should not be used for testing.")
307  .Output(
308  4,
309  "saved_var",
310  "Saved variance used during training to speed up "
311  "gradient computation. Should not be used for testing.");
312 
313 } // namespace caffe2
Copyright (c) 2016-present, Facebook, Inc.