Caffe2 - C++ API
A deep learning, cross platform ML framework
spatial_batch_norm_gradient_op.cc
1 
17 #include "caffe2/operators/spatial_batch_norm_op.h"
18 
19 namespace caffe2 {
20 
21 template <>
22 bool SpatialBNGradientOp<CPUContext>::RunOnDevice() {
23  const auto& X = Input(INPUT);
24  const auto& dY = Input(OUTPUT_GRAD);
25  const auto& scale = Input(SCALE);
26 
27  CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
28  const int N = X.dim32(0);
29  const int C =
30  (order_ == StorageOrder::NCHW ? X.dim32(1) : X.dim32(X.ndim() - 1));
31  const int H = (order_ == StorageOrder::NCHW ? X.dim32(2) : X.dim32(1));
32  const int W = X.ndim() > 3
33  ? (order_ == StorageOrder::NCHW ? X.dim32(3) : X.dim32(2))
34  : 1;
35  const int D = X.ndim() > 4
36  ? (order_ == StorageOrder::NCHW ? X.dim32(4) : X.dim32(3))
37  : 1;
38 
39  const int sample_size = H * W * D;
40 
41  CAFFE_ENFORCE_EQ(scale.ndim(), 1);
42  CAFFE_ENFORCE_EQ(scale.dim32(0), C);
43 
44  ConstEigenVectorArrayMap<float> scale_arr(scale.data<float>(), C);
45  ConstEigenVectorArrayMap<float> mean_arr(Input(SAVED_MEAN).data<float>(), C);
46  ConstEigenVectorArrayMap<float> inv_var_arr(
47  Input(SAVED_INV_VAR).data<float>(), C);
48 
49  auto* dX = Output(INPUT_GRAD);
50  dX->ResizeLike(X);
51 
52  auto* dScale = Output(SCALE_GRAD);
53  auto* dBias = Output(BIAS_GRAD);
54 
55  if (num_batches_ == 1) {
56  dScale->ResizeLike(scale);
57  dBias->ResizeLike(scale);
58  }
59 
60  // dBias = np.sum(dY, axis=0)
61  // dScale = np.sum((X - mean) / inv_std * dy, axis=0)
62  // dX = (1. / N) * scale * inv_var * (N * dY - np.sum(dY, axis=0) - (X - mean)
63  // * inv_var * inv_var * np.sum(dY * (X - mean), axis=0))
64 
65  EigenVectorArrayMap<float> dBias_arr(dBias->mutable_data<float>(), C);
66  EigenVectorArrayMap<float> dScale_arr(dScale->mutable_data<float>(), C);
67 
68  if (num_batches_ == 1) {
69  dBias_arr.setZero();
70  dScale_arr.setZero();
71  }
72 
73  const auto scaleInvVarNHW = scale_arr * inv_var_arr / (N * sample_size);
74 
75  switch (order_) {
76  case StorageOrder::NCHW: {
77  ConstEigenArrayMap<float> X_arr(X.data<float>(), sample_size, N * C);
78  ConstEigenArrayMap<float> dY_arr(dY.data<float>(), sample_size, N * C);
79  EigenArrayMap<float> dX_arr(
80  dX->mutable_data<float>(), sample_size, N * C);
81  dX_arr.setZero();
82 
83  if (num_batches_ == 1) {
84  for (int nc = 0; nc < N * C; ++nc) {
85  int c = nc % C;
86  dBias_arr(c) += dY_arr.col(nc).sum();
87  dScale_arr(c) +=
88  ((X_arr.col(nc) - mean_arr(c)) * inv_var_arr(c) * dY_arr.col(nc))
89  .sum();
90  }
91  } else {
92  for (int c = 0; c < C; ++c) {
93  dBias_arr(c) /= num_batches_;
94  dScale_arr(c) /= num_batches_;
95  }
96  }
97  for (int nc = 0; nc < N * C; ++nc) {
98  int c = nc % C;
99  dX_arr.col(nc) += scaleInvVarNHW(c) *
100  (dY_arr.col(nc) * N * sample_size - dBias_arr(c) -
101  (X_arr.col(nc) - mean_arr[c]) * dScale_arr(c) * inv_var_arr(c));
102  }
103  break;
104  }
105  case StorageOrder::NHWC: {
106  ConstEigenArrayMap<float> X_arr(X.data<float>(), C, N * sample_size);
107  ConstEigenArrayMap<float> dY_arr(dY.data<float>(), C, N * sample_size);
108  EigenArrayMap<float> dX_arr(
109  dX->mutable_data<float>(), C, N * sample_size);
110  dX_arr.setZero();
111 
112  const auto dYRowSum = dY_arr.rowwise().sum();
113  const auto XMinusMean = X_arr.colwise() - mean_arr;
114  const auto dYMulXMinusMeanRowSum = (dY_arr * XMinusMean).rowwise().sum();
115  const auto invVarSqr = inv_var_arr * inv_var_arr;
116  for (int nhw = 0; nhw < N * sample_size; ++nhw) {
117  dBias_arr += dY_arr.col(nhw);
118  dScale_arr +=
119  (X_arr.col(nhw) - mean_arr) * inv_var_arr * dY_arr.col(nhw);
120  dX_arr.col(nhw) += scaleInvVarNHW *
121  (dY_arr.col(nhw) * N * sample_size - dYRowSum -
122  XMinusMean.col(nhw) * invVarSqr * dYMulXMinusMeanRowSum);
123  }
124  break;
125  }
126  default:
127  CAFFE_THROW("Unknown storage order: ", order_);
128  }
129  return true;
130 }
131 
132 REGISTER_CPU_OPERATOR(SpatialBNGradient, SpatialBNGradientOp<CPUContext>);
133 
134 // Input: X, scale, dY, mean, variance, dscale, dbias
135 // Output: dX, dscale, dbias
136 OPERATOR_SCHEMA(SpatialBNGradient)
137  .NumInputs({5, 7})
138  .NumOutputs(3)
139  .AllowInplace({{5, 1}, {6, 2}});
140 
141 // Spatial batch normalization's gradient, depending on the various input sizes,
142 // is a bit more complex than usual gradient operators.
144  using GradientMakerBase::GradientMakerBase;
145  vector<OperatorDef> GetGradientDefs() override {
146  // Check if we are in training or testing mode.
147  bool is_test =
148  ArgumentHelper::GetSingleArgument(def_, OpSchema::Arg_IsTest, 0);
149  int num_batches = ArgumentHelper::GetSingleArgument(def_, "num_batches", 1);
150  vector<string> grad_outputs{GI(0), GI(1), GI(2)};
151  vector<string> grad_inputs;
152  if (is_test) {
153  // This is in testing mode. The operator should have five inputs:
154  // X, scale, bias, estimated_mean, estimated_variance
155  // The gradient inputs are:
156  // X, scale, dY, estimated_mean, estimated_variance
157  CAFFE_ENFORCE_EQ(def_.input_size(), 5);
158  CAFFE_ENFORCE_EQ(def_.output_size(), 1);
159  grad_inputs = vector<string>{I(0), I(1), GO(0), I(3), I(4)};
160  } else if (num_batches > 1) {
161  CAFFE_ENFORCE_EQ(def_.input_size(), 7);
162  CAFFE_ENFORCE_EQ(def_.output_size(), 5);
163  grad_inputs = vector<string>{I(0), I(1), GO(0), O(3), O(4), GI(1), GI(2)};
164  } else {
165  CAFFE_ENFORCE_EQ(def_.input_size(), 5);
166  CAFFE_ENFORCE_EQ(def_.output_size(), 5);
167  grad_inputs = vector<string>{I(0), I(1), GO(0), O(3), O(4)};
168  }
169  return SingleGradientDef(
170  "SpatialBNGradient", "", grad_inputs, grad_outputs);
171  }
172 };
173 REGISTER_GRADIENT(SpatialBN, GetSpatialBNGradient);
174 }
Copyright (c) 2016-present, Facebook, Inc.
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...