Caffe2 - C++ API
A deep learning, cross platform ML framework
spatial_batch_norm_gradient_op.cc
1 #include "caffe2/operators/spatial_batch_norm_op.h"
2 
3 #include <string>
4 
5 #include "caffe2/utils/eigen_utils.h"
6 
7 namespace caffe2 {
8 
9 template <>
10 template <typename T>
11 void SpatialBNGradientOp<CPUContext>::
12  ComputeMultiBatchScaleBiasGradientsAndFusedParams(
13  const int N,
14  const int C,
15  const int HxW,
16  const T* scale,
17  const T* mean,
18  const T* rstd,
19  const T* dscale_sum,
20  const T* dbias_sum,
21  T* dscale,
22  T* dbias,
23  T* alpha,
24  T* beta,
25  T* gamma) {
26  ConstEigenVectorArrayMap<T> scale_arr(scale, C);
27  ConstEigenVectorArrayMap<T> mean_arr(mean, C);
28  ConstEigenVectorArrayMap<T> rstd_arr(rstd, C);
29  EigenVectorArrayMap<T> dscale_arr(dscale, C);
30  EigenVectorArrayMap<T> dbias_arr(dbias, C);
31  EigenVectorArrayMap<T> alpha_arr(alpha, C);
32  EigenVectorArrayMap<T> beta_arr(beta, C);
33  EigenVectorArrayMap<T> gamma_arr(gamma, C);
34  const T inv_num_batches = T(1) / static_cast<T>(num_batches_);
35  math::Scale<T, T, CPUContext>(
36  C, inv_num_batches, dscale_sum, dscale, &context_);
37  math::Scale<T, T, CPUContext>(
38  C, inv_num_batches, dbias_sum, dbias, &context_);
39  const T inv_nhw = T(1) / static_cast<T>(N * HxW);
40  alpha_arr = scale_arr * rstd_arr;
41  beta_arr = dscale_arr * rstd_arr;
42  gamma_arr = alpha_arr * (mean_arr * beta_arr - dbias_arr) * inv_nhw;
43  beta_arr *= -alpha_arr * inv_nhw;
44 }
45 
46 template <>
47 template <typename T>
48 void SpatialBNGradientOp<CPUContext>::ComputeScaleBiasGradientsAndFusedParams(
49  const int N,
50  const int C,
51  const int HxW,
52  const T* dY,
53  const T* X,
54  const T* scale,
55  const T* mean,
56  const T* rstd,
57  T* dscale,
58  T* dbias,
59  T* alpha,
60  T* beta,
61  T* gamma,
62  T* /* scratch */) {
63  ConstEigenVectorArrayMap<T> scale_arr(scale, C);
64  ConstEigenVectorArrayMap<T> mean_arr(mean, C);
65  ConstEigenVectorArrayMap<T> rstd_arr(rstd, C);
66  EigenVectorArrayMap<T> dscale_arr(dscale, C);
67  EigenVectorArrayMap<T> dbias_arr(dbias, C);
68  EigenVectorArrayMap<T> alpha_arr(alpha, C);
69  EigenVectorArrayMap<T> beta_arr(beta, C);
70  EigenVectorArrayMap<T> gamma_arr(gamma, C);
71  math::Set<T, CPUContext>(C, T(0), dscale, &context_);
72  math::Set<T, CPUContext>(C, T(0), dbias, &context_);
73  if (order_ == StorageOrder::NCHW) {
74  ConstEigenArrayMap<T> dY_arr(dY, HxW, N * C);
75  ConstEigenArrayMap<T> X_arr(X, HxW, N * C);
76  for (int i = 0; i < N; ++i) {
77  for (int j = 0; j < C; ++j) {
78  const int c = i * C + j;
79  dscale_arr(j) +=
80  (dY_arr.col(c) * (X_arr.col(c) - mean_arr(j)) * rstd_arr(j)).sum();
81  dbias_arr(j) += dY_arr.col(c).sum();
82  }
83  }
84  } else {
85  const int outer_size = N * HxW;
86  ConstEigenArrayMap<T> dY_arr(dY, C, outer_size);
87  ConstEigenArrayMap<T> X_arr(X, C, outer_size);
88  for (int i = 0; i < outer_size; ++i) {
89  dscale_arr += dY_arr.col(i) * (X_arr.col(i) - mean_arr) * rstd_arr;
90  dbias_arr += dY_arr.col(i);
91  }
92  }
93  const T inv_nhw = T(1) / static_cast<T>(N * HxW);
94  alpha_arr = scale_arr * rstd_arr;
95  beta_arr = dscale_arr * rstd_arr;
96  gamma_arr = alpha_arr * (mean_arr * beta_arr - dbias_arr) * inv_nhw;
97  beta_arr *= -alpha_arr * inv_nhw;
98 }
99 
100 template <>
101 template <typename T>
102 void SpatialBNGradientOp<CPUContext>::ComputeXGradient(
103  const int N,
104  const int C,
105  const int HxW,
106  const T* dY,
107  const T* X,
108  const T* alpha,
109  const T* beta,
110  const T* gamma,
111  T* dX) {
112  ConstEigenVectorArrayMap<T> alpha_arr(alpha, C);
113  ConstEigenVectorArrayMap<T> beta_arr(beta, C);
114  ConstEigenVectorArrayMap<T> gamma_arr(gamma, C);
115  if (order_ == NCHW) {
116  const int stride = C * HxW;
117  const T* dY_ptr = dY;
118  const T* X_ptr = X;
119  T* dX_ptr = dX;
120  for (int i = 0; i < N; ++i) {
121  EigenArrayMap<T>(dX_ptr, HxW, C) =
122  (ConstEigenArrayMap<T>(dY_ptr, HxW, C).rowwise() *
123  alpha_arr.transpose() +
124  ConstEigenArrayMap<T>(X_ptr, HxW, C).rowwise() *
125  beta_arr.transpose())
126  .rowwise() +
127  gamma_arr.transpose();
128  dY_ptr += stride;
129  X_ptr += stride;
130  dX_ptr += stride;
131  }
132  } else {
133  EigenArrayMap<T>(dX, C, N * HxW) =
134  (ConstEigenArrayMap<T>(dY, C, N * HxW).colwise() * alpha_arr +
135  ConstEigenArrayMap<T>(X, C, N * HxW).colwise() * beta_arr)
136  .colwise() +
137  gamma_arr;
138  }
139 }
140 
141 REGISTER_CPU_OPERATOR(SpatialBNGradient, SpatialBNGradientOp<CPUContext>);
142 
143 // Input: X, scale, dY, mean, variance, dscale, dbias
144 // Output: dX, dscale, dbias
145 OPERATOR_SCHEMA(SpatialBNGradient)
146  .NumInputs({5, 7})
147  .NumOutputs(3)
148  .AllowInplace({{5, 1}, {6, 2}});
149 
150 namespace {
151 
152 // Spatial batch normalization's gradient, depending on the various input sizes,
153 // is a bit more complex than usual gradient operators.
154 class GetSpatialBNGradient : public GradientMakerBase {
155  using GradientMakerBase::GradientMakerBase;
156  std::vector<OperatorDef> GetGradientDefs() override {
157  // Check if we are in training or testing mode.
158  const bool is_test =
159  ArgumentHelper::GetSingleArgument(def_, OpSchema::Arg_IsTest, 0);
160  const int num_batches =
161  ArgumentHelper::GetSingleArgument(def_, "num_batches", 1);
162  const std::vector<string> grad_outputs = {GI(0), GI(1), GI(2)};
163  std::vector<string> grad_inputs;
164  if (is_test) {
165  // This is in testing mode. The operator should have five inputs:
166  // X, scale, bias, estimated_mean, estimated_variance
167  // The gradient inputs are:
168  // X, scale, dY, estimated_mean, estimated_variance
169  CAFFE_ENFORCE_EQ(def_.input_size(), 5);
170  CAFFE_ENFORCE_EQ(def_.output_size(), 1);
171  grad_inputs = std::vector<std::string>{I(0), I(1), GO(0), I(3), I(4)};
172  } else if (num_batches > 1) {
173  CAFFE_ENFORCE_EQ(def_.input_size(), 7);
174  CAFFE_ENFORCE_EQ(def_.output_size(), 5);
175  grad_inputs =
176  std::vector<std::string>{I(0), I(1), GO(0), O(3), O(4), GI(1), GI(2)};
177  } else {
178  CAFFE_ENFORCE_EQ(def_.input_size(), 5);
179  CAFFE_ENFORCE_EQ(def_.output_size(), 5);
180  grad_inputs = std::vector<std::string>{I(0), I(1), GO(0), O(3), O(4)};
181  }
182  return SingleGradientDef(
183  "SpatialBNGradient", "", grad_inputs, grad_outputs);
184  }
185 };
186 
187 } // namespace
188 
189 REGISTER_GRADIENT(SpatialBN, GetSpatialBNGradient);
190 
191 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: static.cpp:64