Caffe2 - C++ API
A deep learning, cross platform ML framework
channel_backprop_stats_op.cc
1 #include "caffe2/operators/channel_backprop_stats_op.h"
2 #include "caffe2/utils/eigen_utils.h"
3 
4 namespace caffe2 {
5 
6 template <>
7 bool ChannelBackpropStatsOp<CPUContext>::RunOnDevice() {
8  const auto& X = Input(INPUT);
9  const auto& dY = Input(OUTPUT_GRAD);
10  CAFFE_ENFORCE(X.dim() >= 3 && X.dim() <= 5);
11  const int N = X.dim32(0);
12  const int C = X.dim32(1);
13  const int H = X.dim32(2);
14  const int W = X.dim() > 3 ? X.dim32(3) : 1;
15  const int D = X.dim() > 4 ? X.dim32(4) : 1;
16 
17  const int sampleSize = H * W * D;
18 
19  Output(SCALE_GRAD)->Resize(C);
20  Output(BIAS_GRAD)->Resize(C);
21  auto* dScale = Output(SCALE_GRAD);
22  auto* dBias = Output(BIAS_GRAD);
23 
24  ConstEigenArrayMap<float> X_arr(X.data<float>(), sampleSize, N * C);
25  ConstEigenArrayMap<float> dY_arr(dY.data<float>(), sampleSize, N * C);
26  ConstEigenVectorArrayMap<float> mean_arr(Input(SAVED_MEAN).data<float>(), C);
27  ConstEigenVectorArrayMap<float> inv_stddev_arr(
28  Input(SAVED_INV_STDDEV).data<float>(), C);
29  EigenVectorArrayMap<float> dBias_arr(
30  dBias->template mutable_data<float>(), C);
31  EigenVectorArrayMap<float> dScale_arr(
32  dScale->template mutable_data<float>(), C);
33 
34  dBias_arr.setZero();
35  dScale_arr.setZero();
36 
37  for (int nc = 0; nc < N * C; ++nc) {
38  int c = nc % C;
39  dBias_arr(c) += dY_arr.col(nc).sum();
40  dScale_arr(c) +=
41  ((X_arr.col(nc) - mean_arr(c)) * inv_stddev_arr(c) * dY_arr.col(nc))
42  .sum();
43  }
44  return true;
45 }
46 
47 REGISTER_CPU_OPERATOR(ChannelBackpropStats, ChannelBackpropStatsOp<CPUContext>);
48 
49 OPERATOR_SCHEMA(ChannelBackpropStats)
50  .NumInputs(4)
51  .NumOutputs(2)
52  .SetDoc(R"DOC(
53 Given an input tensor in NCHW format, the gradient for the output of SpatialBN
54 and the per-channel mean and inverse std var vectors for the input, computes the
55 per-channel bias and scale gradient to be used during the backward pass for
56 subsequent spatial batch normalization gradient calculation. Typically, the
57 results of this op are subsequently reduced over multiple devices to obtain
58 statistics over a larger batch size in cases where the batch size for a single
59 model copy is too low to yield the full benefit of batch normalization. The
60 resulting bias and scale can then be plugged back into SpatialBNGradient to get
61 results over the larger batch size )DOC")
62  .Input(0, "X", "The input 4-dimensional tensor of shape NCHW")
63  .Input(
64  1,
65  "mean",
66  "The mean saved from the forward pass as a 1-dimensional "
67  "tensor of size C.")
68  .Input(
69  2,
70  "inv_std",
71  "The saved inverse standard deviation as a 1-dimensional tensor "
72  "of size C.")
73  .Input(
74  3,
75  "output_grad",
76  "Gradient for the output layer of SpatialBN, here used as input "
77  "because we are on the backward pass")
78  .Output(0, "scale_grad", "Gradient for the scale vector")
79  .Output(1, "bias_grad", "Gradient for the bias vector");
80 SHOULD_NOT_DO_GRADIENT(ChannelBackpropStats);
81 
82 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: static.cpp:64
Definition: static.cpp:70