Caffe2 - C++ API
A deep learning, cross platform ML framework
channel_backprop_stats_op.cc
1 
17 #include "caffe2/operators/channel_backprop_stats_op.h"
18 
19 namespace caffe2 {
20 
21 template <>
22 bool ChannelBackpropStatsOp<CPUContext>::RunOnDevice() {
23  const auto& X = Input(INPUT);
24  const auto& dY = Input(OUTPUT_GRAD);
25  CAFFE_ENFORCE(X.ndim() >= 3 && X.ndim() <= 5);
26  const int N = X.dim32(0);
27  const int C = X.dim32(1);
28  const int H = X.dim32(2);
29  const int W = X.ndim() > 3 ? X.dim32(3) : 1;
30  const int D = X.ndim() > 4 ? X.dim32(4) : 1;
31 
32  const int sampleSize = H * W * D;
33 
34  Output(SCALE_GRAD)->Resize(C);
35  Output(BIAS_GRAD)->Resize(C);
36  auto* dScale = Output(SCALE_GRAD);
37  auto* dBias = Output(BIAS_GRAD);
38 
39  ConstEigenArrayMap<float> X_arr(X.data<float>(), sampleSize, N * C);
40  ConstEigenArrayMap<float> dY_arr(dY.data<float>(), sampleSize, N * C);
41  ConstEigenVectorArrayMap<float> mean_arr(Input(SAVED_MEAN).data<float>(), C);
42  ConstEigenVectorArrayMap<float> inv_stddev_arr(
43  Input(SAVED_INV_STDDEV).data<float>(), C);
44  EigenVectorArrayMap<float> dBias_arr(dBias->mutable_data<float>(), C);
45  EigenVectorArrayMap<float> dScale_arr(dScale->mutable_data<float>(), C);
46 
47  dBias_arr.setZero();
48  dScale_arr.setZero();
49 
50  for (int nc = 0; nc < N * C; ++nc) {
51  int c = nc % C;
52  dBias_arr(c) += dY_arr.col(nc).sum();
53  dScale_arr(c) +=
54  ((X_arr.col(nc) - mean_arr(c)) * inv_stddev_arr(c) * dY_arr.col(nc))
55  .sum();
56  }
57  return true;
58 }
59 
60 REGISTER_CPU_OPERATOR(ChannelBackpropStats, ChannelBackpropStatsOp<CPUContext>);
61 
62 OPERATOR_SCHEMA(ChannelBackpropStats)
63  .NumInputs(4)
64  .NumOutputs(2)
65  .SetDoc(R"DOC(
66 Given an input tensor in NCHW format, the gradient for the output of SpatialBN
67 and the per-channel mean and inverse std var vectors for the input, computes the
68 per-channel bias and scale gradient to be used during the backward pass for
69 subsequent spatial batch normalization gradient calculation. Typically, the
70 results of this op are subsequently reduced over multiple devices to obtain
71 statistics over a larger batch size in cases where the batch size for a single
72 model copy is too low to yield the full benefit of batch normalization. The
73 resulting bias and scale can then be plugged back into SpatialBNGradient to get
74 results over the larger batch size )DOC")
75  .Input(0, "X", "The input 4-dimensional tensor of shape NCHW")
76  .Input(
77  1,
78  "mean",
79  "The mean saved from the forward pass as a 1-dimensional "
80  "tensor of size C.")
81  .Input(
82  2,
83  "inv_std",
84  "The saved inverse standard deviation as a 1-dimensional tensor "
85  "of size C.")
86  .Input(
87  3,
88  "output_grad",
89  "Gradient for the output layer of SpatialBN, here used as input "
90  "because we are on the backward pass")
91  .Output(0, "scale_grad", "Gradient for the scale vector")
92  .Output(1, "bias_grad", "Gradient for the bias vector");
93 SHOULD_NOT_DO_GRADIENT(ChannelBackpropStats);
94 
95 } // namespace caffe2
Copyright (c) 2016-present, Facebook, Inc.