1 #include "caffe2/operators/channel_backprop_stats_op.h" 2 #include "caffe2/utils/eigen_utils.h" 7 bool ChannelBackpropStatsOp<CPUContext>::RunOnDevice() {
8 const auto& X = Input(INPUT);
9 const auto& dY = Input(OUTPUT_GRAD);
10 CAFFE_ENFORCE(X.dim() >= 3 && X.dim() <= 5);
11 const int N = X.dim32(0);
12 const int C = X.dim32(1);
13 const int H = X.dim32(2);
14 const int W = X.dim() > 3 ? X.dim32(3) : 1;
15 const int D = X.dim() > 4 ? X.dim32(4) : 1;
17 const int sampleSize = H * W * D;
19 Output(SCALE_GRAD)->Resize(C);
20 Output(BIAS_GRAD)->Resize(C);
21 auto* dScale = Output(SCALE_GRAD);
22 auto* dBias = Output(BIAS_GRAD);
24 ConstEigenArrayMap<float> X_arr(X.data<
float>(), sampleSize, N * C);
25 ConstEigenArrayMap<float> dY_arr(dY.data<
float>(), sampleSize, N * C);
26 ConstEigenVectorArrayMap<float> mean_arr(Input(SAVED_MEAN).data<float>(), C);
27 ConstEigenVectorArrayMap<float> inv_stddev_arr(
28 Input(SAVED_INV_STDDEV).data<float>(), C);
29 EigenVectorArrayMap<float> dBias_arr(
30 dBias->template mutable_data<float>(), C);
31 EigenVectorArrayMap<float> dScale_arr(
32 dScale->template mutable_data<float>(), C);
37 for (
int nc = 0; nc < N * C; ++nc) {
39 dBias_arr(c) += dY_arr.col(nc).sum();
41 ((X_arr.col(nc) - mean_arr(c)) * inv_stddev_arr(c) * dY_arr.col(nc))
47 REGISTER_CPU_OPERATOR(ChannelBackpropStats, ChannelBackpropStatsOp<CPUContext>);
49 OPERATOR_SCHEMA(ChannelBackpropStats)
53 Given an input tensor in NCHW format, the gradient for the output of SpatialBN 54 and the per-channel mean and inverse std var vectors for the input, computes the 55 per-channel bias and scale gradient to be used during the backward pass for 56 subsequent spatial batch normalization gradient calculation. Typically, the 57 results of this op are subsequently reduced over multiple devices to obtain 58 statistics over a larger batch size in cases where the batch size for a single 59 model copy is too low to yield the full benefit of batch normalization. The 60 resulting bias and scale can then be plugged back into SpatialBNGradient to get 61 results over the larger batch size )DOC") 62 .Input(0, "X",
"The input 4-dimensional tensor of shape NCHW")
66 "The mean saved from the forward pass as a 1-dimensional " 71 "The saved inverse standard deviation as a 1-dimensional tensor " 76 "Gradient for the output layer of SpatialBN, here used as input " 77 "because we are on the backward pass")
78 .Output(0,
"scale_grad",
"Gradient for the scale vector")
79 .Output(1,
"bias_grad",
"Gradient for the bias vector");
80 SHOULD_NOT_DO_GRADIENT(ChannelBackpropStats);
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...