1 #include "caffe2/operators/instance_norm_op.h" 2 #include "caffe2/utils/eigen_utils.h" 11 template <
typename T,
typename Context>
12 bool InstanceNormOp<T, Context>::RunOnDeviceWithOrderNHWC() {
13 const auto& X = Input(INPUT);
16 !IsInputOutputAlias(INPUT, OUTPUT),
17 "Can't run InstanceNorm NHWC in-place");
18 auto* mean = OutputSize() > 1 ? Output(MEAN) : &mean_;
19 auto* inv_stdev = OutputSize() > 1 ? Output(INV_STDEV) : &inv_stdev_;
20 const int N = X.dim32(0);
21 const int H = X.dim32(1);
22 const int W = X.dim32(2);
23 const int C = X.dim32(3);
24 const size_t offset = H * W * C;
26 CAFFE_ENFORCE_EQ(Input(SCALE).numel(), C);
27 CAFFE_ENFORCE_EQ(Input(BIAS).numel(), C);
29 auto* Y = Output(OUTPUT, X.sizes(), at::dtype<T>());
31 inv_stdev->Resize(N, C);
32 ConstEigenVectorArrayMap<T> scale(Input(SCALE).
template data<T>(), C);
33 ConstEigenVectorArrayMap<T> bias(Input(BIAS).
template data<T>(), C);
34 for (
int n = 0; n < N; ++n) {
35 ConstEigenArrayMap<T> Xmat(X.template data<T>() + offset * n, C, H * W);
36 EigenArrayMap<T> Ymat(Y->template mutable_data<T>() + offset * n, C, H * W);
37 EigenVectorArrayMap<T> mean_arr(
38 mean->template mutable_data<T>() + n * C, C);
39 EigenVectorArrayMap<T> inv_stdev_arr(
40 inv_stdev->template mutable_data<T>() + n * C, C);
45 mean_arr = Xmat.col(0);
46 for (
int i = 1; i < H * W; ++i) {
47 mean_arr += Xmat.col(i);
49 mean_arr *= 1. / (H * W);
50 Ymat = Xmat.colwise() - mean_arr;
53 inv_stdev_arr = Ymat.col(0) * Ymat.col(0);
54 for (
int i = 1; i < H * W; ++i) {
55 inv_stdev_arr += Ymat.col(i) * Ymat.col(i);
57 inv_stdev_arr = (inv_stdev_arr / (H * W) + epsilon_).sqrt().inverse();
58 Ymat = (Ymat.colwise() * (inv_stdev_arr * scale)).colwise() + bias;
63 template <
typename T,
typename Context>
64 bool InstanceNormOp<T, Context>::RunOnDeviceWithOrderNCHW() {
65 const auto& X = Input(INPUT);
66 const auto& scale = Input(SCALE);
67 const auto& bias = Input(BIAS);
69 auto* mean = OutputSize() > 1 ? Output(MEAN) : &mean_;
70 auto* inv_stdev = OutputSize() > 1 ? Output(INV_STDEV) : &inv_stdev_;
71 const int N = X.dim32(0);
72 const int C = X.dim32(1);
73 const int H = X.dim32(2);
74 const int W = X.dim32(3);
76 CAFFE_ENFORCE_EQ(scale.numel(), C);
77 CAFFE_ENFORCE_EQ(bias.numel(), C);
79 auto* Y = Output(OUTPUT, X.sizes(), at::dtype<T>());
81 inv_stdev->Resize(N, C);
83 const auto* Xdata = X.template data<T>();
84 auto* Ydata = Y->template mutable_data<T>();
85 const auto* scale_data = scale.template data<T>();
86 const auto* bias_data = bias.template data<T>();
87 auto* mean_data = mean->template mutable_data<T>();
88 auto* inv_stdev_data = inv_stdev->template mutable_data<T>();
91 for (
auto i = 0; i < N * C; ++i) {
92 ConstEigenVectorArrayMap<T> Xi(Xdata + H * W * i, H * W);
93 const T Xi_mean = Xi.mean();
94 const T squared_norm = (Xi - Xi_mean).matrix().squaredNorm();
95 const T inv_stdev = 1.0 / std::sqrt(squared_norm / (H * W) + epsilon_);
96 mean_data[i] = Xi_mean;
97 inv_stdev_data[i] = inv_stdev;
98 EigenVectorArrayMap<T> Yi(Ydata + H * W * i, H * W);
99 const T channel_scale = inv_stdev * scale_data[i % C];
100 const T channel_shift = bias_data[i % C] - Xi_mean * channel_scale;
101 Yi = Xi * channel_scale + channel_shift;
107 REGISTER_CPU_OPERATOR(InstanceNorm, InstanceNormOp<float, CPUContext>);
109 OPERATOR_SCHEMA(InstanceNorm)
112 .AllowInplace({{0,0}})
114 The *InstanceNorm* op applies Instance Normalization over a 4D input as described in [Instance Normalization: The Missing Ingredient for Fast Stylization](https://arxiv.org/abs/1607.08022). 116 $$output = \frac{input-\mu_{input}}{\sqrt{\sigma_{input}^2} + \epsilon}*scale + bias$$ 118 Notice, two of the outputs are optional so there are three output cases for this op. Case 1: output; Case 2: output, saved_mean; Case 3: output, saved_mean, saved_inv_stdev. 122 - https://github.com/caffe2/caffe2/blob/master/caffe2/operators/instance_norm_op.h 123 - https://github.com/caffe2/caffe2/blob/master/caffe2/operators/instance_norm_op.cc 128 <summary> <b>Example</b> </summary> 134 workspace.ResetWorkspace() 136 op = core.CreateOperator( 138 ["input", "scale", "bias"], 143 workspace.FeedBlob("input", np.random.randn(2, 1, 3, 3).astype(np.float32)) 144 print("input:\n", workspace.FetchBlob("input"), "\n") 146 workspace.FeedBlob("scale", np.array([1.5]).astype(np.float32)) 147 print("scale: ", workspace.FetchBlob("scale")) 149 workspace.FeedBlob("bias", np.array([1.]).astype(np.float32)) 150 print("bias: ", workspace.FetchBlob("bias")) 152 workspace.RunOperatorOnce(op) 153 print("output:\n", workspace.FetchBlob("output")) 162 [[[[ 0.97856593 -1.1832817 -0.2540021 ] 163 [-1.3315694 -0.7485018 0.3787225 ] 164 [-0.6826597 -1.4637762 0.57116514]]] 167 [[[-0.44948956 0.85544354 -0.9315333 ] 168 [-0.37202677 -0.22266895 -0.27194235] 169 [ 0.4948163 -0.7296504 1.3393803 ]]]] 174 [[[[ 3.5017493 -0.3791256 1.2890853 ] 175 [-0.6453266 0.40137637 2.4249308 ] 176 [ 0.5195738 -0.8826599 2.7703972 ]]] 179 [[[ 0.12639964 2.856744 -0.8821926 ] 180 [ 0.28847694 0.60098207 0.49788612] 181 [ 2.1021945 -0.45978796 3.869297 ]]]] 188 .Arg("epsilon",
"*(type: float; default: 1e-5)* The epsilon value to use to avoid division by zero.")
189 .Arg(
"*(type: string; default: \"NCHW\")* Specifies the order of the input data blob, where $N$ is batch size, $C$ is number of channels, $H$ is spatial height, and $W$ is spatial width. The only other valid option is \"NHWC\".")
190 .Input(0,
"The input 4-dimensional NCHW tensor to be operated on.")
191 .Input(1,
"The input 1-dimensional scale tensor of size *C*.")
192 .Input(2,
"The input 1-dimensional bias tensor of size *C*.")
196 "The output 4-dimensional tensor of the same shape as input.")
200 "(Optional) Saved mean used during training to speed up gradient computation. Should not be used for testing.")
204 "(Optional) Saved inverse stdev used during training to speed up gradient computation. Should not be used for testing.");
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...