Caffe2 - C++ API
A deep learning, cross platform ML framework
spatial_batch_norm_dnnlowp_op.cc
1 #include "caffe2/quantization/server/spatial_batch_norm_dnnlowp_op.h"
2 
3 #include "caffe2/quantization/server/caffe2_dnnlowp_utils.h"
4 
5 namespace caffe2 {
6 
7 template <typename T>
8 SpatialBNDNNLowPOp<T>::SpatialBNDNNLowPOp(
9  const OperatorDef& operator_def,
10  Workspace* ws)
11  : DNNLowPOp<T, SpatialBNOp<CPUContext>>(operator_def, ws),
12  OP_SINGLE_ARG(double, "epsilon", epsilon_, 1e-5),
13  order_(StringToStorageOrder(
14  this->template GetSingleArgument<std::string>("order", "NCHW"))) {
15  bool is_test = this->template GetSingleArgument<bool>("is_test", false);
16  OPERATOR_NEEDS_FEATURE(
17  is_test, "SpatialBN DNNLOWP op only works for inference.");
18  CAFFE_ENFORCE_NE(
19  order_,
20  StorageOrder::UNKNOWN,
21  "order should be either \"NCHW\" or \"NHWC\".");
22  CAFFE_ENFORCE(OutputSize() == 1);
23  CAFFE_ENFORCE_GT(epsilon_, 0);
24 }
25 
26 template <typename T>
27 void SpatialBNDNNLowPOp<T>::ComputeFusedParam_(
28  const int C,
29  const float* scale,
30  const float* bias,
31  const float* mean,
32  const float* var,
33  float* alpha,
34  float* beta) {
35  EigenVectorArrayMap<float> alpha_arr(alpha, C);
36  EigenVectorArrayMap<float> beta_arr(beta, C);
37  alpha_arr = ConstEigenVectorArrayMap<float>(scale, C) *
38  (ConstEigenVectorArrayMap<float>(var, C) + epsilon_).rsqrt();
39  beta_arr = ConstEigenVectorArrayMap<float>(bias, C) -
40  alpha_arr * ConstEigenVectorArrayMap<float>(mean, C);
41 
42  // Adjust alpha and beta considering quantization scales
43  alpha_arr = alpha_arr * (in_qparams_[0].scale / out_qparams_.scale);
44  beta_arr = beta_arr / out_qparams_.scale;
45 }
46 
47 template <typename T>
48 bool SpatialBNDNNLowPOp<T>::RunOnDevice() {
49  const auto& X = InputTensorCPU_(INPUT);
50  const auto& scale = Input(SCALE);
51  const auto& bias = Input(BIAS);
52 
53  const int ndim = X.dim();
54  CAFFE_ENFORCE_GE(ndim, 3);
55  const int N = X.dim32(0);
56  const int C = (order_ == StorageOrder::NCHW ? X.dim32(1) : X.dim32(ndim - 1));
57  const std::vector<int> X_dims(X.sizes().cbegin(), X.sizes().cend());
58  const int HxW =
59  std::accumulate(
60  X_dims.cbegin() + 1, X_dims.cend(), 1, std::multiplies<int>()) /
61  C;
62  CAFFE_ENFORCE_EQ(scale.numel(), C);
63  CAFFE_ENFORCE_EQ(bias.numel(), C);
64 
65  GetOutputQuantizationParams_();
66 
67  in_qparams_[0] = GetInputTensorQuantizationParamsOf(this, 0, qfactory_.get());
68 
69  const float* scale_data = scale.template data<float>();
70  const float* bias_data = bias.template data<float>();
72  &alpha_, {C}, at::dtype<float>().device(CPUContext::GetDeviceType()));
74  &beta_, {C}, at::dtype<float>().device(CPUContext::GetDeviceType()));
75  float* alpha_data = alpha_.template mutable_data<float>();
76  float* beta_data = beta_.template mutable_data<float>();
77  if (N == 0) {
78  return true;
79  }
80  const auto& mean = Input(EST_MEAN);
81  const auto& var = Input(EST_VAR);
82  CAFFE_ENFORCE_EQ(mean.numel(), C);
83  CAFFE_ENFORCE_EQ(var.numel(), C);
84  ComputeFusedParam_(
85  C,
86  scale_data,
87  bias_data,
88  mean.template data<float>(),
89  var.template data<float>(),
90  alpha_data,
91  beta_data);
92 
93  vector<T> X_temp;
94  const T* X_data =
95  dnnlowp::QuantizeInputIfNeeded(this, 0, in_qparams_[0], X_temp);
96  auto* Y = OutputTensorCPU_(OUTPUT);
97  Y->Resize(X.sizes());
98  T* Y_data = GetQuantizedOutputData_();
99 
100  if (order_ == StorageOrder::NCHW) {
101  for (int c = 0; c < C; ++c) {
102  for (int i = 0; i < N; ++i) {
103  for (int j = 0; j < HxW; ++j) {
104  long quantized_down = out_qparams_.zero_point +
105  std::lrintf(alpha_data[c] *
106  (X_data[(i * C + c) * HxW + j] -
107  in_qparams_[0].zero_point) +
108  beta_data[c]);
109  Y_data[(i * C + c) * HxW + j] =
110  fbgemm::clamp<long, T>(quantized_down, 8);
111  }
112  }
113  }
114  } else {
115  for (int i = 0; i < N * HxW; ++i) {
116  for (int c = 0; c < C; ++c) {
117  long quantized_down = out_qparams_.zero_point +
118  std::lrintf(alpha_data[c] *
119  (X_data[i * C + c] - in_qparams_[0].zero_point) +
120  beta_data[c]);
121  Y_data[i * C + c] = fbgemm::clamp<long, T>(quantized_down, 8);
122  }
123  }
124  }
125 
126  RunOnDeviceEpilogue_();
127 
128  return true;
129 }
130 
131 REGISTER_CPU_OPERATOR_WITH_ENGINE(
132  SpatialBN,
133  DNNLOWP,
134  SpatialBNDNNLowPOp<uint8_t>);
135 
136 REGISTER_CPU_OPERATOR_WITH_ENGINE(
137  Int8SpatialBN,
138  DNNLOWP,
139  SpatialBNDNNLowPOp<uint8_t>);
140 
141 } // namespace caffe2
void ReinitializeTensor(Tensor *tensor, at::IntArrayRef dims, at::TensorOptions options)
Reinitialize a Tensor to given dims and options if necessary, note that this will not do anything if ...
Definition: tensor.cc:127
const Tensor & Input(int idx, DeviceType type=CPUContext::GetDeviceType())
Retrieve a non-owning reference to the input at position &#39;idx&#39; for this operator. ...
Definition: operator.h:702
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: static.cpp:64