1 #include "caffe2/quantization/server/spatial_batch_norm_dnnlowp_op.h" 3 #include "caffe2/quantization/server/caffe2_dnnlowp_utils.h" 8 SpatialBNDNNLowPOp<T>::SpatialBNDNNLowPOp(
9 const OperatorDef& operator_def,
11 : DNNLowPOp<
T, SpatialBNOp<CPUContext>>(operator_def, ws),
12 OP_SINGLE_ARG(double,
"epsilon", epsilon_, 1e-5),
13 order_(StringToStorageOrder(
14 this->template GetSingleArgument<
std::string>(
"order",
"NCHW"))) {
15 bool is_test = this->
template GetSingleArgument<bool>(
"is_test",
false);
16 OPERATOR_NEEDS_FEATURE(
17 is_test,
"SpatialBN DNNLOWP op only works for inference.");
20 StorageOrder::UNKNOWN,
21 "order should be either \"NCHW\" or \"NHWC\".");
22 CAFFE_ENFORCE(OutputSize() == 1);
23 CAFFE_ENFORCE_GT(epsilon_, 0);
27 void SpatialBNDNNLowPOp<T>::ComputeFusedParam_(
35 EigenVectorArrayMap<float> alpha_arr(alpha, C);
36 EigenVectorArrayMap<float> beta_arr(beta, C);
37 alpha_arr = ConstEigenVectorArrayMap<float>(scale, C) *
38 (ConstEigenVectorArrayMap<float>(var, C) + epsilon_).rsqrt();
39 beta_arr = ConstEigenVectorArrayMap<float>(bias, C) -
40 alpha_arr * ConstEigenVectorArrayMap<float>(mean, C);
43 alpha_arr = alpha_arr * (in_qparams_[0].scale / out_qparams_.scale);
44 beta_arr = beta_arr / out_qparams_.scale;
48 bool SpatialBNDNNLowPOp<T>::RunOnDevice() {
49 const auto& X = InputTensorCPU_(INPUT);
50 const auto& scale =
Input(SCALE);
51 const auto& bias =
Input(BIAS);
53 const int ndim = X.dim();
54 CAFFE_ENFORCE_GE(ndim, 3);
55 const int N = X.dim32(0);
56 const int C = (order_ == StorageOrder::NCHW ? X.dim32(1) : X.dim32(ndim - 1));
57 const std::vector<int> X_dims(X.sizes().cbegin(), X.sizes().cend());
60 X_dims.cbegin() + 1, X_dims.cend(), 1, std::multiplies<int>()) /
62 CAFFE_ENFORCE_EQ(scale.numel(), C);
63 CAFFE_ENFORCE_EQ(bias.numel(), C);
65 GetOutputQuantizationParams_();
67 in_qparams_[0] = GetInputTensorQuantizationParamsOf(
this, 0, qfactory_.get());
69 const float* scale_data = scale.template data<float>();
70 const float* bias_data = bias.template data<float>();
72 &alpha_, {C}, at::dtype<float>().device(CPUContext::GetDeviceType()));
74 &beta_, {C}, at::dtype<float>().device(CPUContext::GetDeviceType()));
75 float* alpha_data = alpha_.template mutable_data<float>();
76 float* beta_data = beta_.template mutable_data<float>();
80 const auto& mean =
Input(EST_MEAN);
81 const auto& var =
Input(EST_VAR);
82 CAFFE_ENFORCE_EQ(mean.numel(), C);
83 CAFFE_ENFORCE_EQ(var.numel(), C);
88 mean.template data<float>(),
89 var.template data<float>(),
95 dnnlowp::QuantizeInputIfNeeded(
this, 0, in_qparams_[0], X_temp);
96 auto* Y = OutputTensorCPU_(OUTPUT);
98 T* Y_data = GetQuantizedOutputData_();
100 if (order_ == StorageOrder::NCHW) {
101 for (
int c = 0; c < C; ++c) {
102 for (
int i = 0; i < N; ++i) {
103 for (
int j = 0; j < HxW; ++j) {
104 long quantized_down = out_qparams_.zero_point +
105 std::lrintf(alpha_data[c] *
106 (X_data[(i * C + c) * HxW + j] -
107 in_qparams_[0].zero_point) +
109 Y_data[(i * C + c) * HxW + j] =
110 fbgemm::clamp<long, T>(quantized_down, 8);
115 for (
int i = 0; i < N * HxW; ++i) {
116 for (
int c = 0; c < C; ++c) {
117 long quantized_down = out_qparams_.zero_point +
118 std::lrintf(alpha_data[c] *
119 (X_data[i * C + c] - in_qparams_[0].zero_point) +
121 Y_data[i * C + c] = fbgemm::clamp<long, T>(quantized_down, 8);
126 RunOnDeviceEpilogue_();
131 REGISTER_CPU_OPERATOR_WITH_ENGINE(
134 SpatialBNDNNLowPOp<uint8_t>);
136 REGISTER_CPU_OPERATOR_WITH_ENGINE(
139 SpatialBNDNNLowPOp<uint8_t>);
void ReinitializeTensor(Tensor *tensor, at::IntArrayRef dims, at::TensorOptions options)
Reinitialize a Tensor to given dims and options if necessary, note that this will not do anything if ...
const Tensor & Input(int idx, DeviceType type=CPUContext::GetDeviceType())
Retrieve a non-owning reference to the input at position 'idx' for this operator. ...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...