5 #include "caffe2/operators/group_norm_op.h" 6 #include "caffe2/quantization/server/dnnlowp_op.h" 10 using GroupNormFP32Op = GroupNormOp<float, CPUContext>;
20 bool RunOnDevice()
override;
23 bool GetQuantizationParameters();
27 void QuantizeGammaImpl();
31 bool RunOnDeviceWithOrderNCHW();
33 bool RunOnDeviceWithOrderNHWC();
35 void QuantizedGroupMomentsNCHW(
44 void QuantizedGroupMomentsNHWC(
53 void DequantizedGroupMomentsNCHW(
62 void DequantizedGroupMomentsNHWC(
71 void ComputeQuantizedInvStd(
75 int32_t* rsig_quantized);
77 void ComputeQuantizedFusedParams(
88 void ComputeDequantizedFusedParams(
99 void AffineBatchChannelQuantizedNCHW(
104 const int32_t* scale,
108 void AffineBatchChannelQuantizedNHWC(
113 const int32_t* scale,
117 void AffineBatchChannelDequantizedNCHW(
126 void AffineBatchChannelDequantizedNHWC(
137 const float epsilon_;
138 const StorageOrder order_;
139 const bool is_param_constant_;
141 std::vector<int32_t> mu_quantized_;
142 std::vector<int32_t> rsig_quantized_;
143 std::vector<float> mu_dequantized_;
144 std::vector<float> rsig_dequantized_;
145 dnnlowp::TensorQuantizationParams rsig_qparams_;
147 std::vector<int32_t> gamma_quantized_;
148 std::vector<int32_t> beta_quantized_;
149 std::vector<float> gamma_dequantized_;
150 std::vector<float> beta_dequantized_;
151 const int32_t* gamma_quantized_data_ =
nullptr;
152 const int32_t* beta_quantized_data_ =
nullptr;
153 const float* gamma_dequantized_data_ =
nullptr;
154 const float* beta_dequantized_data_ =
nullptr;
156 std::vector<int32_t> scale_quantized_;
157 std::vector<int32_t> bias_quantized_;
158 std::vector<float> scale_dequantized_;
159 std::vector<float> bias_dequantized_;
160 dnnlowp::TensorQuantizationParams internal_qparams_;
162 std::vector<float> X_dequantized_;
163 std::vector<int32_t> Y_int32_;
165 float cached_X_qparams_scale_ = 0.0f;
169 INPUT_TAGS(INPUT, GAMMA, BETA);
170 OUTPUT_TAGS(OUTPUT, MU, INV_SIGMA);
175 template <
typename T>
176 void VectorMomentsAVX2(
const int N,
const T* src, int64_t* sum, int64_t* sumsq);
178 void ComputeQuantizedFusedParamsAVX2(
182 const int32_t X_zero_point,
185 const int32_t* gamma,
189 template <
typename T>
190 void AffineBatchChannelAndRequantizeNCHWAVX2(
194 const dnnlowp::RequantizationParams& params,
196 const int32_t* scale,
200 template <
typename T>
201 void AffineBatchChannelAndRequantizeNHWCAVX2(
205 const dnnlowp::RequantizationParams& params,
207 const int32_t* scale,
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
A convenient base class for C2 operators with DNNLOWP engine.