1 #include "dequantize_dnnlowp_op.h" 3 #include "caffe2/core/tensor_int8.h" 4 #include "caffe2_dnnlowp_utils.h" 9 DequantizeDNNLowPOp<T>::DequantizeDNNLowPOp(
10 const OperatorDef& operator_def,
12 : Operator<CPUContext>(operator_def, ws),
13 qfactory_(
dnnlowp::GetQuantizationFactoryOf(this)) {
14 if (this->debug_def().engine() ==
"DNNLOWP_16" ||
15 this->debug_def().engine() ==
"DNNLOWP_ROWWISE_16") {
17 << this->debug_def().engine()
18 <<
" is an experimental feature mostly for testing accuracy with " 19 "fixed-point precision higher than 8 and performance is very slow";
24 bool DequantizeDNNLowPOp<T>::RunOnDevice() {
26 TensorQuantizationParams in_qparams =
27 GetInputTensorQuantizationParamsOf(
this, 0, qfactory_.get());
29 const TensorCPU& input = InputIsType<int8::Int8TensorCPU>(0)
30 ? this->
template Input<int8::Int8TensorCPU>(0).t
33 CAFFE_ENFORCE(input.template IsType<T>());
34 Output(0)->ResizeLike(input);
35 fbgemm::Dequantize<T>(
36 input.template data<T>(),
37 Output(0)->template mutable_data<float>(),
44 OPERATOR_SCHEMA(Dequantize)
47 .IdenticalTypeAndShapeOfInput(0);
49 REGISTER_CPU_OPERATOR_WITH_ENGINE(
52 DequantizeDNNLowPOp<std::uint8_t>);
53 REGISTER_CPU_OPERATOR_WITH_ENGINE(
56 DequantizeDNNLowPOp<std::uint8_t>);
58 REGISTER_CPU_OPERATOR_WITH_ENGINE(
61 DequantizeDNNLowPOp<std::uint16_t>);
62 REGISTER_CPU_OPERATOR_WITH_ENGINE(
65 DequantizeDNNLowPOp<std::uint16_t>);
67 REGISTER_CPU_OPERATOR_WITH_ENGINE(
70 DequantizeDNNLowPOp<std::uint8_t>);
71 REGISTER_CPU_OPERATOR_WITH_ENGINE(
74 DequantizeDNNLowPOp<std::uint8_t>);
75 REGISTER_CPU_OPERATOR_WITH_ENGINE(
76 Int8DequantizeRowWise,
78 DequantizeDNNLowPOp<std::uint8_t>);
const Tensor & Input(int idx, DeviceType type=Context::GetDeviceType())
Retrieve a non-owning reference to the input at position 'idx' for this operator. ...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...