3 #include <fbgemm/Fbgemm.h> 4 #include <fbgemm/src/FbgemmI8DepthwiseAvx2.h> 5 #include "caffe2/operators/conv_op.h" 6 #include "caffe2/operators/conv_pool_op_base.h" 7 #include "caffe2/quantization/server/caffe2_dnnlowp_utils.h" 8 #include "caffe2/quantization/server/conv_pool_dnnlowp_op_base.h" 9 #include "caffe2/quantization/server/dnnlowp.h" 10 #include "caffe2/quantization/server/op_wrapper.h" 14 using ConvFp32Op = ConvOp<float, CPUContext>;
17 template <
typename T,
bool ReluFused = false>
21 USE_CONV_POOL_DNNLOWP_OPERATOR_BASE_FUNCTIONS(
T,
ConvFp32Op);
26 bool RunOnDeviceWithOrderNCHW()
override;
27 bool RunOnDeviceWithOrderNHWC()
override;
39 const T* Im2ColNHWC_(
Tensor* col_buffer);
41 dnnlowp::TensorQuantizationParams& FilterQuantizationParams(
int group_id);
42 dnnlowp::RequantizationParams& RequantizationParams(
int group_id);
44 static void PartitionGroupedNHWCConv_(
54 virtual bool Acc16()
const {
59 Tensor img_shape_device_{CPU};
60 Tensor col_buffer_shape_device_{CPU};
64 INPUT_TAGS(INPUT, FILTER, BIAS);
68 using T_signed =
typename std::make_signed<T>::type;
71 std::vector<T_signed> W_quantized_;
74 std::shared_ptr<std::vector<std::int32_t>> column_offsets_;
75 std::vector<std::int32_t> row_offsets_;
76 const std::int32_t* b_quantized_data_{
nullptr};
78 std::vector<std::uint8_t> X_pack_buf_;
80 void RunOnDeviceEpilogueNCHW_(
81 const T* col_buffer_data,
82 std::int32_t* Y_int32,
86 void RunOnDeviceEpilogueNHWC_(
87 const T* col_buffer_data,
88 std::int32_t* Y_int32);
90 std::vector<std::int32_t> Y_int32_;
91 std::vector<dnnlowp::TensorQuantizationParams> filter_qparams_;
92 std::vector<std::int32_t> filter_zero_points_;
94 std::vector<float> requantization_multipliers_;
95 bool quantize_groupwise_;
98 void QuantizeWeight_();
99 void PreComputeRowColumnOffsets_();
100 void QuantizeBias_();
102 bool TakeDepthWise3x3FastPath_();
103 bool TakeDepthWise3x3x3FastPath_();
104 bool TakeGConvFastPath_();
106 template <
typename PackAMatrix, fbgemm::QuantizationGranularity Q_GRAN>
107 void DispatchFBGEMM_(
109 vector<std::int32_t>* Y_int32,
110 uint8_t* Y_uint8_data);
112 void ConvNHWCCore_(
const T* col_buffer_data, vector<std::int32_t>* Y_int32);
114 std::vector<dnnlowp::RequantizationParams> requantization_params_;
117 std::shared_ptr<fbgemm::PackBMatrix<std::int8_t>> Wq_packed_;
120 std::shared_ptr<fbgemm::Packed3x3ConvMatrix> Wq_depthwise_3x3_packed_;
122 std::shared_ptr<fbgemm::Packed3x3x3ConvMatrix> Wq_depthwise_3x3x3_packed_;
124 std::shared_ptr<fbgemm::PackWeightMatrixForGConv<std::int8_t>>
128 std::shared_ptr<std::vector<std::int32_t>> b_quantized_;
130 float in_qparams_scale_old_ = 0;
bool GetQuantizationParameters_()
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...