Caffe2 - C++ API
A deep learning, cross platform ML framework
fully_connected_dnnlowp_op.h
1 #pragma once
2 
3 #include <fbgemm/Fbgemm.h>
4 #include "caffe2/operators/fully_connected_op.h"
5 #include "caffe2/quantization/server/dnnlowp_op.h"
6 
7 namespace caffe2 {
8 
9 template <typename T>
11  : public DNNLowPOp<T, FullyConnectedOp<CPUContext>> {
12  public:
13  FullyConnectedDNNLowPOp(const OperatorDef& operator_def, Workspace* ws);
14  bool RunOnDevice() override;
15 
16  USE_OPERATOR_FUNCTIONS(CPUContext);
17  USE_DNNLOWP_OPERATOR_BASE_FUNCTIONS(T, FullyConnectedOp<CPUContext>);
18 
19  protected:
20  bool GetQuantizationParameters_();
21 
22  std::size_t axis_{1};
23  std::size_t axis_w_{1};
24  vector<std::int64_t> Y_shape_cache_;
25 
26  std::vector<dnnlowp::RequantizationParams> requantization_params_;
27  bool requantization_param_selected_{false};
28 
29  // x86 only provides SIMD instructions that multiply a signed integer with an
30  // unsigned integer. We use signed for weights.
31  using T_signed = typename std::make_signed<T>::type;
32 
33  // used in fast path for T == uint8_t
34  std::shared_ptr<fbgemm::PackBMatrix<std::int8_t>> Wq_packed_;
35  std::vector<std::uint8_t> X_pack_buf_;
36 
37  std::vector<std::int32_t> Y_int32_;
38  std::vector<dnnlowp::TensorQuantizationParams> filter_qparams_;
39  std::vector<float> filter_scales_;
40  std::vector<std::int32_t> filter_zero_points_;
41 
42  std::vector<float> requantization_multipliers_;
43  bool quantize_channelwise_;
44 
45  // used in slow path for T != uint8_t
46  std::vector<T_signed> W_quantized_;
47 
48  // pre-computed biases and offsets
49  std::shared_ptr<std::vector<std::int32_t>> b_quantized_;
50  const std::int32_t* b_quantized_data_{nullptr};
51  std::vector<std::int32_t> row_offsets_;
52  std::shared_ptr<std::vector<std::int32_t>> column_offsets_;
53 
54  // Dequantized bias populated when input bias is quantized and
55  // dequantized_output_ == true
56  std::vector<float> b_dequantized_;
57  const float* b_dequantized_data_{nullptr};
58 
59  bool is_weight_constant_{true};
60 
61  float in_qparams0_scale_old_ = 0;
62  std::int32_t in_qparams0_zero_point_old_ = 0;
63 }; // class FullyConnectedDNNLowPOp
64 
65 } // namespace caffe2
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Definition: context.h:40
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
A convenient base class for C2 operators with DNNLOWP engine.
Definition: dnnlowp_op.h:77