Caffe2 - C++ API
A deep learning, cross platform ML framework
fbgemm_pack_blob.h
1 #pragma once
2 
3 #include <memory>
4 
5 #include <fbgemm/Fbgemm.h>
6 #include <fbgemm/src/FbgemmI8DepthwiseAvx2.h>
7 
8 #include "caffe2/quantization/server/dnnlowp.h"
9 
10 namespace caffe2 {
11 
16  std::vector<dnnlowp::TensorQuantizationParams> qparams;
17  std::shared_ptr<std::vector<std::int32_t>> column_offsets;
18 
19  // The original tensor before packing
20  Tensor original_tensor{CPU};
21 
22  std::shared_ptr<std::vector<std::int32_t>> bias;
23 
24  // Only for 32-bit accumulation
25  std::shared_ptr<fbgemm::PackBMatrix<std::int8_t>> W;
26 
27  // Only for 16-bit accumulation
28  // Dense matrix holding common values
29  std::shared_ptr<fbgemm::PackBMatrix<std::int8_t, std::int16_t>> W_acc16;
30  // Sparse matrix holding outliers
31  std::shared_ptr<fbgemm::CompressedSparseColumn> W_outlier;
32  int nbits_in_non_outlier;
33 };
34 
39  // Only for 32-bit accumulation
40  std::shared_ptr<fbgemm::Packed3x3ConvMatrix> W_depthwise_3x3;
41  std::shared_ptr<fbgemm::Packed3x3x3ConvMatrix> W_depthwise_3x3x3;
42  std::shared_ptr<fbgemm::PackWeightMatrixForGConv<std::int8_t>> W_gconv;
43 };
44 
45 } // namespace caffe2
Packed weight matrix for DNNLOWP Int8Conv operator.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Packed weight matrix for DNNLOWP Int8FC operator.