doxygen-c/html/fbgemm__pack__op_8h_source.html

 #pragma once

 #include "caffe2/core/operator.h"
 #include "caffe2/operators/conv_op.h"
 #include "caffe2/quantization/server/conv_pool_dnnlowp_op_base.h"
 #include "caffe2/quantization/server/fbgemm_pack_blob.h"
 #include "caffe2/quantization/server/fully_connected_dnnlowp_op.h"

 namespace caffe2 {

 using FCFp32Op = FullyConnectedOp<CPUContext>;

 class FullyConnectedDNNLowPPackWeightOp final
     : public DNNLowPOp<std::uint8_t, FCFp32Op> {
  public:
   FullyConnectedDNNLowPPackWeightOp(
       const OperatorDef& operator_def,
       Workspace* ws);
   USE_OPERATOR_FUNCTIONS(CPUContext);

   bool RunOnDevice() override;

  private:
   int axis_w_;
   bool quantize_channelwise_;
   int nbits_in_non_outlier_; // only for DNNLOWP_ACC16

   INPUT_TAGS(FILTER, BIAS);
 };

 using ConvFp32Op = ConvOp<float, CPUContext>;

 class ConvDNNLowPPackWeightOp final
     : public ConvPoolDNNLowPOpBase<std::uint8_t, ConvFp32Op> {
  public:
   USE_CONV_POOL_BASE_FUNCTIONS(CPUContext);
   USE_CONV_POOL_DNNLOWP_OPERATOR_BASE_FUNCTIONS(std::uint8_t, ConvFp32Op);
   ConvDNNLowPPackWeightOp(const OperatorDef& operator_def, Workspace* ws);

   bool RunOnDevice() override;

  private:
   bool TakeDepthWise3x3FastPath_();
   bool TakeDepthWise3x3x3FastPath_();
   bool TakeGConvFastPath_();

   bool quantize_groupwise_;
   int nbits_in_non_outlier_; // only for DNNLOWP_ACC16

   INPUT_TAGS(FILTER, BIAS);
 };

 // Helper functions for packing weights that can be used by
 // ConvDNNLowPAcc16PackWeightOp, ConvDNNLowPOp, and ConvDNNLowPAcc16Op

 template <typename T>
 void QuantizeWeight(
     const Blob& blob,
     int kernel_dim,
     int M,
     vector<dnnlowp::TensorQuantizationParams>& qparams,
     vector<typename std::make_signed<T>::type>& w_quantized,
     dnnlowp::QuantizationFactory* qfactory);

 template <typename T>
 void ComputeColumnOffsets(
     int num_rows,
     int num_cols,
     const T* W,
     const vector<dnnlowp::TensorQuantizationParams>& qparams,
     vector<int32_t>& col_offsets);

 fbgemm::CompressedSparseColumn* ExtractOutlierMatrix(
     int groups,
     int kernel_dim,
     int M,
     int nbits_in_non_outlier,
     vector<std::int8_t>& W_quantized);

 } // namespace caffe2
caffe2::Blob
Blob is a general container that hosts a typed pointer.
Definition: blob.h:24

caffe2::ConvOp
Definition: conv_op.h:14

M
Definition: any.cpp:108

T
Definition: dataloader.cpp:482

caffe2::ConvPoolDNNLowPOpBase
Definition: conv_pool_dnnlowp_op_base.h:23

caffe2::CPUContext
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Definition: context.h:40

caffe2::Workspace
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47

caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13

caffe2::ConvDNNLowPPackWeightOp
Pack a weight matrix that can be used by DNNLOWP Int8Conv operators.
Definition: fbgemm_pack_op.h:45

caffe2::FullyConnectedDNNLowPPackWeightOp
Definition: fbgemm_pack_op.h:13

dnnlowp::QuantizationFactory
Definition: dnnlowp.h:23

caffe2::DNNLowPOp
A convenient base class for C2 operators with DNNLOWP engine.
Definition: dnnlowp_op.h:77