doxygen-c/html/conv__dnnlowp__op_8h_source.html

 #pragma once

 #include <fbgemm/Fbgemm.h>
 #include <fbgemm/src/FbgemmI8DepthwiseAvx2.h>
 #include "caffe2/operators/conv_op.h"
 #include "caffe2/operators/conv_pool_op_base.h"
 #include "caffe2/quantization/server/caffe2_dnnlowp_utils.h"
 #include "caffe2/quantization/server/conv_pool_dnnlowp_op_base.h"
 #include "caffe2/quantization/server/dnnlowp.h"
 #include "caffe2/quantization/server/op_wrapper.h"

 namespace caffe2 {

 using ConvFp32Op = ConvOp<float, CPUContext>;

 // Convolutional layer computed in integer with quantization
 template <typename T, bool ReluFused = false>
 class ConvDNNLowPOp : public ConvPoolDNNLowPOpBase<T, ConvFp32Op> {
  public:
   USE_CONV_POOL_BASE_FUNCTIONS(CPUContext);
   USE_CONV_POOL_DNNLOWP_OPERATOR_BASE_FUNCTIONS(T, ConvFp32Op);
   ConvDNNLowPOp(const OperatorDef& operator_def, Workspace* ws);
   virtual ~ConvDNNLowPOp();

  protected:
   bool RunOnDeviceWithOrderNCHW() override;
   bool RunOnDeviceWithOrderNHWC() override;

   bool GetQuantizationParameters_();

   bool IsConvGEMM_() const;
   bool NoIm2ColNHWC_();
   int KernelDim_();

   const T* Im2ColNHWC_(Tensor* col_buffer);

   dnnlowp::TensorQuantizationParams& FilterQuantizationParams(int group_id);
   dnnlowp::RequantizationParams& RequantizationParams(int group_id);

   static void PartitionGroupedNHWCConv_(
       int* group_begin,
       int* group_end,
       int* i_begin,
       int* i_end,
       int num_groups,
       int m,
       int nthreads,
       int thread_id);

   virtual bool Acc16() const {
     return false;
   }

   Tensor col_buffer_{CPU};
   Tensor img_shape_device_{CPU};
   Tensor col_buffer_shape_device_{CPU};

   // Input: X, W, b
   // Output: Y
   INPUT_TAGS(INPUT, FILTER, BIAS);

   // x86 only provides SIMD instructions that multiply a signed integer with an
   // unsigned integer. We use signed for weights.
   using T_signed = typename std::make_signed<T>::type;

   // used in slow path for T != uint8_t
   std::vector<T_signed> W_quantized_;

   // pre-computed biases and offsets
   std::shared_ptr<std::vector<std::int32_t>> column_offsets_;
   std::vector<std::int32_t> row_offsets_;
   const std::int32_t* b_quantized_data_{nullptr};

   std::vector<std::uint8_t> X_pack_buf_;

   void RunOnDeviceEpilogueNCHW_(
       const T* col_buffer_data,
       std::int32_t* Y_int32,
       T* Y_data,
       std::size_t i_offset,
       int group_id);
   void RunOnDeviceEpilogueNHWC_(
       const T* col_buffer_data,
       std::int32_t* Y_int32);

   std::vector<std::int32_t> Y_int32_;
   std::vector<dnnlowp::TensorQuantizationParams> filter_qparams_;
   std::vector<std::int32_t> filter_zero_points_;

   std::vector<float> requantization_multipliers_;
   bool quantize_groupwise_;

  private:
   void QuantizeWeight_();
   void PreComputeRowColumnOffsets_();
   void QuantizeBias_();

   bool TakeDepthWise3x3FastPath_();
   bool TakeDepthWise3x3x3FastPath_();
   bool TakeGConvFastPath_();

   template <typename PackAMatrix, fbgemm::QuantizationGranularity Q_GRAN>
   void DispatchFBGEMM_(
       PackAMatrix& packA,
       vector<std::int32_t>* Y_int32,
       uint8_t* Y_uint8_data);

   void ConvNHWCCore_(const T* col_buffer_data, vector<std::int32_t>* Y_int32);

   std::vector<dnnlowp::RequantizationParams> requantization_params_;

   // used in fast path for T == uint8_t
   std::shared_ptr<fbgemm::PackBMatrix<std::int8_t>> Wq_packed_;

   // For depthwise 3x3 conv
   std::shared_ptr<fbgemm::Packed3x3ConvMatrix> Wq_depthwise_3x3_packed_;
   // For depthwise 3x3x3 conv
   std::shared_ptr<fbgemm::Packed3x3x3ConvMatrix> Wq_depthwise_3x3x3_packed_;
   // For small gconv
   std::shared_ptr<fbgemm::PackWeightMatrixForGConv<std::int8_t>>
       Wq_gconv_packed_;

   // pre-computed biases and offsets
   std::shared_ptr<std::vector<std::int32_t>> b_quantized_;

   float in_qparams_scale_old_ = 0;
 }; // class ConvDNNLowPOp

 } // namespace caffe2
caffe2::ConvOp
Definition: conv_op.h:14

caffe2::ConvDNNLowPOp::IsConvGEMM_
bool IsConvGEMM_() const
Definition: conv_dnnlowp_op.cc:162

caffe2::ConvDNNLowPOp::GetQuantizationParameters_
bool GetQuantizationParameters_()
Definition: conv_dnnlowp_op.cc:440

T
Definition: dataloader.cpp:482

caffe2::ConvPoolDNNLowPOpBase
Definition: conv_pool_dnnlowp_op_base.h:23

caffe2::CPUContext
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Definition: context.h:40

nom::repr::Tensor
Definition: NeuralNet.h:158

caffe2::Workspace
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47

caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13

caffe2::ConvDNNLowPOp
Definition: conv_dnnlowp_op.h:18