doxygen-c/html/dnnlowp__op_8h_source.html

 #pragma once

 #ifdef _OPENMP
 #include <omp.h>
 #endif

 #include "caffe2/core/operator.h"
 #include "caffe2/core/tensor_int8.h"
 #include "caffe2/quantization/server/caffe2_dnnlowp_utils.h"
 #include "caffe2/quantization/server/dnnlowp.h"
 #include "caffe2/quantization/server/fbgemm_pack_blob.h"
 #include "caffe2/quantization/server/op_wrapper.h"
 #include "caffe2/quantization/server/sigmoid.h"
 #include "caffe2/quantization/server/tanh.h"

 #ifdef _OPENMP
 C10_DECLARE_int(caffe2_omp_num_threads);
 #endif

 namespace caffe2 {

 template <typename T, typename FP32_OP>
 class DNNLowPOp : public Operator<CPUContext> {
   static_assert(std::is_integral<T>::value, "Integral required.");

  public:
   USE_OPERATOR_FUNCTIONS(CPUContext);
   DNNLowPOp(const OperatorDef& operator_def, Workspace* ws)
       : Operator<CPUContext>(operator_def, ws),
         in_qparams_(InputSize()),
         qfactory_(dnnlowp::GetQuantizationFactoryOf(this)) {
 #ifdef _OPENMP
     if (FLAGS_caffe2_omp_num_threads > 0) {
       omp_set_num_threads(FLAGS_caffe2_omp_num_threads);
     }
 #endif
     if (this->debug_def().engine() == "DNNLOWP_16" ||
         this->debug_def().engine() == "DNNLOWP_ROWWISE_16") {
       LOG(WARNING)
           << this->debug_def().engine()
           << " is an experimental feature mostly for testing accuracy with "
              "fixed-point precision higher than 8 and performance is very slow";
     }
   }

   virtual ~DNNLowPOp() {
     if (measure_quantization_error_) {
       dnnlowp::ReportQuantizationError(this, quantization_error_stats_);
     }
   }

  protected:
   const TensorCPU& InputTensorCPU_(int idx) {
     if (InputIsType<int8::Int8TensorCPU>(idx)) {
       return this->Input<int8::Int8TensorCPU>(idx).t;
     } else if (InputIsType<Int8FCDNNLowPPackedWeightBlob>(idx)) {
       return this->Input<Int8FCDNNLowPPackedWeightBlob>(idx).original_tensor;
     } else {
       return Input(idx);
     }
   }

   TensorCPU* OutputTensorCPU_(int idx) {
     if (dequantize_output_) {
       return Output(idx);
     } else {
       return &Outputs()[idx]->template GetMutable<int8::Int8TensorCPU>()->t;
     }
   }

   Tensor* OutputTensorCPU_(int idx, at::IntList dims, at::TensorOptions options) {
     if (dequantize_output_) {
       return Output(idx, dims, options.device(CPU));
     } else {
       auto* t = &Outputs()[idx]->template GetMutable<int8::Int8TensorCPU>()->t;
       ReinitializeTensor(t, dims, options.device(CPU));
       return t;
     }
   }

   T* GetQuantizedOutputData_() {
     if (dequantize_output_) {
       out_temp_.resize(Output(0)->numel());
       return out_temp_.data();
     } else {
       return OutputTensorCPU_(0)->template mutable_data<T>();
     }
   }

   void MeasureQuantizationError_() {
     if (!measure_quantization_error_ || !Fp32Op_()) {
       return;
     }

     const float* actual = nullptr;
     vector<float> actual_temp;
     if (OutputTensorCPU_(0)->template IsType<float>()) {
       actual = OutputTensorCPU_(0)->template data<float>();
     } else {
       actual_temp.resize(OutputTensorCPU_(0)->numel());
       fbgemm::Dequantize<T>(
           OutputTensorCPU_(0)->template data<T>(),
           actual_temp.data(),
           OutputTensorCPU_(0)->numel(),
           out_qparams_);
       actual = actual_temp.data();
     }

     float* ref = Fp32Op_()->Get()->Output(0)->template mutable_data<float>();
     if (followed_by_ == "Relu") {
       for (int i = 0; i < Output(0)->numel(); ++i) {
         ref[i] = std::max(0.f, ref[i]);
       }
     }

     dnnlowp::MeasureQuantizationError(
         actual, ref, OutputTensorCPU_(0)->numel(), &quantization_error_stats_);
   }

   void RunOnDeviceEpilogue_() {
     if (dequantize_output_) {
       fbgemm::Dequantize<T>(
           out_temp_.data(),
           OutputTensorCPU_(0)->template mutable_data<float>(),
           OutputTensorCPU_(0)->numel(),
           out_qparams_);
     } else {
       dnnlowp::PropagateOutputTensorQuantizationParams(this, 0, out_qparams_);
     }

     MeasureQuantizationError_();
   }

   void ParseDNNLowPOperatorArguments_() {
     // Ideally, this should be done in constructor but any modification of
     // arguments in ParseDNNLowPOperatorArguments will be ignored if we call
     // this from constructor.
     // Make sure all derived classes call this "early enough" so that they
     // use correct parameters.
     if (!arguments_parsed_) {
       dnnlowp::ParseDNNLowPOperatorArguments(
           this,
           &dequantize_output_,
           &measure_quantization_error_,
           &followed_by_);
       arguments_parsed_ = true;
     }
   }

   void GetOutputQuantizationParams_() {
     using namespace dnnlowp;

     ParseDNNLowPOperatorArguments_();

     if (HasStaticQuantization(this)) {
       out_qparams_ = GetStaticQuantizationParamsOf(this, 0);

       if (measure_quantization_error_) {
         // To measure quantization error, run ref fp32 impl.
         // This doesn't really belong here but we need to run the reference fp32
         // implementation before quantized computation of some inplace operators
         // will overwrite their inputs.
         Fp32Op_()->DequantizeInput();
         Fp32Op_()->Get()->RunOnDevice();
       }
     } else {
       // TODO: this is only needed when dequantize_output_ == false but leave
       // as it is now because some code relies on out_qparams_ initialized even
       // though it never actually uses it.
       Fp32Op_()->DequantizeInput();
       Fp32Op_()->Get()->RunOnDevice();
       out_qparams_ = Fp32Op_()->GetOutputQuantizationParams(qfactory_.get());
     }
   }

   OpWrapper<FP32_OP, T>* Fp32Op_() {
     if (!fp32_op_) {
       fp32_op_.reset(new OpWrapper<FP32_OP, T>(this, qfactory_.get()));
     }
     return fp32_op_.get();
   }

   bool dequantize_output_{false}, measure_quantization_error_{false};
   std::string followed_by_;

   std::vector<dnnlowp::TensorQuantizationParams> in_qparams_;
   dnnlowp::TensorQuantizationParams out_qparams_;

   std::unique_ptr<OpWrapper<FP32_OP, T>> fp32_op_;
   std::unique_ptr<dnnlowp::QuantizationFactory> qfactory_;

   std::vector<T> out_temp_;
   // Buffer to store quantized output temporarily
   // when we output dequantized values.

   dnnlowp::QuantizationErrorStats quantization_error_stats_;

   bool arguments_parsed_{false};
 };

 #define USE_DNNLOWP_OPERATOR_BASE_FUNCTIONS(T, FP32_OP)              \
   /* using override */ using BaseType = DNNLowPOp<T, FP32_OP>;       \
   /* using override */ using BaseType::GetOutputQuantizationParams_; \
   /* using override */ using BaseType::GetQuantizedOutputData_;      \
   /* using override */ using BaseType::Fp32Op_;                      \
   /* using override */ using BaseType::InputTensorCPU_;              \
   /* using override */ using BaseType::MeasureQuantizationError_;    \
   /* using override */ using BaseType::OutputTensorCPU_;             \
   /* using override */ using BaseType::RunOnDeviceEpilogue_;         \
   /* using override */ using BaseType::dequantize_output_;           \
   /* using override */ using BaseType::followed_by_;                 \
   /* using override */ using BaseType::in_qparams_;                  \
   /* using override */ using BaseType::measure_quantization_error_;  \
   /* using override */ using BaseType::out_qparams_;                 \
   /* using override */ using BaseType::qfactory_;

 inline int dnnlowp_get_num_threads() {
 #ifdef _OPENMP
   return omp_get_num_threads();
 #else
   return 1;
 #endif
 }

 inline int dnnlowp_get_max_threads() {
 #ifdef _OPENMP
   return omp_get_max_threads();
 #else
   return 1;
 #endif
 }

 inline int dnnlowp_get_thread_num() {
 #ifdef _OPENMP
   return omp_get_thread_num();
 #else
   return 0;
 #endif
 }

 } // namespace caffe2
c10::TensorOptions::device
C10_NODISCARD TensorOptions device(c10::optional< Device > device) const noexcept
Return a copy of TensorOptions with device set to the given one, or cleared if device is nullopt...
Definition: TensorOptions.h:169

dnnlowp
Definition: caffe2_dnnlowp_utils.cc:21

caffe2::ReinitializeTensor
void ReinitializeTensor(Tensor *tensor, at::IntArrayRef dims, at::TensorOptions options)
Reinitialize a Tensor to given dims and options if necessary, note that this will not do anything if ...
Definition: tensor.cc:127

T
Definition: dataloader.cpp:482

dnnlowp::QuantizationErrorStats
Definition: caffe2_dnnlowp_utils.h:72

caffe2::CPUContext
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Definition: context.h:40

nom::repr::Tensor
Definition: NeuralNet.h:158

caffe2::Workspace
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47

caffe2::Operator< CPUContext >::Input
const Tensor & Input(int idx, DeviceType type=CPUContext::GetDeviceType())
Retrieve a non-owning reference to the input at position &#39;idx&#39; for this operator. ...
Definition: operator.h:702

caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13

caffe2::Operator
Definition: operator.h:677

c10::TensorOptions
Definition: TensorOptions.h:99

caffe2::DNNLowPOp
A convenient base class for C2 operators with DNNLOWP engine.
Definition: dnnlowp_op.h:77

caffe2::OpWrapper
Wrap a floating-point operator with quantized inputs with type T.
Definition: op_wrapper.h:15