doxygen-c/html/op__wrapper_8h_source.html

 #pragma once

 #include "caffe2/core/operator.h"
 #include "caffe2/core/tensor_int8.h"
 #include "caffe2/quantization/server/caffe2_dnnlowp_utils.h"
 #include "caffe2/quantization/server/dnnlowp.h"

 namespace caffe2 {

 template <typename OpType, typename T>
 class OpWrapper {
  public:
   OpWrapper(OperatorBase* op, dnnlowp::QuantizationFactory* qfactory)
       : op_(op), qfactory_(qfactory) {
     for (auto name : op->debug_def().input()) {
       local_input_blobs_.push_back(local_ws_.CreateBlob(name));
       CHECK_NOTNULL(local_input_blobs_.back());
     }
     OperatorDef def = op->debug_def();
     local_op_.reset(new OpType(def, &local_ws_));
     for (auto name : def.output()) {
       local_output_blobs_.push_back(local_ws_.GetBlob(name));
       CHECK_NOTNULL(local_output_blobs_.back());
     }
   }

   void DequantizeInput() {
     const OperatorDef& def = op_->debug_def();
     CPUContext context(def.device_option());

     for (int i = 0; i < op_->InputSize(); ++i) {
       if (op_->InputIsType<int8::Int8TensorCPU>(i)) {
         const TensorCPU& qtensor = op_->Input<int8::Int8TensorCPU>(i).t;
         TensorCPU* float_tensor =
             BlobGetMutableTensor(local_input_blobs_[i], CPU);
         // FIXME: doesn't work for bias so we shouldn't quantize bias before
         // model loading when we're running a shadow operator in fp32 for
         // example for measuring quantization error.
         float_tensor->ResizeLike(qtensor);
         fbgemm::Dequantize<T>(
             qtensor.data<T>(),
             float_tensor->template mutable_data<float>(),
             qtensor.numel(),
             dnnlowp::GetInputTensorQuantizationParamsOf(op_, i, qfactory_));
       } else {
         local_input_blobs_[i]->ShareExternal(
             const_cast<void*>(op_->Inputs()[i]->GetRaw()),
             op_->Inputs()[i]->meta());
       }
     }
   }

   OpType* Get() {
     return local_op_.get();
   }

   dnnlowp::TensorQuantizationParams GetOutputQuantizationParams(
       dnnlowp::QuantizationFactory* qfactory,
       int index = 0) {
     using namespace dnnlowp;

     float min, max;
     auto& out_tensor = local_output_blobs_[index]->template Get<TensorCPU>();
     fbgemm::FindMinMax(
         out_tensor.template data<float>(), &min, &max, out_tensor.numel());
     if (op_->OperatorBase::GetSingleArgument<std::string>("followed_by", "") ==
         "Relu") {
       min = std::max(0.0f, min);
       max = std::max(0.0f, max);
     }

     return qfactory->ChooseQuantizationParams(min, max);
   }

  private:
   OperatorBase* op_; /* container quantized op */
   Workspace local_ws_;
   std::vector<Blob*> local_input_blobs_;
   std::vector<Blob*> local_output_blobs_;
   std::unique_ptr<OpType> local_op_; /* contained fp32 reference op */
   dnnlowp::QuantizationFactory* qfactory_;
 };

 } // namespace caffe2
caffe2::Workspace::CreateBlob
Blob * CreateBlob(const string &name)
Creates a blob of the given name.
Definition: workspace.cc:100

dnnlowp
Definition: caffe2_dnnlowp_utils.cc:21

dnnlowp::QuantizationFactory::ChooseQuantizationParams
TensorQuantizationParams ChooseQuantizationParams(float min, float max, int precision, bool preserve_sparsity, bool is_signed=false) const
Choose quantization scale and zero_point that maps floating-point range [min, max] to the integer ran...
Definition: dnnlowp.h:46

T
Definition: dataloader.cpp:482

caffe2::CPUContext
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Definition: context.h:40

nom::repr::Tensor
Definition: NeuralNet.h:158

caffe2::Workspace
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47

caffe2::Workspace::GetBlob
const Blob * GetBlob(const string &name) const
Gets the blob with the given name as a const pointer.
Definition: workspace.cc:160

caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13

dnnlowp::QuantizationFactory
Definition: dnnlowp.h:23

caffe2::int8::Int8TensorCPU
Definition: tensor_int8.h:11

caffe2::OperatorBase
Definition: operator.h:38

caffe2::OpWrapper
Wrap a floating-point operator with quantized inputs with type T.
Definition: op_wrapper.h:15