Caffe2 - C++ API
A deep learning, cross platform ML framework
op_wrapper.h
1 #pragma once
2 
3 #include "caffe2/core/operator.h"
4 #include "caffe2/core/tensor_int8.h"
5 #include "caffe2/quantization/server/caffe2_dnnlowp_utils.h"
6 #include "caffe2/quantization/server/dnnlowp.h"
7 
8 namespace caffe2 {
9 
14 template <typename OpType, typename T>
15 class OpWrapper {
16  public:
18  : op_(op), qfactory_(qfactory) {
19  for (auto name : op->debug_def().input()) {
20  local_input_blobs_.push_back(local_ws_.CreateBlob(name));
21  CHECK_NOTNULL(local_input_blobs_.back());
22  }
23  OperatorDef def = op->debug_def();
24  local_op_.reset(new OpType(def, &local_ws_));
25  for (auto name : def.output()) {
26  local_output_blobs_.push_back(local_ws_.GetBlob(name));
27  CHECK_NOTNULL(local_output_blobs_.back());
28  }
29  }
30 
31  void DequantizeInput() {
32  const OperatorDef& def = op_->debug_def();
33  CPUContext context(def.device_option());
34 
35  for (int i = 0; i < op_->InputSize(); ++i) {
36  if (op_->InputIsType<int8::Int8TensorCPU>(i)) {
37  const TensorCPU& qtensor = op_->Input<int8::Int8TensorCPU>(i).t;
38  TensorCPU* float_tensor =
39  BlobGetMutableTensor(local_input_blobs_[i], CPU);
40  // FIXME: doesn't work for bias so we shouldn't quantize bias before
41  // model loading when we're running a shadow operator in fp32 for
42  // example for measuring quantization error.
43  float_tensor->ResizeLike(qtensor);
44  fbgemm::Dequantize<T>(
45  qtensor.data<T>(),
46  float_tensor->template mutable_data<float>(),
47  qtensor.numel(),
48  dnnlowp::GetInputTensorQuantizationParamsOf(op_, i, qfactory_));
49  } else {
50  local_input_blobs_[i]->ShareExternal(
51  const_cast<void*>(op_->Inputs()[i]->GetRaw()),
52  op_->Inputs()[i]->meta());
53  }
54  }
55  }
56 
57  OpType* Get() {
58  return local_op_.get();
59  }
60 
61  dnnlowp::TensorQuantizationParams GetOutputQuantizationParams(
63  int index = 0) {
64  using namespace dnnlowp;
65 
66  float min, max;
67  auto& out_tensor = local_output_blobs_[index]->template Get<TensorCPU>();
68  fbgemm::FindMinMax(
69  out_tensor.template data<float>(), &min, &max, out_tensor.numel());
70  if (op_->OperatorBase::GetSingleArgument<std::string>("followed_by", "") ==
71  "Relu") {
72  min = std::max(0.0f, min);
73  max = std::max(0.0f, max);
74  }
75 
76  return qfactory->ChooseQuantizationParams(min, max);
77  }
78 
79  private:
80  OperatorBase* op_; /* container quantized op */
81  Workspace local_ws_;
82  std::vector<Blob*> local_input_blobs_;
83  std::vector<Blob*> local_output_blobs_;
84  std::unique_ptr<OpType> local_op_; /* contained fp32 reference op */
86 };
87 
88 } // namespace caffe2
Blob * CreateBlob(const string &name)
Creates a blob of the given name.
Definition: workspace.cc:100
TensorQuantizationParams ChooseQuantizationParams(float min, float max, int precision, bool preserve_sparsity, bool is_signed=false) const
Choose quantization scale and zero_point that maps floating-point range [min, max] to the integer ran...
Definition: dnnlowp.h:46
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Definition: context.h:40
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
const Blob * GetBlob(const string &name) const
Gets the blob with the given name as a const pointer.
Definition: workspace.cc:160
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Wrap a floating-point operator with quantized inputs with type T.
Definition: op_wrapper.h:15