Caffe2 - C++ API
A deep learning, cross platform ML framework
quantize_dnnlowp_op.cc
1 #include "quantize_dnnlowp_op.h"
2 #include "dnnlowp_op.h"
3 
4 #ifdef _OPENMP
5 #include <omp.h>
6 #endif
7 
8 #include "caffe2/core/tensor_int8.h"
9 #include "caffe2_dnnlowp_utils.h"
10 #include "dnnlowp_partition.h"
11 
12 namespace caffe2 {
13 
14 using namespace std;
15 
16 template <typename T>
17 QuantizeDNNLowPOp<T>::QuantizeDNNLowPOp(
18  const OperatorDef& operator_def,
19  Workspace* ws)
20  : Operator<CPUContext>(operator_def, ws),
21  qfactory_(dnnlowp::GetQuantizationFactoryOf(this)) {}
22 
23 template <typename T>
24 bool QuantizeDNNLowPOp<T>::RunOnDevice() {
25  using namespace dnnlowp;
26 
27  if (!arguments_parsed_) {
28  dnnlowp::ParseDNNLowPOperatorArguments(this);
29  arguments_parsed_ = true;
30  }
31 
32  CAFFE_ENFORCE(Input(0).template IsType<float>());
33 
34  TensorQuantizationParams in_qparams;
35  if (HasStaticQuantization(this)) {
36  in_qparams = GetStaticQuantizationParamsOf(this, 0);
37  } else {
38  in_qparams = GetInputTensorQuantizationParamsOf(this, 0, qfactory_.get());
39  }
40  int8::Int8TensorCPU* output =
41  Outputs()[0]->template GetMutable<int8::Int8TensorCPU>();
42  output->t.ResizeLike(Input(0));
43 
44  const float* in_data = Input(0).template data<float>();
45  T* out_data = output->t.template mutable_data<T>();
46 
47 #ifdef _OPENMP
48 #pragma omp parallel
49 #endif
50  {
51  int i_begin, i_end;
52  tie(i_begin, i_end) = Get1DPartition(
53  Input(0).numel(), dnnlowp_get_num_threads(), dnnlowp_get_thread_num());
54  fbgemm::Quantize<T>(
55  in_data + i_begin, out_data + i_begin, i_end - i_begin, in_qparams);
56  }
57 
58  PropagateOutputTensorQuantizationParams(this, 0, in_qparams);
59 
60  return true;
61 }
62 
63 OPERATOR_SCHEMA(Quantize)
64  .NumInputs(1)
65  .NumOutputs(1)
66  .IdenticalTypeAndShapeOfInput(0);
67 
68 REGISTER_CPU_OPERATOR_WITH_ENGINE(
69  Quantize,
70  DNNLOWP,
71  QuantizeDNNLowPOp<uint8_t>);
72 REGISTER_CPU_OPERATOR_WITH_ENGINE(
73  Quantize,
74  DNNLOWP_ROWWISE,
75  QuantizeDNNLowPOp<uint8_t>);
76 
77 REGISTER_CPU_OPERATOR_WITH_ENGINE(
78  Quantize,
79  DNNLOWP_16,
80  QuantizeDNNLowPOp<uint16_t>);
81 REGISTER_CPU_OPERATOR_WITH_ENGINE(
82  Quantize,
83  DNNLOWP_ROWWISE_16,
84  QuantizeDNNLowPOp<uint16_t>);
85 
86 REGISTER_CPU_OPERATOR_WITH_ENGINE(
87  Int8Quantize,
88  DNNLOWP,
89  QuantizeDNNLowPOp<uint8_t>);
90 REGISTER_CPU_OPERATOR_WITH_ENGINE(
91  Int8Quantize,
92  DNNLOWP_ROWWISE,
93  QuantizeDNNLowPOp<uint8_t>);
94 
95 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13