Caffe2 - C++ API
A deep learning, cross platform ML framework
elementwise_linear_dnnlowp_op.cc
1 
17 #include "elementwise_linear_dnnlowp_op.h"
18 
19 namespace caffe2 {
20 
21 using namespace dnnlowp;
22 
23 template <typename T>
24 ElementwiseLinearDNNLowPOp<T>::ElementwiseLinearDNNLowPOp(
25  const OperatorDef& operator_def,
26  Workspace* ws)
27  : BaseType(operator_def, ws),
28  axis_(this->template GetSingleArgument<int>("axis", 1)) {}
29 
30 template <typename T>
31 bool ElementwiseLinearDNNLowPOp<T>::RunOnDevice() {
32  if (!GetQuantizationParameters_()) {
33  return false;
34  }
35 
36  const auto& X = InputTensorCPU_(0);
37  const auto& a = InputTensorCPU_(1);
38  const auto& b = InputTensorCPU_(2);
39  auto* Y = OutputTensorCPU_(0);
40 
41  const auto canonical_axis = X.canonical_axis_index(axis_);
42  const int N = X.size_to_dim(canonical_axis);
43  const int D = X.size_from_dim(canonical_axis);
44 
45  CAFFE_ENFORCE_EQ(a.ndim(), 1, a.ndim());
46  CAFFE_ENFORCE_EQ(a.size(0), D, a.ndim());
47  CAFFE_ENFORCE_EQ(b.ndim(), 1, b.ndim());
48  CAFFE_ENFORCE_EQ(b.size(0), D, b.ndim());
49 
50  Y->ResizeLike(X);
51 
52  // Quantize X
53  vector<T> X_temp;
54  const T* X_quantized =
55  QuantizeInputIfNeeded<T>(this, 0, in_qparams_[0], X_temp);
56 
57  // Quantize b
58  vector<int32_t> b_quantized(b.numel());
59  const float* b_data = b.template data<float>();
60 #ifdef _OPENMP
61 #pragma omp parallel for
62 #endif
63  for (int i = 0; i < b.numel(); ++i) {
64  b_quantized[i] = fbgemm::Quantize<int32_t>(
65  b_data[i],
66  0,
67  in_qparams_[0].scale * in_qparams_[1].scale,
68  32,
69  true /* signed */);
70  }
71 
72  T* Y_quantized = GetQuantizedOutputData_();
73 #ifdef _OPENMP
74 #pragma omp parallel for
75 #endif
76  for (int n = 0; n < N; ++n) {
77  for (int d = 0; d < D; ++d) {
78  int32_t raw = (X_quantized[n * D + d] - in_qparams_[0].zero_point) *
79  (a_quantized_[d] - in_qparams_[1].zero_point) +
80  b_quantized[d];
81  Y_quantized[n * D + d] =
82  fbgemm::Requantize<T>(raw, requantization_params_);
83  }
84  }
85 
86  RunOnDeviceEpilogue_();
87 
88  return true;
89 }
90 
91 template <typename T>
92 bool ElementwiseLinearDNNLowPOp<T>::GetQuantizationParameters_() {
93  using namespace dnnlowp;
94 
95  // Choose quantization for X
96  in_qparams_[0] = GetInputTensorQuantizationParamsOf(this, 0, qfactory_.get());
97 
98  // Quantize a
99  if (a_quantized_.empty()) {
100  const auto& a = InputTensorCPU_(1);
101  in_qparams_[1] = qfactory_->ChooseQuantizationParams(
102  a.template data<float>(), a.numel(), true /*weight*/);
103 
104  a_quantized_.resize(a.numel());
105  fbgemm::Quantize<T>(
106  a.template data<float>(),
107  a_quantized_.data(),
108  a_quantized_.size(),
109  in_qparams_[1]);
110  }
111 
112  GetOutputQuantizationParams_();
113 
114  float real_multiplier =
115  in_qparams_[0].scale * in_qparams_[1].scale / out_qparams_.scale;
116  requantization_params_ =
117  qfactory_->ChooseRequantizationMultiplier(real_multiplier, out_qparams_);
118 
119  return true;
120 }
121 
122 REGISTER_CPU_OPERATOR_WITH_ENGINE(
123  ElementwiseLinear,
124  DNNLOWP,
125  ElementwiseLinearDNNLowPOp<uint8_t>);
126 REGISTER_CPU_OPERATOR_WITH_ENGINE(
127  Int8ElementwiseLinear,
128  DNNLOWP,
129  ElementwiseLinearDNNLowPOp<uint8_t>);
130 
131 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: static.cpp:70