Caffe2 - C++ API
A deep learning, cross platform ML framework
elementwise_dnnlowp_op.h
1 #pragma once
2 
3 #include "caffe2/core/tensor_int8.h"
4 #include "caffe2/operators/elementwise_ops.h"
5 #include "caffe2/quantization/server/caffe2_dnnlowp_utils.h"
6 #include "caffe2/quantization/server/dnnlowp_op.h"
7 #include "caffe2/quantization/server/sigmoid.h"
8 
9 namespace caffe2 {
10 
11 template <typename T, class Functor>
12 class UnaryElementwiseWithArgsDNNLowPOp : public Operator<CPUContext> {
13  public:
14  USE_OPERATOR_FUNCTIONS(CPUContext);
16  const OperatorDef& operator_def,
17  Workspace* ws)
18  : Operator<CPUContext>(operator_def, ws), functor_() {}
19 
20  bool RunOnDevice() override {
21  if (!arguments_parsed_) {
22  dnnlowp::ParseDNNLowPOperatorArguments(this);
23  dnnlowp::SetStaticQuantizationParams(
24  this, 0, functor_.GetOutputQuantizationParams());
25  arguments_parsed_ = true;
26  }
27 
28  auto& input = this->template Input<int8::Int8TensorCPU>(0).t;
29  auto& output = Outputs()[0]->template GetMutable<int8::Int8TensorCPU>()->t;
30  output.ResizeLike(input);
31  functor_(
32  input.size(),
33  input.template data<T>(),
34  output.template mutable_data<T>());
35 
36  dnnlowp::PropagateOutputTensorQuantizationParams(
37  this, 0, functor_.GetOutputQuantizationParams());
38  return true;
39  }
40 
41  private:
42  Functor functor_;
43  bool arguments_parsed_{false};
44 };
45 
46 template <typename T, typename FP32_OP>
48  public:
49  USE_OPERATOR_FUNCTIONS(CPUContext);
50  BinaryElementwiseDNNLowPOp(const OperatorDef& operator_def, Workspace* ws)
51  : DNNLowPOp<T, FP32_OP>(operator_def, ws),
52  OP_SINGLE_ARG(bool, "broadcast", enable_broadcast_, 0),
53  OP_SINGLE_ARG(int, "axis", axis_, -1),
54  OP_SINGLE_ARG(string, "axis_str", axis_str_, ""),
55  OP_SINGLE_ARG(string, "order", order_, "NCHW") {
56  // Figure out the correct axis to use.
57  if (enable_broadcast_) {
58  if (axis_ != -1) {
59  // Get axis from an explicit axis argument.
60  CAFFE_ENFORCE_EQ(
61  axis_str_.size(),
62  0,
63  "Args axis and axis_str cannot be used simultaneously.");
64  } else if (axis_str_.size()) {
65  // Get the axis index semantically.
66  CAFFE_ENFORCE_EQ(
67  axis_str_.size(), 1, "Unsupported axis string", axis_str_);
68  size_t semantic_axis_ = order_.find(axis_str_);
69  CAFFE_ENFORCE_NE(
70  semantic_axis_,
71  string::npos,
72  "Unrecognizable axis string ",
73  axis_str_,
74  " from order string ",
75  order_);
76  axis_ = semantic_axis_;
77  }
78  } else {
79  CAFFE_ENFORCE(
80  axis_ == -1 && axis_str_.size() == 0,
81  "Do not specify axis or axis_str if broadcast is not enabled.");
82  }
83  }
84 
85  protected:
86  bool enable_broadcast_;
87  int axis_;
88  string axis_str_;
89  string order_;
90 
91  dnnlowp::RequantizationParams requantization_params_;
92 }; // BinaryElementwiseDNNLowPOp
93 
94 // For arithmetic operators, Eigen provides a good way to vectorize even
95 // when broadcasting.
96 #define DECLARE_EIGEN_FUNCTOR(name, eigen_op, input_type, output_type) \
97  struct Eigen##name##Functor { \
98  template <int b_is_scalar, typename T, typename R> \
99  inline void Run(size_t n, const T* a, const T* b, R* out, CPUContext*) { \
100  if (b_is_scalar) { \
101  EigenVectorArrayMap<R>(out, n) = \
102  eigen_op((ConstEigenVectorArrayMap<T>(a, n)), (b[0])); \
103  } else { \
104  EigenVectorArrayMap<R>(out, n) = eigen_op( \
105  (ConstEigenVectorArrayMap<T>(a, n)), \
106  (ConstEigenVectorArrayMap<T>(b, n))); \
107  } \
108  } \
109  template <typename T, typename R> \
110  void RunWithBroadcast( \
111  const T* a, \
112  const T* b, \
113  R* out, \
114  size_t pre, \
115  size_t n, \
116  CPUContext*) { \
117  EigenArrayMap<R>(out, n, pre) = eigen_op( \
118  (ConstEigenArrayMap<T>(a, n, pre).colwise()), \
119  (ConstEigenVectorArrayMap<T>(b, n))); \
120  } \
121  template <typename T, typename R> \
122  void RunWithBroadcast2( \
123  const T* a, \
124  const T* b, \
125  R* out, \
126  size_t pre, \
127  size_t n, \
128  size_t post, \
129  CPUContext*) { \
130  for (int i = 0; i < pre; ++i) { \
131  EigenArrayMap<R>(out + i * n * post, post, n) = eigen_op( \
132  (ConstEigenArrayMap<T>(a + i * n * post, post, n).rowwise()), \
133  (Eigen::Map<const Eigen::Array<T, 1, Eigen::Dynamic>>(b, n))); \
134  } \
135  } \
136  };
137 } // namespace caffe2
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Definition: context.h:40
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
A convenient base class for C2 operators with DNNLOWP engine.
Definition: dnnlowp_op.h:77