3 #include "caffe2/core/tensor_int8.h" 4 #include "caffe2/operators/elementwise_ops.h" 5 #include "caffe2/quantization/server/caffe2_dnnlowp_utils.h" 6 #include "caffe2/quantization/server/dnnlowp_op.h" 7 #include "caffe2/quantization/server/sigmoid.h" 11 template <
typename T,
class Functor>
16 const OperatorDef& operator_def,
20 bool RunOnDevice()
override {
21 if (!arguments_parsed_) {
22 dnnlowp::ParseDNNLowPOperatorArguments(
this);
23 dnnlowp::SetStaticQuantizationParams(
24 this, 0, functor_.GetOutputQuantizationParams());
25 arguments_parsed_ =
true;
28 auto& input = this->
template Input<int8::Int8TensorCPU>(0).t;
29 auto& output = Outputs()[0]->template GetMutable<int8::Int8TensorCPU>()->t;
30 output.ResizeLike(input);
33 input.template data<T>(),
34 output.template mutable_data<T>());
36 dnnlowp::PropagateOutputTensorQuantizationParams(
37 this, 0, functor_.GetOutputQuantizationParams());
43 bool arguments_parsed_{
false};
46 template <
typename T,
typename FP32_OP>
52 OP_SINGLE_ARG(
bool,
"broadcast", enable_broadcast_, 0),
53 OP_SINGLE_ARG(
int,
"axis", axis_, -1),
54 OP_SINGLE_ARG(
string,
"axis_str", axis_str_,
""),
55 OP_SINGLE_ARG(
string,
"order", order_,
"NCHW") {
57 if (enable_broadcast_) {
63 "Args axis and axis_str cannot be used simultaneously.");
64 }
else if (axis_str_.size()) {
67 axis_str_.size(), 1,
"Unsupported axis string", axis_str_);
68 size_t semantic_axis_ = order_.find(axis_str_);
72 "Unrecognizable axis string ",
74 " from order string ",
76 axis_ = semantic_axis_;
80 axis_ == -1 && axis_str_.size() == 0,
81 "Do not specify axis or axis_str if broadcast is not enabled.");
86 bool enable_broadcast_;
91 dnnlowp::RequantizationParams requantization_params_;
96 #define DECLARE_EIGEN_FUNCTOR(name, eigen_op, input_type, output_type) \ 97 struct Eigen##name##Functor { \ 98 template <int b_is_scalar, typename T, typename R> \ 99 inline void Run(size_t n, const T* a, const T* b, R* out, CPUContext*) { \ 101 EigenVectorArrayMap<R>(out, n) = \ 102 eigen_op((ConstEigenVectorArrayMap<T>(a, n)), (b[0])); \ 104 EigenVectorArrayMap<R>(out, n) = eigen_op( \ 105 (ConstEigenVectorArrayMap<T>(a, n)), \ 106 (ConstEigenVectorArrayMap<T>(b, n))); \ 109 template <typename T, typename R> \ 110 void RunWithBroadcast( \ 117 EigenArrayMap<R>(out, n, pre) = eigen_op( \ 118 (ConstEigenArrayMap<T>(a, n, pre).colwise()), \ 119 (ConstEigenVectorArrayMap<T>(b, n))); \ 121 template <typename T, typename R> \ 122 void RunWithBroadcast2( \ 130 for (int i = 0; i < pre; ++i) { \ 131 EigenArrayMap<R>(out + i * n * post, post, n) = eigen_op( \ 132 (ConstEigenArrayMap<T>(a + i * n * post, post, n).rowwise()), \ 133 (Eigen::Map<const Eigen::Array<T, 1, Eigen::Dynamic>>(b, n))); \ The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
A convenient base class for C2 operators with DNNLOWP engine.