3 #include "caffe2/operators/utility_ops.h" 4 #include "caffe2/quantization/server/caffe2_dnnlowp_utils.h" 5 #include "caffe2/quantization/server/dnnlowp.h" 6 #include "caffe2/quantization/server/dnnlowp_op.h" 10 template <
typename T,
bool ReluFused = false>
14 bool RunOnDevice()
override;
20 bool GetQuantizationParameters_();
22 dnnlowp::TensorQuantizationParams intermediate_qparams_;
24 dnnlowp::RequantizationParams out_requantization_params_;
29 static_assert(std::is_integral<T>::value,
"Integral required.");
34 bool RunOnDevice()
override;
36 template <
typename Index>
37 bool DoRunWithType() {
40 auto& data = (this->
template Input<int8::Int8TensorCPU>(DATA)).t;
41 auto& indices =
Input(INDICES);
42 auto* output = &Outputs()[0]->template GetMutable<int8::Int8TensorCPU>()->t;
44 CAFFE_ENFORCE_GE(data.ndim(), 1,
"DATA should be at least 1-D");
45 auto shape = indices.sizes().vec();
46 shape.insert(shape.end(), data.sizes().begin() + 1, data.sizes().end());
47 output->Resize(shape);
49 int block_size = data.size_from_dim(1);
50 auto block_bytesize = data.size_from_dim(1) * data.dtype().itemsize();
51 int N = indices.numel();
53 auto src_base =
static_cast<const char*
>(data.raw_data());
54 const Index* idxs = indices.template data<Index>();
55 auto out =
static_cast<char*
>(output->raw_mutable_data(data.dtype()));
57 for (
int i = 0; i < N; ++i) {
60 0 <= idx && idx < data.size(0),
61 "INDICES element is out of DATA bounds, id=",
65 auto src = src_base + idx * block_bytesize;
66 context_.CopyItemsSameDevice(
67 data.dtype(), block_size, src, out + block_bytesize * i);
80 return fp32_op_.get();
83 std::unique_ptr<OpWrapper<GatherOp<CPUContext>,
T>> fp32_op_;
84 bool dequantize_output_{
false}, measure_quantization_error_{
false};
86 std::unique_ptr<dnnlowp::QuantizationFactory> qfactory_;
90 bool arguments_parsed_{
false};
95 template <
typename T,
bool ReluFused>
96 void ElementWiseSumAVX2(
102 int32_t a_zero_point,
104 int32_t b_zero_point,
106 int32_t c_zero_points);
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
const Tensor & Input(int idx, DeviceType type=CPUContext::GetDeviceType())
Retrieve a non-owning reference to the input at position 'idx' for this operator. ...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
A convenient base class for C2 operators with DNNLOWP engine.
Wrap a floating-point operator with quantized inputs with type T.