1 #include "caffe2/operators/elementwise_mul_op.h"     2 #include "caffe2/quantization/server/elementwise_dnnlowp_op.h"     3 #include "caffe2/quantization/server/op_wrapper.h"     4 #include "caffe2/quantization/server/sigmoid.h"    12     BinaryElementwiseOp<NumericTypes, CPUContext, MulFunctor<CPUContext>>;
    18   USE_DNNLOWP_OPERATOR_BASE_FUNCTIONS(
T, 
MulFp32Op);
    26   bool RunOnDevice()
 override {
    27     if (!GetQuantizationParameters_()) {
    31     const auto& 
A = InputTensorCPU_(0);
    32     const auto& 
B = InputTensorCPU_(1);
    33     auto* 
C = OutputTensorCPU_(0);
    35         &
B != 
C || !enable_broadcast_,
    36         "In-place is allowed only with the first tensor when broadcasting");
    40     vector<T> A_temp, B_temp;
    41     const T* A_quantized =
    42         QuantizeInputIfNeeded<T>(
this, 0, in_qparams_[0], A_temp);
    43     const T* B_quantized =
    44         QuantizeInputIfNeeded<T>(
this, 1, in_qparams_[1], B_temp);
    46     T* C_quantized = GetQuantizedOutputData_();
    48     if (!enable_broadcast_) {
    52           "Dimension mismatch - did you forget to set broadcast=1?");
    54 #pragma omp parallel for    56       for (
int i = 0; i < 
C->size(); ++i) {
    57         int32_t raw = (A_quantized[i] - in_qparams_[0].zero_point) *
    58             (B_quantized[i] - in_qparams_[1].zero_point);
    59         C_quantized[i] = fbgemm::Requantize<T>(raw, requantization_params_);
    61     } 
else if (
B.size() == 1) {
    63 #pragma omp parallel for    65       for (
int i = 0; i < 
C->size(); ++i) {
    66         int32_t raw = (A_quantized[i] - in_qparams_[0].zero_point) *
    67             (B_quantized[0] - in_qparams_[1].zero_point);
    68         C_quantized[i] = fbgemm::Requantize<T>(raw, requantization_params_);
    72       std::tie(pre, n, post) =
    73           elementwise_ops_utils::ComputeLegacyBroadcastSizes(
A, 
B, axis_);
    75 #pragma omp parallel for    77       for (
int i = 0; i < pre; ++i) {
    78         for (
int j = 0; j < n; ++j) {
    79           for (
int k = 0; k < post; ++k) {
    80             int32_t raw = (A_quantized[((i * n) + j) * post + k] -
    81                            in_qparams_[0].zero_point) *
    82                 (B_quantized[j] - in_qparams_[1].zero_point);
    83             C_quantized[((i * n) + j) * post + k] =
    84                 fbgemm::Requantize<T>(raw, requantization_params_);
    90     RunOnDeviceEpilogue_();
    96   bool GetQuantizationParameters_() {
    99         GetInputTensorQuantizationParamsOf(
this, 0, qfactory_.get());
   101         GetInputTensorQuantizationParamsOf(
this, 1, qfactory_.get());
   103     GetOutputQuantizationParams_();
   105     float real_multiplier =
   106         in_qparams_[0].scale * in_qparams_[1].scale / out_qparams_.scale;
   107     requantization_params_ = qfactory_->ChooseRequantizationMultiplier(
   108         real_multiplier, out_qparams_);
 
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
 
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
 
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
 
does bound shape inference given a C2 net.