1 #include "caffe2_dnnlowp_utils.h"     2 #include "caffe2/core/tensor_int8.h"     3 #include "caffe2/quantization/server/sigmoid.h"     4 #include "caffe2/quantization/server/tanh.h"    11 C10_DECLARE_int32(caffe2_dnnlowp_activation_quantization_precision);
    12 C10_DECLARE_int32(caffe2_dnnlowp_weight_quantization_precision);
    13 C10_DECLARE_int32(caffe2_dnnlowp_requantization_multiplier_precision);
    14 C10_DECLARE_int32(caffe2_dnnlowp_eltwise_quantization_precision);
    15 C10_DECLARE_bool(caffe2_dnnlowp_force_scale_power_of_two);
    16 C10_DECLARE_bool(caffe2_dnnlowp_preserve_activation_sparsity);
    17 C10_DECLARE_bool(caffe2_dnnlowp_preserve_weight_sparsity);
    18 C10_DECLARE_string(caffe2_dnnlowp_activation_quantization_kind);
    19 C10_DECLARE_string(caffe2_dnnlowp_weight_quantization_kind);
    27 static bool HasDNNLowPEngine_(
const OperatorDef& op_def) {
    28   const string ENGINE_PREFIX = 
"DNNLOWP";
    30              op_def.engine().c_str(),
    31              ENGINE_PREFIX.c_str(),
    32              ENGINE_PREFIX.size()) == 0;
    36   return HasDNNLowPEngine_(op.debug_def());
    39 void PropagateOutputTensorQuantizationParams(
    42     const TensorQuantizationParams& qparams) {
    43   LOG_IF(WARNING, !HasDNNLowPEngine_(*op));
    44   Int8TensorCPU* output =
    45       op->Outputs()[idx]->template GetMutable<Int8TensorCPU>();
    46   output->scale = qparams.scale;
    47   output->zero_point = qparams.zero_point;
    50 TensorQuantizationParams GetInputTensorQuantizationParamsOf(
    53     const QuantizationFactory* qfactory,
    55   LOG_IF(WARNING, !HasDNNLowPEngine_(*op));
    57   if (op->InputIsType<Int8TensorCPU>(idx)) {
    58     const Int8TensorCPU& int8_tensor = op->Input<Int8TensorCPU>(idx);
    59     TensorQuantizationParams qparams;
    60     qparams.scale = int8_tensor.scale;
    61     qparams.zero_point = int8_tensor.zero_point;
    62     qparams.precision = qfactory->GetActivationPrecision();
    65     const TensorCPU* tensor = &op->template Input<Tensor>(idx, CPU);
    66     CAFFE_ENFORCE(tensor->template IsType<float>());
    67     CAFFE_ENFORCE(tensor->numel() == 0 || tensor->template data<float>());
    71         tensor->template data<float>(), &min, &max, tensor->numel());
    73     return qfactory->ChooseQuantizationParams(min, max, is_weight);
    77 static string OutputArgumentIdxString_(
int idx) {
    78   return idx == 0 ? 
"" : to_string(idx + 1);
    81 static string OutputScaleArgumentName(
int idx) {
    82   return "Y" + OutputArgumentIdxString_(idx) + 
"_scale";
    85 static string OutputZeroPointArgumentName(
int idx) {
    86   return "Y" + OutputArgumentIdxString_(idx) + 
"_zero_point";
    89 static void SetStaticQuantizationParams_(
    92     const TensorQuantizationParams& qparams) {
    94       OutputScaleArgumentName(output_index), qparams.scale, op_def);
    96       OutputZeroPointArgumentName(output_index), qparams.zero_point, op_def);
    99 void SetStaticQuantizationParams(
   102     const TensorQuantizationParams& qparams) {
   103   LOG_IF(WARNING, !HasDNNLowPEngine_(*op));
   104   auto op_def = make_shared<OperatorDef>();
   105   *op_def = op->debug_def();
   106   SetStaticQuantizationParams_(op_def.get(), output_index, qparams);
   107   op->set_debug_def(op_def);
   110 bool HasStaticQuantization(
   113   LOG_IF(WARNING, !HasDNNLowPEngine_(*op));
   114   return op->HasSingleArgumentOfType<
float>(
   115       OutputScaleArgumentName(output_index));
   118 TensorQuantizationParams GetStaticQuantizationParamsOf(
   121   LOG_IF(WARNING, !HasDNNLowPEngine_(*op));
   122   unique_ptr<QuantizationFactory> qfactory = GetQuantizationFactoryOf(op);
   124   TensorQuantizationParams qparams;
   125   qparams.scale = op->GetSingleArgument<
float>(OutputScaleArgumentName(idx), 0);
   127       op->GetSingleArgument<int32_t>(OutputZeroPointArgumentName(idx), 0);
   128   qparams.precision = qfactory->GetActivationPrecision();
   133 template <
typename T>
   134 const T* QuantizeInputIfNeeded(
   137     const TensorQuantizationParams& qparams,
   145     temp.resize(tensor.numel());
   147         tensor.data<
float>(), temp.data(), temp.size(), qparams);
   152 template <
typename T>
   153 const T* RowWiseQuantizeInputIfNeeded(
   156     const std::vector<TensorQuantizationParams>& qparams,
   164     temp.resize(tensor.numel());
   166     int N = qparams.size();
   167     int rowwidth = temp.size() / N;
   169     for (
int i = 0; i < N; i++) {
   171           tensor.data<
float>() + rowwidth * i,
   172           temp.data() + rowwidth * i,
   180 template const uint8_t* QuantizeInputIfNeeded<uint8_t>(
   183     const TensorQuantizationParams& qparams,
   184     vector<uint8_t>& temp);
   186 template const int8_t* QuantizeInputIfNeeded<int8_t>(
   189     const TensorQuantizationParams& qparams,
   190     vector<int8_t>& temp);
   192 template const uint16_t* QuantizeInputIfNeeded<uint16_t>(
   195     const TensorQuantizationParams& qparams,
   196     vector<uint16_t>& temp);
   198 template const int16_t* QuantizeInputIfNeeded<int16_t>(
   201     const TensorQuantizationParams& qparams,
   202     vector<int16_t>& temp);
   204 template const uint8_t* RowWiseQuantizeInputIfNeeded<uint8_t>(
   207     const std::vector<TensorQuantizationParams>& qparams,
   208     vector<uint8_t>& temp);
   210 template const uint16_t* RowWiseQuantizeInputIfNeeded<uint16_t>(
   213     const std::vector<TensorQuantizationParams>& qparams,
   214     vector<uint16_t>& temp);
   216 void MeasureQuantizationError(
   220     QuantizationErrorStats* stat) {
   221   for (
int i = 0; i < len; ++i) {
   222     stat->sum_sq += ref[i] * ref[i];
   223     float err = actual[i] - ref[i];
   224     stat->sum_err_sq += err * err;
   226     if (fabs(err) > stat->max_abs_err) {
   227       stat->max_abs_err = fabs(err);
   228       stat->max_err_actual = actual[i];
   229       stat->max_err_ref = ref[i];
   235 void ReportQuantizationError(
   237     const QuantizationErrorStats& stat) {
   238   if (stat.sum_sq == 0) {
   239     LOG(INFO) << 
" output " << op->debug_def().output(0) << 
" of operator "   240               << op << 
" with type " << op->debug_def().type()
   241               << 
" has l2 relative error nan (stat.sum_err_sq "   242               << stat.sum_err_sq << 
" stat.sum_sq 0)"   243               << 
" and max abs error " << stat.max_abs_err << 
" (reference is "   244               << stat.max_err_ref << 
" and actual is " << stat.max_err_actual
   246               << 
" sum_err_sq " << stat.sum_err_sq << 
" sum_sq_ " << stat.sum_sq
   247               << 
" cnt " << stat.measure_cnt;
   249     LOG(INFO) << 
" output " << op->debug_def().output(0) << 
" of operator "   250               << op << 
" with type " << op->debug_def().type()
   251               << 
" has l2 relative error "   252               << std::sqrt(stat.sum_err_sq) / std::sqrt(stat.sum_sq)
   253               << 
" and max abs error " << stat.max_abs_err << 
" (reference is "   254               << stat.max_err_ref << 
" and actual is " << stat.max_err_actual
   256               << 
" sum_err_sq " << stat.sum_err_sq << 
" sum_sq_ " << stat.sum_sq
   257               << 
" cnt " << stat.measure_cnt;
   261 static unique_ptr<QuantizationFactory> GetQuantizationFactoryOf_(
   262     const OperatorDef& op_def) {
   263   int activation_precision =
   264       ArgumentHelper::GetSingleArgument<OperatorDef, int>(
   266           "activation_precision",
   267           FLAGS_caffe2_dnnlowp_activation_quantization_precision);
   268   int weight_precision = ArgumentHelper::GetSingleArgument<OperatorDef, int>(
   271       FLAGS_caffe2_dnnlowp_weight_quantization_precision);
   272   int requantization_multiplier_precision =
   273       ArgumentHelper::GetSingleArgument<OperatorDef, int>(
   275           "requantization_multiplier_precision",
   276           FLAGS_caffe2_dnnlowp_requantization_multiplier_precision);
   277   int eltwise_quantization_precision =
   278       ArgumentHelper::GetSingleArgument<OperatorDef, int>(
   280           "eltwise_quantization_precision",
   281           FLAGS_caffe2_dnnlowp_eltwise_quantization_precision);
   282   bool preserve_activation_sparsity =
   283       ArgumentHelper::GetSingleArgument<OperatorDef, bool>(
   285           "preserve_activation_sparsity",
   286           FLAGS_caffe2_dnnlowp_preserve_activation_sparsity);
   287   bool preserve_weight_sparsity =
   288       ArgumentHelper::GetSingleArgument<OperatorDef, bool>(
   290           "preserve_weight_sparsity",
   291           FLAGS_caffe2_dnnlowp_preserve_weight_sparsity);
   292   bool force_scale_power_of_two =
   293       ArgumentHelper::GetSingleArgument<OperatorDef, bool>(
   295           "force_scale_power_of_two",
   296           FLAGS_caffe2_dnnlowp_force_scale_power_of_two);
   297   string activation_quantization_kind =
   298       ArgumentHelper::GetSingleArgument<OperatorDef, string>(
   300           "activation_quantization_kind",
   301           FLAGS_caffe2_dnnlowp_activation_quantization_kind);
   302   string weight_quantization_kind =
   303       ArgumentHelper::GetSingleArgument<OperatorDef, string>(
   305           "weight_quantization_kind",
   306           FLAGS_caffe2_dnnlowp_weight_quantization_kind);
   308   VLOG(2) << 
"Quantization method for op with output " << op_def.output(0)
   309           << 
" activation_precision " << activation_precision
   310           << 
" weight_precision " << weight_precision
   311           << 
" requantization_multiplier_precision "   312           << requantization_multiplier_precision
   313           << 
" eltwise_quantization_precision "   314           << eltwise_quantization_precision << 
" preserve_activation_sparsity "   315           << preserve_activation_sparsity << 
" preserve_weight_sparsity "   316           << preserve_weight_sparsity << 
" force_scale_power_of_two "   317           << force_scale_power_of_two << 
" activation_quantization_kind "   318           << activation_quantization_kind << 
" weight_quantization_kind "   319           << weight_quantization_kind;
   321   return unique_ptr<QuantizationFactory>(
new QuantizationFactory(
   322       activation_precision,
   324       requantization_multiplier_precision,
   325       eltwise_quantization_precision,
   326       preserve_activation_sparsity,
   327       preserve_weight_sparsity,
   328       force_scale_power_of_two,
   329       StringToKind(activation_quantization_kind),
   330       StringToKind(weight_quantization_kind)));
   333 unique_ptr<QuantizationFactory> GetQuantizationFactoryOf(
   335   return GetQuantizationFactoryOf_(op->debug_def());
   338 void AdjustOutputTensorQuantizationParamsWithFollowedBy(
   340     const string& followed_by) {
   341   LOG_IF(WARNING, !HasDNNLowPEngine_(*op));
   343   auto op_def = make_shared<OperatorDef>();
   344   *op_def = op->debug_def();
   345   AddArgument<string>(
"followed_by", followed_by, op_def.get());
   346   op->set_debug_def(op_def);
   348   if (followed_by == 
"Sigmoid") {
   349     SetStaticQuantizationParams(
   350         op, 0, Sigmoid<uint8_t>().GetInputQuantizationParams());
   351   } 
else if (followed_by == 
"Tanh") {
   352     SetStaticQuantizationParams(
   353         op, 0, Tanh<uint8_t>().GetInputQuantizationParams());
   354   } 
else if (followed_by == 
"Relu") {
   355     if (HasStaticQuantization(op)) {
   356       unique_ptr<QuantizationFactory> qfactory = GetQuantizationFactoryOf(op);
   357       TensorQuantizationParams qparams = GetStaticQuantizationParamsOf(op, 0);
   358       qparams = qfactory->ChooseQuantizationParams(0, qparams.Max());
   359       SetStaticQuantizationParams(op, 0, qparams);
   362     LOG(WARNING) << 
"Unknown followed_by " << followed_by;
   366 void ParseDNNLowPOperatorArguments(
   368     bool* dequantize_output,
   369     bool* measure_quantization_error,
   370     string* followed_by) {
   373   if (dequantize_output) {
   375         op->GetSingleArgument<
bool>(
"dequantize_output", 
false);
   376     if (*dequantize_output) {
   377       VLOG(2) << 
"Dequantize output " << op->debug_def().output(0)
   378               << 
" of operator type " << op->debug_def().type();
   383   if (measure_quantization_error) {
   384     *measure_quantization_error =
   385         op->GetSingleArgument<
bool>(
"measure_quantization_error", 
false);
   391   if (HasStaticQuantization(op)) {
   392     TensorQuantizationParams qparams = GetStaticQuantizationParamsOf(op, 0);
   393     unique_ptr<QuantizationFactory> qfactory = GetQuantizationFactoryOf(op);
   394     if (qparams.zero_point != (1 << (qfactory->GetActivationPrecision() - 1)) &&
   395         qparams.zero_point != 0 && qfactory->GetPreserveActivationSparsity()) {
   396       LOG(WARNING) << 
"Symmetric quantization is used for activation but "   398                    << qparams.zero_point << 
" for " << op->debug_def().output(0)
   399                    << 
" output activation of an operator with type "   400                    << op->debug_def().type();
   403     if (op->HasSingleArgumentOfType<
int>(
"Y_zero_point")) {
   404       LOG(WARNING) << 
"Y_zero_point without Y_scale for "   405                    << op->debug_def().output(0)
   406                    << 
" (an output of operator type " << op->debug_def().type()
   407                    << 
") doesn't make sense";
   413   if (op->HasSingleArgumentOfType<
string>(
"followed_by")) {
   414     string followed_by_ = op->GetSingleArgument<
string>(
"followed_by", 
"");
   415     VLOG(2) << 
"Operator with type " << op->debug_def().type() << 
" and output "   416             << op->debug_def().output(0) << 
" is followed by " << followed_by_;
   418     AdjustOutputTensorQuantizationParamsWithFollowedBy(op, followed_by_);
   420       *followed_by = followed_by_;
   425 NetDef AddScaleZeroOffsetArgumentsWithHistogram(
   427     const string& histogram_file_name) {
   428   ifstream f(histogram_file_name);
   431   string first_line, word;
   432   getline(f, first_line);
   434   istringstream ist(first_line);
   435   int nwords_first_line = 0;
   436   while (ist >> word) {
   443   bool new_format = 
true;
   444   int op_index, i, nbins;
   445   string op_type, tensor_name;
   447   ist >> op_index >> op_type >> i >> tensor_name >> min >> max >> nbins;
   448   if (nwords_first_line != nbins + 7) {
   451     ist >> op_index >> i >> tensor_name >> min >> max >> nbins;
   452     if (nwords_first_line == nbins + 6) {
   455       LOG(WARNING) << 
"histogram file " << histogram_file_name
   456                    << 
" has an invalid format";
   463   for (
auto& op_def : *net_def.mutable_op()) {
   466     for (i = 0; i < op_def.output().size(); ++i) {
   470         f >> op_index2 >> op_type >> i2 >> tensor_name >> min >> max >> nbins;
   472         f >> op_index2 >> i2 >> tensor_name >> min >> max >> nbins;
   474       LOG_IF(WARNING, op_index2 != op_index)
   475           << 
"op index " << op_index2 << 
" doesn't match with " << op_index;
   476       LOG_IF(WARNING, tensor_name != op_def.output(i))
   477           << tensor_name << 
" in histogram file line " << op_index
   478           << 
" doesn't match with operation def " << op_def.output(i);
   479       LOG_IF(WARNING, i2 != i)
   480           << 
"output tensor index " << i2 << 
" doesn't match with " << i;
   482         LOG_IF(WARNING, op_type != op_def.type())
   483             << 
"operator type " << op_type << 
" in histogram file line "   484             << op_index << 
" doesn't match with operation def "   488       vector<uint64_t> bins;
   489       for (
int j = 0; j < nbins; ++j) {
   495       if (!HasDNNLowPEngine_(op_def) ||
   496           arg_helper.GetSingleArgument<
int>(
"dequantize_output", 0) != 0 ||
   498         LOG(INFO) << 
"Skip " << op_def.type() << 
" " << op_def.output(0);
   502       Histogram hist = Histogram(min, max, bins);
   504       unique_ptr<QuantizationFactory> qfactory =
   505           GetQuantizationFactoryOf_(op_def);
   506       TensorQuantizationParams qparams =
   507           qfactory->ChooseQuantizationParams(hist);
   509       SetStaticQuantizationParams_(&op_def, 0, qparams);
 
Tensor class holds a shared pointer to the implementation TensorImpl, redirects API calls to TensorIm...
 
A helper class to index into arguments. 
 
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...