1 #include "caffe2/quantization/server/activation_distribution_observer.h" 2 #include "caffe2/quantization/server/caffe2_dnnlowp_utils.h" 3 #include "caffe2/quantization/server/dnnlowp.h" 10 OutputMinMaxObserver::OutputMinMaxObserver(
OperatorBase* op)
11 : ObserverBase<
OperatorBase>(op), info_(make_shared<OperatorInfo>()) {
12 for (
int i = 0; i < op->OutputSize(); ++i) {
13 info_->tensor_infos.emplace_back(op->debug_def().output(i));
14 info_->type = op->debug_def().type();
20 static map<string, pair<float, float>> min_max_map_;
22 OutputMinMaxObserver::~OutputMinMaxObserver() {
38 void FindMinMax(
const T* data,
float* min,
float* max,
int len) {
39 vector<float> temp(len);
40 for (
int i = 0; i < len; ++i) {
43 fbgemm::FindMinMax(temp.data(), min, max, len);
47 void FindMinMax<float>(
const float* data,
float* min,
float* max,
int len) {
48 fbgemm::FindMinMax(data, min, max, len);
51 void OutputMinMaxObserver::Stop() {
52 for (
int i = 0; i < subject_->OutputSize(); ++i) {
53 if (!subject_->OutputIsTensorType(i, CPU)) {
56 Tensor* tensor = subject_->template Output<Tensor>(i, CPU);
57 if (tensor->numel() == 0 || tensor->numel() == -1)
59 string out_name(subject_->debug_def().output(i));
61 float min = numeric_limits<float>::lowest(),
62 max = numeric_limits<float>::max();
64 if (tensor->IsType<
float>()) {
65 if (!tensor->data<
float>()) {
68 FindMinMax(tensor->data<
float>(), &min, &max, tensor->numel());
69 }
else if (tensor->IsType<
int>()) {
70 if (!tensor->data<
int>()) {
73 FindMinMax(tensor->data<
int>(), &min, &max, tensor->numel());
74 }
else if (tensor->IsType<
long>()) {
75 if (!tensor->data<
long>()) {
78 FindMinMax(tensor->data<
long>(), &min, &max, tensor->numel());
80 if (!warning_printed_) {
81 LOG(INFO) <<
"Tensor " << out_name <<
" has unsupported type " 82 << tensor->meta().name() <<
" with size " << tensor->numel();
83 warning_printed_ =
true;
92 if (min_max_map_.find(out_name) == min_max_map_.end()) {
93 min_max_map_[out_name] = make_pair(
94 numeric_limits<float>::max(), numeric_limits<float>::lowest());
97 info_->tensor_infos[i].Update(min, max);
99 min_max_map_[out_name].first =
100 std::min(min_max_map_[out_name].first, min);
101 min_max_map_[out_name].second =
102 std::max(min_max_map_[out_name].second, max);
103 assert(min_max_map_[out_name].second >= min_max_map_[out_name].first);
104 assert(min_max_map_[out_name].first < 1e38);
106 VLOG(2) <<
this <<
" " << info_->type <<
" " << i <<
" " << out_name
107 <<
" " << info_->tensor_infos[i].min <<
" " 108 << info_->tensor_infos[i].max <<
" " 109 << min_max_map_[out_name].first <<
" " 110 << min_max_map_[out_name].second;
119 const string& out_file_name,
122 dump_freq_(dump_freq),
124 out_file_name_(out_file_name) {
125 VLOG(2) << out_file_name;
126 min_max_infos_.resize(subject->GetOperators().size());
128 for (
auto* op : subject->GetOperators()) {
130 op->AttachObserver(std::unique_ptr<OutputMinMaxObserver>(observer));
131 min_max_infos_[i] = observer->GetInfo();
136 void OutputMinMaxNetObserver::DumpAndReset_(
137 const std::string& out_file_name,
138 bool print_total_min_max) {
139 ofstream f(out_file_name);
141 LOG(WARNING) <<
this <<
": can't open " << out_file_name;
144 for (
int op_index = 0; op_index < min_max_infos_.size(); ++op_index) {
146 min_max_infos_[op_index].get();
148 for (
int i = 0; i < op_info->tensor_infos.size(); ++i) {
150 op_info->tensor_infos[i];
153 ost << op_index <<
" " << op_info->type <<
" " << i <<
" " 154 << tensor_info.name <<
" ";
155 if (print_total_min_max) {
156 ost << tensor_info.total_min <<
" " << tensor_info.total_max;
158 ost << tensor_info.min <<
" " << tensor_info.max;
161 LOG(INFO) <<
this <<
" " << ost.str();
162 f << ost.str() << endl;
164 op_info->tensor_infos[i].min = numeric_limits<float>::max();
165 op_info->tensor_infos[i].max = numeric_limits<float>::lowest();
172 OutputMinMaxNetObserver::~OutputMinMaxNetObserver() {
173 DumpAndReset_(out_file_name_,
true);
183 localtime_r(&rawtime, &timeinfo);
184 char buffer[128] = {};
185 strftime(buffer,
sizeof(buffer),
"%Y-%m-%d-%H-%M-%S", &timeinfo);
186 char buffer2[256] = {};
187 snprintf(buffer2,
sizeof(buffer2),
"global_%s.minmax", buffer);
191 for (
auto key_value : min_max_map_) {
193 assert(key_value.second.first <= key_value.second.second);
194 assert(key_value.second.first < 1e38);
195 ost << op_index <<
" 0 " << key_value.first <<
" " 196 << key_value.second.first <<
" " << key_value.second.second;
197 f << ost.str() << endl;
205 void OutputMinMaxNetObserver::Stop() {
207 if (dump_freq_ == -1 || (cnt_ % dump_freq_) != 0) {
212 size_t last_dot = out_file_name_.rfind(
'.');
213 size_t last_slash = out_file_name_.rfind(
'/');
214 if (last_dot != string::npos &&
215 (last_slash == string::npos || last_slash < last_dot)) {
216 ost << out_file_name_.substr(0, last_dot) <<
"_" << cnt_ / dump_freq_
217 << out_file_name_.substr(last_dot);
219 ost << out_file_name_ <<
"_" << cnt_ / dump_freq_;
222 DumpAndReset_(ost.str());
226 HistogramObserver::HistogramObserver(
OperatorBase* op, shared_ptr<Info> info)
229 void HistogramObserver::Stop() {
230 for (
int i = 0; i < subject_->OutputSize(); ++i) {
231 if (!subject_->OutputIsTensorType(i, CPU)) {
234 Tensor* tensor = subject_->template Output<Tensor>(i, CPU);
235 if (tensor->numel() == 0 || tensor->numel() == -1) {
239 string out_name(subject_->debug_def().output(i));
241 const float* data =
nullptr;
242 vector<float> data_temp;
244 if (tensor->IsType<
float>()) {
245 if (!tensor->data<
float>()) {
248 data = tensor->template data<float>();
249 }
else if (tensor->IsType<
int>()) {
250 if (!tensor->data<
int>()) {
253 const int* data_orig = tensor->data<
int>();
254 data_temp.resize(tensor->numel());
255 for (
int j = 0; j < tensor->numel(); ++j) {
256 data_temp[j] = data_orig[j];
258 data = data_temp.data();
259 }
else if (tensor->IsType<
long>()) {
260 if (!tensor->data<
long>()) {
263 const long* data_orig = tensor->data<
long>();
264 data_temp.resize(tensor->numel());
265 for (
int j = 0; j < tensor->numel(); ++j) {
266 data_temp[j] = data_orig[j];
268 data = data_temp.data();
270 if (!warning_printed_) {
271 LOG(INFO) <<
"Tensor " << out_name <<
" has unsupported type " 272 << tensor->meta().name() <<
" with size " << tensor->numel();
273 warning_printed_ =
true;
278 info_->histograms[i].Add(data, tensor->numel());
279 info_->total_histograms[i].Add(data, tensor->numel());
284 HistogramNetObserver::HistogramNetObserver(
286 const string& out_file_name,
291 dump_freq_(dump_freq),
294 out_file_name_(out_file_name) {
295 hist_infos_.resize(subject->GetOperators().size());
298 for (
auto* op : subject->GetOperators()) {
300 info->min_max_info.type = op->debug_def().type();
302 for (
int j = 0; j < op->OutputSize(); ++j) {
303 info->histograms.emplace_back(nbins);
304 info->total_histograms.emplace_back(nbins);
305 info->min_max_info.tensor_infos.emplace_back(op->debug_def().output(j));
309 op->AttachObserver(unique_ptr<HistogramObserver>(observer));
310 hist_infos_[i] = info;
315 void HistogramNetObserver::DumpAndReset_(
316 const string& out_file_name,
317 bool print_total_min_max) {
318 stringstream file_name;
319 file_name << out_file_name;
324 ofstream f(file_name.str());
326 LOG(WARNING) <<
this <<
": can't open " << file_name.str();
329 for (
int op_index = 0; op_index < hist_infos_.size(); ++op_index) {
335 for (
int i = 0; i < info->histograms.size(); ++i) {
337 (print_total_min_max ? info->total_histograms : info->histograms)[i]
339 if (hist->Min() >= hist->Max()) {
340 LOG(WARNING) <<
"Histogram of " 341 << info->min_max_info.tensor_infos[i].name
342 <<
" has an empty range: min " << hist->Min()
343 <<
" and max " << hist->Max();
347 ost << op_index <<
" " << info->min_max_info.type <<
" " << i <<
" " 348 << info->min_max_info.tensor_infos[i].name <<
" " << hist->Min()
349 <<
" " << hist->Max() <<
" " << hist->GetHistogram()->size();
351 for (uint64_t c : *hist->GetHistogram()) {
355 f << ost.str() << endl;
356 if (print_total_min_max) {
357 LOG(INFO) <<
this <<
" " << ost.str();
360 if (hist->GetHistogram()->empty()) {
361 LOG(WARNING) <<
"Histogram of " 362 << info->min_max_info.tensor_infos[i].name <<
" is empty";
365 if (!print_total_min_max) {
373 HistogramNetObserver::~HistogramNetObserver() {
374 DumpAndReset_(out_file_name_,
true);
377 void HistogramNetObserver::Stop() {
379 if (dump_freq_ == -1 || (cnt_ % dump_freq_) != 0) {
384 size_t last_dot = out_file_name_.rfind(
'.');
385 size_t last_slash = out_file_name_.rfind(
'/');
386 if (last_dot != string::npos &&
387 (last_slash == string::npos || last_slash < last_dot)) {
388 ost << out_file_name_.substr(0, last_dot) <<
"_" << cnt_ / dump_freq_
389 << out_file_name_.substr(last_dot);
391 ost << out_file_name_ <<
"_" << cnt_ / dump_freq_;
394 DumpAndReset_(ost.str());
398 static bool HasDNNLowPEngine_(
const OperatorDef& op_def) {
399 const string ENGINE_PREFIX =
"DNNLOWP";
401 op_def.engine().c_str(),
402 ENGINE_PREFIX.c_str(),
403 ENGINE_PREFIX.size()) == 0;
407 return HasDNNLowPEngine_(op.debug_def());
410 RegisterQuantizationParamsNetObserver::RegisterQuantizationParamsNetObserver(
412 const string& min_max_file_name,
414 const string& qparams_output_file_name)
416 ifstream f(min_max_file_name);
419 string first_line, word;
420 getline(f, first_line);
422 istringstream ist(first_line);
423 int nwords_first_line = 0;
424 while (ist >> word) {
428 bool new_format = nwords_first_line == 6;
429 if (!new_format && nwords_first_line != 5) {
430 LOG(WARNING) <<
"min_max file " << min_max_file_name
431 <<
" has an invalid format";
436 if (!qparams_output_file_name.empty()) {
437 fout.open(qparams_output_file_name);
439 LOG(WARNING) <<
this <<
": can't open " << qparams_output_file_name;
445 for (
auto* op : subject->GetOperators()) {
446 for (
int i = 0; i < op->OutputSize(); ++i) {
448 string op_type, tensor_name;
452 f >> op_index2 >> op_type >> i2 >> tensor_name >> min >> max;
454 f >> op_index2 >> i2 >> tensor_name >> min >> max;
456 assert(op_index2 == op_index);
458 assert(tensor_name == op->debug_def().output(i));
460 TensorQuantizationParams qparams;
462 unique_ptr<QuantizationFactory> qfactory(GetQuantizationFactoryOf(op));
463 qparams = qfactory->ChooseQuantizationParams(min, max, is_weight);
465 qparams.scale = 0.1f;
466 qparams.zero_point = -min / qparams.scale;
467 qparams.precision = 8;
470 if (HasDNNLowPEngine_(*op)) {
471 SetStaticQuantizationParams(op, i, qparams);
474 if (fout.is_open()) {
475 fout << op_index <<
" " << op_type <<
" " << i <<
" " << tensor_name
476 <<
" " << qparams.Min() <<
" " << qparams.Max() <<
" " 477 << qparams.scale <<
" " << qparams.zero_point <<
" " 478 << qparams.precision << endl;
484 if (fout.is_open()) {
489 RegisterQuantizationParamsWithHistogramNetObserver::
490 RegisterQuantizationParamsWithHistogramNetObserver(
492 const string& histogram_file_name,
494 const string& qparams_output_file_name)
496 ifstream f(histogram_file_name);
499 string first_line, word;
500 getline(f, first_line);
502 istringstream ist(first_line);
503 int nwords_first_line = 0;
504 while (ist >> word) {
511 bool new_format =
true;
512 int op_index, i, nbins;
513 string op_type, tensor_name;
515 ist >> op_index >> op_type >> i >> tensor_name >> min >> max >> nbins;
516 if (nwords_first_line != nbins + 7) {
519 ist >> op_index >> i >> tensor_name >> min >> max >> nbins;
520 if (nwords_first_line == nbins + 6) {
523 LOG(WARNING) <<
"histogram file " << histogram_file_name
524 <<
" has an invalid format";
531 if (!qparams_output_file_name.empty()) {
532 fout.open(qparams_output_file_name);
534 LOG(WARNING) <<
this <<
": can't open " << qparams_output_file_name;
540 for (
auto* op : subject->GetOperators()) {
541 for (i = 0; i < op->OutputSize(); ++i) {
545 f >> op_index2 >> op_type >> i2 >> tensor_name >> min >> max >> nbins;
547 f >> op_index2 >> i2 >> tensor_name >> min >> max >> nbins;
549 LOG_IF(WARNING, op_index2 != op_index)
550 <<
"op index " << op_index2 <<
" doesn't match with " << op_index;
551 LOG_IF(WARNING, tensor_name != op->debug_def().output(i))
552 << tensor_name <<
" in histogram file line " << op_index
553 <<
" doesn't match with operation def " << op->debug_def().output(i);
554 LOG_IF(WARNING, i2 != i)
555 <<
"output tensor index " << i2 <<
" doesn't match with " << i;
557 LOG_IF(WARNING, op_type != op->debug_def().type())
558 <<
"operator type " << op_type <<
" in histogram file line " 559 << op_index <<
" doesn't match with operation def " 560 << op->debug_def().type();
563 vector<uint64_t> bins;
564 for (
int j = 0; j < nbins; ++j) {
572 TensorQuantizationParams qparams;
574 unique_ptr<QuantizationFactory> qfactory(GetQuantizationFactoryOf(op));
575 qparams = qfactory->ChooseQuantizationParams(hist, is_weight);
577 qparams.scale = 0.1f;
578 qparams.zero_point = -min / qparams.scale;
579 qparams.precision = 8;
582 if (HasDNNLowPEngine_(*op)) {
583 SetStaticQuantizationParams(op, i, qparams);
586 if (fout.is_open()) {
587 fout << op_index <<
" " << op_type <<
" " << i <<
" " << tensor_name
588 <<
" " << qparams.Min() <<
" " << qparams.Max() <<
" " 589 << qparams.scale <<
" " << qparams.zero_point <<
" " 590 << qparams.precision << endl;
596 if (fout.is_open()) {
Given min/max, collect histogram.
OutputMinMaxNetObserver(NetBase *subject, const std::string &out_file_name, int dump_freq=-1)
dump_freq Print out only once in destructor if -1.
An equi-width histogram where the spread of bins change over time when we see new min or max values...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
bin_width = (max - min)/nbins ith bin (zero-based indexing) contains [i*bin_width, (i+1)*bin_width) with an exception that (nbins - 1)th bin contains [(nbins-1)*bin_width, nbins*bin_width]