Caffe2 - C++ API
A deep learning, cross platform ML framework
activation_distribution_observer.cc
1 #include "caffe2/quantization/server/activation_distribution_observer.h"
2 #include "caffe2/quantization/server/caffe2_dnnlowp_utils.h"
3 #include "caffe2/quantization/server/dnnlowp.h"
4 
5 namespace caffe2 {
6 
7 using namespace std;
8 using namespace dnnlowp;
9 
10 OutputMinMaxObserver::OutputMinMaxObserver(OperatorBase* op)
11  : ObserverBase<OperatorBase>(op), info_(make_shared<OperatorInfo>()) {
12  for (int i = 0; i < op->OutputSize(); ++i) {
13  info_->tensor_infos.emplace_back(op->debug_def().output(i));
14  info_->type = op->debug_def().type();
15  }
16 }
17 
18 // A global table that collects min/max for each tensor name.
19 // Useful in case there are multiple copies of the same network.
20 static map<string, pair<float, float>> min_max_map_;
21 
22 OutputMinMaxObserver::~OutputMinMaxObserver() {
23  /*#pragma omp critical
24  {
25  for (int i = 0; i < info_->tensor_infos.size(); ++i) {
26  LOG(INFO) <<
27  this << " " << info_->type << " " << i << " " <<
28  info_->tensor_infos[i].name << " " <<
29  info_->tensor_infos[i].min << " " <<
30  info_->tensor_infos[i].max << " " <<
31  min_max_map_[info_->tensor_infos[i].name].first << " " <<
32  min_max_map_[info_->tensor_infos[i].name].second << " ";
33  }
34  }*/
35 }
36 
37 template <typename T>
38 void FindMinMax(const T* data, float* min, float* max, int len) {
39  vector<float> temp(len);
40  for (int i = 0; i < len; ++i) {
41  temp[i] = data[i];
42  }
43  fbgemm::FindMinMax(temp.data(), min, max, len);
44 }
45 
46 template <>
47 void FindMinMax<float>(const float* data, float* min, float* max, int len) {
48  fbgemm::FindMinMax(data, min, max, len);
49 }
50 
51 void OutputMinMaxObserver::Stop() {
52  for (int i = 0; i < subject_->OutputSize(); ++i) {
53  if (!subject_->OutputIsTensorType(i, CPU)) {
54  continue;
55  }
56  Tensor* tensor = subject_->template Output<Tensor>(i, CPU);
57  if (tensor->numel() == 0 || tensor->numel() == -1)
58  continue;
59  string out_name(subject_->debug_def().output(i));
60 
61  float min = numeric_limits<float>::lowest(),
62  max = numeric_limits<float>::max();
63 
64  if (tensor->IsType<float>()) {
65  if (!tensor->data<float>()) {
66  continue;
67  }
68  FindMinMax(tensor->data<float>(), &min, &max, tensor->numel());
69  } else if (tensor->IsType<int>()) {
70  if (!tensor->data<int>()) {
71  continue;
72  }
73  FindMinMax(tensor->data<int>(), &min, &max, tensor->numel());
74  } else if (tensor->IsType<long>()) {
75  if (!tensor->data<long>()) {
76  continue;
77  }
78  FindMinMax(tensor->data<long>(), &min, &max, tensor->numel());
79  } else {
80  if (!warning_printed_) {
81  LOG(INFO) << "Tensor " << out_name << " has unsupported type "
82  << tensor->meta().name() << " with size " << tensor->numel();
83  warning_printed_ = true;
84  }
85  continue;
86  }
87 
88 #ifdef _OPENMP
89 #pragma omp critical
90 #endif
91  {
92  if (min_max_map_.find(out_name) == min_max_map_.end()) {
93  min_max_map_[out_name] = make_pair(
94  numeric_limits<float>::max(), numeric_limits<float>::lowest());
95  }
96 
97  info_->tensor_infos[i].Update(min, max);
98 
99  min_max_map_[out_name].first =
100  std::min(min_max_map_[out_name].first, min);
101  min_max_map_[out_name].second =
102  std::max(min_max_map_[out_name].second, max);
103  assert(min_max_map_[out_name].second >= min_max_map_[out_name].first);
104  assert(min_max_map_[out_name].first < 1e38);
105 
106  VLOG(2) << this << " " << info_->type << " " << i << " " << out_name
107  << " " << info_->tensor_infos[i].min << " "
108  << info_->tensor_infos[i].max << " "
109  << min_max_map_[out_name].first << " "
110  << min_max_map_[out_name].second;
111  }
112  }
113 
114  return;
115 }
116 
118  NetBase* subject,
119  const string& out_file_name,
120  int dump_freq)
121  : NetObserver(subject),
122  dump_freq_(dump_freq),
123  cnt_(0),
124  out_file_name_(out_file_name) {
125  VLOG(2) << out_file_name;
126  min_max_infos_.resize(subject->GetOperators().size());
127  int i = 0;
128  for (auto* op : subject->GetOperators()) {
129  OutputMinMaxObserver* observer = new OutputMinMaxObserver(op);
130  op->AttachObserver(std::unique_ptr<OutputMinMaxObserver>(observer));
131  min_max_infos_[i] = observer->GetInfo();
132  ++i;
133  }
134 }
135 
136 void OutputMinMaxNetObserver::DumpAndReset_(
137  const std::string& out_file_name,
138  bool print_total_min_max) {
139  ofstream f(out_file_name);
140  if (!f) {
141  LOG(WARNING) << this << ": can't open " << out_file_name;
142  }
143 
144  for (int op_index = 0; op_index < min_max_infos_.size(); ++op_index) {
146  min_max_infos_[op_index].get();
147  if (op_info) {
148  for (int i = 0; i < op_info->tensor_infos.size(); ++i) {
149  const OutputMinMaxObserver::TensorInfo& tensor_info =
150  op_info->tensor_infos[i];
151 
152  ostringstream ost;
153  ost << op_index << " " << op_info->type << " " << i << " "
154  << tensor_info.name << " ";
155  if (print_total_min_max) {
156  ost << tensor_info.total_min << " " << tensor_info.total_max;
157  } else {
158  ost << tensor_info.min << " " << tensor_info.max;
159  }
160 
161  LOG(INFO) << this << " " << ost.str();
162  f << ost.str() << endl;
163 
164  op_info->tensor_infos[i].min = numeric_limits<float>::max();
165  op_info->tensor_infos[i].max = numeric_limits<float>::lowest();
166  }
167  }
168  }
169  f.close();
170 }
171 
172 OutputMinMaxNetObserver::~OutputMinMaxNetObserver() {
173  DumpAndReset_(out_file_name_, true);
174 
175 #ifdef _OPENMP
176 #pragma omp critical
177 #endif
178  {
179  ofstream f;
180  time_t rawtime;
181  time(&rawtime);
182  struct tm timeinfo;
183  localtime_r(&rawtime, &timeinfo);
184  char buffer[128] = {};
185  strftime(buffer, sizeof(buffer), "%Y-%m-%d-%H-%M-%S", &timeinfo);
186  char buffer2[256] = {};
187  snprintf(buffer2, sizeof(buffer2), "global_%s.minmax", buffer);
188 
189  f.open(buffer2);
190  int op_index = 0;
191  for (auto key_value : min_max_map_) {
192  ostringstream ost;
193  assert(key_value.second.first <= key_value.second.second);
194  assert(key_value.second.first < 1e38);
195  ost << op_index << " 0 " << key_value.first << " "
196  << key_value.second.first << " " << key_value.second.second;
197  f << ost.str() << endl;
198 
199  ++op_index;
200  }
201  f.close();
202  }
203 }
204 
205 void OutputMinMaxNetObserver::Stop() {
206  ++cnt_;
207  if (dump_freq_ == -1 || (cnt_ % dump_freq_) != 0) {
208  return;
209  }
210 
211  ostringstream ost;
212  size_t last_dot = out_file_name_.rfind('.');
213  size_t last_slash = out_file_name_.rfind('/');
214  if (last_dot != string::npos &&
215  (last_slash == string::npos || last_slash < last_dot)) {
216  ost << out_file_name_.substr(0, last_dot) << "_" << cnt_ / dump_freq_
217  << out_file_name_.substr(last_dot);
218  } else {
219  ost << out_file_name_ << "_" << cnt_ / dump_freq_;
220  }
221 
222  DumpAndReset_(ost.str());
223  return;
224 }
225 
226 HistogramObserver::HistogramObserver(OperatorBase* op, shared_ptr<Info> info)
227  : ObserverBase<OperatorBase>(op), info_(info) {}
228 
229 void HistogramObserver::Stop() {
230  for (int i = 0; i < subject_->OutputSize(); ++i) {
231  if (!subject_->OutputIsTensorType(i, CPU)) {
232  continue;
233  }
234  Tensor* tensor = subject_->template Output<Tensor>(i, CPU);
235  if (tensor->numel() == 0 || tensor->numel() == -1) {
236  continue;
237  }
238 
239  string out_name(subject_->debug_def().output(i));
240 
241  const float* data = nullptr;
242  vector<float> data_temp;
243 
244  if (tensor->IsType<float>()) {
245  if (!tensor->data<float>()) {
246  continue;
247  }
248  data = tensor->template data<float>();
249  } else if (tensor->IsType<int>()) {
250  if (!tensor->data<int>()) {
251  continue;
252  }
253  const int* data_orig = tensor->data<int>();
254  data_temp.resize(tensor->numel());
255  for (int j = 0; j < tensor->numel(); ++j) {
256  data_temp[j] = data_orig[j];
257  }
258  data = data_temp.data();
259  } else if (tensor->IsType<long>()) {
260  if (!tensor->data<long>()) {
261  continue;
262  }
263  const long* data_orig = tensor->data<long>();
264  data_temp.resize(tensor->numel());
265  for (int j = 0; j < tensor->numel(); ++j) {
266  data_temp[j] = data_orig[j];
267  }
268  data = data_temp.data();
269  } else {
270  if (!warning_printed_) {
271  LOG(INFO) << "Tensor " << out_name << " has unsupported type "
272  << tensor->meta().name() << " with size " << tensor->numel();
273  warning_printed_ = true;
274  }
275  continue;
276  }
277 
278  info_->histograms[i].Add(data, tensor->numel());
279  info_->total_histograms[i].Add(data, tensor->numel());
280  }
281  return;
282 }
283 
284 HistogramNetObserver::HistogramNetObserver(
285  NetBase* subject,
286  const string& out_file_name,
287  int nbins,
288  int dump_freq,
289  bool mul_nets)
290  : NetObserver(subject),
291  dump_freq_(dump_freq),
292  cnt_(0),
293  mul_nets_(mul_nets),
294  out_file_name_(out_file_name) {
295  hist_infos_.resize(subject->GetOperators().size());
296 
297  int i = 0;
298  for (auto* op : subject->GetOperators()) {
299  shared_ptr<HistogramObserver::Info> info(new HistogramObserver::Info);
300  info->min_max_info.type = op->debug_def().type();
301 
302  for (int j = 0; j < op->OutputSize(); ++j) {
303  info->histograms.emplace_back(nbins);
304  info->total_histograms.emplace_back(nbins);
305  info->min_max_info.tensor_infos.emplace_back(op->debug_def().output(j));
306  }
307 
308  HistogramObserver* observer = new HistogramObserver(op, info);
309  op->AttachObserver(unique_ptr<HistogramObserver>(observer));
310  hist_infos_[i] = info;
311  ++i;
312  }
313 }
314 
315 void HistogramNetObserver::DumpAndReset_(
316  const string& out_file_name,
317  bool print_total_min_max) {
318  stringstream file_name;
319  file_name << out_file_name;
320  if (mul_nets_) {
321  file_name << ".";
322  file_name << this;
323  }
324  ofstream f(file_name.str());
325  if (!f) {
326  LOG(WARNING) << this << ": can't open " << file_name.str();
327  }
328 
329  for (int op_index = 0; op_index < hist_infos_.size(); ++op_index) {
330  HistogramObserver::Info* info = hist_infos_[op_index].get();
331  if (!info) {
332  continue;
333  }
334 
335  for (int i = 0; i < info->histograms.size(); ++i) {
336  const Histogram* hist =
337  (print_total_min_max ? info->total_histograms : info->histograms)[i]
338  .Finalize();
339  if (hist->Min() >= hist->Max()) {
340  LOG(WARNING) << "Histogram of "
341  << info->min_max_info.tensor_infos[i].name
342  << " has an empty range: min " << hist->Min()
343  << " and max " << hist->Max();
344  }
345 
346  ostringstream ost;
347  ost << op_index << " " << info->min_max_info.type << " " << i << " "
348  << info->min_max_info.tensor_infos[i].name << " " << hist->Min()
349  << " " << hist->Max() << " " << hist->GetHistogram()->size();
350 
351  for (uint64_t c : *hist->GetHistogram()) {
352  ost << " " << c;
353  }
354 
355  f << ost.str() << endl;
356  if (print_total_min_max) {
357  LOG(INFO) << this << " " << ost.str();
358  }
359 
360  if (hist->GetHistogram()->empty()) {
361  LOG(WARNING) << "Histogram of "
362  << info->min_max_info.tensor_infos[i].name << " is empty";
363  }
364 
365  if (!print_total_min_max) {
366  info->histograms[i] = DynamicHistogram(hist->GetHistogram()->size());
367  }
368  }
369  }
370  f.close();
371 }
372 
373 HistogramNetObserver::~HistogramNetObserver() {
374  DumpAndReset_(out_file_name_, true);
375 }
376 
377 void HistogramNetObserver::Stop() {
378  ++cnt_;
379  if (dump_freq_ == -1 || (cnt_ % dump_freq_) != 0) {
380  return;
381  }
382 
383  ostringstream ost;
384  size_t last_dot = out_file_name_.rfind('.');
385  size_t last_slash = out_file_name_.rfind('/');
386  if (last_dot != string::npos &&
387  (last_slash == string::npos || last_slash < last_dot)) {
388  ost << out_file_name_.substr(0, last_dot) << "_" << cnt_ / dump_freq_
389  << out_file_name_.substr(last_dot);
390  } else {
391  ost << out_file_name_ << "_" << cnt_ / dump_freq_;
392  }
393 
394  DumpAndReset_(ost.str());
395  return;
396 }
397 
398 static bool HasDNNLowPEngine_(const OperatorDef& op_def) {
399  const string ENGINE_PREFIX = "DNNLOWP";
400  return strncmp(
401  op_def.engine().c_str(),
402  ENGINE_PREFIX.c_str(),
403  ENGINE_PREFIX.size()) == 0;
404 }
405 
406 static bool HasDNNLowPEngine_(const OperatorBase& op) {
407  return HasDNNLowPEngine_(op.debug_def());
408 }
409 
410 RegisterQuantizationParamsNetObserver::RegisterQuantizationParamsNetObserver(
411  NetBase* subject,
412  const string& min_max_file_name,
413  bool is_weight,
414  const string& qparams_output_file_name)
415  : NetObserver(subject) {
416  ifstream f(min_max_file_name);
417 
418  // check the format by looking at the first line
419  string first_line, word;
420  getline(f, first_line);
421  f.seekg(0, f.beg);
422  istringstream ist(first_line);
423  int nwords_first_line = 0;
424  while (ist >> word) {
425  ++nwords_first_line;
426  }
427 
428  bool new_format = nwords_first_line == 6;
429  if (!new_format && nwords_first_line != 5) {
430  LOG(WARNING) << "min_max file " << min_max_file_name
431  << " has an invalid format";
432  }
433 
434  // Optionally dump quantization params to file
435  ofstream fout;
436  if (!qparams_output_file_name.empty()) {
437  fout.open(qparams_output_file_name);
438  if (!fout) {
439  LOG(WARNING) << this << ": can't open " << qparams_output_file_name;
440  }
441  }
442 
443  // parse the input file
444  int op_index = 0;
445  for (auto* op : subject->GetOperators()) {
446  for (int i = 0; i < op->OutputSize(); ++i) {
447  int op_index2, i2;
448  string op_type, tensor_name;
449  float min, max;
450 
451  if (new_format) {
452  f >> op_index2 >> op_type >> i2 >> tensor_name >> min >> max;
453  } else {
454  f >> op_index2 >> i2 >> tensor_name >> min >> max;
455  }
456  assert(op_index2 == op_index);
457  assert(i2 == i);
458  assert(tensor_name == op->debug_def().output(i));
459 
460  TensorQuantizationParams qparams;
461  if (max > min) {
462  unique_ptr<QuantizationFactory> qfactory(GetQuantizationFactoryOf(op));
463  qparams = qfactory->ChooseQuantizationParams(min, max, is_weight);
464  } else {
465  qparams.scale = 0.1f;
466  qparams.zero_point = -min / qparams.scale;
467  qparams.precision = 8;
468  }
469 
470  if (HasDNNLowPEngine_(*op)) {
471  SetStaticQuantizationParams(op, i, qparams);
472  }
473 
474  if (fout.is_open()) {
475  fout << op_index << " " << op_type << " " << i << " " << tensor_name
476  << " " << qparams.Min() << " " << qparams.Max() << " "
477  << qparams.scale << " " << qparams.zero_point << " "
478  << qparams.precision << endl;
479  }
480  }
481  ++op_index;
482  }
483 
484  if (fout.is_open()) {
485  fout.close();
486  }
487 }
488 
489 RegisterQuantizationParamsWithHistogramNetObserver::
490  RegisterQuantizationParamsWithHistogramNetObserver(
491  NetBase* subject,
492  const string& histogram_file_name,
493  bool is_weight,
494  const string& qparams_output_file_name)
495  : NetObserver(subject) {
496  ifstream f(histogram_file_name);
497 
498  // check the format by looking at the first line
499  string first_line, word;
500  getline(f, first_line);
501  f.seekg(0, f.beg);
502  istringstream ist(first_line);
503  int nwords_first_line = 0;
504  while (ist >> word) {
505  ++nwords_first_line;
506  }
507 
508  ist.str(first_line);
509  ist.clear();
510 
511  bool new_format = true;
512  int op_index, i, nbins;
513  string op_type, tensor_name;
514  float min, max;
515  ist >> op_index >> op_type >> i >> tensor_name >> min >> max >> nbins;
516  if (nwords_first_line != nbins + 7) {
517  ist.str(first_line);
518  ist.clear();
519  ist >> op_index >> i >> tensor_name >> min >> max >> nbins;
520  if (nwords_first_line == nbins + 6) {
521  new_format = false;
522  } else {
523  LOG(WARNING) << "histogram file " << histogram_file_name
524  << " has an invalid format";
525  return;
526  }
527  }
528 
529  // Optionally dump quantization params to file
530  ofstream fout;
531  if (!qparams_output_file_name.empty()) {
532  fout.open(qparams_output_file_name);
533  if (!fout) {
534  LOG(WARNING) << this << ": can't open " << qparams_output_file_name;
535  }
536  }
537 
538  // parse the input file
539  op_index = 0;
540  for (auto* op : subject->GetOperators()) {
541  for (i = 0; i < op->OutputSize(); ++i) {
542  int op_index2, i2;
543 
544  if (new_format) {
545  f >> op_index2 >> op_type >> i2 >> tensor_name >> min >> max >> nbins;
546  } else {
547  f >> op_index2 >> i2 >> tensor_name >> min >> max >> nbins;
548  }
549  LOG_IF(WARNING, op_index2 != op_index)
550  << "op index " << op_index2 << " doesn't match with " << op_index;
551  LOG_IF(WARNING, tensor_name != op->debug_def().output(i))
552  << tensor_name << " in histogram file line " << op_index
553  << " doesn't match with operation def " << op->debug_def().output(i);
554  LOG_IF(WARNING, i2 != i)
555  << "output tensor index " << i2 << " doesn't match with " << i;
556  if (new_format) {
557  LOG_IF(WARNING, op_type != op->debug_def().type())
558  << "operator type " << op_type << " in histogram file line "
559  << op_index << " doesn't match with operation def "
560  << op->debug_def().type();
561  }
562 
563  vector<uint64_t> bins;
564  for (int j = 0; j < nbins; ++j) {
565  uint64_t cnt;
566  f >> cnt;
567  bins.push_back(cnt);
568  }
569 
570  Histogram hist = Histogram(min, max, bins);
571 
572  TensorQuantizationParams qparams;
573  if (max > min) {
574  unique_ptr<QuantizationFactory> qfactory(GetQuantizationFactoryOf(op));
575  qparams = qfactory->ChooseQuantizationParams(hist, is_weight);
576  } else {
577  qparams.scale = 0.1f;
578  qparams.zero_point = -min / qparams.scale;
579  qparams.precision = 8;
580  }
581 
582  if (HasDNNLowPEngine_(*op)) {
583  SetStaticQuantizationParams(op, i, qparams);
584  }
585 
586  if (fout.is_open()) {
587  fout << op_index << " " << op_type << " " << i << " " << tensor_name
588  << " " << qparams.Min() << " " << qparams.Max() << " "
589  << qparams.scale << " " << qparams.zero_point << " "
590  << qparams.precision << endl;
591  }
592  }
593  ++op_index;
594  }
595 
596  if (fout.is_open()) {
597  fout.close();
598  }
599 }
600 
601 } // namespace caffe2
Given min/max, collect histogram.
OutputMinMaxNetObserver(NetBase *subject, const std::string &out_file_name, int dump_freq=-1)
dump_freq Print out only once in destructor if -1.
An equi-width histogram where the spread of bins change over time when we see new min or max values...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
bin_width = (max - min)/nbins ith bin (zero-based indexing) contains [i*bin_width, (i+1)*bin_width) with an exception that (nbins - 1)th bin contains [(nbins-1)*bin_width, nbins*bin_width]