Caffe2 - C++ API
A deep learning, cross platform ML framework
dynamic_histogram.cc
1 #include "dynamic_histogram.h"
2 #include "dnnlowp_op.h"
3 
4 #include <cassert>
5 #include <cmath>
6 #include <limits>
7 
8 #ifdef _OPENMP
9 #include <omp.h>
10 #endif
11 
12 namespace dnnlowp {
13 
14 using namespace std;
15 
16 void Histogram::Add(float f, uint64_t cnt) {
17  int nbins = histogram_.size();
18  float bin_width = (max_ - min_) / nbins;
19  int bin = bin_width == 0
20  ? 0
21  : std::min(static_cast<int>((f - min_) / bin_width), nbins - 1);
22  bin = std::max(0, bin);
23  assert(bin >= 0);
24  histogram_[bin] += cnt;
25 }
26 
27 void Histogram::Add(const float* f, int len) {
28  int nbins = histogram_.size();
29  float bin_width = (max_ - min_) / nbins;
30 
31  if (bin_width > 0.0) {
32  assert(per_thread_histogram_.size() % nbins == 0);
33 
34  // Check if dnnlowp_get_max_threads has been reduced, and if so reduce
35  // per-thread histogram and clear them.
36  int old_nthreads = per_thread_histogram_.size() / nbins + 1;
37  if (caffe2::dnnlowp_get_max_threads() < old_nthreads) {
38  Finalize();
39  }
40 
41  per_thread_histogram_.resize(
42  (caffe2::dnnlowp_get_max_threads() - 1) * nbins);
43 
44 #ifdef _OPENMP
45 #pragma omp parallel
46 #endif
47  {
48  int tid = caffe2::dnnlowp_get_thread_num();
49 
50  uint64_t* my_histogram = nullptr;
51  if (tid == 0) {
52  my_histogram = histogram_.data();
53  } else {
54  my_histogram = per_thread_histogram_.data() + (tid - 1) * nbins;
55  }
56 
57 #ifdef _OPENMP
58 #pragma omp for
59 #endif
60  for (auto i = 0; i < len; ++i) {
61  int bin =
62  std::min(static_cast<int>((f[i] - min_) / bin_width), nbins - 1);
63  bin = std::max(0, bin);
64  ++my_histogram[bin];
65  }
66  } // omp parallel
67  } else {
68  histogram_[0] += len;
69  }
70 }
71 
73  int nbins = histogram_.size();
74  assert(per_thread_histogram_.size() % nbins == 0);
75  int nthreads = per_thread_histogram_.size() / nbins + 1;
76 
77  if (nthreads > 1) {
78  for (int bin = 0; bin < nbins; ++bin) {
79  for (int i = 1; i < nthreads; ++i) {
80  histogram_[bin] += per_thread_histogram_[(i - 1) * nbins + bin];
81  }
82  }
83  }
84 
85  per_thread_histogram_.clear();
86 }
87 
88 static const int OVER_BINNING_FACTOR = 4;
89 
90 DynamicHistogram::DynamicHistogram(int nbins)
91  : nbins_(nbins),
92  min_(numeric_limits<float>::max()),
93  max_(numeric_limits<float>::lowest()) {
94  assert(nbins_ > 0);
95 }
96 
97 void DynamicHistogram::Add(float f) {
98  min_ = std::min(min_, f);
99  max_ = std::max(max_, f);
100 
101  if (histograms_.empty()) {
102  histograms_.emplace_back(nbins_ * OVER_BINNING_FACTOR, f, f);
103  } else {
104  Histogram& curr_hist = histograms_.back();
105  if (f < curr_hist.Min() || f > curr_hist.Max()) {
106  float old_spread = curr_hist.Max() - curr_hist.Min();
107  if (f < curr_hist.Min()) {
108  float new_min;
109  if (old_spread == 0) {
110  new_min = f;
111  } else {
112  new_min = curr_hist.Min() -
113  ceil((curr_hist.Min() - f) / old_spread) * old_spread;
114  }
115  histograms_.emplace_back(
116  curr_hist.GetHistogram()->size(), new_min, curr_hist.Max());
117  } else {
118  float new_max;
119  if (old_spread == 0) {
120  new_max = f;
121  } else {
122  new_max = curr_hist.Max() +
123  ceil((f - curr_hist.Max()) / old_spread) * old_spread;
124  }
125  histograms_.emplace_back(
126  curr_hist.GetHistogram()->size(), curr_hist.Min(), new_max);
127  }
128  }
129  }
130 
131  Histogram& new_hist = histograms_.back();
132  new_hist.Add(f);
133 }
134 
135 void DynamicHistogram::Add(const float* f, int len) {
136  float minimum = min_, maximum = max_;
137 #ifdef _OPENMP
138 #pragma omp parallel for reduction(min : minimum) reduction(max : maximum)
139 #endif
140  for (int i = 0; i < len; ++i) {
141  minimum = std::min(f[i], minimum);
142  maximum = std::max(f[i], maximum);
143  }
144  min_ = minimum;
145  max_ = maximum;
146 
147  if (histograms_.empty()) {
148  histograms_.emplace_back(nbins_ * OVER_BINNING_FACTOR, min_, max_);
149  } else {
150  Histogram& curr_hist = histograms_.back();
151  if (min_ < curr_hist.Min() || max_ > curr_hist.Max()) {
152  float old_spread = curr_hist.Max() - curr_hist.Min();
153  float new_min = curr_hist.Min(), new_max = curr_hist.Max();
154  if (min_ < curr_hist.Min()) {
155  if (old_spread == 0.0f) {
156  new_min = min_;
157  } else {
158  new_min = curr_hist.Min() -
159  ceil((curr_hist.Min() - min_) / old_spread) * old_spread;
160  }
161  }
162  if (max_ > curr_hist.Max()) {
163  old_spread = curr_hist.Max() - new_min;
164  if (old_spread == 0.0f) {
165  new_max = max_;
166  } else {
167  new_max = curr_hist.Max() +
168  ceil((max_ - curr_hist.Max()) / old_spread) * old_spread;
169  }
170  }
171  histograms_.emplace_back(
172  curr_hist.GetHistogram()->size(), new_min, new_max);
173  }
174  }
175 
176  Histogram& new_hist = histograms_.back();
177  new_hist.Add(f, len);
178 }
179 
181  if (final_histogram_.get()) {
182  return final_histogram_.get();
183  }
184 
185  final_histogram_.reset(new Histogram(nbins_, min_, max_));
186  float dst_bin_width = (max_ - min_) / nbins_;
187 
188  for (Histogram& hist : histograms_) {
189  hist.Finalize();
190 
191  const std::vector<uint64_t>& bins = *hist.GetHistogram();
192  float src_bin_width = (hist.Max() - hist.Min()) / bins.size();
193 
194  for (int i = 0; i < bins.size(); ++i) {
195  if (bins[i] == 0) {
196  continue;
197  }
198  float src_bin_begin = hist.Min() + src_bin_width * i;
199  float src_bin_end = src_bin_begin + src_bin_width;
200  // dst_bin corresponds to the beginning of the src_bin
201  // dst_bin2 corresponds to the end of the src_bin
202  int dst_bin =
203  dst_bin_width == 0 ? 0 : (src_bin_begin - min_) / dst_bin_width;
204  float dst_bin_begin = min_ + dst_bin_width * dst_bin;
205  float dst_bin_end = dst_bin_begin + dst_bin_width;
206  int dst_bin2 =
207  dst_bin_width == 0 ? 0 : (src_bin_end - min_) / dst_bin_width;
208  // 1 src_bin is mapped to at most 2 dst bin
209  assert(dst_bin2 <= dst_bin + 2);
210 
211  // dst_bin_cnt is the count from src_bin that should go to dst_bin
212  // The remainder should go to dst_bin2
213  // rint is the fastest way to round
214  // (https://stackoverflow.com/questions/485525/round-for-float-in-c/5849630)
215  uint64_t dst_bin_cnt = (src_bin_width == 0 || dst_bin_width == 0)
216  ? bins[i]
217  : std::min(
218  static_cast<uint64_t>(rint(
219  (dst_bin_end - src_bin_begin) / src_bin_width * bins[i])),
220  bins[i]);
221 
222  final_histogram_->Add(dst_bin_begin + dst_bin_width / 2, dst_bin_cnt);
223  if (dst_bin_cnt < bins[i]) {
224  final_histogram_->Add(
225  dst_bin_end + dst_bin_width / 2, bins[i] - dst_bin_cnt);
226  }
227  }
228  } // for each histogram with different scales
229 
230  return final_histogram_.get();
231 }
232 
233 } // namespace dnnlowp
const Histogram * Finalize()
Indicate we&#39;re not dynamically adjusting histogram bins any more and return the current static histog...
bin_width = (max - min)/nbins ith bin (zero-based indexing) contains [i*bin_width, (i+1)*bin_width) with an exception that (nbins - 1)th bin contains [(nbins-1)*bin_width, nbins*bin_width]
void Finalize()
Reduce per-thread histogram.