1 #include "dynamic_histogram.h" 2 #include "dnnlowp_op.h" 16 void Histogram::Add(
float f, uint64_t cnt) {
17 int nbins = histogram_.size();
18 float bin_width = (max_ - min_) / nbins;
19 int bin = bin_width == 0
21 : std::min(static_cast<int>((f - min_) / bin_width), nbins - 1);
22 bin = std::max(0, bin);
24 histogram_[bin] += cnt;
27 void Histogram::Add(
const float* f,
int len) {
28 int nbins = histogram_.size();
29 float bin_width = (max_ - min_) / nbins;
31 if (bin_width > 0.0) {
32 assert(per_thread_histogram_.size() % nbins == 0);
36 int old_nthreads = per_thread_histogram_.size() / nbins + 1;
37 if (caffe2::dnnlowp_get_max_threads() < old_nthreads) {
41 per_thread_histogram_.resize(
42 (caffe2::dnnlowp_get_max_threads() - 1) * nbins);
48 int tid = caffe2::dnnlowp_get_thread_num();
50 uint64_t* my_histogram =
nullptr;
52 my_histogram = histogram_.data();
54 my_histogram = per_thread_histogram_.data() + (tid - 1) * nbins;
60 for (
auto i = 0; i < len; ++i) {
62 std::min(static_cast<int>((f[i] - min_) / bin_width), nbins - 1);
63 bin = std::max(0, bin);
73 int nbins = histogram_.size();
74 assert(per_thread_histogram_.size() % nbins == 0);
75 int nthreads = per_thread_histogram_.size() / nbins + 1;
78 for (
int bin = 0; bin < nbins; ++bin) {
79 for (
int i = 1; i < nthreads; ++i) {
80 histogram_[bin] += per_thread_histogram_[(i - 1) * nbins + bin];
85 per_thread_histogram_.clear();
88 static const int OVER_BINNING_FACTOR = 4;
90 DynamicHistogram::DynamicHistogram(
int nbins)
92 min_(numeric_limits<float>::max()),
93 max_(numeric_limits<float>::lowest()) {
97 void DynamicHistogram::Add(
float f) {
98 min_ = std::min(min_, f);
99 max_ = std::max(max_, f);
101 if (histograms_.empty()) {
102 histograms_.emplace_back(nbins_ * OVER_BINNING_FACTOR, f, f);
104 Histogram& curr_hist = histograms_.back();
105 if (f < curr_hist.Min() || f > curr_hist.Max()) {
106 float old_spread = curr_hist.Max() - curr_hist.Min();
107 if (f < curr_hist.Min()) {
109 if (old_spread == 0) {
112 new_min = curr_hist.Min() -
113 ceil((curr_hist.Min() - f) / old_spread) * old_spread;
115 histograms_.emplace_back(
116 curr_hist.GetHistogram()->size(), new_min, curr_hist.Max());
119 if (old_spread == 0) {
122 new_max = curr_hist.Max() +
123 ceil((f - curr_hist.Max()) / old_spread) * old_spread;
125 histograms_.emplace_back(
126 curr_hist.GetHistogram()->size(), curr_hist.Min(), new_max);
131 Histogram& new_hist = histograms_.back();
135 void DynamicHistogram::Add(
const float* f,
int len) {
136 float minimum = min_, maximum = max_;
138 #pragma omp parallel for reduction(min : minimum) reduction(max : maximum) 140 for (
int i = 0; i < len; ++i) {
141 minimum = std::min(f[i], minimum);
142 maximum = std::max(f[i], maximum);
147 if (histograms_.empty()) {
148 histograms_.emplace_back(nbins_ * OVER_BINNING_FACTOR, min_, max_);
150 Histogram& curr_hist = histograms_.back();
151 if (min_ < curr_hist.Min() || max_ > curr_hist.Max()) {
152 float old_spread = curr_hist.Max() - curr_hist.Min();
153 float new_min = curr_hist.Min(), new_max = curr_hist.Max();
154 if (min_ < curr_hist.Min()) {
155 if (old_spread == 0.0f) {
158 new_min = curr_hist.Min() -
159 ceil((curr_hist.Min() - min_) / old_spread) * old_spread;
162 if (max_ > curr_hist.Max()) {
163 old_spread = curr_hist.Max() - new_min;
164 if (old_spread == 0.0f) {
167 new_max = curr_hist.Max() +
168 ceil((max_ - curr_hist.Max()) / old_spread) * old_spread;
171 histograms_.emplace_back(
172 curr_hist.GetHistogram()->size(), new_min, new_max);
176 Histogram& new_hist = histograms_.back();
177 new_hist.Add(f, len);
181 if (final_histogram_.get()) {
182 return final_histogram_.get();
185 final_histogram_.reset(
new Histogram(nbins_, min_, max_));
186 float dst_bin_width = (max_ - min_) / nbins_;
191 const std::vector<uint64_t>& bins = *hist.GetHistogram();
192 float src_bin_width = (hist.Max() - hist.Min()) / bins.size();
194 for (
int i = 0; i < bins.size(); ++i) {
198 float src_bin_begin = hist.Min() + src_bin_width * i;
199 float src_bin_end = src_bin_begin + src_bin_width;
203 dst_bin_width == 0 ? 0 : (src_bin_begin - min_) / dst_bin_width;
204 float dst_bin_begin = min_ + dst_bin_width * dst_bin;
205 float dst_bin_end = dst_bin_begin + dst_bin_width;
207 dst_bin_width == 0 ? 0 : (src_bin_end - min_) / dst_bin_width;
209 assert(dst_bin2 <= dst_bin + 2);
215 uint64_t dst_bin_cnt = (src_bin_width == 0 || dst_bin_width == 0)
218 static_cast<uint64_t>(rint(
219 (dst_bin_end - src_bin_begin) / src_bin_width * bins[i])),
222 final_histogram_->Add(dst_bin_begin + dst_bin_width / 2, dst_bin_cnt);
223 if (dst_bin_cnt < bins[i]) {
224 final_histogram_->Add(
225 dst_bin_end + dst_bin_width / 2, bins[i] - dst_bin_cnt);
230 return final_histogram_.get();
const Histogram * Finalize()
Indicate we're not dynamically adjusting histogram bins any more and return the current static histog...
bin_width = (max - min)/nbins ith bin (zero-based indexing) contains [i*bin_width, (i+1)*bin_width) with an exception that (nbins - 1)th bin contains [(nbins-1)*bin_width, nbins*bin_width]
void Finalize()
Reduce per-thread histogram.