Caffe2 - C++ API
A deep learning, cross platform ML framework
elementwise_sum_benchmark.cc
1 #include <chrono>
2 #include <cstdint>
3 #include <iostream>
4 #include <vector>
5 
6 #include "utility_dnnlowp_ops.h"
7 
8 using namespace std;
9 
10 int main(int argc, const char* argv[]) {
11  int LEN = argc > 1 ? atoi(argv[1]) : 65536;
12 
13  vector<uint8_t> a(LEN), b(LEN), c_avx2(LEN), c_avx512(LEN);
14  for (int i = 0; i < LEN; ++i) {
15  a[i] = i % 256;
16  b[i] = (i * 2) % 256;
17  }
18 
19  chrono::time_point<chrono::system_clock> t = chrono::system_clock::now();
20  caffe2::internal::ElementWiseSumAVX2<uint8_t, false>(
21  a.data(),
22  b.data(),
23  c_avx2.data(),
24  a.size(),
25  1.0f,
26  11,
27  2.0f,
28  22,
29  3.0f,
30  33);
31  double dt = chrono::duration<double>(chrono::system_clock::now() - t).count();
32  double bytes = 3. * LEN * sizeof(a[0]);
33  cout << bytes / dt / 1e9 << " GB/s" << endl;
34 
35  return 0;
36 }