Caffe2 - C++ API
A deep learning, cross platform ML framework
common.h
1 
17 // Common utilities for writing performance kernels and easy dispatching of
18 // different backends.
19 /*
20 The general workflow shall be as follows, say we want to
21 implement a functionality called void foo(int a, float b).
22 
23 In foo.h, do:
24  void foo(int a, float b);
25 
26 In foo_avx2.cc, do:
27  void foo__avx2(int a, float b) {
28  [actual avx2 implementation]
29  }
30 
31 In foo_avx.cc, do:
32  void foo__avx(int a, float b) {
33  [actual avx implementation]
34  }
35 
36 In foo.cc, do:
37  // The base implementation should *always* be provided.
38  void foo__base(int a, float b) {
39  [base, possibly slow implementation]
40  }
41  void foo(int a, float b) {
42  // You should always order things by their preference, faster
43  // implementations earlier in the function.
44  AVX2_DO(foo, a, b);
45  AVX_DO(foo, a, b);
46  BASE_DO(foo, a, b);
47  }
48 
49 */
50 // Details: this functionality basically covers the cases for both build time
51 // and run time architecture support.
52 //
53 // During build time:
54 // The build system should provide flags CAFFE2_PERF_WITH_AVX2 and
55 // CAFFE2_PERF_WITH_AVX that corresponds to the __AVX__ and __AVX2__ flags
56 // the compiler provides. Note that we do not use the compiler flags but
57 // rely on the build system flags, because the common files (like foo.cc
58 // above) will always be built without __AVX__ and __AVX2__.
59 // During run time:
60 // we use cpuid to identify cpu support and run the proper functions.
61 
62 #pragma once
63 
64 // DO macros: these should be used in your entry function, similar to foo()
65 // above, that routes implementations based on CPU capability.
66 
67 #define BASE_DO(funcname, ...) return funcname##__base(__VA_ARGS__);
68 
69 #ifdef CAFFE2_PERF_WITH_AVX2
70 #define AVX2_DO(funcname, ...) \
71  decltype(funcname##__base) funcname##__avx2; \
72  if (GetCpuId().avx2()) { \
73  return funcname##__avx2(__VA_ARGS__); \
74  }
75 #define AVX2_FMA_DO(funcname, ...) \
76  decltype(funcname##__base) funcname##__avx2_fma; \
77  if (GetCpuId().avx2() && GetCpuId().fma()) { \
78  return funcname##__avx2_fma(__VA_ARGS__); \
79  }
80 #else // CAFFE2_PERF_WITH_AVX2
81 #define AVX2_DO(funcname, ...)
82 #define AVX2_FMA_DO(funcname, ...)
83 #endif // CAFFE2_PERF_WITH_AVX2
84 
85 #ifdef CAFFE2_PERF_WITH_AVX
86 #define AVX_DO(funcname, ...) \
87  decltype(funcname##__base) funcname##__avx; \
88  if (GetCpuId().avx()) { \
89  return funcname##__avx(__VA_ARGS__); \
90  }
91 #define AVX_F16C_DO(funcname, ...) \
92  decltype(funcname##__base) funcname##__avx_f16c; \
93  if (GetCpuId().avx() && GetCpuId().f16c()) { \
94  return funcname##__avx_f16c(__VA_ARGS__); \
95  }
96 #else // CAFFE2_PERF_WITH_AVX
97 #define AVX_DO(funcname, ...)
98 #define AVX_F16C_DO(funcname, ...)
99 #endif // CAFFE2_PERF_WITH_AVX