Caffe2 - C++ API
A deep learning, cross platform ML framework
simd.h
1 #ifndef TH_SIMD_INC
2 #define TH_SIMD_INC
3 
4 #include <stdint.h>
5 #include <stdlib.h>
6 #if defined(_MSC_VER)
7 #include <intrin.h>
8 #elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
9 #include <cpuid.h>
10 #endif
11 
12 // Can be found on Intel ISA Reference for CPUID
13 #define CPUID_AVX2_BIT 0x20 // Bit 5 of EBX for EAX=0x7
14 #define CPUID_AVX_BIT 0x10000000 // Bit 28 of ECX for EAX=0x1
15 #define CPUID_SSE_BIT 0x2000000 // bit 25 of EDX for EAX=0x1
16 
17 // Helper macros for initialization
18 #define FUNCTION_IMPL(NAME, EXT) \
19  { (void *)NAME, \
20  EXT \
21  }
22 
23 #define INIT_DISPATCH_PTR(OP) \
24  do { \
25  size_t i; \
26  for (i = 0; i < sizeof(THVector_(OP ## _DISPATCHTABLE)) / sizeof(FunctionDescription); ++i) { \
27  THVector_(OP ## _DISPATCHPTR) = reinterpret_cast<decltype(THVector_(OP ## _DISPATCHPTR))>(THVector_(OP ## _DISPATCHTABLE)[i].function); \
28  if (THVector_(OP ## _DISPATCHTABLE)[i].supportedSimdExt & hostSimdExts) { \
29  break; \
30  } \
31  } \
32  } while(0)
33 
34 
35 typedef struct FunctionDescription
36 {
37  void *function;
38  uint32_t supportedSimdExt;
40 
41 
42 enum SIMDExtensions
43 {
44 #if defined(__NEON__)
45  SIMDExtension_NEON = 0x1,
46 #elif defined(__PPC64__)
47  SIMDExtension_VSX = 0x1,
48 #else
49  SIMDExtension_AVX2 = 0x1,
50  SIMDExtension_AVX = 0x2,
51  SIMDExtension_SSE = 0x4,
52 #endif
53  SIMDExtension_DEFAULT = 0x0
54 };
55 
56 
57 #if defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64
58 
59  #if defined(__NEON__)
60 
61 static inline uint32_t detectHostSIMDExtensions()
62 {
63  return SIMDExtension_NEON;
64 }
65 
66  #else //ARM without NEON
67 
68 static inline uint32_t detectHostSIMDExtensions()
69 {
70  return SIMDExtension_DEFAULT;
71 }
72 
73  #endif
74 
75 #elif defined(__PPC64__)
76 
77  #if defined(__VSX__)
78 
79 static inline uint32_t detectHostSIMDExtensions()
80 {
81  uint32_t hostSimdExts = SIMDExtension_DEFAULT;
82  char *evar;
83 
84  evar = getenv("TH_NO_VSX");
85  if (evar == NULL || strncmp(evar, "1", 1) != 0)
86  hostSimdExts = SIMDExtension_VSX;
87  return hostSimdExts;
88 }
89 
90  #else //PPC64 without VSX
91 
92 static inline uint32_t detectHostSIMDExtensions()
93 {
94  return SIMDExtension_DEFAULT;
95 }
96 
97  #endif
98 
99 #elif defined(__EMSCRIPTEN__)
100 
101 static inline uint32_t detectHostSIMDExtensions()
102 {
103  return SIMDExtension_DEFAULT;
104 }
105 
106 #else // x86
107 static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
108 {
109 #if defined(_MSC_VER)
110  uint32_t cpuInfo[4];
111  __cpuid((int *)cpuInfo, *eax);
112  *eax = cpuInfo[0];
113  *ebx = cpuInfo[1];
114  *ecx = cpuInfo[2];
115  *edx = cpuInfo[3];
116 #elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
117  uint32_t level = *eax;
118  __get_cpuid (level, eax, ebx, ecx, edx);
119 #else
120  uint32_t a = *eax, b, c = *ecx, d;
121  asm volatile ( "cpuid\n\t"
122  : "+a"(a), "=b"(b), "+c"(c), "=d"(d) );
123  *eax = a;
124  *ebx = b;
125  *ecx = c;
126  *edx = d;
127 #endif
128 }
129 
130 static inline uint32_t detectHostSIMDExtensions()
131 {
132  uint32_t eax, ebx, ecx, edx;
133  uint32_t hostSimdExts = 0x0;
134  int TH_NO_AVX = 1, TH_NO_AVX2 = 1, TH_NO_SSE = 1;
135  char *evar;
136 
137  evar = getenv("TH_NO_AVX2");
138  if (evar == NULL || strncmp(evar, "1", 1) != 0)
139  TH_NO_AVX2 = 0;
140 
141  // Check for AVX2. Requires separate CPUID
142  eax = 0x7;
143  ecx = 0x0;
144  cpuid(&eax, &ebx, &ecx, &edx);
145  if ((ebx & CPUID_AVX2_BIT) && TH_NO_AVX2 == 0) {
146  hostSimdExts |= SIMDExtension_AVX2;
147  }
148 
149  // Detect and enable AVX and SSE
150  eax = 0x1;
151  cpuid(&eax, &ebx, &ecx, &edx);
152 
153  evar = getenv("TH_NO_AVX");
154  if (evar == NULL || strncmp(evar, "1", 1) != 0)
155  TH_NO_AVX = 0;
156  if (ecx & CPUID_AVX_BIT && TH_NO_AVX == 0) {
157  hostSimdExts |= SIMDExtension_AVX;
158  }
159 
160  evar = getenv("TH_NO_SSE");
161  if (evar == NULL || strncmp(evar, "1", 1) != 0)
162  TH_NO_SSE = 0;
163  if (edx & CPUID_SSE_BIT && TH_NO_SSE == 0) {
164  hostSimdExts |= SIMDExtension_SSE;
165  }
166 
167  return hostSimdExts;
168 }
169 
170 #endif // end SIMD extension detection code
171 
172 #endif