Caffe2 - C++ API
A deep learning, cross platform ML framework
Related Pages
Modules
Data Structures
Files
C++ API
Python API
GitHub
File List
Globals
caffe2
perfkernels
common.h
1
// !!!! PLEASE READ !!!!
2
// Minimize (transitively) included headers from _avx*.cc because some of the
3
// functions defined in the headers compiled with platform dependent compiler
4
// options can be reused by other translation units generating illegal
5
// instruction run-time error.
6
7
// Common utilities for writing performance kernels and easy dispatching of
8
// different backends.
9
/*
10
The general workflow shall be as follows, say we want to
11
implement a functionality called void foo(int a, float b).
12
13
In foo.h, do:
14
void foo(int a, float b);
15
16
In foo_avx512.cc, do:
17
void foo__avx512(int a, float b) {
18
[actual avx512 implementation]
19
}
20
21
In foo_avx2.cc, do:
22
void foo__avx2(int a, float b) {
23
[actual avx2 implementation]
24
}
25
26
In foo_avx.cc, do:
27
void foo__avx(int a, float b) {
28
[actual avx implementation]
29
}
30
31
In foo.cc, do:
32
// The base implementation should *always* be provided.
33
void foo__base(int a, float b) {
34
[base, possibly slow implementation]
35
}
36
decltype(foo__base) foo__avx512;
37
decltype(foo__base) foo__avx2;
38
decltype(foo__base) foo__avx;
39
void foo(int a, float b) {
40
// You should always order things by their preference, faster
41
// implementations earlier in the function.
42
AVX512_DO(foo, a, b);
43
AVX2_DO(foo, a, b);
44
AVX_DO(foo, a, b);
45
BASE_DO(foo, a, b);
46
}
47
48
*/
49
// Details: this functionality basically covers the cases for both build time
50
// and run time architecture support.
51
//
52
// During build time:
53
// The build system should provide flags CAFFE2_PERF_WITH_AVX512,
54
// CAFFE2_PERF_WITH_AVX2, and CAFFE2_PERF_WITH_AVX that corresponds to the
55
// __AVX512F__, __AVX512DQ__, __AVX512VL__, __AVX2__, and __AVX__ flags the
56
// compiler provides. Note that we do not use the compiler flags but rely on
57
// the build system flags, because the common files (like foo.cc above) will
58
// always be built without __AVX512F__, __AVX512DQ__, __AVX512VL__, __AVX2__
59
// and __AVX__.
60
// During run time:
61
// we use cpuid to identify cpu support and run the proper functions.
62
63
#pragma once
64
65
#include "caffe2/utils/cpuid.h"
66
67
// DO macros: these should be used in your entry function, similar to foo()
68
// above, that routes implementations based on CPU capability.
69
70
#define BASE_DO(funcname, ...) return funcname##__base(__VA_ARGS__);
71
72
#ifdef CAFFE2_PERF_WITH_AVX512
73
#define AVX512_DO(funcname, ...) \
74
if (GetCpuId().avx512f() && GetCpuId().avx512dq() && \
75
GetCpuId().avx512vl()) { \
76
return funcname##__avx512(__VA_ARGS__); \
77
}
78
#else // CAFFE2_PERF_WITH_AVX512
79
#define AVX512_DO(funcname, ...)
80
#endif // CAFFE2_PERF_WITH_AVX512
81
82
#ifdef CAFFE2_PERF_WITH_AVX2
83
#define AVX2_DO(funcname, ...) \
84
if (GetCpuId().avx2()) { \
85
return funcname##__avx2(__VA_ARGS__); \
86
}
87
#define AVX2_FMA_DO(funcname, ...) \
88
if (GetCpuId().avx2() && GetCpuId().fma()) { \
89
return funcname##__avx2_fma(__VA_ARGS__); \
90
}
91
#else // CAFFE2_PERF_WITH_AVX2
92
#define AVX2_DO(funcname, ...)
93
#define AVX2_FMA_DO(funcname, ...)
94
#endif // CAFFE2_PERF_WITH_AVX2
95
96
#ifdef CAFFE2_PERF_WITH_AVX
97
#define AVX_DO(funcname, ...) \
98
if (GetCpuId().avx()) { \
99
return funcname##__avx(__VA_ARGS__); \
100
}
101
#define AVX_F16C_DO(funcname, ...) \
102
if (GetCpuId().avx() && GetCpuId().f16c()) { \
103
return funcname##__avx_f16c(__VA_ARGS__); \
104
}
105
#else // CAFFE2_PERF_WITH_AVX
106
#define AVX_DO(funcname, ...)
107
#define AVX_F16C_DO(funcname, ...)
108
#endif // CAFFE2_PERF_WITH_AVX
Generated on Thu Mar 21 2019 13:06:10 for Caffe2 - C++ API by
1.8.11