1 #ifndef C10_MACROS_MACROS_H_ 2 #define C10_MACROS_MACROS_H_ 20 #ifndef C10_USING_CUSTOM_GENERATED_MACROS 21 #include "c10/macros/cmake_macros.h" 22 #endif // C10_USING_CUSTOM_GENERATED_MACROS 24 #include "c10/macros/Export.h" 28 #define C10_DISABLE_COPY_AND_ASSIGN(classname) \ 29 classname(const classname&) = delete; \ 30 classname& operator=(const classname&) = delete 32 #define CONCAT_IMPL(x, y) x##y 33 #define MACRO_CONCAT(x, y) CONCAT_IMPL(x, y) 35 #define MACRO_EXPAND(args) args 39 #if __cplusplus > 201402L && defined(__has_cpp_attribute) 40 #if __has_cpp_attribute(nodiscard) 42 #define C10_NODISCARD [[nodiscard]] 46 #elif __cplusplus && defined(__has_cpp_attribute) 47 #if __has_cpp_attribute(clang::warn_unused_result) 49 #define C10_NODISCARD [[clang::warn_unused_result]] 57 #define C10_UNUSED __attribute__((__unused__)) 63 namespace c10 {
namespace cuda {} }
64 namespace c10 {
namespace hip {} }
71 namespace at {
using namespace c10; }
72 namespace at {
namespace cuda {
using namespace c10::cuda; }}
82 namespace at {
namespace cuda {
using namespace c10::hip; }}
86 #define C10_NORETURN __declspec(noreturn) 88 #define C10_NORETURN __attribute__((noreturn)) 103 #if defined(__GNUC__) || defined(__ICL) || defined(__clang__) 104 #define C10_LIKELY(expr) (__builtin_expect(static_cast<bool>(expr), 1)) 105 #define C10_UNLIKELY(expr) (__builtin_expect(static_cast<bool>(expr), 0)) 107 #define C10_LIKELY(expr) (expr) 108 #define C10_UNLIKELY(expr) (expr) 114 #if defined(__CUDACC__) || defined(__HIPCC__) 116 #define C10_HOST_DEVICE __host__ __device__ 117 #define C10_DEVICE __device__ 118 #define C10_HOST __host__ 123 #if __CUDA_ARCH__ >= 750 124 constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 1024;
126 constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 2048;
129 constexpr uint32_t CUDA_MAX_THREADS_PER_BLOCK = 1024;
133 constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256;
143 #define C10_MAX_THREADS_PER_BLOCK(val) (((val) <= CUDA_MAX_THREADS_PER_BLOCK) ? (val) : CUDA_THREADS_PER_BLOCK_FALLBACK) 144 #define C10_MIN_BLOCKS_PER_SM(threads_per_block, blocks_per_sm) ((((threads_per_block)*(blocks_per_sm) <= CUDA_MAX_THREADS_PER_SM) ? (blocks_per_sm) : ((CUDA_MAX_THREADS_PER_SM + (threads_per_block) - 1) / (threads_per_block)))) 146 #define C10_LAUNCH_BOUNDS_0 __launch_bounds__(256, 4) // default launch bounds that should give good occupancy and versatility across all architectures. 147 #define C10_LAUNCH_BOUNDS_1(max_threads_per_block) __launch_bounds__((C10_MAX_THREADS_PER_BLOCK((max_threads_per_block)))) 148 #define C10_LAUNCH_BOUNDS_2(max_threads_per_block, min_blocks_per_sm) __launch_bounds__((C10_MAX_THREADS_PER_BLOCK((max_threads_per_block))), (C10_MIN_BLOCKS_PER_SM((max_threads_per_block), (min_blocks_per_sm)))) 150 #define C10_HOST_DEVICE 155 #ifdef __HIP_PLATFORM_HCC__ 156 #define C10_HIP_HOST_DEVICE __host__ __device__ 158 #define C10_HIP_HOST_DEVICE 161 #if defined(__ANDROID__) 162 #define C10_ANDROID 1 165 defined(__APPLE__) && \ 166 (TARGET_IPHONE_SIMULATOR || TARGET_OS_SIMULATOR || TARGET_OS_IPHONE)) 169 #elif (defined(__APPLE__) && TARGET_OS_MAC) 171 #endif // ANDROID / IOS / MACOS 174 #if __GNUG__ && __GNUC__ < 5 175 #define C10_IS_TRIVIALLY_COPYABLE(T) __has_trivial_copy(T) 177 #define C10_IS_TRIVIALLY_COPYABLE(T) std::is_trivially_copyable<T>::value 180 #endif // C10_MACROS_MACROS_H_
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
To register your own kernel for an operator, do in one (!) cpp file: C10_REGISTER_KERNEL(OperatorHand...
Flush-To-Zero and Denormals-Are-Zero mode.