1 #ifndef CAFFE2_CUDA_RTC_COMMON_RTC_H_ 2 #define CAFFE2_CUDA_RTC_COMMON_RTC_H_ 10 #define NVRTC_CHECK(condition) \ 12 nvrtcResult result = condition; \ 13 if (result != NVRTC_SUCCESS) { \ 14 LOG(FATAL) << "Error at: " << __FILE__ << ":" << __LINE__ << ": " \ 15 << nvrtcGetErrorString(result); \ 21 template <
typename Derived>
27 CUDA_DRIVERAPI_ENFORCE(cuModuleUnload(module_));
34 template <
typename... Args>
35 void Compile(Args... args) {
36 string src =
static_cast<Derived*
>(
this)->GetSource(args...);
37 string name =
static_cast<Derived*
>(
this)->KernelName(args...);
38 VLOG(1) <<
"function name: " << name;
39 VLOG(1) <<
"function src:\n" << src;
42 NVRTC_CHECK(nvrtcCreateProgram(
43 &prog, src.c_str(),
nullptr, 0,
nullptr,
nullptr));
47 const char *nvrtc_opts[] = {
"--gpu-architecture=compute_35",
49 nvrtcResult compile_result = nvrtcCompileProgram(
51 if (compile_result != NVRTC_SUCCESS) {
53 NVRTC_CHECK(nvrtcGetProgramLogSize(prog, &log_size));
54 vector<char> nvrtc_log(log_size);
55 NVRTC_CHECK(nvrtcGetProgramLog(prog, nvrtc_log.data()));
56 LOG(FATAL) <<
"Compilation failure for nvrtc(" 57 << nvrtcGetErrorString(compile_result) <<
"): \n" 61 NVRTC_CHECK(nvrtcGetPTXSize(prog, &ptx_size));
62 vector<char> nvrtc_ptx(ptx_size);
63 NVRTC_CHECK(nvrtcGetPTX(prog, nvrtc_ptx.data()));
64 NVRTC_CHECK(nvrtcDestroyProgram(&prog));
67 CUDA_DRIVERAPI_ENFORCE(cuModuleUnload(module_));
69 CUDA_DRIVERAPI_ENFORCE(
70 cuModuleLoadDataEx(&module_, nvrtc_ptx.data(), 0, 0, 0));
71 module_loaded_ =
true;
72 CUDA_DRIVERAPI_ENFORCE(
73 cuModuleGetFunction(&kernel_, module_, name.c_str()));
76 template <
typename... Args>
77 void Launch(
unsigned int gx,
unsigned int gy,
unsigned int gz,
78 unsigned int bx,
unsigned int by,
unsigned int bz,
79 unsigned int shared_mem, cudaStream_t stream,
82 module_loaded_,
"Cannot call Launch before a module is loaded.");
83 void * args_voidp[] = {&args...};
84 CUDA_DRIVERAPI_ENFORCE(cuLaunchKernel(
85 kernel_, gx, gy, gz, bx, by, bz, shared_mem, stream, args_voidp, 0));
88 void LaunchEx(
unsigned int gx,
unsigned int gy,
unsigned int gz,
89 unsigned int bx,
unsigned int by,
unsigned int bz,
90 unsigned int shared_mem, cudaStream_t stream,
93 module_loaded_,
"Cannot call Launch before a module is loaded.");
94 CUDA_DRIVERAPI_ENFORCE(cuLaunchKernel(
95 kernel_, gx, gy, gz, bx, by, bz, shared_mem, stream,
nullptr, extra));
105 inline std::string GetUniqueName() {
106 static constexpr
int len = 20;
107 static const char alpha[] =
108 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
110 std::stringstream ss;
111 ss <<
"_cuda_kernel_";
112 for (
int i = 0; i < len; ++i) {
113 ss << alpha[rand() % (
sizeof(alpha) - 1)];
120 #endif // CAFFE2_CUDA_RTC_COMMON_RTC_H_
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...