1 #include <torch/csrc/jit/fuser/cpu/fused_kernel.h> 2 #include <c10/util/Exception.h> 3 #include <torch/csrc/jit/code_template.h> 4 #include <torch/csrc/jit/fuser/compiler.h> 5 #include <torch/csrc/jit/fuser/cpu/dynamic_library.h> 6 #include <torch/csrc/jit/fuser/cpu/temp_file.h> 7 #include <torch/csrc/utils/memory.h> 20 static const std::string so_template =
"/tmp/pytorch_fuserXXXXXX.so";
21 static const std::string cpp_template =
"/tmp/pytorch_fuserXXXXXX.cpp";
22 static const std::string check_exists_string =
"which '${program}' > /dev/null";
24 static bool programExists(
const std::string& program) {
26 env.s(
"program", program);
27 std::string cmd = format(check_exists_string, env);
28 return (system(cmd.c_str()) == 0);
36 const char* cxx_env = getenv(
"CXX");
37 if (cxx_env !=
nullptr) {
41 if (!programExists(cxx)) {
48 std::string cxx =
"g++";
67 static const std::string compile_string =
72 "-std=c++11 -fPIC ${fopenmp} -shared \"${cpp_file}\" -o \"${so_file}\" -lm";
74 static void runCompiler(
75 const std::string& cpp_file,
76 const std::string& so_file) {
77 auto& config = getConfig();
79 env.s(
"cxx", config.cxx);
80 env.s(
"fopenmp", config.openmp ?
"-fopenmp" :
"");
81 env.s(
"cpp_file", cpp_file);
82 env.s(
"so_file", so_file);
83 std::string result = format(compile_string, env);
84 int r = system(result.c_str());
85 if (config.openmp && r != 0) {
87 <<
"warning: pytorch jit fuser failed to compile with openmp, trying without it...\n";
88 config.openmp =
false;
89 return runCompiler(cpp_file, so_file);
91 AT_CHECK(r == 0,
"Failed to compile a fused CPU kernel");
94 static const std::string disas_string =
"objdump -M intel -d \"${so_file}\"";
95 static void disas(
const std::string& so_file) {
97 env.s(
"so_file", so_file);
98 std::string cmd = format(disas_string, env);
99 int r = system(cmd.c_str());
103 FusedKernelCPU::FusedKernelCPU(
106 std::vector<TensorDesc> input_desc,
107 std::vector<TensorDesc> output_desc,
108 std::vector<PartitionDesc> chunk_desc,
109 std::vector<PartitionDesc> concat_desc,
114 std::move(input_desc),
115 std::move(output_desc),
116 std::move(chunk_desc),
117 std::move(concat_desc),
121 cpp_file.write(code_);
123 runCompiler(cpp_file.name(), so_file.name());
124 if (debugFuser() >= 2)
125 disas(so_file.name());
126 so_lib = make_unique<DynamicLibrary>(so_file.name().c_str());
127 #pragma GCC diagnostic ignored "-Wpedantic" 129 reinterpret_cast<void (*)(uint32_t,
void**)
>(so_lib->sym(name_.c_str()));
130 #pragma GCC diagnostic pop 133 static std::shared_ptr<FusedKernel> createFusionKernel(
137 std::vector<TensorDesc> input_desc,
138 std::vector<TensorDesc> output_desc,
139 std::vector<PartitionDesc> chunk_desc,
140 std::vector<PartitionDesc> concat_desc,
142 return std::make_shared<FusedKernelCPU>(
145 std::move(input_desc),
146 std::move(output_desc),
147 std::move(chunk_desc),
148 std::move(concat_desc),