Caffe2 - C++ API
A deep learning, cross platform ML framework
fused_kernel.cpp
1 #include <torch/csrc/jit/fuser/cpu/fused_kernel.h>
2 #include <c10/util/Exception.h>
3 #include <torch/csrc/jit/code_template.h>
4 #include <torch/csrc/jit/fuser/compiler.h>
5 #include <torch/csrc/jit/fuser/cpu/dynamic_library.h>
6 #include <torch/csrc/jit/fuser/cpu/temp_file.h>
7 #include <torch/csrc/utils/memory.h>
8 
9 #include <cstdlib>
10 #include <iostream>
11 #include <sstream>
12 #include <stdexcept>
13 #include <string>
14 
15 namespace torch {
16 namespace jit {
17 namespace fuser {
18 namespace cpu {
19 
20 static const std::string so_template = "/tmp/pytorch_fuserXXXXXX.so";
21 static const std::string cpp_template = "/tmp/pytorch_fuserXXXXXX.cpp";
22 static const std::string check_exists_string = "which '${program}' > /dev/null";
23 
24 static bool programExists(const std::string& program) {
25  TemplateEnv env;
26  env.s("program", program);
27  std::string cmd = format(check_exists_string, env);
28  return (system(cmd.c_str()) == 0);
29 }
30 
31 // A single compiler config is accessed through getConfig() (below)
32 // Controls compilation options and may be updated based on the result
33 // of compilation attempts.
35  CompilerConfig() {
36  const char* cxx_env = getenv("CXX");
37  if (cxx_env != nullptr) {
38  cxx = cxx_env;
39  }
40 
41  if (!programExists(cxx)) {
42  cxx = "";
43  }
44  }
45 
46  ~CompilerConfig() = default;
47 
48  std::string cxx = "g++"; // compiler location
49  bool openmp = true;
50 };
51 
52 static CompilerConfig& getConfig() {
53  static CompilerConfig config;
54  return config;
55 }
56 
57 // NB: -march=native not supported on PPC64 g++. It's a bit annoying
58 // to do a configure-style test to decide whether or not the g++
59 // actually supports it or not, so we heuristically use the host
60 // compiler to predict if the runtime compiler supports the option we
61 // want. This probably won't work if you're cross-compiling.
62 // NB: -march=native is disabled because it has caused problems where
63 // compiler and assembler do not agree on what native instruction they
64 // understand for AVX512. When we need better CPU performance this
65 // optimization can be re-enabled by tracking down the platforms where
66 // this error occurs and only selectively disabling it.
67 static const std::string compile_string =
68  "\"${cxx}\" -O3 -g "
69 #ifndef __PPC64__
70 // "-march=native "
71 #endif
72  "-std=c++11 -fPIC ${fopenmp} -shared \"${cpp_file}\" -o \"${so_file}\" -lm";
73 
74 static void runCompiler(
75  const std::string& cpp_file,
76  const std::string& so_file) {
77  auto& config = getConfig();
78  TemplateEnv env;
79  env.s("cxx", config.cxx);
80  env.s("fopenmp", config.openmp ? "-fopenmp" : "");
81  env.s("cpp_file", cpp_file);
82  env.s("so_file", so_file);
83  std::string result = format(compile_string, env);
84  int r = system(result.c_str());
85  if (config.openmp && r != 0) {
86  std::cerr
87  << "warning: pytorch jit fuser failed to compile with openmp, trying without it...\n";
88  config.openmp = false; // disable for future compiles
89  return runCompiler(cpp_file, so_file);
90  }
91  AT_CHECK(r == 0, "Failed to compile a fused CPU kernel");
92 }
93 
94 static const std::string disas_string = "objdump -M intel -d \"${so_file}\"";
95 static void disas(const std::string& so_file) {
96  TemplateEnv env;
97  env.s("so_file", so_file);
98  std::string cmd = format(disas_string, env);
99  int r = system(cmd.c_str());
100  AT_ASSERT(r == 0);
101 }
102 
103 FusedKernelCPU::FusedKernelCPU(
104  std::string name,
105  std::string code,
106  std::vector<TensorDesc> input_desc,
107  std::vector<TensorDesc> output_desc,
108  std::vector<PartitionDesc> chunk_desc,
109  std::vector<PartitionDesc> concat_desc,
110  bool has_random)
111  : FusedKernel(
112  std::move(name),
113  std::move(code),
114  std::move(input_desc),
115  std::move(output_desc),
116  std::move(chunk_desc),
117  std::move(concat_desc),
118  has_random) {
119  TempFile so_file(so_template, 3);
120  TempFile cpp_file(cpp_template, 4);
121  cpp_file.write(code_);
122  cpp_file.sync();
123  runCompiler(cpp_file.name(), so_file.name());
124  if (debugFuser() >= 2)
125  disas(so_file.name());
126  so_lib = make_unique<DynamicLibrary>(so_file.name().c_str());
127 #pragma GCC diagnostic ignored "-Wpedantic"
128  kernel =
129  reinterpret_cast<void (*)(uint32_t, void**)>(so_lib->sym(name_.c_str()));
130 #pragma GCC diagnostic pop
131 }
132 
133 static std::shared_ptr<FusedKernel> createFusionKernel(
134  int16_t device,
135  std::string name,
136  std::string code,
137  std::vector<TensorDesc> input_desc,
138  std::vector<TensorDesc> output_desc,
139  std::vector<PartitionDesc> chunk_desc,
140  std::vector<PartitionDesc> concat_desc,
141  bool has_random) {
142  return std::make_shared<FusedKernelCPU>(
143  std::move(name),
144  std::move(code),
145  std::move(input_desc),
146  std::move(output_desc),
147  std::move(chunk_desc),
148  std::move(concat_desc),
149  has_random);
150 }
151 
152 RegisterFusionBackend reg(at::DeviceType::CPU, createFusionKernel);
153 } // namespace cpu
154 } // namespace fuser
155 } // namespace jit
156 } // namespace torch
Definition: jit_type.h:17