Caffe2 - C++ API
A deep learning, cross platform ML framework
signal_handler.cc
1 
17 #include "caffe2/utils/signal_handler.h"
18 #include "caffe2/core/logging.h"
19 
20 #if defined(CAFFE2_SUPPORTS_SIGNAL_HANDLER)
21 
22 // Normal signal handler implementation.
23 #include <cxxabi.h>
24 #include <dirent.h>
25 #include <dlfcn.h>
26 #include <pthread.h>
27 #include <sys/syscall.h>
28 #include <sys/types.h>
29 #include <unistd.h>
30 #include <unwind.h>
31 
32 #include <atomic>
33 #include <csignal>
34 #include <cstdio>
35 #include <cstdlib>
36 #include <mutex>
37 #include <unordered_set>
38 
39 #include "caffe2/core/init.h"
40 
41 #if CAFFE2_ANDROID
42 #ifndef SYS_gettid
43 #define SYS_gettid __NR_gettid
44 #endif
45 #ifndef SYS_tgkill
46 #define SYS_tgkill __NR_tgkill
47 #endif
48 #endif
49 
50 namespace {
51 
52 struct sigaction previousSighup;
53 struct sigaction previousSigint;
54 std::atomic<int> sigintCount(0);
55 std::atomic<int> sighupCount(0);
56 std::atomic<int> hookedUpCount(0);
57 
58 void handleSignal(int signal) {
59  switch (signal) {
60  // TODO: what if the previous handler uses sa_sigaction?
61  case SIGHUP:
62  sighupCount += 1;
63  if (previousSighup.sa_handler) {
64  previousSighup.sa_handler(signal);
65  }
66  break;
67  case SIGINT:
68  sigintCount += 1;
69  if (previousSigint.sa_handler) {
70  previousSigint.sa_handler(signal);
71  }
72  break;
73  }
74 }
75 
76 void hookupHandler() {
77  if (hookedUpCount++) {
78  return;
79  }
80  struct sigaction sa;
81  // Setup the handler
82  sa.sa_handler = &handleSignal;
83  // Restart the system call, if at all possible
84  sa.sa_flags = SA_RESTART;
85  // Block every signal during the handler
86  sigfillset(&sa.sa_mask);
87  // Intercept SIGHUP and SIGINT
88  if (sigaction(SIGHUP, &sa, &previousSighup) == -1) {
89  LOG(FATAL) << "Cannot install SIGHUP handler.";
90  }
91  if (sigaction(SIGINT, &sa, &previousSigint) == -1) {
92  LOG(FATAL) << "Cannot install SIGINT handler.";
93  }
94 }
95 
96 // Set the signal handlers to the default.
97 void unhookHandler() {
98  if (--hookedUpCount > 0) {
99  return;
100  }
101  struct sigaction sa;
102  // Setup the sighub handler
103  sa.sa_handler = SIG_DFL;
104  // Restart the system call, if at all possible
105  sa.sa_flags = SA_RESTART;
106  // Block every signal during the handler
107  sigfillset(&sa.sa_mask);
108  // Intercept SIGHUP and SIGINT
109  if (sigaction(SIGHUP, &previousSighup, nullptr) == -1) {
110  LOG(FATAL) << "Cannot uninstall SIGHUP handler.";
111  }
112  if (sigaction(SIGINT, &previousSigint, nullptr) == -1) {
113  LOG(FATAL) << "Cannot uninstall SIGINT handler.";
114  }
115 }
116 
117 #if defined(CAFFE2_SUPPORTS_FATAL_SIGNAL_HANDLERS)
118 // The mutex protects the bool.
119 std::mutex fatalSignalHandlersInstallationMutex;
120 bool fatalSignalHandlersInstalled;
121 // We need to hold a reference to call the previous SIGUSR2 handler in case
122 // we didn't signal it
123 struct sigaction previousSigusr2;
124 // Flag dictating whether the SIGUSR2 handler falls back to previous handlers
125 // or is intercepted in order to print a stack trace.
126 std::atomic<bool> fatalSignalReceived(false);
127 // Global state set when a fatal signal is received so that backtracing threads
128 // know why they're printing a stacktrace.
129 const char* fatalSignalName("<UNKNOWN>");
130 int fatalSignum(-1);
131 // This wait condition is used to wait for other threads to finish writing
132 // their stack trace when in fatal sig handler (we can't use pthread_join
133 // because there's no way to convert from a tid to a pthread_t).
134 pthread_cond_t writingCond = PTHREAD_COND_INITIALIZER;
135 pthread_mutex_t writingMutex = PTHREAD_MUTEX_INITIALIZER;
136 
137 struct {
138  const char* name;
139  int signum;
140  struct sigaction previous;
141 } kSignalHandlers[] = {
142  { "SIGABRT", SIGABRT, {} },
143  { "SIGINT", SIGINT, {} },
144  { "SIGILL", SIGILL, {} },
145  { "SIGFPE", SIGFPE, {} },
146  { "SIGBUS", SIGBUS, {} },
147  { "SIGSEGV", SIGSEGV, {} },
148  { nullptr, 0, {} }
149 };
150 
151 struct sigaction* getPreviousSigaction(int signum) {
152  for (auto handler = kSignalHandlers; handler->name != nullptr; handler++) {
153  if (handler->signum == signum) {
154  return &handler->previous;
155  }
156  }
157  return nullptr;
158 }
159 
160 const char* getSignalName(int signum) {
161  for (auto handler = kSignalHandlers; handler->name != nullptr; handler++) {
162  if (handler->signum == signum) {
163  return handler->name;
164  }
165  }
166  return nullptr;
167 }
168 
169 _Unwind_Reason_Code unwinder(struct _Unwind_Context* context, void* userInfo) {
170  auto& pcs = *reinterpret_cast<std::vector<uintptr_t>*>(userInfo);
171  pcs.push_back(_Unwind_GetIP(context));
172  return _URC_NO_REASON;
173 }
174 
175 std::vector<uintptr_t> getBacktrace() {
176  std::vector<uintptr_t> pcs;
177  _Unwind_Backtrace(unwinder, &pcs);
178  return pcs;
179 }
180 
181 void printStacktrace() {
182  std::vector<uintptr_t> pcs = getBacktrace();
183  Dl_info info;
184  size_t i = 0;
185  for (uintptr_t pcAddr : pcs) {
186  const void* pc = reinterpret_cast<const void*>(pcAddr);
187  const char* path = nullptr;
188  const char* name = "???";
189  char* demangled = nullptr;
190  int offset = -1;
191 
192  std::cerr << "[" << i << "] ";
193  if (dladdr(pc, &info)) {
194  path = info.dli_fname;
195  name = info.dli_sname ?: "???";
196  offset = reinterpret_cast<uintptr_t>(pc) -
197  reinterpret_cast<uintptr_t>(info.dli_saddr);
198 
199  int status;
200  demangled = abi::__cxa_demangle(name, nullptr, nullptr, &status);
201  if (status == 0) {
202  name = demangled;
203  }
204  }
205  std::cerr << name;
206  if (offset >= 0) {
207  std::cerr << "+" << reinterpret_cast<void*>(offset);
208  }
209  std::cerr << "(" << pc << ")";
210  if (path) {
211  std::cerr << " in " << path;
212  }
213  std::cerr << std::endl;
214  if (demangled) {
215  free(demangled);
216  }
217  i += 1;
218  }
219 }
220 
221 void callPreviousSignalHandler(
222  struct sigaction* action,
223  int signum,
224  siginfo_t* info,
225  void* ctx) {
226  if (!action->sa_handler) {
227  return;
228  }
229  if ((action->sa_flags & SA_SIGINFO) == SA_SIGINFO) {
230  action->sa_sigaction(signum, info, ctx);
231  } else {
232  action->sa_handler(signum);
233  }
234 }
235 
236 // needsLock signals whether we need to lock our writing mutex.
237 void stacktraceSignalHandler(bool needsLock) {
238  if (needsLock) {
239  pthread_mutex_lock(&writingMutex);
240  }
241  pid_t tid = syscall(SYS_gettid);
242  std::cerr << fatalSignalName << "(" << fatalSignum << "), Thread " << tid
243  << ": " << std::endl;
244  printStacktrace();
245  std::cerr << std::endl;
246  if (needsLock) {
247  pthread_mutex_unlock(&writingMutex);
248  pthread_cond_signal(&writingCond);
249  }
250 }
251 
252 // Our fatal signal entry point
253 void fatalSignalHandler(int signum) {
254  // Check if this is a proper signal that we declared above.
255  const char* name = getSignalName(signum);
256  if (!name) {
257  return;
258  }
259  if (fatalSignalReceived) {
260  return;
261  }
262  // Set the flag so that our SIGUSR2 handler knows that we're aborting and
263  // that it should intercept any SIGUSR2 signal.
264  fatalSignalReceived = true;
265  // Set state for other threads.
266  fatalSignum = signum;
267  fatalSignalName = name;
268  // Linux doesn't have a nice userland API for enumerating threads so we
269  // need to use the proc pseudo-filesystem.
270  DIR* procDir = opendir("/proc/self/task");
271  if (procDir) {
272  pid_t pid = getpid();
273  pid_t currentTid = syscall(SYS_gettid);
274  struct dirent* entry;
275  pthread_mutex_lock(&writingMutex);
276  while ((entry = readdir(procDir)) != nullptr) {
277  if (entry->d_name[0] == '.') {
278  continue;
279  }
280  pid_t tid = atoi(entry->d_name);
281  // If we've found the current thread then we'll jump into the SIGUSR2
282  // handler before calling pthread_cond_wait thus deadlocking, so branch
283  // our directly to the backtrace handler instead of signaling it.
284  if (tid != currentTid) {
285  syscall(SYS_tgkill, pid, tid, SIGUSR2);
286  pthread_cond_wait(&writingCond, &writingMutex);
287  } else {
288  stacktraceSignalHandler(false);
289  }
290  }
291  pthread_mutex_unlock(&writingMutex);
292  } else {
293  perror("Failed to open /proc/self/task");
294  }
295  sigaction(signum, getPreviousSigaction(signum), nullptr);
296  raise(signum);
297 }
298 
299 // Our SIGUSR2 entry point
300 void stacktraceSignalHandler(int signum, siginfo_t* info, void* ctx) {
301  if (fatalSignalReceived) {
302  stacktraceSignalHandler(true);
303  } else {
304  // We don't want to actually change the signal handler as we want to
305  // remain the signal handler so that we may get the usr2 signal later.
306  callPreviousSignalHandler(&previousSigusr2, signum, info, ctx);
307  }
308 }
309 
310 // Installs SIGABRT signal handler so that we get stack traces
311 // from every thread on SIGABRT caused exit. Also installs SIGUSR2 handler
312 // so that threads can communicate with each other (be sure if you use SIGUSR2)
313 // to install your handler before initing caffe2 (we properly fall back to
314 // the previous handler if we didn't initiate the SIGUSR2).
315 void installFatalSignalHandlers() {
316  std::lock_guard<std::mutex> locker(fatalSignalHandlersInstallationMutex);
317  if (fatalSignalHandlersInstalled) {
318  return;
319  }
320  fatalSignalHandlersInstalled = true;
321  struct sigaction sa;
322  sigemptyset(&sa.sa_mask);
323  // Since we'll be in an exiting situation it's possible there's memory
324  // corruption, so make our own stack just in case.
325  sa.sa_flags = SA_ONSTACK | SA_SIGINFO;
326  sa.sa_handler = ::fatalSignalHandler;
327  for (auto* handler = kSignalHandlers; handler->name != nullptr; handler++) {
328  if (sigaction(handler->signum, &sa, &handler->previous)) {
329  std::string str("Failed to add ");
330  str += handler->name;
331  str += " handler!";
332  perror(str.c_str());
333  }
334  }
335  sa.sa_sigaction = ::stacktraceSignalHandler;
336  if (sigaction(SIGUSR2, &sa, &::previousSigusr2)) {
337  perror("Failed to add SIGUSR2 handler!");
338  }
339 }
340 
341 void uninstallFatalSignalHandlers() {
342  std::lock_guard<std::mutex> locker(fatalSignalHandlersInstallationMutex);
343  if (!fatalSignalHandlersInstalled) {
344  return;
345  }
346  fatalSignalHandlersInstalled = false;
347  for (auto* handler = kSignalHandlers; handler->name != nullptr; handler++) {
348  if (sigaction(handler->signum, &handler->previous, nullptr)) {
349  std::string str("Failed to remove ");
350  str += handler->name;
351  str += " handler!";
352  perror(str.c_str());
353  } else {
354  handler->previous = {};
355  }
356  }
357  if (sigaction(SIGUSR2, &::previousSigusr2, nullptr)) {
358  perror("Failed to add SIGUSR2 handler!");
359  } else {
360  ::previousSigusr2 = {};
361  }
362 }
363 #endif // defined(CAFFE2_SUPPORTS_FATAL_SIGNAL_HANDLERS)
364 
365 } // namespace
366 
367 #if defined(CAFFE2_SUPPORTS_FATAL_SIGNAL_HANDLERS)
368 CAFFE2_DEFINE_bool(
369  caffe2_print_stacktraces,
370  false,
371  "If set, prints stacktraces when a fatal signal is raised.");
372 #endif
373 
374 namespace caffe2 {
375 
376 SignalHandler::SignalHandler(
377  SignalHandler::Action SIGINT_action,
378  SignalHandler::Action SIGHUP_action)
379  : SIGINT_action_(SIGINT_action),
380  SIGHUP_action_(SIGHUP_action),
381  my_sigint_count_(sigintCount),
382  my_sighup_count_(sighupCount) {
383  hookupHandler();
384 }
385 
386 SignalHandler::~SignalHandler() {
387  unhookHandler();
388 }
389 
390 // Return true iff a SIGINT has been received since the last time this
391 // function was called.
392 bool SignalHandler::GotSIGINT() {
393  uint64_t count = sigintCount;
394  bool result = (count != my_sigint_count_);
395  my_sigint_count_ = count;
396  return result;
397 }
398 
399 // Return true iff a SIGHUP has been received since the last time this
400 // function was called.
401 bool SignalHandler::GotSIGHUP() {
402  uint64_t count = sighupCount;
403  bool result = (count != my_sighup_count_);
404  my_sighup_count_ = count;
405  return result;
406 }
407 
408 SignalHandler::Action SignalHandler::CheckForSignals() {
409  if (GotSIGHUP()) {
410  return SIGHUP_action_;
411  }
412  if (GotSIGINT()) {
413  return SIGINT_action_;
414  }
415  return SignalHandler::Action::NONE;
416 }
417 
418 #if defined(CAFFE2_SUPPORTS_FATAL_SIGNAL_HANDLERS)
419 void setPrintStackTracesOnFatalSignal(bool print) {
420  if (print) {
421  installFatalSignalHandlers();
422  } else {
423  uninstallFatalSignalHandlers();
424  }
425 }
426 bool printStackTracesOnFatalSignal() {
427  std::lock_guard<std::mutex> locker(fatalSignalHandlersInstallationMutex);
428  return fatalSignalHandlersInstalled;
429 }
430 
431 namespace internal {
432 bool Caffe2InitFatalSignalHandler(int*, char***) {
433  if (caffe2::FLAGS_caffe2_print_stacktraces) {
434  setPrintStackTracesOnFatalSignal(true);
435  }
436  return true;
437 }
438 
439 REGISTER_CAFFE2_INIT_FUNCTION(
440  Caffe2InitFatalSignalHandler,
441  &Caffe2InitFatalSignalHandler,
442  "Inits signal handlers for fatal signals so we can see what if"
443  " caffe2_print_stacktraces is set.");
444 
445 } // namepsace internal
446 #endif // defined(CAFFE2_SUPPORTS_FATAL_SIGNAL_HANDLERS)
447 } // namespace caffe2
448 
449 #else // defined(CAFFE2_SUPPORTS_SIGNAL_HANDLER)
450 
451 // TODO: Currently we do not support signal handling in non-Linux yet - below is
452 // a minimal implementation that makes things compile.
453 namespace caffe2 {
454 SignalHandler::SignalHandler(
455  SignalHandler::Action SIGINT_action,
456  SignalHandler::Action SIGHUP_action) {}
457 SignalHandler::~SignalHandler() {}
458 bool SignalHandler::GotSIGINT() {
459  return false;
460 }
461 bool SignalHandler::GotSIGHUP() {
462  return false;
463 }
464 SignalHandler::Action SignalHandler::CheckForSignals() {
465  return SignalHandler::Action::NONE;
466 }
467 } // namespace caffe2
468 
469 #endif // defined(CAFFE2_SUPPORTS_SIGNAL_HANDLER)
Copyright (c) 2016-present, Facebook, Inc.