1

我正在尝试使用信号处理程序捕获故障,然后打印堆栈跟踪信息以添加到日志文件(或控制台)中以获取崩溃报告并在非开发机器上调试我的应用程序。我的问题是有时我没有得到完整的堆栈帧回溯。在许多情况下,它似乎挂起并且没有完成或退出。只有有时它会成功退出。

这是我的代码:

#include <signal.h>
#include <stdlib.h>
#include <stdio.h>
#include <execinfo.h>

typedef struct { char name[10]; int id; char description[40]; } signal_def;

signal_def signal_data[] =
{
    { "SIGHUP", SIGHUP, "Hangup (POSIX)" },
    { "SIGINT", SIGINT, "Interrupt (ANSI)" },
    { "SIGQUIT", SIGQUIT, "Quit (POSIX)" },
    { "SIGILL", SIGILL, "Illegal instruction (ANSI)" },
    { "SIGTRAP", SIGTRAP, "Trace trap (POSIX)" },
    { "SIGABRT", SIGABRT, "Abort (ANSI)" },
    { "SIGIOT", SIGIOT, "IOT trap (4.2 BSD)" },
    { "SIGBUS", SIGBUS, "BUS error (4.2 BSD)" },
    { "SIGFPE", SIGFPE, "Floating-point exception (ANSI)" },
    { "SIGKILL", SIGKILL, "Kill, unblockable (POSIX)" },
    { "SIGUSR1", SIGUSR1, "User-defined signal 1 (POSIX)" },
    { "SIGSEGV", SIGSEGV, "Segmentation violation (ANSI)" },
    { "SIGUSR2", SIGUSR2, "User-defined signal 2 (POSIX)" },
    { "SIGPIPE", SIGPIPE, "Broken pipe (POSIX)" },
    { "SIGALRM", SIGALRM, "Alarm clock (POSIX)" },
    { "SIGTERM", SIGTERM, "Termination (ANSI)" },
    //{ "SIGSTKFLT", SIGSTKFLT, "Stack fault" },
    { "SIGCHLD", SIGCHLD, "Child status has changed (POSIX)" },
    //{ "SIGCLD", SIGCLD, "Same as SIGCHLD (System V)" },
    { "SIGCONT", SIGCONT, "Continue (POSIX)" },
    { "SIGSTOP", SIGSTOP, "Stop, unblockable (POSIX)" },
    { "SIGTSTP", SIGTSTP, "Keyboard stop (POSIX)" },
    { "SIGTTIN", SIGTTIN, "Background read from tty (POSIX)" },
    { "SIGTTOU", SIGTTOU, "Background write to tty (POSIX)" },
    { "SIGURG", SIGURG, "Urgent condition on socket (4.2 BSD)" },
    { "SIGXCPU", SIGXCPU, "CPU limit exceeded (4.2 BSD)" },
    { "SIGXFSZ", SIGXFSZ, "File size limit exceeded (4.2 BSD)" },
    { "SIGVTALRM", SIGVTALRM, "Virtual alarm clock (4.2 BSD)" },
    { "SIGPROF", SIGPROF, "Profiling alarm clock (4.2 BSD)" },
    { "SIGWINCH", SIGWINCH, "Window size change (4.3 BSD, Sun)" },
    { "SIGIO", SIGIO, "I/O now possible (4.2 BSD)" },
    //{ "SIGPOLL", SIGPOLL, "Pollable event occurred (System V)" },
    //{ "SIGPWR", SIGPWR, "Power failure restart (System V)" },
    { "SIGSYS", SIGSYS, "Bad system call" },
};

void bt_sighandler(int sig, siginfo_t *info, void *secret) {
   signal_def *sigd = NULL;
       for (int i = 0; i < sizeof(signal_data) / sizeof(signal_def); ++i) {
          if (sig == signal_data[i].id) {
             sigd = &signal_data[i];
             break;
          }
       }
   //ucontext_t* uc = (ucontext_t*) secret;
   //void *pnt = (void*) uc->uc_mcontext.gregs[REG_RIP] ;

   void *trace[16];
   int trace_size = backtrace(trace, 16);
   /* overwrite sigaction with caller's address */
   //trace[1] = pnt;

   if (sigd) {
       fprintf(stderr, "SigHandler(0x%02X)[%d]:%s[%s]", sig, trace_size,
          sigd->name, sigd->description);
       } else {
       fprintf(stderr, "SigHandler(0x%02X)[%d]", sig, trace_size);
       }

   backtrace_symbols_fd(trace, trace_size, fileno(stderr));

   exit(1);
}

#endif

int main(int argc, char* argv[]) {
  struct sigaction sa;

  sa.sa_sigaction = bt_sighandler;
  sigemptyset(&sa.sa_mask);
  sa.sa_flags = 0;

  sigaction(SIGINT, &sa, NULL);
  sigaction(SIGSEGV, &sa, NULL);
  sigaction(SIGBUS, &sa, NULL);
  sigaction(SIGILL, &sa, NULL);
  sigaction(SIGFPE, &sa, NULL);
  sigaction(SIGUSR1, &sa, NULL);
  sigaction(SIGUSR2, &sa, NULL);

  signal(SIGPIPE, SIG_IGN);

  //Produce a fault

  return 0;
}

您会在我的示例代码中注意到,负责用调用者地址覆盖 sigaction 的部分已被注释掉。这是因为我不确定如何让它为 Mac 编译。

这是一个示例控制台输出: 控制台输出 http://www.minesclubtennis.com/images/stackoverflow/fatalconsoleoutputhang.png

您会注意到它只打印了前 3 帧,然后挂起而不退出,即使找到并应该打印 9 帧。

所以我从 Activity Monitor 应用程序中做了一个“示例过程”,发现执行 backtrace_symbols_fd 函数的线程卡在了 strlen 上。截图: 示例过程输出 http://www.minesclubtennis.com/images/stackoverflow/sampleprocessoutputhang.png

为什么挂了?这是我自己代码中的错误还是 Apple回溯中的错误?有人告诉我,信号处理程序可以做的事情有限,但我在sigaction 手册页上看不到任何表明我做错了什么的东西。

4

1 回答 1

3

您需要更仔细地阅读 sigaction 手册页!信号安全函数列表中未列出的任何内容都在信号处理程序中被禁止。backtrace_symbols_fd() 不在该列表中。您不能在信号处理程序中使用它。

如果您想了解具体原因,请访问 Apple 的开源站点并下载Libc代码。您的捕获说明了问题所在。如果您查看“stdio/vprintf-fbsd.c”,您会看到 __vfprintf() 有以下注释:

/*
 * Non-MT-safe version
 */

很多 printf 风格的函数都在这里结束(snprintf 就是我们到达这里的方式)。如果您的应用程序在 printf 样式函数中崩溃并且信号处理程序尝试重新输入,那么您看到的意外行为是......预期的。

或者,即使您的应用程序没有在 printf 样式函数中崩溃,但其他线程在崩溃时恰好在 printf 样式函数中,您也可以看到这种行为。

于 2012-05-19T04:23:56.697 回答