HOWTO get backtraces for all threads

Let’s say you have a daemon process that is crashing on takeoff, early during system boot, before you even have a chance to log in, much less start a debugger. Furthermore, it is a multithreaded application, and you want to get backtraces for all threads to help diagnose the problem. What do you do?

Glibc has limited support for building backtraces. If you #include <execinfo.h>, then you can use the backtrace, backtrace_symbols, and backtrace_symbols_fd functions. But these functions have some serious drawbacks in the scenario described above.

  • They can only find symbols for non-static functions.
  • The program must be linked with -rdynamic.
  • A backtrace is built for only the calling thread.
  • You get function names and offsets into those functions in the binary, not source code locations.

Here’s an alternative approach: spawn a gdb subprocess, and ask it to produce a backtrace for all threads, as in this code.

/**
 * traceme - Demonstration of attaching gdb to oneself in order to get high
 *           quality stack traces
 *
 * Written in 2013 by Jerry James <loganjerry@gmail.com>.
 *
 * To the extent possible under law, the author has dedicated all copyright
 * and related and neighboring rights to this software to the public domain
 * worldwide. This software is distributed without any warranty.
 *
 * You should have received a copy of the CC0 Public Domain Dedication along
 * with this software.  If not, see
 * <http://creativecommons.org/publicdomain/zero/1.0/>.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/wait.h>

/* If you use a gcc version older than 2.5, you lose */
#ifdef __GNUC__
# define NORETURN __attribute__((noreturn))
#else
# define NORETURN
#endif

/**
 * The size of the buffer used to read gdb output.
 */
#define BACKTRACE_SZ (1U << 14)

/**
 * The buffer used to read gdb output.
 */
static char backtracebuf[BACKTRACE_SZ];

/**
 * Print an error message and exit.
 *
 * @param[in] msg the message to print
 */
static NORETURN void
error (const char *msg)
{
  perror (msg);
  exit (EXIT_FAILURE);
}

/**
 * Print stack backtraces for all threads.
 *
 * @return the exit status of the gdb subprocess
 */
static int
traceme (void)
{
  char pidstr[16];
  ssize_t size;
  pid_t child;
  int status, pipes[2];

  /* Get our process ID as a string */
  snprintf (pidstr, 16U, "%d", getpid ());

  /* Create the pipe used to get gdb output */
  if (pipe (pipes) != 0)
    error ("pipe");

  /* Spawn the gdb subprocess */
  child = fork ();
  if (child == 0)
    {
      dup2 (1, pipes[1]);
      close (pipes[0]);
      close (pipes[1]);
      execl ("/usr/bin/gdb", "gdb", "-q", "-nw", "-batch", "-p", pidstr,
	     "-ex", "thread apply all bt full", NULL);
      error ("Could not run gdb");
    }

  /* Close the child's end of the pipe */
  close (pipes[1]);

  /* Get output from gdb until done */
  size = read (pipes[0], backtracebuf, BACKTRACE_SZ);
  while (size > 0)
    {
      fputs (backtracebuf, stdout);
      size = read (pipes[0], backtracebuf, BACKTRACE_SZ);
    }

  /* Clean up and exit */
  close (pipes[0]);
  waitpid (child, &status, 0);
  return status;
}

int
main ()
{
  traceme ();
  return EXIT_SUCCESS;
}