我写了第二个测试用例。我不得不添加一个单独的答案,因为包含示例输出的第一个答案太长了。
首先,这里是tracer.c
:
#include <unistd.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/ptrace.h>
#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/user.h>
#include <dirent.h>
#include <string.h>
#include <signal.h>
#include <errno.h>
#include <stdio.h>
#ifndef SINGLESTEPS
#define SINGLESTEPS 10
#endif
/* Similar to getline(), except gets process pid task IDs.
* Returns positive (number of TIDs in list) if success,
* otherwise 0 with errno set. */
size_t get_tids(pid_t **const listptr, size_t *const sizeptr, const pid_t pid)
{
char dirname[64];
DIR *dir;
pid_t *list;
size_t size, used = 0;
if (!listptr || !sizeptr || pid < (pid_t)1) {
errno = EINVAL;
return (size_t)0;
}
if (*sizeptr > 0) {
list = *listptr;
size = *sizeptr;
} else {
list = *listptr = NULL;
size = *sizeptr = 0;
}
if (snprintf(dirname, sizeof dirname, "/proc/%d/task/", (int)pid) >= (int)sizeof dirname) {
errno = ENOTSUP;
return (size_t)0;
}
dir = opendir(dirname);
if (!dir) {
errno = ESRCH;
return (size_t)0;
}
while (1) {
struct dirent *ent;
int value;
char dummy;
errno = 0;
ent = readdir(dir);
if (!ent)
break;
/* Parse TIDs. Ignore non-numeric entries. */
if (sscanf(ent->d_name, "%d%c", &value, &dummy) != 1)
continue;
/* Ignore obviously invalid entries. */
if (value < 1)
continue;
/* Make sure there is room for another TID. */
if (used >= size) {
size = (used | 127) + 128;
list = realloc(list, size * sizeof list[0]);
if (!list) {
closedir(dir);
errno = ENOMEM;
return (size_t)0;
}
*listptr = list;
*sizeptr = size;
}
/* Add to list. */
list[used++] = (pid_t)value;
}
if (errno) {
const int saved_errno = errno;
closedir(dir);
errno = saved_errno;
return (size_t)0;
}
if (closedir(dir)) {
errno = EIO;
return (size_t)0;
}
/* None? */
if (used < 1) {
errno = ESRCH;
return (size_t)0;
}
/* Make sure there is room for a terminating (pid_t)0. */
if (used >= size) {
size = used + 1;
list = realloc(list, size * sizeof list[0]);
if (!list) {
errno = ENOMEM;
return (size_t)0;
}
*listptr = list;
*sizeptr = size;
}
/* Terminate list; done. */
list[used] = (pid_t)0;
errno = 0;
return used;
}
static int wait_process(const pid_t pid, int *const statusptr)
{
int status;
pid_t p;
do {
status = 0;
p = waitpid(pid, &status, WUNTRACED | WCONTINUED);
} while (p == (pid_t)-1 && errno == EINTR);
if (p != pid)
return errno = ESRCH;
if (statusptr)
*statusptr = status;
return errno = 0;
}
static int continue_process(const pid_t pid, int *const statusptr)
{
int status;
pid_t p;
do {
if (kill(pid, SIGCONT) == -1)
return errno = ESRCH;
do {
status = 0;
p = waitpid(pid, &status, WUNTRACED | WCONTINUED);
} while (p == (pid_t)-1 && errno == EINTR);
if (p != pid)
return errno = ESRCH;
} while (WIFSTOPPED(status));
if (statusptr)
*statusptr = status;
return errno = 0;
}
void show_registers(FILE *const out, pid_t tid, const char *const note)
{
struct user_regs_struct regs;
long r;
do {
r = ptrace(PTRACE_GETREGS, tid, ®s, ®s);
} while (r == -1L && errno == ESRCH);
if (r == -1L)
return;
#if (defined(__x86_64__) || defined(__i386__)) && __WORDSIZE == 64
if (note && *note)
fprintf(out, "Task %d: RIP=0x%016lx, RSP=0x%016lx. %s\n", (int)tid, regs.rip, regs.rsp, note);
else
fprintf(out, "Task %d: RIP=0x%016lx, RSP=0x%016lx.\n", (int)tid, regs.rip, regs.rsp);
#elif (defined(__x86_64__) || defined(__i386__)) && __WORDSIZE == 32
if (note && *note)
fprintf(out, "Task %d: EIP=0x%08lx, ESP=0x%08lx. %s\n", (int)tid, regs.eip, regs.esp, note);
else
fprintf(out, "Task %d: EIP=0x%08lx, ESP=0x%08lx.\n", (int)tid, regs.eip, regs.esp);
#endif
}
int main(int argc, char *argv[])
{
pid_t *tid = 0;
size_t tids = 0;
size_t tids_max = 0;
size_t t, s;
long r;
pid_t child;
int status;
if (argc < 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
fprintf(stderr, " %s COMMAND [ ARGS ... ]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "This program executes COMMAND in a child process,\n");
fprintf(stderr, "and waits for it to stop (via a SIGSTOP signal).\n");
fprintf(stderr, "When that occurs, the register state of each thread\n");
fprintf(stderr, "is dumped to standard output, then the child process\n");
fprintf(stderr, "is sent a SIGCONT signal.\n");
fprintf(stderr, "\n");
return 1;
}
child = fork();
if (child == (pid_t)-1) {
fprintf(stderr, "fork() failed: %s.\n", strerror(errno));
return 1;
}
if (!child) {
prctl(PR_SET_DUMPABLE, (long)1);
prctl(PR_SET_PTRACER, (long)getppid());
fflush(stdout);
fflush(stderr);
execvp(argv[1], argv + 1);
fprintf(stderr, "%s: %s.\n", argv[1], strerror(errno));
return 127;
}
fprintf(stderr, "Tracer: Waiting for child (pid %d) events.\n\n", (int)child);
fflush(stderr);
while (1) {
/* Wait for a child event. */
if (wait_process(child, &status))
break;
/* Exited? */
if (WIFEXITED(status) || WIFSIGNALED(status)) {
errno = 0;
break;
}
/* At this point, only stopped events are interesting. */
if (!WIFSTOPPED(status))
continue;
/* Obtain task IDs. */
tids = get_tids(&tid, &tids_max, child);
if (!tids)
break;
printf("Process %d has %d tasks,", (int)child, (int)tids);
fflush(stdout);
/* Attach to all tasks. */
for (t = 0; t < tids; t++) {
do {
r = ptrace(PTRACE_ATTACH, tid[t], (void *)0, (void *)0);
} while (r == -1L && (errno == EBUSY || errno == EFAULT || errno == ESRCH));
if (r == -1L) {
const int saved_errno = errno;
while (t-->0)
do {
r = ptrace(PTRACE_DETACH, tid[t], (void *)0, (void *)0);
} while (r == -1L && (errno == EBUSY || errno == EFAULT || errno == ESRCH));
tids = 0;
errno = saved_errno;
break;
}
}
if (!tids) {
const int saved_errno = errno;
if (continue_process(child, &status))
break;
printf(" failed to attach (%s).\n", strerror(saved_errno));
fflush(stdout);
if (WIFCONTINUED(status))
continue;
errno = 0;
break;
}
printf(" attached to all.\n\n");
fflush(stdout);
/* Dump the registers of each task. */
for (t = 0; t < tids; t++)
show_registers(stdout, tid[t], "");
printf("\n");
fflush(stdout);
for (s = 0; s < SINGLESTEPS; s++) {
do {
r = ptrace(PTRACE_SINGLESTEP, tid[tids-1], (void *)0, (void *)0);
} while (r == -1L && errno == ESRCH);
if (!r) {
for (t = 0; t < tids - 1; t++)
show_registers(stdout, tid[t], "");
show_registers(stdout, tid[tids-1], "Advanced by one step.");
printf("\n");
fflush(stdout);
} else {
fprintf(stderr, "Single-step failed: %s.\n", strerror(errno));
fflush(stderr);
}
}
/* Detach from all tasks. */
for (t = 0; t < tids; t++)
do {
r = ptrace(PTRACE_DETACH, tid[t], (void *)0, (void *)0);
} while (r == -1 && (errno == EBUSY || errno == EFAULT || errno == ESRCH));
tids = 0;
if (continue_process(child, &status))
break;
if (WIFCONTINUED(status)) {
printf("Detached. Waiting for new stop events.\n\n");
fflush(stdout);
continue;
}
errno = 0;
break;
}
if (errno)
fprintf(stderr, "Tracer: Child lost (%s)\n", strerror(errno));
else
if (WIFEXITED(status))
fprintf(stderr, "Tracer: Child exited (%d)\n", WEXITSTATUS(status));
else
if (WIFSIGNALED(status))
fprintf(stderr, "Tracer: Child died from signal %d\n", WTERMSIG(status));
else
fprintf(stderr, "Tracer: Child vanished\n");
fflush(stderr);
return status;
}
tracer.c
执行指定的命令,等待命令接收到SIGSTOP
信号。(tracer.c
不自己发送;您可以让被跟踪者自行停止,或从外部发送信号。)
当命令停止时,tracer.c
将 ptrace 附加到每个线程,并将其中一个线程单步执行固定数量的步骤(SINGLESTEPS
编译时常量),显示每个线程的相关寄存器状态。
之后,它从命令中分离出来,并向它发送一个SIGCONT
信号,让它继续正常运行。
这是一个简单的测试程序worker.c
,我用于测试:
#include <pthread.h>
#include <signal.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
#ifndef THREADS
#define THREADS 2
#endif
volatile sig_atomic_t done = 0;
void catch_done(int signum)
{
done = signum;
}
int install_done(const int signum)
{
struct sigaction act;
sigemptyset(&act.sa_mask);
act.sa_handler = catch_done;
act.sa_flags = 0;
if (sigaction(signum, &act, NULL))
return errno;
else
return 0;
}
void *worker(void *data)
{
volatile unsigned long *const counter = data;
while (!done)
__sync_add_and_fetch(counter, 1UL);
return (void *)(unsigned long)__sync_or_and_fetch(counter, 0UL);
}
int main(void)
{
unsigned long counter = 0UL;
pthread_t thread[THREADS];
pthread_attr_t attrs;
size_t i;
if (install_done(SIGHUP) ||
install_done(SIGTERM) ||
install_done(SIGUSR1)) {
fprintf(stderr, "Worker: Cannot install signal handlers: %s.\n", strerror(errno));
return 1;
}
pthread_attr_init(&attrs);
pthread_attr_setstacksize(&attrs, 65536);
for (i = 0; i < THREADS; i++)
if (pthread_create(&thread[i], &attrs, worker, &counter)) {
done = 1;
fprintf(stderr, "Worker: Cannot create thread: %s.\n", strerror(errno));
return 1;
}
pthread_attr_destroy(&attrs);
/* Let the original thread also do the worker dance. */
worker(&counter);
for (i = 0; i < THREADS; i++)
pthread_join(thread[i], NULL);
return 0;
}
使用例如编译两者
gcc -W -Wall -O3 -fomit-frame-pointer worker.c -pthread -o worker
gcc -W -Wall -O3 -fomit-frame-pointer tracer.c -o tracer
并在单独的终端或后台运行,使用例如
./tracer ./worker &
跟踪器显示工作人员的 PID:
Tracer: Waiting for child (pid 24275) events.
此时,孩子正在正常跑步。当您向SIGSTOP
孩子发送 a 时,该操作开始。跟踪器检测到它,进行所需的跟踪,然后分离并让子进程正常继续:
kill -STOP 24275
Process 24275 has 3 tasks, attached to all.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8. Advanced by one step.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a63, RSP=0x00007f399cfa6ee8. Advanced by one step.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a65, RSP=0x00007f399cfa6ee8. Advanced by one step.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a58, RSP=0x00007f399cfa6ee8. Advanced by one step.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8. Advanced by one step.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a63, RSP=0x00007f399cfa6ee8. Advanced by one step.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a65, RSP=0x00007f399cfa6ee8. Advanced by one step.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a58, RSP=0x00007f399cfa6ee8. Advanced by one step.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8. Advanced by one step.
Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.
Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.
Task 24277: RIP=0x0000000000400a63, RSP=0x00007f399cfa6ee8. Advanced by one step.
Detached. Waiting for new stop events.
您可以根据需要多次重复上述操作。请注意,我选择了SIGSTOP
信号作为触发器,因为这种方式tracer.c
也可用作生成每个请求的复杂多线程核心转储的基础(因为多线程进程可以简单地通过向自身发送 a 来触发它SIGSTOP
)。
worker()
在上面的例子中,线程都在旋转的函数的反汇编:
0x400a50: eb 0b jmp 0x400a5d
0x400a52: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
0x400a58: f0 48 83 07 01 lock addq $0x1,(%rdi) = fourth step
0x400a5d: 8b 05 00 00 00 00 mov 0x0(%rip),%eax = first step
0x400a63: 85 c0 test %eax,%eax = second step
0x400a65: 74 f1 je 0x400a58 = third step
0x400a67: 48 8b 07 mov (%rdi),%rax
0x400a6a: 48 89 c2 mov %rax,%rdx
0x400a6d: f0 48 0f b1 07 lock cmpxchg %rax,(%rdi)
0x400a72: 75 f6 jne 0x400a6a
0x400a74: 48 89 d0 mov %rdx,%rax
0x400a77: c3 retq
现在,这个测试程序只展示了如何停止一个进程,附加到它的所有线程,单步执行一个线程所需数量的指令,然后让所有线程正常继续;它还没有证明这同样适用于让特定线程正常继续(通过PTRACE_CONT
)。但是,我在下面描述的细节表明,对我来说,同样的方法应该适用于PTRACE_CONT
.
我在编写上述测试程序时遇到的主要问题或惊喜是
long r;
do {
r = ptrace(PTRACE_cmd, tid, ...);
} while (r == -1L && (errno == EBUSY || errno == EFAULT || errno == ESRCH));
循环,尤其是对于这种ESRCH
情况(由于ptrace 手册页描述,我只添加了其他情况)。
您会看到,大多数 ptrace 命令仅在任务停止时才允许使用。然而,当任务仍在完成例如单步命令时,任务不会停止。因此,使用上述循环——可能添加毫秒 nanosleep 或类似的以避免浪费 CPU——确保在我们尝试提供新命令之前,前一个 ptrace 命令已经完成(因此任务停止)。
Kerrek SB,我相信您在测试程序中遇到的至少一些问题是由于这个问题造成的?对我个人而言,这是一种D'oh!时刻意识到这当然是必要的,因为 ptracing 本质上是异步的,而不是同步的。
SIGCONT
(这种异步性也是我上面提到的交互的原因PTRACE_CONT
。我相信通过使用上面显示的循环进行正确处理,交互不再是问题——实际上是可以理解的。)
添加对此答案的评论:
Linux 内核使用 task_struct 结构中的一组任务状态标志(请参阅include/linux/sched.h
定义)来跟踪每个任务的状态。面向用户空间的一面ptrace()
在kernel/ptrace.c
.
当PTRACE_SINGLESTEP
orPTRACE_CONT
被调用时,kernel/ptrace.c
:ptrace_continue()
处理大部分细节。它通过调用wake_up_state(child, __TASK_TRACED)
( kernel/sched/core.c::try_to_wake_up(child, __TASK_TRACED, 0)
) 结束。
当一个进程通过SIGSTOP
信号停止时,所有的任务都将停止,并最终处于“停止,未跟踪”状态。
附加到每个任务(通过 PTRACE_ATTACH 或 PTRACE_SEIZE,请参阅:)kernel/ptrace.c
会ptrace_attach()
修改任务状态。但是,ptrace 状态位(请参阅常量)与任务可运行状态位(请参阅常量include/linux/ptrace.h:PT_
)是分开的。include/linux/sched.h:TASK_
在附加到任务并向进程发送SIGCONT
信号后,停止状态不会立即修改(我相信),因为任务也在被跟踪。执行 PTRACE_SINGLESTEP 或 PTRACE_CONT 以 结束kernel/sched/core.c::try_to_wake_up(child, __TASK_TRACED, 0)
,这会更新任务状态,并将任务移动到运行队列。
现在,我还没有找到代码路径的复杂部分是,当下一次计划任务时,任务状态如何在内核中更新。我的测试表明,使用单步(这是另一个任务状态标志)时,只有任务状态会被更新,而单步标志会被清除。似乎 PTRACE_CONT 不那么可靠;我相信这是因为单步标志“强制”了任务状态的改变。也许有一个“竞争条件”。继续信号传递和状态变化?
(进一步编辑:内核开发人员肯定希望wait()
被调用,例如看这个线程。)
换句话说,在注意到进程已经停止之后(注意你可以使用/proc/PID/stat
或者/proc/PID/status
如果进程不是子进程,并且还没有附加),我相信下面的过程是最健壮的一个:
pid_t pid, p; /* Process owning the tasks */
tid_t *tid; /* Task ID array */
size_t tids; /* Tasks */
long result;
int status;
size_t i;
for (i = 0; i < tids; i++) {
while (1) {
result = ptrace(PTRACE_ATTACH, tid[i], (void *)0, (void *)0);
if (result == -1L && (errno == ESRCH || errno == EBUSY || errno == EFAULT || errno == EIO)) {
/* To avoid burning up CPU for nothing: */
sched_yield(); /* or nanosleep(), or usleep() */
continue;
}
break;
}
if (result == -1L) {
/*
* Fatal error. First detach from tid[0..i-1], then exit.
*/
}
}
/* Send SIGCONT to the process. */
if (kill(pid, SIGCONT)) {
/*
* Fatal error, see errno. Exit.
*/
}
/* Since we are attached to the process,
* we can wait() on it. */
while (1) {
errno = 0;
status = 0;
p = waitpid(pid, &status, WCONTINUED);
if (p == (pid_t)-1) {
if (errno == EINTR)
continue;
else
break;
} else
if (p != pid) {
errno = ESRCH;
break;
} else
if (WIFCONTINUED(status)) {
errno = 0;
break;
}
}
if (errno) {
/*
* Fatal error. First detach from tid[0..tids-1], then exit.
*/
}
/* Single-step each task to update the task states. */
for (i = 0; i < tids; i++) {
while (1) {
result = ptrace(PTRACE_SINGLESTEP, tid[i], (void *)0, (void *)0);
if (result == -1L && errno == ESRCH) {
/* To avoid burning up CPU for nothing: */
sched_yield(); /* or nanosleep(), or usleep() */
continue;
}
break;
}
if (result == -1L) {
/*
* Fatal error. First detach from tid[0..i-1], then exit.
*/
}
}
/* Obtain task register structures, to make sure the single-steps
* have completed and their states have stabilized. */
for (i = 0; i < tids; i++) {
struct user_regs_struct regs;
while (1) {
result = ptrace(PTRACE_GETREGS, tid[i], ®s, ®s);
if (result == -1L && (errno == ESRCH || errno == EBUSY || errno == EFAULT || errno == EIO)) {
/* To avoid burning up CPU for nothing: */
sched_yield(); /* or nanosleep(), or usleep() */
continue;
}
break;
}
if (result == -1L) {
/*
* Fatal error. First detach from tid[0..i-1], then exit.
*/
}
}
完成上述操作后,所有任务都应附加并处于预期状态,以便例如 PTRACE_CONT 无需其他技巧即可工作。
如果未来内核的行为发生变化——我确实相信 STOP/CONT 信号和 ptracing 之间的交互可能会发生变化;至少有必要向 LKML 开发人员提出有关此行为的问题!——,上述程序仍然可以正常工作。(谨慎起见,通过使用 PTRACE_SINGLESTEP 循环几次,也可能是一个好主意。)
与 PTRACE_CONT 的不同之处在于,如果将来行为发生变化,最初的 PTRACE_CONT 可能实际上会继续该过程,从而导致ptrace()
随后的过程失败。使用 PTRACE_SINGLESTEP,进程将停止,允许进一步ptrace()
调用成功。
问题?