由于我不会深入讨论的原因,我需要在 FreeBSD 8.1 上的 Python 线程的子进程中运行“top -m io -d 2 10”的变体。问题是,似乎有时会产生 SIGTTOU(在我尚未破译的某些与代码相关的条件下),完全停止 top 和线程。其他时候,似乎没有产生 SIGTTOU,但是 top 或线程无论如何都会卡住。
top 的输出应该为系统上的前 10 个进程生成两组 IO 统计信息,其中第一组是“绝对”数字,第二组是自上一组(一秒前)以来统计信息的增量差异。在终端上或在 shell 脚本中运行此命令,无论是否重定向输出,都可以正常工作。
当问题发生时,'top' 似乎写入了第一组输出,但随后挂起/接收 SIGTTOU,然后才能输出第二组。在下面的示例代码中,只有一组进程统计信息被写入输出文件。
我发现 SIGTTOU 信号在“truss”下运行 python 脚本,但似乎“truss”和“top”本身之间的交互可能是一个令人困惑的问题,因为简单地运行truss top -d 2
会产生信号并挂起,如下所示:
...
ioctl(1,TIOCGETA,0xffffe460) = 0 (0x0)
ioctl(1,TIOCGETA,0xc6b138) = 0 (0x0)
ioctl(1,TIOCGETA,0xffffe410) = 0 (0x0)
ioctl(1,TIOCGWINSZ,0xffffe460) = 0 (0x0)
ioctl(1,TIOCGWINSZ,0xffffe930) = 0 (0x0)
ioctl(1,TIOCGETA,0x50e560) = 0 (0x0)
sigprocmask(SIG_BLOCK,SIGINT|SIGQUIT|SIGTSTP,0x0) = 0 (0x0)
ioctl(1,TIOCGETA,0x50e560) = 0 (0x0)
SIGNAL 22 (SIGTTOU)
这是一个重现挂起和/或 SIGTTOU 的示例 Python 脚本:
import subprocess
from threading import Thread
def run():
with open("top.log", "wb") as f:
subprocess.Popen(("/usr/bin/top", "-m", "io", "-d", "2", "10"), stdout=f, stderr=f, stdin=subprocess.PIPE).communicate()
if __name__ == "__main__":
th = Thread(target=run)
print "Starting"
th.start()
th.join()
在我上次运行时,这个示例程序没有产生 SIGTTOU,但 top 确实挂了。桁架展示:
....
open("/usr/local/lib/python2.7/lib-tk/_heapq.pyc",O_RDONLY,0666) ERR#2 'No such file or directory'
stat("/usr/local/lib/python2.7/lib-dynload/_heapq",0x7fffffffa500) ERR#2 'No such file or directory'
open("/usr/local/lib/python2.7/lib-dynload/_heapq.so",O_RDONLY,0666) = 5 (0x5)
fstat(5,{ mode=-rwxr-xr-x ,inode=238187,size=22293,blksize=16384 }) = 0 (0x0)
sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGKILL|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
open("/usr/local/lib/python2.7/lib-dynload/_heapq.so",O_RDONLY,057) = 6 (0x6)
fstat(6,{ mode=-rwxr-xr-x ,inode=238187,size=22293,blksize=16384 }) = 0 (0x0)
pread(0x6,0x80074c2e0,0x1000,0x0,0xffff800800653120,0x8080808080808080) = 4096 (0x1000)
mmap(0x0,1069056,PROT_NONE,MAP_PRIVATE|MAP_ANON|MAP_NOCORE,-1,0x0) = 34389442560 (0x801c54000)
mmap(0x801c54000,12288,PROT_READ|PROT_EXEC,MAP_PRIVATE|MAP_FIXED|MAP_NOCORE,6,0x0) = 34389442560 (0x801c54000)
mmap(0x801d56000,12288,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED,6,0x2000) = 34390499328 (0x801d56000)
mmap(0x0,36864,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON,-1,0x0) = 34366377984 (0x800655000)
close(6) = 0 (0x0)
mmap(0x0,832,PROT_READ|PROT_WRITE,MAP_ANON,-1,0x0) = 34366414848 (0x80065e000)
munmap(0x80065e000,832) = 0 (0x0)
sigprocmask(SIG_SETMASK,0x0,0x0) = 0 (0x0)
sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGKILL|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
sigprocmask(SIG_SETMASK,0x0,0x0) = 0 (0x0)
close(5) = 0 (0x0)
close(4) = 0 (0x0)
close(3) = 0 (0x0)
close(2) = 0 (0x0)
fstat(1,{ mode=crw------- ,inode=102,size=0,blksize=4096 }) = 0 (0x0)
ioctl(1,TIOCGETA,0xffffe400) = 0 (0x0)
Starting
write(1,"Starting\n",9) = 9 (0x9)
sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGKILL|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
_umtx_op(0x7fffffffe1d8,0x3,0x1,0x0,0x0,0x0) = 0 (0x0)
sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGABRT|SIGEMT|SIGKILL|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2) = 0 (0x0)
sigprocmask(SIG_SETMASK,SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGABRT|SIGEMT|SIGKILL|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2) = 0 (0x0)
sigprocmask(SIG_SETMASK,SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
mmap(0x7fffffbde000,135168,PROT_READ|PROT_WRITE,MAP_STACK,-1,0x0) = 140737484021760 (0x7fffffbde000)
mprotect(0x7fffffbde000,4096,PROT_NONE) = 0 (0x0)
thr_new(0x7fffffffe220,0x68,0x800a9f4c0,0x186fc,0xffffffff,0x0) = 0 (0x0)
sigprocmask(SIG_SETMASK,0x0,0x0) = 0 (0x0)
mmap(0x0,2097152,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON,-1,0x0) = 34390511616 (0x801d59000)
mmap(0x801f59000,684032,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON,-1,0x0) = 34392608768 (0x801f59000)
munmap(0x801d59000,684032) = 0 (0x0)
_umtx_op(0x8010127f8,0x10,0x1,0x0,0x0,0x0) = 0 (0x0)
_umtx_op(0x800e0b438,0xf,0x0,0x0,0x0,0x0) = 0 (0x0)
_umtx_op(0x800e0b438,0x10,0x1,0x0,0x0,0x0) = 0 (0x0)
_umtx_op(0x800e0b438,0x10,0x1,0x0,0x0,0x0) = 0 (0x0)
_umtx_op(0x800e0b438,0x10,0x1,0x0,0x0,0x8080808080808080) = 0 (0x0)
open("top.log",O_WRONLY|O_CREAT|O_TRUNC,0666) = 2 (0x2)
fstat(2,{ mode=-rw-r--r-- ,inode=70860,size=0,blksize=16384 }) = 0 (0x0)
pipe(0x7fffffbfd910) = 0 (0x0)
pipe(0x7fffffbfd870) = 0 (0x0)
fcntl(6,F_GETFD,) = 0 (0x0)
fcntl(6,F_SETFD,FD_CLOEXEC) = 0 (0x0)
sigprocmask(SIG_BLOCK,SIGHUP|SIGINT|SIGQUIT|SIGABRT|SIGEMT|SIGKILL|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2) = 0 (0x0)
fork() = 21503 (0x53ff)
sigprocmask(SIG_SETMASK,SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2,0x0) = 0 (0x0)
close(6) = 0 (0x0)
close(3) = 0 (0x0)
read(5,0x801e31024,1048576) = 0 (0x0)
close(5) = 0 (0x0)
fcntl(4,F_GETFL,) = 2 (0x2)
fstat(4,{ mode=p--------- ,inode=0,size=0,blksize=4096 }) = 0 (0x0)
close(4) = 0 (0x0)
我查看了 SIGTTOU 并找到了对 TOSTOP termios 标志的引用,并且我在主线程、子线程和调用 Python 的环境中摆弄了它,但都无济于事。这是一个教育过程,但我还没有。
我已经运行测试以确保顶部进程是在 Python 进程的进程组中创建的并且似乎保留在 Python 进程的进程组中(基于 SIGTTOU 文档,如果不是,这将是 SIGTTOU 的原因),并且这似乎很好:PGRP 最终与 Python PID/PGRP 相同。
我试过用 subprocess.check_output 和 .Popen() 运行'top',使用 shell=True,shell=False,并在整个地方重定向 std{out,err,in},但似乎都没有改变这一点结果。我尝试使用通过子进程执行的“/bin/sh -c”命令运行“top”,但也无济于事。
如果不做一些半奇怪的事情,比如在我的 Python 线程调用的 shell 脚本中运行“top”,或者使用 os.fork() 而不是使用线程,我该如何解决这个问题,根本原因是什么?