1

当一个子进程被fork()编辑后,那么父进程就可wait()以为子进程完成。假设,只是为了实验,而不是wait()ing,如果我们制作父进程sleep(),那为什么它不起作用?

#include <stdio.h>
#include <unistd.h>
#include <sys/wait.h>

int main()
{
    pid_t child_id ;

    child_id = fork() ;

    if (child_id == 0)
    {
        printf("\nChild process");
        printf("\nChild process exiting");
    }
    else
    {
        printf("\nParent process");
        sleep(10);
        printf("\nParent process exiting");
    }
}

我猜SIGCHLD信号导致父进程从sleep(). 但是为什么,它是一个子进程,它们有不同的地址空间和资源,那它怎么能干涉父进程的事情呢?

4

2 回答 2

3

注意系统之间的差异。在 Mac OS X 10.9 上运行您的代码的这种小修改后,死亡的孩子不会影响sleep(10)父母中的:

Parent process

Child process
Child process exiting 1384590368

Parent process exiting 1384590378

如您所见,父母比孩子晚了 10 秒左右。

#include <stdio.h>
#include <unistd.h>
#include <time.h>

int main(void)
{
    pid_t child_id;

    child_id = fork();

    if (child_id == 0)
    {
        printf("\nChild process");
        printf("\nChild process exiting %ld\n", (long)time(0));
    }
    else
    {
        printf("\nParent process\n");
        sleep(10);
        printf("\nParent process exiting %ld\n", (long)time(0));
    }
}

我在运行旧版本 Linux(2008 年的 2.6.16.60 内核)的 VM 上得到了相同的行为;孩子死后 10 秒,父母就死了。

因此,如果您要问的行为“为什么它不起作用?” 是“父母立即退出,孩子死亡”,那么您的代码不能证明它确实在两个系统中的任何一个上退出。我不能断然地说父母不会在您的系统上立即死亡,但这会出乎意料。

您可能会发现此程序对研究 SIGCHLD 信号的行为很有用:

#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/wait.h>
#include <unistd.h>

static siginfo_t sig_info;
static volatile sig_atomic_t sig_num;
static void *sig_ctxt;

static void catcher(int signum, siginfo_t *info, void *vp)
{
    sig_num = signum;
    sig_info = *info;
    sig_ctxt = vp;
}

static void set_handler(int signum)
{
    struct sigaction sa;
    sa.sa_flags = SA_SIGINFO;
    sa.sa_sigaction = catcher;
    sigemptyset(&sa.sa_mask);

    if (sigaction(signum, &sa, 0) != 0)
    {
        int errnum = errno;
        fprintf(stderr, "Failed to set signal handler (%d: %s)\n", errnum, strerror(errnum));
        exit(1);
    }
}

static void prt_interrupt(FILE *fp)
{
    if (sig_num != 0)
    {
        fprintf(fp, "Signal %d from PID %d\n", sig_info.si_signo, (int)sig_info.si_pid);
        sig_num = 0;
    }
}

static void five_kids(void)
{
    for (int i = 0; i < 5; i++)
    {
        pid_t pid = fork();
        if (pid < 0)
            break;
        else if (pid == 0)
        {
            printf("PID %d - exiting with status %d\n", (int)getpid(), i);
            exit(i);
        }
        else
        {
            int status = 0;
            pid_t corpse = wait(&status);
            printf("Child: %d; Corpse: %d; Status = 0x%.4X\n", pid, corpse, (status & 0xFFFF));
            prt_interrupt(stdout);
            fflush(0);
        }
    }
}

int main(void)
{
    printf("SIGCHLD set to SIG_DFL\n");
    signal(SIGCHLD, SIG_DFL);
    five_kids();

    printf("SIGCHLD set to SIG_IGN\n");
    signal(SIGCHLD, SIG_IGN);
    five_kids();

    printf("SIGCHLD set to catcher()\n");
    set_handler(SIGCHLD);
    five_kids();

    return(0);
}

再次在 Mac OS X 10.9 上,它产生:

SIGCHLD set to SIG_DFL
PID 52345 - exiting with status 0
Child: 52345; Corpse: 52345; Status = 0x0000
PID 52346 - exiting with status 1
Child: 52346; Corpse: 52346; Status = 0x0100
PID 52347 - exiting with status 2
Child: 52347; Corpse: 52347; Status = 0x0200
PID 52348 - exiting with status 3
Child: 52348; Corpse: 52348; Status = 0x0300
PID 52349 - exiting with status 4
Child: 52349; Corpse: 52349; Status = 0x0400
SIGCHLD set to SIG_IGN
PID 52350 - exiting with status 0
Child: 52350; Corpse: -1; Status = 0x0000
PID 52351 - exiting with status 1
Child: 52351; Corpse: -1; Status = 0x0000
PID 52352 - exiting with status 2
Child: 52352; Corpse: -1; Status = 0x0000
PID 52353 - exiting with status 3
Child: 52353; Corpse: -1; Status = 0x0000
PID 52354 - exiting with status 4
Child: 52354; Corpse: -1; Status = 0x0000
SIGCHLD set to catcher()
PID 52355 - exiting with status 0
Child: 52355; Corpse: -1; Status = 0x0000
Signal 20 from PID 52355
Child: 52356; Corpse: 52355; Status = 0x0000
PID 52356 - exiting with status 1
Child: 52357; Corpse: -1; Status = 0x0000
PID 52357 - exiting with status 2
Signal 20 from PID 52356
Child: 52358; Corpse: 52357; Status = 0x0200
Signal 20 from PID 52357
PID 52358 - exiting with status 3
Child: 52359; Corpse: 52356; Status = 0x0100
PID 52359 - exiting with status 4

Linux 上的行为类似——并不完全相同:

SIGCHLD set to SIG_DFL
PID 14645 - exiting with status 0
Child: 14645; Corpse: 14645; Status = 0x0000
PID 14646 - exiting with status 1
Child: 14646; Corpse: 14646; Status = 0x0100
PID 14647 - exiting with status 2
Child: 14647; Corpse: 14647; Status = 0x0200
PID 14648 - exiting with status 3
Child: 14648; Corpse: 14648; Status = 0x0300
PID 14649 - exiting with status 4
Child: 14649; Corpse: 14649; Status = 0x0400
SIGCHLD set to SIG_IGN
PID 14650 - exiting with status 0
Child: 14650; Corpse: -1; Status = 0x0000
PID 14651 - exiting with status 1
Child: 14651; Corpse: -1; Status = 0x0000
PID 14652 - exiting with status 2
Child: 14652; Corpse: -1; Status = 0x0000
PID 14653 - exiting with status 3
Child: 14653; Corpse: -1; Status = 0x0000
PID 14654 - exiting with status 4
Child: 14654; Corpse: -1; Status = 0x0000
SIGCHLD set to catcher()
PID 14655 - exiting with status 0
Child: 14655; Corpse: 14655; Status = 0x0000
Signal 17 from PID 14655
PID 14656 - exiting with status 1
Child: 14656; Corpse: 14656; Status = 0x0100
Signal 17 from PID 14656
PID 14657 - exiting with status 2
Child: 14657; Corpse: 14657; Status = 0x0200
Signal 17 from PID 14657
PID 14658 - exiting with status 3
Child: 14658; Corpse: 14658; Status = 0x0300
Signal 17 from PID 14658
PID 14659 - exiting with status 4
Child: 14659; Corpse: 14659; Status = 0x0400
Signal 17 from PID 14659
于 2013-11-16T08:41:41.967 回答
0

仔细阅读fork(2)execve(2)wait(2) ...的手册页

系统wait调用不仅仅是被动地等待子进程。它清理内部内核状态以避免僵尸进程

也可以在您的程序中使用strace(1),例如。strace -f

并花几个小时阅读一本好书,例如Advanced Linux Programming。理解流程需要很多时间,而我们没有那么多时间教你。请阅读书籍并像您一样继续尝试!另外,花一些时间阅读自由软件的源代码(比如一些 shell 的源代码 -eg bashor sash

顺便说一句,你的程序在另一点上是错误的:你应该总是测试失败fork(所以总是处理三个可能的返回fork==0在子进程中,>0在父进程中,<0失败时)。考虑使用ulimit(在 shell 中调用setrlimit(2))来触发这样的错误条件以进行测试。

于 2013-11-16T08:04:58.470 回答