c - 为什么这个 pclose() 实现会随着 ECHILD 提前返回，除非在 popen() 之后延迟调用？

Question

我最近想弄明白如何 fork/exec 子进程并重定向 stdin、stdout 和 stderr，通过这些方法，我编写了自己的popen()类似pclose()函数my_popen()and my_pclose()，灵感来自 Apple 的开源实现popen() 和 pclose()。

通过人工检查——例如ps在不同的终端中运行以寻找预期的子进程——popen()似乎在预期的子进程出现时起作用。

问题：如果我在之后立即调用它，为什么会my_pclose()立即返回？我的期望是等到子进程结束。errno == 10 (ECHILD)my_popen()my_pclose()

问题：鉴于上述情况，如果我在andmy_pclose()之间插入延迟，为什么会在子进程正常结束后按预期返回？my_popen()my_pclose()

问题：my_pclose()只有在子进程结束后才需要进行哪些更正才能可靠地返回，而不需要任何延迟或其他设计？

MCVE 下面。

一些上下文：我想让my_popen()用户 1）写入子进程' stdin，2）读取子进程' stdout，3）读取子进程' stderr，4）知道子进程' pid_t，5）在 fork 的环境中运行/exec 的进程可能是子进程或孙子进程，并且能够在后者的情况下杀死孙子进程（因此是setpgid()）。

// main.c

#include <errno.h>
#include <pthread.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>

typedef int Pipe[2];

typedef enum PipeEnd {
  READ_END  = 0,
  WRITE_END = 1
} PipeEnd;

#define INVALID_FD (-1)
#define INVALID_PID (0)

typedef struct my_popen_t {
  bool  success;  ///< true if the child process was spawned.
  Pipe  stdin;    ///< parent -> stdin[WRITE_END] -> child's stdin
  Pipe  stdout;   ///< child -> stdout[WRITE_END] -> parent reads stdout[READ_END]
  Pipe  stderr;   ///< child -> stderr[WRITE_END] -> parent reads stderr[READ_END]
  pid_t pid;      ///< child process' pid
} my_popen_t;

/** dup2( p[pe] ) then close and invalidate both ends of p */
static void dupFd( Pipe p, const PipeEnd pe, const int fd ) {
  dup2( p[pe], fd);
  close( p[READ_END] );
  close( p[WRITE_END] );
  p[READ_END] = INVALID_FD;
  p[WRITE_END] = INVALID_FD;
}

/**
 * Redirect a parent-accessible pipe to the child's stdin, and redirect the
 * child's stdout and stderr to parent-accesible pipes.
 */
my_popen_t my_popen( const char* cmd ) {
  my_popen_t r = { false,
    { INVALID_FD, INVALID_FD },
    { INVALID_FD, INVALID_FD },
    { INVALID_FD, INVALID_FD },
    INVALID_PID };

  if ( -1 == pipe( r.stdin ) ) { goto end; }
  if ( -1 == pipe( r.stdout ) ) { goto end; }
  if ( -1 == pipe( r.stderr ) ) { goto end; }

  switch ( (r.pid = fork()) ) {
    case -1: // Error
      goto end;

    case 0: // Child process
      dupFd( r.stdin, READ_END, STDIN_FILENO );
      dupFd( r.stdout, WRITE_END, STDOUT_FILENO );
      dupFd( r.stderr, WRITE_END, STDERR_FILENO );
      setpgid( getpid(), getpid() );

      {
        char* argv[] = { (char*)"sh", (char*)"-c", (char*)cmd, NULL };

        // @todo Research why - as has been pointed out - _exit() should be
        // used here, not exit().
        if ( -1 == execvp( argv[0], argv ) ) { exit(0); }
      }
  }

  // Parent process
  close( r.stdin[READ_END] );
  r.stdin[READ_END] = INVALID_FD;
  close( r.stdout[WRITE_END] );
  r.stdout[WRITE_END] = INVALID_FD;
  close( r.stderr[WRITE_END] );
  r.stderr[WRITE_END] = INVALID_FD;
  r.success = true;

end:
  if ( ! r.success ) {
    if ( INVALID_FD != r.stdin[READ_END] ) { close( r.stdin[READ_END] ); }
    if ( INVALID_FD != r.stdin[WRITE_END] ) { close( r.stdin[WRITE_END] ); }
    if ( INVALID_FD != r.stdout[READ_END] ) { close( r.stdout[READ_END] ); }
    if ( INVALID_FD != r.stdout[WRITE_END] ) { close( r.stdout[WRITE_END] ); }
    if ( INVALID_FD != r.stderr[READ_END] ) { close( r.stderr[READ_END] ); }
    if ( INVALID_FD != r.stderr[WRITE_END] ) { close( r.stderr[WRITE_END] ); }

    r.stdin[READ_END] = r.stdin[WRITE_END] =
      r.stdout[READ_END] = r.stdout[WRITE_END] =
      r.stderr[READ_END] = r.stderr[WRITE_END] = INVALID_FD;
  }

  return r;
}

int my_pclose( my_popen_t* p ) {
  if ( ! p )                    { return -1; }
  if ( ! p->success )           { return -1; }
  if ( INVALID_PID == p->pid )  { return -1; }

  {
    pid_t pid = INVALID_PID;
    int wstatus;

    do {
      pid = waitpid( -1 * (p->pid), &wstatus, 0 );
    } while ( -1 == pid && EINTR == errno );

    return ( -1 == pid ? pid : wstatus );
  }
}

int main( int argc, char* argv[] ) {
  my_popen_t p = my_popen( "sleep 3" );
  //sleep( 1 ); // Uncomment this line for my_pclose() success.
  int res = my_pclose( &p );

  printf( "res: %d, errno: %d (%s)\n", res, errno, strerror( errno ) );

  return 0;
}

意外失败的执行：

$ gcc --version && gcc -g ./main.c && ./a.out
gcc (Debian 6.3.0-18+deb9u1) 6.3.0 20170516
Copyright (C) 2016 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

res: -1, errno: 10 (No child processes)

参考文献：1 , 2 , 3

更新：
这个链接让我想知道在ingsetpgid( pid, 0 )之后添加父进程是否相关。fork()它似乎确实有效，因为在添加之后，my_pclose()立即调用my_popen()似乎会等到该过程完成。

老实说，我不太明白为什么这会有所作为。如果知识渊博的社区成员能提供见解，我将不胜感激。

my_popen_t my_popen( const char* cmd ) {
  my_popen_t r = { false,
    { INVALID_FD, INVALID_FD },
    { INVALID_FD, INVALID_FD },
    { INVALID_FD, INVALID_FD },
    INVALID_PID };

  if ( -1 == pipe( r.stdin ) ) { goto end; }
  if ( -1 == pipe( r.stdout ) ) { goto end; }
  if ( -1 == pipe( r.stderr ) ) { goto end; }

  switch ( (r.pid = fork()) ) {
    case -1: // Error
      goto end;

    case 0: // Child process
      dupFd( r.stdin, READ_END, STDIN_FILENO );
      dupFd( r.stdout, WRITE_END, STDOUT_FILENO );
      dupFd( r.stderr, WRITE_END, STDERR_FILENO );
      //setpgid( getpid(), getpid() ); // This looks unnecessary

      {
        char* argv[] = { (char*)"sh", (char*)"-c", (char*)cmd, NULL };

        // @todo Research why - as has been pointed out - _exit() should be
        // used here, not exit().
        if ( -1 == execvp( argv[0], argv ) ) { exit(0); }
      }
  }

  // Parent process
  setpgid( r.pid, 0 ); // This is the relevant change
  close( r.stdin[READ_END] );
  r.stdin[READ_END] = INVALID_FD;
  close( r.stdout[WRITE_END] );
  r.stdout[WRITE_END] = INVALID_FD;
  close( r.stderr[WRITE_END] );
  r.stderr[WRITE_END] = INVALID_FD;
  r.success = true;

end:
  if ( ! r.success ) {
    if ( INVALID_FD != r.stdin[READ_END] ) { close( r.stdin[READ_END] ); }
    if ( INVALID_FD != r.stdin[WRITE_END] ) { close( r.stdin[WRITE_END] ); }
    if ( INVALID_FD != r.stdout[READ_END] ) { close( r.stdout[READ_END] ); }
    if ( INVALID_FD != r.stdout[WRITE_END] ) { close( r.stdout[WRITE_END] ); }
    if ( INVALID_FD != r.stderr[READ_END] ) { close( r.stderr[READ_END] ); }
    if ( INVALID_FD != r.stderr[WRITE_END] ) { close( r.stderr[WRITE_END] ); }

    r.stdin[READ_END] = r.stdin[WRITE_END] =
      r.stdout[READ_END] = r.stdout[WRITE_END] =
      r.stderr[READ_END] = r.stderr[WRITE_END] = INVALID_FD;
  }

  return r;
}

score 2 · Accepted Answer

您的问题my_pclose()是您正在尝试执行进程组等待，而不是等待特定的子进程。这个：

      pid = waitpid( -1 * (p->pid), &wstatus, 0 );

尝试等待属于 process group 的孩子，但如果没有您后来添加p->pid的调用，这极不可能工作。setpgid()分叉的子进程最初将与其父进程位于同一进程组中，并且该组的进程组号几乎肯定会与子进程号不同。

此外，尚不清楚您为什么首先尝试等待进程组。你知道你要等待的具体进程my_pclose()，而不管它是否属于同一个进程组，收集不同的进程是不正确的。您应该等待该特定过程：

      pid = waitpid(p->pid, &wstatus, 0 );

无论有没有setpgid()调用都可以，但几乎可以肯定的是，您应该在诸如此类的通用函数中省略该调用。

c - 为什么这个 pclose() 实现会随着 ECHILD 提前返回，除非在 popen() 之后延迟调用？

1 回答 1

Related

Reference