Waitpid 相当于超时吗？-IT科技

摘要：问题描述：假设我有一个进程启动了几个子进程。父进程需要知道子进程何时退出。我可以使用waitpid，但如果/当父级需要退出时，我无法告诉被阻塞的线程waitpid正常退出并加入它。让事情自行清理是件好事，但这可能不是什么大问题。我可以使用waitpidwith WNOHANG，然后休眠一段时间以避免繁忙等待。...

问题描述：

假设我有一个进程启动了几个子进程。父进程需要知道子进程何时退出。

我可以使用waitpid，但如果/当父级需要退出时，我无法告诉被阻塞的线程waitpid正常退出并加入它。让事情自行清理是件好事，但这可能不是什么大问题。

我可以使用waitpidwith WNOHANG，然后休眠一段时间以避免繁忙等待。但是我只能偶尔知道孩子是否退出。就我而言，立即知道孩子何时退出可能不是非常重要，但我希望尽快知道……

我可以使用信号处理程序SIGCHLD，并在信号处理程序中执行我在子进程退出时要执行的任何操作，或者向其他线程发送消息以执行某些操作。但使用信号处理程序会使代码流程稍微混乱。

我真正想做的是使用waitpid某个超时，比如 5 秒。由于退出进程不是时间关键操作，我可以懒洋洋地向线程发出退出信号，同时在waitpid其余时间仍将其阻塞，随时准备做出反应。Linux中有这样的调用吗？在替代方案中，哪一个最好？

编辑：

基于回复的另一种方法是使用\SIGCHLD在所有线程中阻塞。然后在一个线程中，在寻找的同时继续调用。这意味着我可以超时并检查线程是否应该退出，如果不退出，则保持阻塞以等待信号。一旦将传递给此线程，我们就可以立即对其做出反应，并在等待线程中执行，而无需使用信号处理程序。pthread`_sigmask()sigtimedwait()SIGCHLD`SIGCHLD

解决方案 1：

不要alarm()与混合wait()。这样可能会丢失错误信息。

使用自管道技巧。这会将任何信号转换为select()可用事件：

int selfpipe[2];
void selfpipe_sigh(int n)
{
    int save_errno = errno;
    (void)write(selfpipe[1], &quot;&quot;,1);
    errno = save_errno;
}
void selfpipe_setup(void)
{
    static struct sigaction act;
    if (pipe(selfpipe) == -1) { abort(); }

    fcntl(selfpipe[0],F_SETFL,fcntl(selfpipe[0],F_GETFL)|O_NONBLOCK);
    fcntl(selfpipe[1],F_SETFL,fcntl(selfpipe[1],F_GETFL)|O_NONBLOCK);
    memset(&amp;act, 0, sizeof(act));
    act.sa_handler = selfpipe_sigh;
    sigaction(SIGCHLD, &amp;act, NULL);
}

然后，类似 waitpid 的函数如下所示：

int selfpipe_waitpid(void)
{
    static char dummy[4096];
    fd_set rfds;
    struct timeval tv;
    int died = 0, st;

    tv.tv_sec = 5;
    tv.tv_usec = 0;
    FD_ZERO(&amp;rfds);
    FD_SET(selfpipe[0], &amp;rfds);
    if (select(selfpipe[0]+1, &amp;rfds, NULL, NULL, &amp;tv) > 0) {
       while (read(selfpipe[0],dummy,sizeof(dummy)) > 0);
       while (waitpid(-1, &amp;st, WNOHANG) != -1) died++;
    }
    return died;
}

您可以看到selfpipe_waitpid()如何控制超时甚至与其他select()基于的 IO 混合。

解决方案 2：

分叉一个中间子进程，该子进程分叉真正的子进程和一个超时进程，并等待其所有（两个）子进程。当一个子进程退出时，它将杀死另一个子进程并退出。

pid_t intermediate_pid = fork();
if (intermediate_pid == 0) {
    pid_t worker_pid = fork();
    if (worker_pid == 0) {
        do_work();
        _exit(0);
    }

    pid_t timeout_pid = fork();
    if (timeout_pid == 0) {
        sleep(timeout_time);
        _exit(0);
    }

    pid_t exited_pid = wait(NULL);
    if (exited_pid == worker_pid) {
        kill(timeout_pid, SIGKILL);
    } else {
        kill(worker_pid, SIGKILL); // Or something less violent if you prefer
    }
    wait(NULL); // Collect the other process
    _exit(0); // Or some more informative status
}
waitpid(intermediate_pid, 0, 0);

出奇的简单:)

如果您确定程序中没有其他模块产生自己的子进程，您甚至可以省略中间子进程。

解决方案 3：

这是一个有趣的问题。我发现sigtimedwait可以做到这一点。

编辑 2016/08/29：感谢 Mark Edington 的建议。我在 Ubuntu 16.04 上测试了您的示例，它按预期运行。

注意：这只适用于子进程。遗憾的是，Linux/Unix 中似乎没有与 Windows 的 WaitForSingleObject(unrelated_process_handle, timeout) 等效的方法来在超时内收到无关进程终止的通知。

好的，Mark Edington 的示例代码在这里：

/* The program creates a child process and waits for it to finish. If a timeout
 * elapses the child is killed. Waiting is done using sigtimedwait(). Race
 * condition is avoided by blocking the SIGCHLD signal before fork().
 */
#include &lt;sys/types.h>
#include &lt;sys/wait.h>
#include &lt;signal.h>
#include &lt;stdio.h>
#include &lt;string.h>
#include &lt;stdlib.h>
#include &lt;unistd.h>
#include &lt;errno.h>

static pid_t fork_child (void)
{
    int p = fork ();

    if (p == -1) {
        perror (&quot;fork&quot;);
        exit (1);
    }

    if (p == 0) {
        puts (&quot;child: sleeping...&quot;);
        sleep (10);
        puts (&quot;child: exiting&quot;);
        exit (0);
    }

    return p;
}

int main (int argc, char *argv[])
{
    sigset_t mask;
    sigset_t orig_mask;
    struct timespec timeout;
    pid_t pid;

    sigemptyset (&amp;mask);
    sigaddset (&amp;mask, SIGCHLD);

    if (sigprocmask(SIG_BLOCK, &amp;mask, &amp;orig_mask) &lt; 0) {
        perror (&quot;sigprocmask&quot;);
        return 1;
    }

    pid = fork_child ();

    timeout.tv_sec = 5;
    timeout.tv_nsec = 0;

    do {
        if (sigtimedwait(&amp;mask, NULL, &amp;timeout) &lt; 0) {
            if (errno == EINTR) {
                /* Interrupted by a signal other than SIGCHLD. */
                continue;
            }
            else if (errno == EAGAIN) {
                printf (&quot;Timeout, killing child
&quot;);
                kill (pid, SIGKILL);
            }
            else {
                perror (&quot;sigtimedwait&quot;);
                return 1;
            }
        }

        break;
    } while (1);

    if (waitpid(pid, NULL, 0) &lt; 0) {
        perror (&quot;waitpid&quot;);
        return 1;
    }

    return 0;
}

解决方案 4：

如果您的程序仅在当代 Linux 内核（5.3 或更高版本）上运行，则首选方法是使用pidfd_open（https://lwn.net/Articles/789023/ https://man7.org/linux/man-pages/man2/pidfd_open.2.html）。

此系统调用返回一个代表进程的文件描述符，然后您可以select像poll等待epoll其他类型的文件描述符一样对其进行操作。

例如，

int fd = pidfd_open(pid, 0);
struct pollfd pfd = {fd, POLLIN, 0};
poll(&amp;pfd, 1, 1000) == 1;

解决方案 5：

该函数可以通过信号中断，因此您可以在调用 waitpid() 之前设置一个计时器，当计时器信号发出时，它将以 EINTR 退出。编辑：它应该像在调用 waitpid() 之前调用 alarm(5) 一样简单。

解决方案 6：

我认为当孩子发出信号时select会返回。我相信这应该有效：EINTR`SIGCHLD`

while(1)
{
  int retval = select(0, NULL, NULL, NULL, &amp;tv, &amp;mask);
  if (retval == -1 &amp;&amp; errno == EINTR) // some signal
  { 
      pid_t pid = (waitpid(-1, &amp;st, WNOHANG) == 0);
      if (pid != 0) // some child signaled
  }
  else if (retval == 0)
  {
      // timeout
      break;
  }
  else // error
}

注意：您可以使用它pselect来覆盖电流sigmask并避免不需要的信号的中断。

解决方案 7：

除了直接调用 waitpid()，您还可以使用 SIGCHLD（子进程退出后发送给父进程）调用 sigtimedwait() 并等待它被传递到当前线程，正如函数名称所暗示的那样，支持超时参数。

请查看以下代码片段了解详细信息


static bool waitpid_with_timeout(pid_t pid, int timeout_ms, int* status) {
    sigset_t child_mask, old_mask;
    sigemptyset(&amp;child_mask);
    sigaddset(&amp;child_mask, SIGCHLD);

    if (sigprocmask(SIG_BLOCK, &amp;child_mask, &amp;old_mask) == -1) {
        printf(&quot;*** sigprocmask failed: %s
&quot;, strerror(errno));
        return false;
    }

    timespec ts;
    ts.tv_sec = MSEC_TO_SEC(timeout_ms);
    ts.tv_nsec = (timeout_ms % 1000) * 1000000;
    int ret = TEMP_FAILURE_RETRY(sigtimedwait(&amp;child_mask, NULL, &amp;ts));
    int saved_errno = errno;

    // Set the signals back the way they were.
    if (sigprocmask(SIG_SETMASK, &amp;old_mask, NULL) == -1) {
        printf(&quot;*** sigprocmask failed: %s
&quot;, strerror(errno));
        if (ret == 0) {
            return false;
        }
    }
    if (ret == -1) {
        errno = saved_errno;
        if (errno == EAGAIN) {
            errno = ETIMEDOUT;
        } else {
            printf(&quot;*** sigtimedwait failed: %s
&quot;, strerror(errno));
        }
        return false;
    }

    pid_t child_pid = waitpid(pid, status, WNOHANG);
    if (child_pid != pid) {
        if (child_pid != -1) {
            printf(&quot;*** Waiting for pid %d, got pid %d instead
&quot;, pid, child_pid);
        } else {
            printf(&quot;*** waitpid failed: %s
&quot;, strerror(errno));
        }
        return false;
    }
    return true;
}

参考：https://android.googlesource.com/platform/frameworks/native/+/master/cmds/dumpstate/DumpstateUtil.cpp#46

解决方案 8：

如果您无论如何都要使用信号（按照 Steve 的建议），那么您可以在想要退出时手动发送信号。这将导致 waitpid 返回 EINTR，然后线程可以退出。无需定期警报/重启。

解决方案 9：

由于某些情况，我绝对需要在主线程中运行这个，而且使用自管道技巧或 eventfd 并不是一件简单的事情，因为我的 epoll 循环正在另一个线程中运行。所以我通过搜集其他堆栈溢出处理程序想出了这个。请注意，一般来说，用其他方式执行此操作要安全得多，但这很简单。如果有人想评论它有多糟糕，我洗耳恭听。

注意：除了要运行该信号的线程之外，绝对有必要阻止任何线程中的信号处理。我默认这样做，因为我认为在随机线程中处理信号很麻烦。

static void ctlWaitPidTimeout(pid_t child, useconds_t usec, int *timedOut) {
    int rc = -1;

    static pthread_mutex_t alarmMutex = PTHREAD_MUTEX_INITIALIZER;

    TRACE(&quot;ctlWaitPidTimeout: waiting on %lu
&quot;, (unsigned long) child);

    /**
     * paranoid, in case this was called twice in a row by different
     * threads, which could quickly turn very messy.
     */
    pthread_mutex_lock(&amp;alarmMutex);

    /* set the alarm handler */
    struct sigaction alarmSigaction;
    struct sigaction oldSigaction;

    sigemptyset(&amp;alarmSigaction.sa_mask);
    alarmSigaction.sa_flags   = 0;
    alarmSigaction.sa_handler = ctlAlarmSignalHandler;
    sigaction(SIGALRM, &amp;alarmSigaction, &amp;oldSigaction);

    /* set alarm, because no alarm is fired when the first argument is 0, 1 is used instead */
    ualarm((usec == 0) ? 1 : usec, 0);

    /* wait for the child we just killed */
    rc = waitpid(child, NULL, 0);

    /* if errno == EINTR, the alarm went off, set timedOut to true */
    *timedOut = (rc == -1 &amp;&amp; errno == EINTR);

    /* in case we did not time out, unset the current alarm so it doesn&#039;t bother us later */
    ualarm(0, 0);

    /* restore old signal action */
    sigaction(SIGALRM, &amp;oldSigaction, NULL);

    pthread_mutex_unlock(&amp;alarmMutex);

    TRACE(&quot;ctlWaitPidTimeout: timeout wait done, rc = %d, error = &#039;%s&#039;
&quot;, rc, (rc == -1) ? strerror(errno) : &quot;none&quot;);
}

static void ctlAlarmSignalHandler(int s) {
    TRACE(&quot;ctlAlarmSignalHandler: alarm occured, %d
&quot;, s);
}

编辑：从那时起，我已过渡到使用与我现有的基于 epoll() 的事件循环很好地集成的解决方案，即使用 timerfd。由于我一直使用 epoll，因此我实际上并没有失去任何平台独立性，而且我获得了额外的睡眠，因为我知道多线程和 UNIX 信号的不正当组合不会再损害我的程序。

解决方案 10：

我可以使用 SIGCHLD 信号处理程序，并在信号处理程序中执行子进程退出时要执行的任何操作，或者向其他线程发送消息以执行某些操作。但使用信号处理程序会使代码流程稍微混乱。

为了避免竞争条件，您应该避免做比更改信号处理程序中的易失性标志更复杂的事情。

我认为，对于你的情况，最好的选择是向父进程发送一个信号。waitpid() 随后会将 errno 设置为 EINTR 并返回。此时，你检查 waitpid 返回值和 errno，注意你已收到一个信号并采取适当的措施。

解决方案 11：

如果可以接受第三方库，那么libkqueue项目将模拟kqueue（*BSD 事件系统）并使用EVFILT_PROC+提供基本流程监控NOTE_EXIT。

kqueue使用或的主要优势libkqueue在于它是跨平台的，并且没有信号处理的复杂性。如果您的程序使用异步 I/O，您还会发现它比使用诸如epoll和各种*fd函数（signalfd、等）之类的eventfd东西的摩擦更小pidfd。

#include &lt;stdio.h>
#include &lt;stdint.h>
#include &lt;sys/event.h> /* kqueue header */
#include &lt;sys/types.h> /* for pid_t */

/* Link with -lkqueue */

int waitpid_timeout(pid_t pid, struct timespec *timeout)
{
    struct kevent changelist, eventlist;
    int kq, ret;

    /* Populate a changelist entry (an event we want to be notified of) */
    EV_SET(&amp;changelist, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL);

    kq = kqueue();

    /* Call kevent with a timeout */
    ret = kevent(kq, &amp;changelist, 1, &amp;eventlist, 1, timeout);

    /* Kevent returns 0 on timeout, the number of events that occurred, or -1 on error */
    switch (ret) {
    case -1:
        printf(&quot;Error %s
&quot;, strerror(errno));
        break;

    case 0:
        printf(&quot;Timeout
&quot;);
        break;

    case 1:
        printf(&quot;PID %u exited, status %u
&quot;, (unsigned int)eventlist.ident, (unsigned int)eventlist.data);
        break;
    }
    close(kq);

    return ret;
}

Linux 后台libkqueue 使用Linux 内核（>= 5.3）上的 pidfd 或等待线程，该线程在进程退出时监听SIGCHLD并通知一个或多个kqueue实例。第二种方法效率不高（它会扫描已注册感兴趣的 PID waitid），但除非您正在等待大量 PID，否则这并不重要。

EVFILT_PROC`kqueue自成立以来，一直受到支持，并且libkqueue自以来一直受到支持v2.5.0`。