[glib/wip/pwithnall/2216-pidfd-sigchld] gmain: Use waitid() on pidfds rather than a global SIGCHLD handler




commit 288fae314be8698e6d13a13751590bf14d01455a
Author: Philip Withnall <pwithnall endlessos org>
Date:   Thu Dec 23 17:45:51 2021 +0000

    gmain: Use waitid() on pidfds rather than a global SIGCHLD handler
    
    When the system supports it (as all Linux kernels ≥ 5.3 should), it’s
    preferable to use `pidfd_open()` and `waitid()` to be notified of
    child processes exiting or being signalled, rather than installing a
    default `SIGCHLD` handler.
    
    A default `SIGCHLD` handler is global, and can never interact well with
    other code (from the application or other libraries) which also wants to
    install a `SIGCHLD` handler.
    
    This use of `pidfd_open()` is racy (the PID may be reused between
    `g_child_watch_source_new()` being called and `pidfd_open()` being
    called), so it doesn’t improve behaviour there. For that, we’d need
    continuous use of pidfds throughout GLib, from fork/spawn time until
    here. See #1866 for that.
    
    The use of `waitid()` to get the process exit status could be expanded
    in future to also work for stopped or continued processes (as per #175)
    by adding `WSTOPPED | WCONTINUED` into the flags. That’s a behaviour
    change which is outside the strict scope of adding pidfd support,
    though.
    
    Signed-off-by: Philip Withnall <pwithnall endlessos org>
    
    Helps: #1866
    Fixes: #2216

 glib/gmain.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 meson.build  |  14 ++++++++
 2 files changed, 121 insertions(+), 6 deletions(-)
---
diff --git a/glib/gmain.c b/glib/gmain.c
index e2b091ffd2..aab6c48d02 100644
--- a/glib/gmain.c
+++ b/glib/gmain.c
@@ -69,6 +69,12 @@
 #include <errno.h>
 #include <string.h>
 
+#ifdef HAVE_PIDFD
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <linux/wait.h>  /* P_PIDFD */
+#endif  /* HAVE_PIDFD */
+
 #ifdef G_OS_WIN32
 #define STRICT
 #include <windows.h>
@@ -355,10 +361,11 @@ struct _GChildWatchSource
   /* On Unix this is a wait status, which is the thing you pass to WEXITSTATUS()
    * to get the status returned from the process’ main() or passed to exit(): */
   gint        child_status;
-#ifdef G_OS_WIN32
+  /* @poll is always used on Windows, and used on Unix iff @using_pidfd is set: */
   GPollFD     poll;
-#else /* G_OS_WIN32 */
-  gboolean    child_exited; /* (atomic) */
+#ifndef G_OS_WIN32
+  gboolean    child_exited; /* (atomic); not used iff @using_pidfd is set */
+  gboolean    using_pidfd;
 #endif /* G_OS_WIN32 */
 };
 
@@ -5474,7 +5481,8 @@ dispatch_unix_signals_unlocked (void)
         {
           GChildWatchSource *source = node->data;
 
-          if (!g_atomic_int_get (&source->child_exited))
+          if (!source->using_pidfd &&
+              !g_atomic_int_get (&source->child_exited))
             {
               pid_t pid;
               do
@@ -5533,6 +5541,34 @@ g_child_watch_prepare (GSource *source,
   return g_atomic_int_get (&child_watch_source->child_exited);
 }
 
+#ifdef HAVE_PIDFD
+static int
+siginfo_t_to_wait_status (const siginfo_t *info)
+{
+  /* Each of these returns is essentially the inverse of WIFEXITED(),
+   * WIFSIGNALED(), etc. */
+  switch (info->si_code)
+    {
+    case CLD_EXITED:
+      return W_EXITCODE (info->si_status, 0);
+    case CLD_KILLED:
+      return W_EXITCODE (0, info->si_status);
+    case CLD_DUMPED:
+      return W_EXITCODE (0, info->si_status | WCOREFLAG);
+    case CLD_CONTINUED:
+#ifndef __W_CONTINUED
+      return __W_CONTINUED;
+#else
+      return 0xffff;
+#endif
+    case CLD_STOPPED:
+    case CLD_TRAPPED:
+    default:
+      return W_STOPCODE (info->si_status);
+    }
+}
+#endif  /* HAVE_PIDFD */
+
 static gboolean
 g_child_watch_check (GSource *source)
 {
@@ -5540,6 +5576,34 @@ g_child_watch_check (GSource *source)
 
   child_watch_source = (GChildWatchSource *) source;
 
+#ifdef HAVE_PIDFD
+  if (child_watch_source->using_pidfd)
+    {
+      gboolean child_exited = child_watch_source->poll.revents & G_IO_IN;
+
+      if (child_exited)
+        {
+          siginfo_t child_info = { 0, };
+
+          /* Get the exit status */
+          if (waitid (P_PIDFD, child_watch_source->poll.fd, &child_info, WEXITED | WNOHANG) >= 0 &&
+              child_info.si_pid != 0)
+            {
+              /* waitid() helpfully provides the wait status in a decomposed
+               * form which is quite useful. Unfortunately we have to report it
+               * to the #GChildWatchFunc as a waitpid()-style platform-specific
+               * wait status, so that the user code in #GChildWatchFunc can then
+               * call WIFEXITED() (etc.) on it. That means re-composing the
+               * status information. */
+              child_watch_source->child_status = siginfo_t_to_wait_status (&child_info);
+              child_watch_source->child_exited = TRUE;
+            }
+        }
+
+      return child_exited;
+    }
+#endif  /* HAVE_PIDFD */
+
   return g_atomic_int_get (&child_watch_source->child_exited);
 }
 
@@ -5724,6 +5788,11 @@ g_unix_signal_watch_finalize (GSource    *source)
 static void
 g_child_watch_finalize (GSource *source)
 {
+  GChildWatchSource *child_watch_source = (GChildWatchSource *) source;
+
+  if (child_watch_source->using_pidfd)
+    return;
+
   G_LOCK (unix_signal_lock);
   unix_child_watches = g_slist_remove (unix_child_watches, source);
   unref_unix_signal_handler_unlocked (SIGCHLD);
@@ -5825,6 +5894,9 @@ g_child_watch_source_new (GPid pid)
 {
   GSource *source;
   GChildWatchSource *child_watch_source;
+#ifdef HAVE_PIDFD
+  int errsv;
+#endif
 
 #ifndef G_OS_WIN32
   g_return_val_if_fail (pid > 0, NULL);
@@ -5843,14 +5915,43 @@ g_child_watch_source_new (GPid pid)
   child_watch_source->poll.events = G_IO_IN;
 
   g_source_add_poll (source, &child_watch_source->poll);
-#else /* G_OS_WIN32 */
+#else /* !G_OS_WIN32 */
+
+#ifdef HAVE_PIDFD
+  /* Use a pidfd, if possible, to avoid having to install a global SIGCHLD
+   * handler and potentially competing with any other library/code which wants
+   * to install one.
+   *
+   * Unfortunately this use of pidfd isn’t race-free (the PID could be recycled
+   * between the caller calling g_child_watch_source_new() and here), but it’s
+   * better than SIGCHLD.
+   */
+  child_watch_source->poll.fd = (int) syscall (SYS_pidfd_open, pid, 0);
+  errsv = errno;
+
+  if (child_watch_source->poll.fd >= 0)
+    {
+      child_watch_source->using_pidfd = TRUE;
+      child_watch_source->poll.events = G_IO_IN;
+      g_source_add_poll (source, &child_watch_source->poll);
+
+      return source;
+    }
+  else
+    {
+      g_debug ("pidfd_open(%" G_PID_FORMAT ") failed with error: %s",
+               pid, g_strerror (errsv));
+      /* Fall through; likely the kernel isn’t new enough to support pidfd_open() */
+    }
+#endif  /* HAVE_PIDFD */
+
   G_LOCK (unix_signal_lock);
   ref_unix_signal_handler_unlocked (SIGCHLD);
   unix_child_watches = g_slist_prepend (unix_child_watches, child_watch_source);
   if (waitpid (pid, &child_watch_source->child_status, WNOHANG) > 0)
     child_watch_source->child_exited = TRUE;
   G_UNLOCK (unix_signal_lock);
-#endif /* G_OS_WIN32 */
+#endif /* !G_OS_WIN32 */
 
   return source;
 }
diff --git a/meson.build b/meson.build
index 27600672b6..0edbc2bf00 100644
--- a/meson.build
+++ b/meson.build
@@ -873,6 +873,20 @@ if cc.links('''#include <sys/eventfd.h>
   glib_conf.set('HAVE_EVENTFD', 1)
 endif
 
+# Check for pidfd_open(2)
+if cc.links('''#include <sys/syscall.h>
+               #include <sys/wait.h>
+               #include <linux/wait.h>
+               #include <unistd.h>
+               int main (int argc, char ** argv) {
+                 siginfo_t child_info = { 0, };
+                 syscall (SYS_pidfd_open, 0, 0);
+                 waitid (P_PIDFD, 0, &child_info, WEXITED | WNOHANG);
+                 return 0;
+               }''', name : 'pidfd_open(2) system call')
+  glib_conf.set('HAVE_PIDFD', 1)
+endif
+
 # Check for __uint128_t (gcc) by checking for 128-bit division
 uint128_t_src = '''int main() {
 static __uint128_t v1 = 100;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]