No public description

PiperOrigin-RevId: 775821600
diff --git a/centipede/command.cc b/centipede/command.cc
index d533bde..ab63e5d 100644
--- a/centipede/command.cc
+++ b/centipede/command.cc
@@ -333,9 +333,10 @@
     struct pollfd poll_fd = {};
     int poll_ret = -1;
     auto poll_deadline = absl::Now() + options_.timeout;
-    // The `poll()` syscall can get interrupted: it sets errno==EINTR in that
-    // case. We should tolerate that.
+    bool sigterm_sent = false;
+    bool try_again = false;
     do {
+      try_again = false;
       // NOTE: `poll_fd` has to be reset every time.
       poll_fd = {
           /*fd=*/fork_server_->pipe_[1],  // The file descriptor to wait for.
@@ -344,15 +345,36 @@
       const int poll_timeout_ms = static_cast<int>(absl::ToInt64Milliseconds(
           std::max(poll_deadline - absl::Now(), absl::Milliseconds(1))));
       poll_ret = poll(&poll_fd, 1, poll_timeout_ms);
-    } while (poll_ret < 0 && errno == EINTR);
+      // The `poll()` syscall can get interrupted: it sets errno==EINTR in that
+      // case. We should tolerate that.
+      if (poll_ret < 0 && errno == EINTR) {
+        try_again = true;
+        continue;
+      }
+      if (poll_ret == 0 && !sigterm_sent) {
+        LogProblemInfo(
+            absl::StrCat("Timeout while waiting for fork server: timeout is ",
+                         absl::FormatDuration(options_.timeout)));
+        CHECK_NE(fork_server_->pid_, -1);
+        LOG(INFO) << "Sending SIGTERM to the fork server PID "
+                  << fork_server_->pid_ << " and waiting for 60s";
+        kill(fork_server_->pid_, SIGTERM);
+        sigterm_sent = true;
+        poll_deadline += absl::Seconds(60);
+        try_again = true;
+        continue;
+      }
+    } while (try_again);
 
     if (poll_ret != 1 || (poll_fd.revents & POLLIN) == 0) {
       // The fork server errored out or timed out, or some other error occurred,
       // e.g. the syscall was interrupted.
       if (poll_ret == 0) {
+        CHECK(sigterm_sent);
         LogProblemInfo(
-            absl::StrCat("Timeout while waiting for fork server: timeout is ",
-                         absl::FormatDuration(options_.timeout)));
+            "Fork server did not respond within 60s after SIGTERM was sent");
+        // TODO: xinhaoyuan - the right thing to do is to either properly
+        // recover or request early exit.
       } else {
         LogProblemInfo(absl::StrCat(
             "Error while waiting for fork server: poll() returned ", poll_ret));
diff --git a/centipede/command_test.cc b/centipede/command_test.cc
index 3129a41..6f26f43 100644
--- a/centipede/command_test.cc
+++ b/centipede/command_test.cc
@@ -190,7 +190,7 @@
     cmd_options.timeout = absl::Seconds(2);
     Command cmd{helper, std::move(cmd_options)};
     ASSERT_TRUE(cmd.StartForkServer(test_tmpdir, "ForkServer"));
-    EXPECT_EQ(cmd.Execute(), EXIT_FAILURE);
+    EXPECT_EQ(cmd.Execute(), SIGTERM);
     std::string log_contents;
     ReadFromLocalFile(log, log_contents);
     EXPECT_EQ(log_contents, absl::Substitute("Got input: $0", input));
diff --git a/centipede/runner_fork_server.cc b/centipede/runner_fork_server.cc
index 7467f94..28ece5a 100644
--- a/centipede/runner_fork_server.cc
+++ b/centipede/runner_fork_server.cc
@@ -57,6 +57,7 @@
 #else                      // __APPLE__
 #include <linux/limits.h>  // ARG_MAX
 #endif                     // __APPLE__
+#include <signal.h>
 #include <sys/wait.h>
 #include <unistd.h>
 
@@ -205,6 +206,46 @@
   if (pipe1 < 0) Exit("###open pipe1 failed\n");
   Log("###Centipede fork server ready\n");
 
+  struct sigaction old_sigterm_act{};
+  struct sigaction sigterm_act{};
+  sigterm_act.sa_handler = [](int) {};
+  if (sigaction(SIGTERM, &sigterm_act, &old_sigterm_act) != 0) {
+    Exit("###sigaction failed on SIGTERM for the fork server");
+  }
+
+  struct sigaction old_sigchld_act{};
+  struct sigaction sigchld_act{};
+  sigchld_act.sa_handler = [](int) {};
+  if (sigaction(SIGCHLD, &sigchld_act, &old_sigchld_act) != 0) {
+    Exit("###sigaction failed on SIGCHLD for the fork server");
+  }
+
+  sigset_t old_sigset;
+  sigset_t server_sigset;
+  if (sigprocmask(SIG_SETMASK, nullptr, &server_sigset) != 0) {
+    Exit("###sigprocmask() failed to get the existing sigset\n");
+  }
+  if (sigaddset(&server_sigset, SIGTERM) != 0) {
+    Exit("###sigaddset() failed to add SIGTERM\n");
+  }
+  if (sigaddset(&server_sigset, SIGCHLD) != 0) {
+    Exit("###sigaddset() failed to add SIGCHLD\n");
+  }
+  if (sigprocmask(SIG_SETMASK, &server_sigset, &old_sigset) != 0) {
+    Exit("###sigprocmask() failed to set the fork server sigset\n");
+  }
+
+  sigset_t wait_sigset;
+  if (sigemptyset(&wait_sigset) != 0) {
+    Exit("###sigemptyset() failed\n");
+  }
+  if (sigaddset(&wait_sigset, SIGTERM) != 0) {
+    Exit("###sigaddset() failed to add SIGTERM to the wait sigset\n");
+  }
+  if (sigaddset(&wait_sigset, SIGCHLD) != 0) {
+    Exit("###sigaddset() failed to add SIGCHLD to the wait sigset\n");
+  }
+
   // Loop.
   while (true) {
     Log("###Centipede fork server blocking on pipe0\n");
@@ -216,6 +257,15 @@
     if (pid < 0) {
       Exit("###fork failed\n");
     } else if (pid == 0) {
+      if (sigaction(SIGTERM, &old_sigterm_act, nullptr) != 0) {
+        Exit("###sigaction failed on SIGTERM for the child");
+      }
+      if (sigaction(SIGCHLD, &old_sigchld_act, nullptr) != 0) {
+        Exit("###sigaction failed on SIGCHLD for the child");
+      }
+      if (sigprocmask(SIG_SETMASK, &old_sigset, nullptr) != 0) {
+        Exit("###sigprocmask() failed to restore the previous sigset\n");
+      }
       // Child process. Reset stdout/stderr and let it run normally.
       for (int fd = 1; fd <= 2; fd++) {
         lseek(fd, 0, SEEK_SET);
@@ -227,7 +277,28 @@
     } else {
       // Parent process.
       int status = -1;
-      if (waitpid(pid, &status, 0) < 0) Exit("###waitpid failed\n");
+      while (true) {
+        int sig = -1;
+        if (sigwait(&wait_sigset, &sig) != 0) {
+          Exit("###sigwait() failed\n");
+        }
+        if (sig == SIGCHLD) {
+          Log("###Got SIGCHLD\n");
+          const pid_t ret = waitpid(pid, &status, WNOHANG);
+          if (ret < 0) {
+            Exit("###waitpid failed\n");
+          }
+          if (ret == pid && (WIFEXITED(status) || WIFSIGNALED(status))) {
+            Log("###Got exit status\n");
+            break;
+          }
+        } else if (sig == SIGTERM) {
+          Log("###Got SIGTERM\n");
+          kill(pid, SIGTERM);
+        } else {
+          Exit("###Unknown signal from sigwait\n");
+        }
+      }
       if (WIFEXITED(status)) {
         if (WEXITSTATUS(status) == EXIT_SUCCESS)
           Log("###Centipede fork returned EXIT_SUCCESS\n");
@@ -239,8 +310,28 @@
         Log("###Centipede fork crashed\n");
       }
       Log("###Centipede fork writing status to pipe1\n");
-      if (write(pipe1, &status, sizeof(status)) == -1)
+      if (write(pipe1, &status, sizeof(status)) == -1) {
         Exit("###write to pipe1 failed\n");
+      }
+      // Deplete any remaining signals before the next execution. Controller
+      // won't send more signals after write succeeded.
+      {
+        sigset_t pending;
+        while (true) {
+          if (sigpending(&pending) != 0) {
+            Exit("###sigpending() failed\n");
+          }
+          if (sigismember(&pending, SIGTERM) ||
+              sigismember(&pending, SIGCHLD)) {
+            int unused_sig;
+            if (sigwait(&wait_sigset, &unused_sig) != 0) {
+              Exit("###sigwait() failed\n");
+            }
+          } else {
+            break;
+          }
+        }
+      }
     }
   }
   // The only way out of the loop is via Exit() or return.