No public description
PiperOrigin-RevId: 775821600
diff --git a/centipede/command.cc b/centipede/command.cc
index d533bde..ab63e5d 100644
--- a/centipede/command.cc
+++ b/centipede/command.cc
@@ -333,9 +333,10 @@
struct pollfd poll_fd = {};
int poll_ret = -1;
auto poll_deadline = absl::Now() + options_.timeout;
- // The `poll()` syscall can get interrupted: it sets errno==EINTR in that
- // case. We should tolerate that.
+ bool sigterm_sent = false;
+ bool try_again = false;
do {
+ try_again = false;
// NOTE: `poll_fd` has to be reset every time.
poll_fd = {
/*fd=*/fork_server_->pipe_[1], // The file descriptor to wait for.
@@ -344,15 +345,36 @@
const int poll_timeout_ms = static_cast<int>(absl::ToInt64Milliseconds(
std::max(poll_deadline - absl::Now(), absl::Milliseconds(1))));
poll_ret = poll(&poll_fd, 1, poll_timeout_ms);
- } while (poll_ret < 0 && errno == EINTR);
+ // The `poll()` syscall can get interrupted: it sets errno==EINTR in that
+ // case. We should tolerate that.
+ if (poll_ret < 0 && errno == EINTR) {
+ try_again = true;
+ continue;
+ }
+ if (poll_ret == 0 && !sigterm_sent) {
+ LogProblemInfo(
+ absl::StrCat("Timeout while waiting for fork server: timeout is ",
+ absl::FormatDuration(options_.timeout)));
+ CHECK_NE(fork_server_->pid_, -1);
+ LOG(INFO) << "Sending SIGTERM to the fork server PID "
+ << fork_server_->pid_ << " and waiting for 60s";
+ kill(fork_server_->pid_, SIGTERM);
+ sigterm_sent = true;
+ poll_deadline += absl::Seconds(60);
+ try_again = true;
+ continue;
+ }
+ } while (try_again);
if (poll_ret != 1 || (poll_fd.revents & POLLIN) == 0) {
// The fork server errored out or timed out, or some other error occurred,
// e.g. the syscall was interrupted.
if (poll_ret == 0) {
+ CHECK(sigterm_sent);
LogProblemInfo(
- absl::StrCat("Timeout while waiting for fork server: timeout is ",
- absl::FormatDuration(options_.timeout)));
+ "Fork server did not respond within 60s after SIGTERM was sent");
+ // TODO: xinhaoyuan - the right thing to do is to either properly
+ // recover or request early exit.
} else {
LogProblemInfo(absl::StrCat(
"Error while waiting for fork server: poll() returned ", poll_ret));
diff --git a/centipede/command_test.cc b/centipede/command_test.cc
index 3129a41..6f26f43 100644
--- a/centipede/command_test.cc
+++ b/centipede/command_test.cc
@@ -190,7 +190,7 @@
cmd_options.timeout = absl::Seconds(2);
Command cmd{helper, std::move(cmd_options)};
ASSERT_TRUE(cmd.StartForkServer(test_tmpdir, "ForkServer"));
- EXPECT_EQ(cmd.Execute(), EXIT_FAILURE);
+ EXPECT_EQ(cmd.Execute(), SIGTERM);
std::string log_contents;
ReadFromLocalFile(log, log_contents);
EXPECT_EQ(log_contents, absl::Substitute("Got input: $0", input));
diff --git a/centipede/runner_fork_server.cc b/centipede/runner_fork_server.cc
index 7467f94..28ece5a 100644
--- a/centipede/runner_fork_server.cc
+++ b/centipede/runner_fork_server.cc
@@ -57,6 +57,7 @@
#else // __APPLE__
#include <linux/limits.h> // ARG_MAX
#endif // __APPLE__
+#include <signal.h>
#include <sys/wait.h>
#include <unistd.h>
@@ -205,6 +206,46 @@
if (pipe1 < 0) Exit("###open pipe1 failed\n");
Log("###Centipede fork server ready\n");
+ struct sigaction old_sigterm_act{};
+ struct sigaction sigterm_act{};
+ sigterm_act.sa_handler = [](int) {};
+ if (sigaction(SIGTERM, &sigterm_act, &old_sigterm_act) != 0) {
+ Exit("###sigaction failed on SIGTERM for the fork server");
+ }
+
+ struct sigaction old_sigchld_act{};
+ struct sigaction sigchld_act{};
+ sigchld_act.sa_handler = [](int) {};
+ if (sigaction(SIGCHLD, &sigchld_act, &old_sigchld_act) != 0) {
+ Exit("###sigaction failed on SIGCHLD for the fork server");
+ }
+
+ sigset_t old_sigset;
+ sigset_t server_sigset;
+ if (sigprocmask(SIG_SETMASK, nullptr, &server_sigset) != 0) {
+ Exit("###sigprocmask() failed to get the existing sigset\n");
+ }
+ if (sigaddset(&server_sigset, SIGTERM) != 0) {
+ Exit("###sigaddset() failed to add SIGTERM\n");
+ }
+ if (sigaddset(&server_sigset, SIGCHLD) != 0) {
+ Exit("###sigaddset() failed to add SIGCHLD\n");
+ }
+ if (sigprocmask(SIG_SETMASK, &server_sigset, &old_sigset) != 0) {
+ Exit("###sigprocmask() failed to set the fork server sigset\n");
+ }
+
+ sigset_t wait_sigset;
+ if (sigemptyset(&wait_sigset) != 0) {
+ Exit("###sigemptyset() failed\n");
+ }
+ if (sigaddset(&wait_sigset, SIGTERM) != 0) {
+ Exit("###sigaddset() failed to add SIGTERM to the wait sigset\n");
+ }
+ if (sigaddset(&wait_sigset, SIGCHLD) != 0) {
+ Exit("###sigaddset() failed to add SIGCHLD to the wait sigset\n");
+ }
+
// Loop.
while (true) {
Log("###Centipede fork server blocking on pipe0\n");
@@ -216,6 +257,15 @@
if (pid < 0) {
Exit("###fork failed\n");
} else if (pid == 0) {
+ if (sigaction(SIGTERM, &old_sigterm_act, nullptr) != 0) {
+ Exit("###sigaction failed on SIGTERM for the child");
+ }
+ if (sigaction(SIGCHLD, &old_sigchld_act, nullptr) != 0) {
+ Exit("###sigaction failed on SIGCHLD for the child");
+ }
+ if (sigprocmask(SIG_SETMASK, &old_sigset, nullptr) != 0) {
+ Exit("###sigprocmask() failed to restore the previous sigset\n");
+ }
// Child process. Reset stdout/stderr and let it run normally.
for (int fd = 1; fd <= 2; fd++) {
lseek(fd, 0, SEEK_SET);
@@ -227,7 +277,28 @@
} else {
// Parent process.
int status = -1;
- if (waitpid(pid, &status, 0) < 0) Exit("###waitpid failed\n");
+ while (true) {
+ int sig = -1;
+ if (sigwait(&wait_sigset, &sig) != 0) {
+ Exit("###sigwait() failed\n");
+ }
+ if (sig == SIGCHLD) {
+ Log("###Got SIGCHLD\n");
+ const pid_t ret = waitpid(pid, &status, WNOHANG);
+ if (ret < 0) {
+ Exit("###waitpid failed\n");
+ }
+ if (ret == pid && (WIFEXITED(status) || WIFSIGNALED(status))) {
+ Log("###Got exit status\n");
+ break;
+ }
+ } else if (sig == SIGTERM) {
+ Log("###Got SIGTERM\n");
+ kill(pid, SIGTERM);
+ } else {
+ Exit("###Unknown signal from sigwait\n");
+ }
+ }
if (WIFEXITED(status)) {
if (WEXITSTATUS(status) == EXIT_SUCCESS)
Log("###Centipede fork returned EXIT_SUCCESS\n");
@@ -239,8 +310,28 @@
Log("###Centipede fork crashed\n");
}
Log("###Centipede fork writing status to pipe1\n");
- if (write(pipe1, &status, sizeof(status)) == -1)
+ if (write(pipe1, &status, sizeof(status)) == -1) {
Exit("###write to pipe1 failed\n");
+ }
+ // Deplete any remaining signals before the next execution. Controller
+ // won't send more signals after write succeeded.
+ {
+ sigset_t pending;
+ while (true) {
+ if (sigpending(&pending) != 0) {
+ Exit("###sigpending() failed\n");
+ }
+ if (sigismember(&pending, SIGTERM) ||
+ sigismember(&pending, SIGCHLD)) {
+ int unused_sig;
+ if (sigwait(&wait_sigset, &unused_sig) != 0) {
+ Exit("###sigwait() failed\n");
+ }
+ } else {
+ break;
+ }
+ }
+ }
}
}
// The only way out of the loop is via Exit() or return.