[4/4] restore: Fix hang if root task is waiting on zombie

Submitted by Andrey Vagin on Dec. 11, 2018, 6:06 p.m.

Details

Message ID 20181211180609.GB23910@gmail.com
State New
Series "restore: Fix potential hung on restore"
Headers show

Commit Message

Andrey Vagin Dec. 11, 2018, 6:06 p.m.
On Tue, Dec 11, 2018 at 12:36:29PM +0300, Cyrill Gorcunov wrote:
> On Mon, Dec 10, 2018 at 11:07:09PM -0800, Andrey Vagin wrote:
> > > A zombie is not reparented to us yet.
> > 
> > This means that we are waiting when a process (helper) will die and its
> > children (zombies) will be reparanted to the init process.
> > 
> > If nr_in_progress is 1, this means that there is no processes which are
> > going to die, doesn't it? It the answer is yes, what are we waiting
> > here?
> 
> If nr_in_progress is 1 it means the zombie processes already decremented
> nr_in_progress but may not be yet reparented to us, because we do decrement
> nr_in_progress first and only then call kill(self) in zombie restore code.
> 
> 	for (i = 0; i < task_args->zombies_n; i++) {
> 		int ret, nr_in_progress;
> 
> 		nr_in_progress = futex_get(&task_entries_local->nr_in_progress);
> 
> 		ret = sys_waitid(P_PID, task_args->zombies[i], NULL, WNOWAIT | WEXITED, NULL);
> 		if (ret == -ECHILD) {
> 			/* A process isn't reparented to this task yet.
> 			 * Let's wait when someone complete this stage
> 			 * and try again.
> 			 */
> -->			futex_wait_while_eq(&task_entries_local->nr_in_progress,
> 								nr_in_progress);
> 			i--;
> 			continue;
> 		}
> 
> The nr_in_progress is fetched earlier and equal to 1. Then we call for waitid
> and got -ECHILD which resulted that we're sitting in futex wait forever. That
> is what I've been notified about when investigating a bug. Unfortunately I
> lost the container failing and was unable to recreate a local test case.
> I suspect there is a race window between signal delivery and nr_in_progress
> updating.

This hack isn't about zombies of this process, it is used to wait when
other tasks complete waiting their helpers and zombies. nr_in_progress
is descrimented after wait_helpers and wait_zombies.

        if (wait_helpers(args) < 0)
                goto core_restore_end;
        if (wait_zombies(args) < 0)
                goto core_restore_end;
...

        restore_finish_stage(task_entries_local, CR_STATE_RESTORE_SIGCHLD);

The problem here is that we don't take into account that zombies can be
reparented from other zombies...

> 
> Still since I've no local reproduction maybe it worth simply let this patch
> flying around and if it triggers someday we will back to the question.

Take a look at the attached test
From c2e9864c1236aaa35266e7596f57ada9d2cb8421 Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@gmail.com>
Date: Tue, 11 Dec 2018 20:45:01 +0300
Subject: [PATCH] test

Signed-off-by: Andrei Vagin <avagin@gmail.com>
---
 test/zdtm/lib/test.c           |  8 +++++
 test/zdtm/static/Makefile      |  1 +
 test/zdtm/static/zombie02.c    | 69 ++++++++++++++++++++++++++++++++++++++++++
 test/zdtm/static/zombie02.desc |  1 +
 4 files changed, 79 insertions(+)
 create mode 100644 test/zdtm/static/zombie02.c
 create mode 100644 test/zdtm/static/zombie02.desc

Patch hide | download patch | download mbox

diff --git a/test/zdtm/lib/test.c b/test/zdtm/lib/test.c
index a1bdfc1b4..839db8092 100644
--- a/test/zdtm/lib/test.c
+++ b/test/zdtm/lib/test.c
@@ -295,6 +295,14 @@  void test_init(int argc, char **argv)
 	futex_init(&test_shared_state->stage);
 	futex_set(&test_shared_state->stage, TEST_INIT_STAGE);
 
+
+	if (getenv("ZDTM_PIDNS"))
+		pr_err("newpid\n");
+	if (getenv("ZDTM_PIDNS") && unshare(CLONE_NEWPID)) {
+		pr_perror("Unable to create pidns");
+		exit(1);
+	}
+
 	pid = fork();
 	if (pid < 0) {
 		pr_perror("Daemonizing failed");
diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile
index 7b8d66377..c5d934e5a 100644
--- a/test/zdtm/static/Makefile
+++ b/test/zdtm/static/Makefile
@@ -11,6 +11,7 @@  TST_NOFILE	:=				\
 		wait00				\
 		zombie00			\
 		zombie01			\
+		zombie02			\
 		fpu00				\
 		fpu01				\
 		fpu02				\
diff --git a/test/zdtm/static/zombie02.c b/test/zdtm/static/zombie02.c
new file mode 100644
index 000000000..5f53b3d98
--- /dev/null
+++ b/test/zdtm/static/zombie02.c
@@ -0,0 +1,69 @@ 
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <string.h>
+
+#include "zdtmtst.h"
+
+const char *test_doc	= "See if we can wait() for a zombified child after migration";
+const char *test_author	= "Roman Kagan <rkagan@parallels.com>";
+
+int main(int argc, char ** argv)
+{
+	int status;
+	int pid_pipe[2];
+	pid_t pid, pid_c;
+	siginfo_t siginfo;
+
+	setenv("ZDTM_PIDNS", "1", 1);
+	test_init(argc, argv);
+
+	if (pipe(pid_pipe))
+		return 1;
+
+	pid = fork();
+	if (pid == 0) {
+		setsid();
+		pid_c = fork();
+		if (pid_c == 0)
+			return 0;
+		if (write(pid_pipe[1], &pid_c, sizeof(pid_c)) != sizeof(pid_c))
+			return 1;
+		if (waitid(P_PID, pid_c, &siginfo, WNOWAIT | WEXITED) == -1) {
+			pr_perror("Unable to wait %d", pid_c);
+			exit(1);
+		}
+		return 0;
+	}
+	close(pid_pipe[1]);
+
+	if (read(pid_pipe[0], &pid_c, sizeof(pid_c)) != sizeof(pid_c))
+		return 1;
+
+	if (waitid(P_PID, pid, &siginfo, WNOWAIT | WEXITED) == -1) {
+		pr_perror("Unable to wait %d", pid);
+		exit(1);
+	}
+	if (waitid(P_PID, pid_c, &siginfo, WNOWAIT | WEXITED) == -1) {
+		pr_perror("Unable to wait %d", pid_c);
+		exit(1);
+	}
+
+	test_daemon();
+	test_waitsig();
+
+	if (waitpid(pid, &status, 0) == -1) {
+		pr_perror("Unable to wait %d", pid);
+		exit(1);
+	}
+	if (waitpid(pid_c, &status, 0) == -1) {
+		pr_perror("Unable to wait %d", pid_c);
+		exit(1);
+	}
+
+	pass();
+	return 0;
+}
diff --git a/test/zdtm/static/zombie02.desc b/test/zdtm/static/zombie02.desc
new file mode 100644
index 000000000..fa2c82d08
--- /dev/null
+++ b/test/zdtm/static/zombie02.desc
@@ -0,0 +1 @@ 
+{'flavor': 'h', 'flags': 'suid'}