[1/3,v2] restore: wait restored tasks in the check-only case

Submitted by Andrei Vagin on Sept. 8, 2017, 6:19 p.m.

Details

Message ID 20170908181928.27578-1-avagin@openvz.org
State Accepted
Series "A few fixes for the check-only mode"
Commit 8cfc61ff556250a335bbdf9127af21157716ac16
Headers show

Commit Message

Andrei Vagin Sept. 8, 2017, 6:19 p.m.
From: Andrei Vagin <avagin@virtuozzo.com>

If the restore was exexuted with the check-only option,
after restoring all resources tasks waits children and
exits with the 0 code.

v2: don't try to access thread_args after its end of life

Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
---
 criu/cr-restore.c       | 24 ++++++++++++++++++++----
 criu/include/restorer.h |  1 +
 criu/pie/restorer.c     | 32 +++++++++++++++++++++++++-------
 test/zdtm.py            |  7 -------
 4 files changed, 46 insertions(+), 18 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 991162ad4..4d6f6e447 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -2353,14 +2353,16 @@  skip_ns_bouncing:
 	 * -------------------------------------------------------------
 	 * Below this line nothing should fail, because network is unlocked
 	 */
-	attach_to_tasks(root_seized);
+	if (!opts.check_only)
+		attach_to_tasks(root_seized);
 
 	ret = restore_switch_stage(CR_STATE_RESTORE_CREDS);
 	BUG_ON(ret);
 
 	timing_stop(TIME_RESTORE);
 
-	ret = catch_tasks(root_seized, &flag);
+	if (!opts.check_only)
+		ret = catch_tasks(root_seized, &flag);
 
 	pr_info("Restore finished successfully. Resuming tasks.\n");
 	__restore_switch_stage(CR_STATE_COMPLETE);
@@ -2388,9 +2390,22 @@  skip_for_check:
 
 	fini_cgroup();
 
-	if (!opts.check_only)
-		/* Detaches from processes and they continue run through sigreturn. */
+	/* Detaches from processes and they continue run through sigreturn. */
+	if (!opts.check_only) {
 		finalize_restore_detach(ret);
+	} else {
+		int status;
+
+		if (waitpid(root_item->pid->real, &status, 0) < 0) {
+			pr_perror("Unable to wait %d", root_item->pid->real);
+			goto out_kill;
+		}
+		if (status) {
+			pr_err("The root task exited with %d\n", status);
+			return 1;
+		}
+		return 0;
+	}
 
 	write_stats(RESTORE_STATS);
 
@@ -3645,6 +3660,7 @@  static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 		thread_args[i].futex_rla	= tcore->thread_core->futex_rla;
 		thread_args[i].futex_rla_len	= tcore->thread_core->futex_rla_len;
 		thread_args[i].pdeath_sig	= tcore->thread_core->pdeath_sig;
+		thread_args[i].check_only	= opts.check_only;
 		if (tcore->thread_core->pdeath_sig > _KNSIG) {
 			pr_err("Pdeath signal is too big\n");
 			goto err;
diff --git a/criu/include/restorer.h b/criu/include/restorer.h
index 91bdc17be..ba6b59c7f 100644
--- a/criu/include/restorer.h
+++ b/criu/include/restorer.h
@@ -98,6 +98,7 @@  struct thread_restore_args {
 	int				pdeath_sig;
 	int				pfc_ns_fd;
 
+	bool				check_only;
 	struct thread_creds_args	*creds_args;
 } __aligned(64);
 
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index 739c87347..091026103 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -502,6 +502,7 @@  static void noinline rst_sigreturn(unsigned long new_sp,
  */
 long __export_restore_thread(struct thread_restore_args *args)
 {
+	bool check_only = args->check_only;
 	struct rt_sigframe *rt_sigframe;
 	k_rtsigset_t to_block;
 	unsigned long new_sp;
@@ -565,6 +566,9 @@  long __export_restore_thread(struct thread_restore_args *args)
 
 	futex_dec_and_wake(&thread_inprogress);
 
+	if (check_only)
+		restore_finish_stage(task_entries_local, CR_STATE_COMPLETE);
+
 	new_sp = (long)rt_sigframe + RT_SIGFRAME_OFFSET(rt_sigframe);
 	rst_sigreturn(new_sp, rt_sigframe);
 
@@ -1693,19 +1697,33 @@  long __export_restore_task(struct task_restore_args *args)
 
 	restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS);
 
-	if (args->check_only) {
-		pr_info("Restore check was successful.\n");
-		futex_abort_and_wake(&task_entries_local->nr_in_progress);
-		return 0;
-	}
-
-
 	if (ret)
 		BUG();
 
 	/* Wait until children stop to use args->task_entries */
 	futex_wait_while_gt(&thread_inprogress, 1);
 
+	if (args->check_only) {
+		pr_info("Restore check was successful.\n");
+		while (1) {
+			pid_t pid;
+			int status;
+
+			pid = sys_wait4(-1, &status, 0, NULL);
+			if (pid < 0) {
+				if (pid == -ECHILD)
+					break;
+				pr_err("Unable to wait a child: %d\n", pid);
+				goto core_restore_end;
+			}
+			if (status) {
+				pr_err("The %d process exited with %d\n", pid, status);
+				goto core_restore_end;
+			}
+		}
+		sys_exit_group(0);
+	}
+
 	sys_close(args->proc_fd);
 	sys_close(args->transport_fd);
 	std_log_set_fd(-1);
diff --git a/test/zdtm.py b/test/zdtm.py
index 85d60bad2..225892bba 100755
--- a/test/zdtm.py
+++ b/test/zdtm.py
@@ -941,13 +941,6 @@  class criu:
 			os.close(status_fds[0])
 			return ret
 
-		if '--check-only' in opts and action == "restore":
-			# Although the restored process never starts
-			# running in check-only mode, it sometimes takes
-			# some time for the process to disappear.
-			# Wait until it is gone.
-			self.__test.gone()
-
 		grep_errors(os.path.join(__ddir, log))
 		if ret != 0:
 			if self.__fault and int(self.__fault) < 128: