[1/3] restore: wait restored tasks in the check-only case

Submitted by Andrei Vagin on June 21, 2017, 5:16 a.m.

Details

Message ID 20170621051647.28276-2-avagin@openvz.org
State New
Series "A few fixes for the check-only mode"
Headers show

Commit Message

Andrei Vagin June 21, 2017, 5:16 a.m.
From: Andrei Vagin <avagin@virtuozzo.com>

If the restore was exexuted with the check-only option,
after restoring all resources tasks waits children and
exits with the 0 code.

Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
---
 criu/cr-restore.c       | 24 ++++++++++++++++++++----
 criu/include/restorer.h |  1 +
 criu/pie/restorer.c     | 31 ++++++++++++++++++++++++-------
 test/zdtm.py            |  7 -------
 4 files changed, 45 insertions(+), 18 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 41b5db3..f492fa2 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -2276,14 +2276,16 @@  skip_ns_bouncing:
 	 * -------------------------------------------------------------
 	 * Below this line nothing should fail, because network is unlocked
 	 */
-	attach_to_tasks(root_seized);
+	if (!opts.check_only)
+		attach_to_tasks(root_seized);
 
 	ret = restore_switch_stage(CR_STATE_RESTORE_CREDS);
 	BUG_ON(ret);
 
 	timing_stop(TIME_RESTORE);
 
-	ret = catch_tasks(root_seized, &flag);
+	if (!opts.check_only)
+		ret = catch_tasks(root_seized, &flag);
 
 	pr_info("Restore finished successfully. Resuming tasks.\n");
 	__restore_switch_stage(CR_STATE_COMPLETE);
@@ -2311,9 +2313,22 @@  skip_for_check:
 
 	fini_cgroup();
 
-	if (!opts.check_only)
-		/* Detaches from processes and they continue run through sigreturn. */
+	/* Detaches from processes and they continue run through sigreturn. */
+	if (!opts.check_only) {
 		finalize_restore_detach(ret);
+	} else {
+		int status;
+
+		if (waitpid(root_item->pid->real, &status, 0) < 0) {
+			pr_perror("Unable to wait %d", root_item->pid->real);
+			goto out_kill;
+		}
+		if (status) {
+			pr_err("The root task exited with %d\n", status);
+			return 1;
+		}
+		return 0;
+	}
 
 	write_stats(RESTORE_STATS);
 
@@ -3564,6 +3579,7 @@  static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 		thread_args[i].futex_rla	= tcore->thread_core->futex_rla;
 		thread_args[i].futex_rla_len	= tcore->thread_core->futex_rla_len;
 		thread_args[i].pdeath_sig	= tcore->thread_core->pdeath_sig;
+		thread_args[i].check_only	= opts.check_only;
 		if (tcore->thread_core->pdeath_sig > _KNSIG) {
 			pr_err("Pdeath signal is too big\n");
 			goto err;
diff --git a/criu/include/restorer.h b/criu/include/restorer.h
index 736ba96..bef9577 100644
--- a/criu/include/restorer.h
+++ b/criu/include/restorer.h
@@ -98,6 +98,7 @@  struct thread_restore_args {
 	int				pdeath_sig;
 	int				pfc_ns_fd;
 
+	bool				check_only;
 	struct thread_creds_args	*creds_args;
 } __aligned(64);
 
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index 7b9c052..0b7c8d1 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -565,6 +565,9 @@  long __export_restore_thread(struct thread_restore_args *args)
 
 	futex_dec_and_wake(&thread_inprogress);
 
+	if (args->check_only)
+		restore_finish_stage(task_entries_local, CR_STATE_COMPLETE);
+
 	new_sp = (long)rt_sigframe + RT_SIGFRAME_OFFSET(rt_sigframe);
 	rst_sigreturn(new_sp, rt_sigframe);
 
@@ -1657,19 +1660,33 @@  long __export_restore_task(struct task_restore_args *args)
 
 	restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS);
 
-	if (args->check_only) {
-		pr_info("Restore check was successful.\n");
-		futex_abort_and_wake(&task_entries_local->nr_in_progress);
-		return 0;
-	}
-
-
 	if (ret)
 		BUG();
 
 	/* Wait until children stop to use args->task_entries */
 	futex_wait_while_gt(&thread_inprogress, 1);
 
+	if (args->check_only) {
+		pr_info("Restore check was successful.\n");
+		while (1) {
+			pid_t pid;
+			int status;
+
+			pid = sys_wait4(-1, &status, 0, NULL);
+			if (pid < 0) {
+				if (pid == -ECHILD)
+					break;
+				pr_err("Unable to wait a child: %d\n", pid);
+				goto core_restore_end;
+			}
+			if (status) {
+				pr_err("The %d process exited with %d\n", pid, status);
+				goto core_restore_end;
+			}
+		}
+		sys_exit_group(0);
+	}
+
 	sys_close(args->proc_fd);
 	sys_close(args->transport_fd);
 	std_log_set_fd(-1);
diff --git a/test/zdtm.py b/test/zdtm.py
index 088572d..4cc73aa 100755
--- a/test/zdtm.py
+++ b/test/zdtm.py
@@ -937,13 +937,6 @@  class criu:
 			os.close(status_fds[0])
 			return ret
 
-		if '--check-only' in opts and action == "restore":
-			# Although the restored process never starts
-			# running in check-only mode, it sometimes takes
-			# some time for the process to disappear.
-			# Wait until it is gone.
-			self.__test.gone()
-
 		grep_errors(os.path.join(__ddir, log))
 		if ret != 0:
 			if self.__fault and int(self.__fault) < 128: