[v8,09/10] restore: print error messages on all error-paths in restore_root_task

Submitted by Pavel Tikhomirov on Feb. 22, 2018, 11:49 a.m.

Details

Message ID 20180222114928.24343-10-ptikhomirov@virtuozzo.com
State Accepted
Series "don't use wrong pagemap (from other task) on pid reuse"
Commit c3abb6999d0748229bfc97898b4bdc7cb657fbd5
Headers show

Commit Message

Pavel Tikhomirov Feb. 22, 2018, 11:49 a.m.
On restore we once had:

Run criu restore
Fail RESTORE failed: Error(0): Unknown
=[log]=> dump/zdtm/static/env00/85/1/restore.log
------------------------ grep Error ------------------------
(00.122659) Running setup-namespaces scripts
(00.122748)      1: cg: Cgroups 1 inherited from parent
(00.122765)      1: uns: calling usernsd_recv_transport (0, 0)
(00.123140) uns: daemon calls 0x48d8b0 (106, 1, 0)
(00.145095) Error (criu/mount.c:3175): mnt: Can't remove the directory
	/tmp/.criu.mntns.SIOGLA: No such file or directory
(00.145124) uns: calling exit_usernsd (-1, 1)
(00.145174) uns: daemon calls 0x48d880 (104, -1, 1)
(00.145181) uns: `- daemon exits w/ 0
(00.149591) uns: daemon stopped
(00.149607) Error (criu/cr-restore.c:2498): Restoring FAILED.
------------------------ ERROR OVER ------------------------

Only "Restoring FAILED." error, so no clue to what's really going on.

Printing errors for these "silent" errorpaths can help resolve issues in
future:

restore_root_task
	-> fork_with_pid -> open_core -> pb_read_one -> do_pb_read_one
		-> xmalloc
	-> prepare_userns -> write_id_map -> get_service_fd
	-> restore_wait_inprogress_tasks
		->__restore_wait_inprogress_tasks -> futex_get
	-> restore_switch_stage -> restore_wait_other_tasks
		-> __restore_wait_inprogress_tasks -> futex_get
	-> move_veth_to_bridge -> external_for_each_type -> move_to_bridge
		-> external_val
	-> fault_injected -> __fault_injected
	-> depopulate_roots_yard -> try_clean_remaps -> clean_one_remap
		-> rst_get_mnt_root -> lookup_mnt_id -> __lookup_mnt_id

Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
---
 criu/cr-restore.c | 42 ++++++++++++++++++++++++++++++++----------
 1 file changed, 32 insertions(+), 10 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index a7316f79c..8c5d067f0 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -2244,14 +2244,18 @@  static int restore_root_task(struct pstree_item *init)
 	if (prepare_userns_hook())
 		return -1;
 
-	if (prepare_namespace_before_tasks())
+	if (prepare_namespace_before_tasks()) {
+		pr_err("Failed to prepare namespace before tasks\n");
 		return -1;
+	}
 
 	__restore_switch_stage_nw(CR_STATE_ROOT_TASK);
 
 	ret = fork_with_pid(init);
-	if (ret < 0)
+	if (ret < 0) {
+		pr_err("Failed to fork init task\n");
 		goto out;
+	}
 
 	restore_origin_ns_hook();
 
@@ -2285,21 +2289,28 @@  static int restore_root_task(struct pstree_item *init)
 	 * uid_map and gid_map must be filled from a parent user namespace.
 	 * prepare_userns_creds() must be called after filling mappings.
 	 */
-	if ((root_ns_mask & CLONE_NEWUSER) && prepare_userns(init->pid->real, userns_entry))
+	if ((root_ns_mask & CLONE_NEWUSER) && prepare_userns(init->pid->real, userns_entry)) {
+		pr_err("Failed to prepare userns\n");
 		goto out_destroy;
+	}
 
 	pr_info("Wait until namespaces are created\n");
 	ret = restore_wait_inprogress_tasks();
-	if (ret)
+	if (ret) {
+		pr_err("Failed to wait inprogress tasks\n");
 		goto out_destroy;
+	}
 
 	ret = run_scripts(ACT_SETUP_NS);
 	if (ret)
 		goto out_destroy;
 
 	ret = restore_switch_stage(CR_STATE_PREPARE_NAMESPACES);
-	if (ret)
+	if (ret) {
+		pr_err("Failed to switch restore stage to " \
+		       "CR_STATE_PREPARE_NAMESPACES\n");
 		goto out_destroy;
+	}
 
 	if (root_ns_mask & CLONE_NEWNS) {
 		mnt_ns_fd = open_proc(init->pid->real, "ns/mnt");
@@ -2329,8 +2340,10 @@  static int restore_root_task(struct pstree_item *init)
 skip_ns_bouncing:
 
 	ret = restore_wait_inprogress_tasks();
-	if (ret < 0)
+	if (ret < 0) {
+		pr_err("Failed to wait inprogress tasks\n");
 		goto out_destroy;
+	}
 
 	/*
 	 * Zombies die after CR_STATE_RESTORE which is switched
@@ -2347,23 +2360,30 @@  static int restore_root_task(struct pstree_item *init)
 		goto out_kill;
 
 	ret = restore_switch_stage(CR_STATE_RESTORE_SIGCHLD);
-	if (ret < 0)
+	if (ret < 0) {
+		pr_err("Failed to switch restore stage to " \
+		       "CR_STATE_RESTORE_SIGCHLD\n");
 		goto out_kill;
+	}
 
 	ret = stop_usernsd();
 	if (ret < 0)
 		goto out_kill;
 
 	ret = move_veth_to_bridge();
-	if (ret < 0)
+	if (ret < 0) {
+		pr_err("Failed to move veth to bridge\n");
 		goto out_kill;
+	}
 
 	ret = prepare_cgroup_properties();
 	if (ret < 0)
 		goto out_kill;
 
-	if (fault_injected(FI_POST_RESTORE))
+	if (fault_injected(FI_POST_RESTORE)) {
+		pr_err("Post restore fault injected\n");
 		goto out_kill;
+	}
 
 	ret = run_scripts(ACT_POST_RESTORE);
 	if (ret != 0) {
@@ -2377,8 +2397,10 @@  static int restore_root_task(struct pstree_item *init)
 	 * There is no need to call try_clean_remaps() after this point,
 	 * as restore went OK and all ghosts were removed by the openers.
 	 */
-	if (depopulate_roots_yard(mnt_ns_fd, false))
+	if (depopulate_roots_yard(mnt_ns_fd, false)) {
+		pr_err("Failed to depopulate roots yard\n");
 		goto out_kill;
+	}
 
 	close_safe(&mnt_ns_fd);