[12/14] forking: Use last_pid_mutex for synchronization during clone()

Submitted by Kirill Tkhai on May 15, 2017, 1:06 p.m.

Details

Message ID 149485356456.681.18391316674652413105.stgit@localhost.localdomain
State New
Series "Refactor pid_ns helpers creation"
Headers show

Commit Message

Kirill Tkhai May 15, 2017, 1:06 p.m.
Before this patch we used flock to order task creation,
but this way is not good. It took 5 syscalls to synchronize
a creation of a single child:

1)open()
2)flock(LOCK_EX)
3)flock(LOCK_UN)
4)close() in parent
5)close() in child

The patch introduces more effective way for synchronization,
which executes 2 syscalls only. We use last_pid_mutex,
and the syscalls number sounds definitely better.

In order to synchronize with parallel CRIU, we still
take ns_last_pid flock. After talking to @xemul,
we remain the ns_last_pid file locked all the restore time,
which is done once by criu task. In further, if we need
to relax this, we may take the flock() globally on
CR_STATE_FORKING stage only, and synchronize thread
creation with flock() locally, but now we don't need that.

v2: Lock ns_last_pid only in case of !CLONE_NEWPID

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 criu/cr-restore.c   |   42 +++++++++++++++++++++++-------------------
 criu/namespaces.c   |   33 +++++----------------------------
 criu/pie/restorer.c |   34 ++++++++++++++--------------------
 3 files changed, 42 insertions(+), 67 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 0f6dc23bc..3917150c6 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -1172,7 +1172,6 @@  static int restore_one_task(int pid, CoreEntry *core)
 struct cr_clone_arg {
 	struct pstree_item *item;
 	unsigned long clone_flags;
-	int fd;
 
 	CoreEntry *core;
 };
@@ -1373,19 +1372,12 @@  static inline int fork_with_pid(struct pstree_item *item)
 
 	pr_info("Forking task with %d pid (flags 0x%lx)\n", pid, ca.clone_flags);
 
-	ca.fd = open_proc_rw(PROC_GEN, LAST_PID_PATH);
-	if (ca.fd < 0)
-		goto err;
-
 	wait_pid_ns_helper_prepared(pid_ns, item->pid);
 
 	if (set_pid_ns_for_children(pid_ns, item->pid) < 0)
-		goto err_close;
+		goto err;
 
-	if (flock(ca.fd, LOCK_EX)) {
-		pr_perror("%d: Can't lock %s", pid, LAST_PID_PATH);
-		goto err_close;
-	}
+	lock_last_pid();
 
 	ret = do_fork_with_pid(item, pid_ns, &ca);
 	if (ret < 0) {
@@ -1394,10 +1386,7 @@  static inline int fork_with_pid(struct pstree_item *item)
 	}
 
 err_unlock:
-	if (flock(ca.fd, LOCK_UN))
-		pr_perror("%d: Can't unlock %s", pid, LAST_PID_PATH);
-err_close:
-	close(ca.fd);
+	unlock_last_pid();
 err:
 	if (ca.core)
 		core_entry__free_unpacked(ca.core, NULL);
@@ -1694,9 +1683,6 @@  static int restore_task_with_children(void *_arg)
 	if (current->pid->real < 0)
 		goto err;
 
-	if ( !(ca->clone_flags & CLONE_FILES))
-		close_safe(&ca->fd);
-
 	ret = clone_service_fd(rsti(current)->service_fd_id);
 	if (ret)
 		goto err;
@@ -2072,7 +2058,7 @@  static int write_restored_pid(void)
 static int restore_root_task(struct pstree_item *init)
 {
 	enum trace_flags flag = TRACE_ALL;
-	int ret, fd, mnt_ns_fd = -1;
+	int ret, fd, mnt_ns_fd = -1, lock_fd = -1;
 	int root_seized = 0;
 	struct pstree_item *item;
 
@@ -2122,11 +2108,21 @@  static int restore_root_task(struct pstree_item *init)
 	if (prepare_namespace_before_tasks())
 		return -1;
 
+	if (!(root_ns_mask & CLONE_NEWPID)) {
+		lock_fd = open_proc_rw(PROC_GEN, LAST_PID_PATH);
+		if (lock_fd < 0)
+			goto out;
+		if (flock(lock_fd, LOCK_EX)) {
+			pr_perror("Can't flock ns_last_pid");
+			goto out_close;
+		}
+	}
+
 	__restore_switch_stage_nw(CR_STATE_ROOT_TASK);
 
 	ret = fork_with_pid(init);
 	if (ret < 0)
-		goto out;
+		goto out_unlock;
 
 	restore_origin_ns_hook();
 
@@ -2278,6 +2274,9 @@  static int restore_root_task(struct pstree_item *init)
 	ret = restore_switch_stage(CR_STATE_RESTORE_CREDS);
 	BUG_ON(ret);
 
+	if (lock_fd >= 0 && (flock(lock_fd, LOCK_UN) || close_safe(&lock_fd)))
+		pr_perror("Can't unlock or close ns_last_pid");
+
 	timing_stop(TIME_RESTORE);
 
 	ret = catch_tasks(root_seized, &flag);
@@ -2347,6 +2346,11 @@  static int restore_root_task(struct pstree_item *init)
 				kill(vpid(pi), SIGKILL);
 	}
 
+out_unlock:
+	if (lock_fd >= 0 && flock(lock_fd, LOCK_UN))
+		pr_perror("Can't unlock ns_last_pid");
+out_close:
+	close_safe(&lock_fd);
 out:
 	fini_cgroup();
 	depopulate_roots_yard(mnt_ns_fd, true);
diff --git a/criu/namespaces.c b/criu/namespaces.c
index 74c0ad7f4..75465e157 100644
--- a/criu/namespaces.c
+++ b/criu/namespaces.c
@@ -2621,7 +2621,7 @@  static int pid_ns_helper(struct ns_id *ns, int sk)
 
 static int do_create_pid_ns_helper(void *arg, int sk, pid_t unused_pid)
 {
-	int pid_ns_fd, mnt_ns_fd, i, lock_fd, transport_fd, saved_errno;
+	int pid_ns_fd, i, transport_fd, saved_errno;
 	struct pstree_item *ns_reaper;
 	struct ns_id *ns, *tmp;
 	struct pid *pid;
@@ -2639,26 +2639,7 @@  static int do_create_pid_ns_helper(void *arg, int sk, pid_t unused_pid)
 		goto err;
 	}
 
-	if (switch_ns(root_item->pid->real, &mnt_ns_desc, &mnt_ns_fd) < 0) {
-		pr_err("Can't set mnt_ns\n");
-		goto err;
-	}
-
-	lock_fd = open("/proc/" LAST_PID_PATH, O_RDONLY);
-	if (lock_fd < 0) {
-		pr_perror("Unable to open /proc/" LAST_PID_PATH);
-		goto err;
-	}
-
-	if (restore_ns(mnt_ns_fd, &mnt_ns_desc) < 0) {
-		pr_err("Can't restore ns\n");
-		goto err;
-	}
-
-	if (flock(lock_fd, LOCK_EX)) {
-		close(lock_fd);
-		pr_perror("Can't lock %s", LAST_PID_PATH);
-	}
+	lock_last_pid();
 
 	transport_fd = get_service_fd(TRANSPORT_FD_OFF);
 	BUG_ON(transport_fd < 0);
@@ -2670,18 +2651,14 @@  static int do_create_pid_ns_helper(void *arg, int sk, pid_t unused_pid)
 	for (i = pid->level - 2, tmp = ns->parent; i >= 0; i--, tmp = tmp->parent)
 		if (request_set_next_pid(tmp->id, pid->ns[i].virt, transport_fd)) {
 			pr_err("Can't set next pid using helper\n");
-			flock(lock_fd, LOCK_UN);
-			close(lock_fd);
+			unlock_last_pid();
 			goto err;
 		}
 	child = fork();
-	if (!child) {
-		close(lock_fd);
+	if (!child)
 		exit(pid_ns_helper(ns, sk));
-	}
 	saved_errno = errno;
-	flock(lock_fd, LOCK_UN);
-	close(lock_fd);
+	unlock_last_pid();
 	if (child < 0) {
 		errno = saved_errno;
 		pr_perror("Can't fork");
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index 98c81f378..d369f6575 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -1488,20 +1488,18 @@  long __export_restore_task(struct task_restore_args *args)
 				   CLONE_THREAD | CLONE_SYSVSEM | CLONE_FS;
 		long last_pid_len;
 		long parent_tid;
-		int i, fd;
+		int i, fd = -1;
 
-		fd = sys_openat(args->proc_fd, LAST_PID_PATH, O_RDWR, 0);
-		if (fd < 0) {
-			pr_err("can't open last pid fd %d\n", fd);
-			goto core_restore_end;
+		if (thread_args[0].pid[1] == 0) {
+			/* One level pid ns hierarhy */
+			fd = sys_openat(args->proc_fd, LAST_PID_PATH, O_RDWR, 0);
+			if (fd < 0) {
+				pr_err("can't open last pid fd %d\n", fd);
+				goto core_restore_end;
+			}
 		}
 
-		ret = sys_flock(fd, LOCK_EX);
-		if (ret) {
-			pr_err("Can't lock last_pid %d\n", fd);
-			sys_close(fd);
-			goto core_restore_end;
-		}
+		mutex_lock(&task_entries_local->last_pid_mutex);
 
 		for (i = 0; i < args->nr_threads; i++) {
 			char last_pid_buf[16], *s;
@@ -1509,13 +1507,14 @@  long __export_restore_task(struct task_restore_args *args)
 			if (thread_args[i].pid[0] == args->t->pid[0])
 				continue;
 
-			if (thread_args[i].pid[1] == 0) {
+			if (fd >= 0) {
 				/* One level pid ns hierarhy */
 				last_pid_len = std_vprint_num(last_pid_buf, sizeof(last_pid_buf), thread_args[i].pid[0] - 1, &s);
 				sys_lseek(fd, 0, SEEK_SET);
 				ret = sys_write(fd, s, last_pid_len);
 				if (ret < 0) {
 					pr_err("Can't set last_pid %ld/%s\n", ret, last_pid_buf);
+					mutex_unlock(&task_entries_local->last_pid_mutex);
 					sys_close(fd);
 					goto core_restore_end;
 				}
@@ -1525,7 +1524,7 @@  long __export_restore_task(struct task_restore_args *args)
 						break;
 					if (request_set_next_pid(args->pid_ns_id[k], thread_args[i].pid[k], args->transport_fd) < 0) {
 						pr_err("Can't request to set pid\n");
-						sys_close(fd);
+						mutex_unlock(&task_entries_local->last_pid_mutex);
 						goto core_restore_end;
 					}
 				}
@@ -1542,14 +1541,9 @@  long __export_restore_task(struct task_restore_args *args)
 			RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, thread_args, args->clone_restore_fn);
 		}
 
-		ret = sys_flock(fd, LOCK_UN);
-		if (ret) {
-			pr_err("Can't unlock last_pid %ld\n", ret);
+		mutex_unlock(&task_entries_local->last_pid_mutex);
+		if (fd >= 0)
 			sys_close(fd);
-			goto core_restore_end;
-		}
-
-		sys_close(fd);
 	}
 
 	restore_rlims(args);