[v3,24/33] ns: Generate user_ns tree

Submitted by Kirill Tkhai on Feb. 16, 2017, 12:10 p.m.

Details

Message ID 148724700714.22444.598450343202362874.stgit@localhost.localdomain
State New
Series "Nested user namespaces support"
Headers show

Commit Message

Kirill Tkhai Feb. 16, 2017, 12:10 p.m.
Create user namespaces hierarhy from criu main task.
Open ns'es fds, so they are seen for everybody in
fdstore.

Why we do it this way.
1)User namespaces are not correlated with task
hierarhy. Parent task may have a user namespace
of a level bigger, that a child task. So, we
can't restore the user namespaces just by
passing CLONE_NEWUSER in fork_with_pid().

2)CLONE_FS tasks will require user_ns is set at the
moment of clone(), so we have to restore target user_ns
in locality of create_children_and_session() in this case.

v3: Check for WIFEXITED(). Aligned stack.
    Use fdstore to keep ns fd.
    Create tree from root_item.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 criu/include/namespaces.h |    1 
 criu/namespaces.c         |  102 +++++++++++++++++++++++++++++++++++++++++++++
 criu/pstree.c             |    6 ++-
 3 files changed, 107 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
index f70d7ffd3..77ba31fb3 100644
--- a/criu/include/namespaces.h
+++ b/criu/include/namespaces.h
@@ -117,6 +117,7 @@  struct ns_id {
 		} net;
 		struct {
 			UsernsEntry *e;
+			int nsfd_id;
 		} user;
 	};
 };
diff --git a/criu/namespaces.c b/criu/namespaces.c
index add23d0cc..518b0af4e 100644
--- a/criu/namespaces.c
+++ b/criu/namespaces.c
@@ -30,6 +30,8 @@ 
 #include "protobuf.h"
 #include "util.h"
 #include "images/ns.pb-c.h"
+#include "common/scm.h"
+#include "fdstore.h"
 
 static struct ns_desc *ns_desc_array[] = {
 	&net_ns_desc,
@@ -2121,6 +2123,103 @@  int join_namespaces(void)
 	return ret;
 }
 
+enum {
+	NS__CREATED = 1,
+	NS__MAPS_POPULATED,
+	NS__RESTORED,
+	NS__EXIT_HELPER,
+	NS__ERROR,
+};
+
+struct ns_arg {
+	struct ns_id *me;
+	futex_t futex;
+	pid_t pid;
+};
+
+static int create_user_ns_hierarhy_fn(void *in_arg)
+{
+	char stack[128] __stack_aligned__;
+	struct ns_arg *arg = NULL, *p_arg = in_arg;
+	futex_t *p_futex = NULL, *futex = NULL;
+	int status, fd, ret = -1;
+	struct ns_id *me, *child;
+	pid_t pid = -1;
+
+	if (p_arg->me != root_user_ns)
+		p_futex = &p_arg->futex;
+	me = p_arg->me;
+
+	if (p_futex) {
+		/* Set self pid to allow parent restore user_ns maps */
+		p_arg->pid = get_self_real_pid();
+		futex_set_and_wake(p_futex, NS__CREATED);
+		fd = open("/proc/self/ns/user", O_RDONLY);
+		if (fd < 0) {
+			pr_err("Can't get self user ns");
+			goto out;
+		}
+		me->user.nsfd_id = fdstore_add(fd);
+		close(fd);
+		if (me->user.nsfd_id < 0) {
+			pr_err("Can't add fd to fdstore\n");
+			goto out;
+		}
+
+		futex_wait_while_lt(p_futex, NS__MAPS_POPULATED);
+		if (prepare_userns_creds()) {
+			pr_err("Can't prepare creds\n");
+			goto out;
+		}
+	}
+
+	arg = mmap(NULL, sizeof(*arg), PROT_WRITE | PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	if (arg == MAP_FAILED) {
+		pr_perror("Failed to mmap arg");
+		goto out;
+	}
+	futex = &arg->futex;
+
+	list_for_each_entry(child, &me->children, siblings) {
+		arg->me = child;
+		futex_init(futex);
+
+		pid = clone(create_user_ns_hierarhy_fn, stack + 128, CLONE_NEWUSER | CLONE_FILES | SIGCHLD, arg);
+		if (pid < 0) {
+			pr_perror("Can't clone");
+			goto out;
+		}
+		futex_wait_while_lt(futex, NS__CREATED);
+		/* Get child real pid */
+		pid = arg->pid;
+		if (prepare_userns(pid, child->user.e) < 0) {
+			pr_err("Can't prepare child user_ns\n");
+			goto out;
+		}
+		futex_set_and_wake(futex, NS__MAPS_POPULATED);
+
+		errno = 0;
+		if (wait(&status) < 0 || !WIFEXITED(status) || WEXITSTATUS(status)) {
+			pr_perror("Child process waiting: %d\n", status);
+			goto out;
+		}
+	}
+
+	ret = 0;
+out:
+	if (p_futex)
+		futex_set_and_wake(p_futex, ret ? NS__ERROR : NS__RESTORED);
+	if (arg)
+		munmap(arg, sizeof(*arg));
+	return ret ? 1 : 0;
+}
+
+static int create_user_ns_hierarhy(void)
+{
+	struct ns_arg arg = { .me = root_user_ns };
+	return create_user_ns_hierarhy_fn(&arg);
+}
+
 int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
 {
 	pid_t pid = item->pid->ns[0].virt;
@@ -2129,7 +2228,8 @@  int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
 	pr_info("Restoring namespaces %d flags 0x%lx\n",
 			item->pid->ns[0].virt, clone_flags);
 
-	if ((clone_flags & CLONE_NEWUSER) && prepare_userns_creds())
+	if ((clone_flags & CLONE_NEWUSER) && (prepare_userns_creds() ||
+					      create_user_ns_hierarhy()))
 		return -1;
 
 	/*
diff --git a/criu/pstree.c b/criu/pstree.c
index 4e609b7ab..2c44642a0 100644
--- a/criu/pstree.c
+++ b/criu/pstree.c
@@ -876,8 +876,12 @@  static int prepare_pstree_kobj_ids(void)
 			 * be born in a fresh new mount namespace
 			 * which will be populated with all other
 			 * namespaces' entries.
+			 *
+			 * User namespaces are created in create_ns_hierarhy()
+			 * before the tasks, as their hierarhy does not correlated
+			 * with tasks hierarhy in any way.
 			 */
-			rsti(item)->clone_flags &= ~CLONE_NEWNS;
+			rsti(item)->clone_flags &= ~(CLONE_NEWNS | CLONE_NEWUSER);
 
 		cflags &= CLONE_ALLNS;