@@ -117,6 +117,7 @@ struct ns_id {
} net;
struct {
UsernsEntry *e;
+ int nsfd_id;
} user;
};
};
@@ -30,6 +30,8 @@
#include "protobuf.h"
#include "util.h"
#include "images/ns.pb-c.h"
+#include "common/scm.h"
+#include "fdstore.h"
static struct ns_desc *ns_desc_array[] = {
&net_ns_desc,
@@ -2112,6 +2114,103 @@ int join_namespaces(void)
return ret;
}
+enum {
+ NS__CREATED = 1,
+ NS__MAPS_POPULATED,
+ NS__RESTORED,
+ NS__EXIT_HELPER,
+ NS__ERROR,
+};
+
+struct ns_arg {
+ struct ns_id *me;
+ futex_t futex;
+ pid_t pid;
+};
+
+static int create_user_ns_hierarhy_fn(void *in_arg)
+{
+ char stack[128] __stack_aligned__;
+ struct ns_arg *arg = NULL, *p_arg = in_arg;
+ futex_t *p_futex = NULL, *futex = NULL;
+ int status, fd, ret = -1;
+ struct ns_id *me, *child;
+ pid_t pid = -1;
+
+ if (p_arg->me != root_user_ns)
+ p_futex = &p_arg->futex;
+ me = p_arg->me;
+
+ if (p_futex) {
+ /* Set self pid to allow parent restore user_ns maps */
+ p_arg->pid = get_self_real_pid();
+ futex_set_and_wake(p_futex, NS__CREATED);
+ fd = open("/proc/self/ns/user", O_RDONLY);
+ if (fd < 0) {
+ pr_err("Can't get self user ns");
+ goto out;
+ }
+ me->user.nsfd_id = fdstore_add(fd);
+ close(fd);
+ if (me->user.nsfd_id < 0) {
+ pr_err("Can't add fd to fdstore\n");
+ goto out;
+ }
+
+ futex_wait_while_lt(p_futex, NS__MAPS_POPULATED);
+ if (prepare_userns_creds()) {
+ pr_err("Can't prepare creds\n");
+ goto out;
+ }
+ }
+
+ arg = mmap(NULL, sizeof(*arg), PROT_WRITE | PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (arg == MAP_FAILED) {
+ pr_perror("Failed to mmap arg");
+ goto out;
+ }
+ futex = &arg->futex;
+
+ list_for_each_entry(child, &me->children, siblings) {
+ arg->me = child;
+ futex_init(futex);
+
+ pid = clone(create_user_ns_hierarhy_fn, stack + 128, CLONE_NEWUSER | CLONE_FILES | SIGCHLD, arg);
+ if (pid < 0) {
+ pr_perror("Can't clone");
+ goto out;
+ }
+ futex_wait_while_lt(futex, NS__CREATED);
+ /* Get child real pid */
+ pid = arg->pid;
+ if (prepare_userns(pid, child->user.e) < 0) {
+ pr_err("Can't prepare child user_ns\n");
+ goto out;
+ }
+ futex_set_and_wake(futex, NS__MAPS_POPULATED);
+
+ errno = 0;
+ if (wait(&status) < 0 || !WIFEXITED(status) || WEXITSTATUS(status)) {
+ pr_perror("Child process waiting: %d\n", status);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ if (p_futex)
+ futex_set_and_wake(p_futex, ret ? NS__ERROR : NS__RESTORED);
+ if (arg)
+ munmap(arg, sizeof(*arg));
+ return ret ? 1 : 0;
+}
+
+static int create_user_ns_hierarhy(void)
+{
+ struct ns_arg arg = { .me = root_user_ns };
+ return create_user_ns_hierarhy_fn(&arg);
+}
+
int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
{
pid_t pid = vpid(item);
@@ -2120,7 +2219,8 @@ int prepare_namespace(struct pstree_item *item, unsigned long clone_flags)
pr_info("Restoring namespaces %d flags 0x%lx\n",
vpid(item), clone_flags);
- if ((clone_flags & CLONE_NEWUSER) && prepare_userns_creds())
+ if ((clone_flags & CLONE_NEWUSER) && (prepare_userns_creds() ||
+ create_user_ns_hierarhy()))
return -1;
/*
@@ -876,8 +876,12 @@ static int prepare_pstree_kobj_ids(void)
* be born in a fresh new mount namespace
* which will be populated with all other
* namespaces' entries.
+ *
+ * User namespaces are created in create_ns_hierarhy()
+ * before the tasks, as their hierarhy does not correlated
+ * with tasks hierarhy in any way.
*/
- rsti(item)->clone_flags &= ~CLONE_NEWNS;
+ rsti(item)->clone_flags &= ~(CLONE_NEWNS | CLONE_NEWUSER);
cflags &= CLONE_ALLNS;
Create user namespaces hierarhy from criu main task. Open ns'es fds, so they are seen for everybody in fdstore. Why we do it this way. 1)User namespaces are not correlated with task hierarhy. Parent task may have a user namespace of a level bigger, that a child task. So, we can't restore the user namespaces just by passing CLONE_NEWUSER in fork_with_pid(). 2)CLONE_FS tasks will require user_ns is set at the moment of clone(), so we have to restore target user_ns in locality of create_children_and_session() in this case. v3: Check for WIFEXITED(). Aligned stack. Use fdstore to keep ns fd. Create tree from root_item. Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com> --- criu/include/namespaces.h | 1 criu/namespaces.c | 102 +++++++++++++++++++++++++++++++++++++++++++++ criu/pstree.c | 6 ++- 3 files changed, 107 insertions(+), 2 deletions(-)