[1/2] mount: create a mount point for the root mount namespace in the roots yard

Submitted by Andrei Vagin on Oct. 20, 2016, 7:06 p.m.

Details

Message ID 1476990420-16256-2-git-send-email-avagin@openvz.org
State Superseded
Series "mount: create a mount point for the root mount namespace in the roots yard"
Headers show

Commit Message

Andrei Vagin Oct. 20, 2016, 7:06 p.m.
From: Andrei Vagin <avagin@virtuozzo.com>

These chnages allows us to:
* avoid difference between the root mount namespace and other mount namespaces
* support a read-only root mount
* don't create temporary directories in the root mount

Signed-off-by: Andrei Vagin <avagin@virtuozzo.com>
---
 criu/include/path.h |   5 +-
 criu/mount.c        | 205 ++++++++++------------------------------------------
 2 files changed, 42 insertions(+), 168 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/include/path.h b/criu/include/path.h
index 5fec3a9..b00c64a 100644
--- a/criu/include/path.h
+++ b/criu/include/path.h
@@ -1,6 +1,9 @@ 
 #ifndef __CR_PATH_H__
 #define __CR_PATH_H__
 
+#include "namespaces.h"
+#include "pstree.h"
+
 /* Asolute paths are used on dump and relative paths are used on restore */
 static inline int is_root(char *p)
 {
@@ -10,7 +13,7 @@  static inline int is_root(char *p)
 /* True for the root mount (the topmost one) */
 static inline int is_root_mount(struct mount_info *mi)
 {
-	return is_root(mi->mountpoint + 1);
+	return mi->parent == NULL && mi->nsid->id == root_item->ids->mnt_ns_id;
 }
 
 /*
diff --git a/criu/mount.c b/criu/mount.c
index bb1378d..ba177f3 100644
--- a/criu/mount.c
+++ b/criu/mount.c
@@ -361,9 +361,10 @@  static struct mount_info *mnt_build_ids_tree(struct mount_info *list, struct mou
 
 		if (!parent) {
 			/* This should be / */
-			if (root == NULL && is_root_mount(m)) {
+			if (root == NULL && (!tmp_root_mount || is_root_mount(m))) {
 				root = m;
-				continue;
+				if (!tmp_root_mount)
+					continue;
 			}
 
 			pr_debug("Mountpoint %d (@%s) w/o parent %d\n",
@@ -395,7 +396,7 @@  static struct mount_info *mnt_build_ids_tree(struct mount_info *list, struct mou
 				pr_debug("Mountpoint %d (@%s) get parent %d (@%s)\n",
 					 m->mnt_id, m->mountpoint,
 					 parent->mnt_id, parent->mountpoint);
-			} else {
+			} else if (root != m) {
 				pr_err("No root found for mountpoint %d (@%s)\n",
 					m->mnt_id, m->mountpoint);
 				return NULL;
@@ -411,10 +412,8 @@  static struct mount_info *mnt_build_ids_tree(struct mount_info *list, struct mou
 		return NULL;
 	}
 
-	if (tmp_root_mount) {
-		tmp_root_mount->parent = root;
-		list_add_tail(&tmp_root_mount->siblings, &root->children);
-	}
+	if (tmp_root_mount)
+		return tmp_root_mount;
 
 	return root;
 }
@@ -634,7 +633,7 @@  static struct mount_info *find_fsroot_mount_for(struct mount_info *bm)
 
 	list_for_each_entry(sm, &bm->mnt_bind, mnt_bind)
 		if (fsroot_mounted(sm) ||
-				(sm->parent == NULL &&
+				(sm->parent == root_yard_mp &&
 				 strstartswith(bm->root, sm->root)))
 			return sm;
 
@@ -1642,7 +1641,7 @@  skip_parent:
 	 * FIXME Currently non-root mounts can be restored
 	 * only if a proper root mount exists
 	 */
-	if (fsroot_mounted(mi) || mi->parent == NULL) {
+	if (fsroot_mounted(mi) || mi->parent == root_yard_mp) {
 		list_for_each_entry(t, &mi->mnt_bind, mnt_bind) {
 			if (t->mounted)
 				continue;
@@ -1984,10 +1983,14 @@  err:
 	return exit_code;
 }
 
+static bool rst_mnt_is_root(struct mount_info *m)
+{
+	return (m->is_ns_root && m->nsid->id == root_item->ids->mnt_ns_id);
+}
+
 static bool can_mount_now(struct mount_info *mi)
 {
-	/* The root mount */
-	if (!mi->parent)
+	if (rst_mnt_is_root(mi))
 		return true;
 
 	if (mi->external)
@@ -2060,7 +2063,7 @@  static int do_mount_one(struct mount_info *mi)
 		return 1;
 	}
 
-	if (mi->parent && !strcmp(mi->parent->mountpoint, mi->mountpoint)) {
+	if (!strcmp(mi->parent->mountpoint, mi->mountpoint)) {
 		mi->parent->fd = open(mi->parent->mountpoint, O_PATH);
 		if (mi->parent->fd < 0) {
 			pr_perror("Unable to open %s", mi->mountpoint);
@@ -2070,8 +2073,12 @@  static int do_mount_one(struct mount_info *mi)
 
 	pr_debug("\tMounting %s @%s (%d)\n", mi->fstype->name, mi->mountpoint, mi->need_plugin);
 
-	if (!mi->parent) {
+	if (rst_mnt_is_root(mi)) {
 		/* do_mount_root() is called from populate_mnt_ns() */
+		if (mount(opts.root, mi->mountpoint, NULL, MS_BIND | MS_REC, NULL))
+			return -1;
+		if (do_mount_root(mi))
+			return -1;
 		mi->mounted = true;
 		ret = 0;
 	} else if (!mi->bind && !mi->need_plugin && !mi->external)
@@ -2144,33 +2151,10 @@  static int do_mnt_remap(struct mount_info *m)
 {
 	int len;
 
-	if (m->nsid->type == NS_OTHER) {
-		/*
-		 * m->mountpoint already contains a roots_yard prefix and
-		 * it has a fixed size, so it can be just replaced.
-		 */
-		len = print_ns_root(m->nsid, remap_id, m->mountpoint, PATH_MAX);
-		m->mountpoint[len] = '/';
-	} else if (m->nsid->type == NS_ROOT) {
-		char root[PATH_MAX], *mp, *ns_mp;
-		int len, ret;
+	/* A path in root_yard has a fixed size, so it can be replaced. */
+	len = print_ns_root(m->nsid, remap_id, m->mountpoint, PATH_MAX);
+	m->mountpoint[len] = '/';
 
-		/*
-		 * Allocate a new path in the roots yard. m->mountpoint in the
-		 * root namespace doesn't have a roots_yard prefix, so its
-		 * size has to be changed and a new storage has to be
-		 * allocated.
-		 */
-		mp = m->mountpoint; ns_mp = m->ns_mountpoint;
-
-		len = print_ns_root(m->nsid, remap_id, root, PATH_MAX);
-
-		ret = get_mp_mountpoint(ns_mp, m, root, len);
-		if (ret < 0)
-			return ret;
-		xfree(mp);
-	} else
-		BUG();
 	return 0;
 }
 
@@ -2231,14 +2215,9 @@  static int move_back_mnt_remaps()
 		char path[PATH_MAX];
 		int len;
 
-		if (m->nsid->type == NS_ROOT) {
-			path[0] = '.';
-			strncpy(path + 1, m->ns_mountpoint, PATH_MAX - 1);
-		} else {
-			strncpy(path, m->mountpoint, PATH_MAX);
-			len = print_ns_root(m->nsid, 0, path, PATH_MAX);
-			path[len] = '/';
-		}
+		strncpy(path, m->mountpoint, PATH_MAX);
+		len = print_ns_root(m->nsid, 0, path, PATH_MAX);
+		path[len] = '/';
 
 		pr_debug("Move mount %s -> %s\n", m->mountpoint, path);
 		if (mount(m->mountpoint, path, NULL, MS_MOVE, NULL)) {
@@ -2368,23 +2347,12 @@  static inline int print_ns_root(struct ns_id *ns, int remap_id, char *buf, int b
 
 static int create_mnt_roots(void)
 {
-	int exit_code = -1, cwd_fd;
+	int exit_code = -1;
 
 	if (mnt_roots)
 		return 0;
 
-	cwd_fd = open(".", O_DIRECTORY);
-	if (cwd_fd < 0) {
-		pr_perror("Unable to open cwd");
-		return -1;
-	}
-
-	if (chdir(opts.root ? : "/")) {
-		pr_perror("Unable to change working directory on %s", opts.root);
-		goto out;
-	}
-
-	mnt_roots = strdup(".criu.mntns.XXXXXX");
+	mnt_roots = strdup("/tmp/.criu.mntns.XXXXXX");
 	if (mnt_roots == NULL) {
 		pr_perror("Can't allocate memory");
 		goto out;
@@ -2395,15 +2363,10 @@  static int create_mnt_roots(void)
 		mnt_roots = NULL;
 		goto out;
 	}
+	chmod(mnt_roots, 0777);
 
 	exit_code = 0;
 out:
-	if (fchdir(cwd_fd)) {
-		pr_perror("Unable to restore cwd");
-		exit_code = -1;
-	}
-	close(cwd_fd);
-
 	return exit_code;
 }
 
@@ -2503,8 +2466,7 @@  static int collect_mnt_from_image(struct mount_info **pms, struct ns_id *nsid)
 	if (!img)
 		return -1;
 
-	if (nsid->type == NS_OTHER)
-		root_len = print_ns_root(nsid, 0, root, sizeof(root));
+	root_len = print_ns_root(nsid, 0, root, sizeof(root));
 
 	pr_debug("Reading mountpoint images (id %d pid %d)\n",
 		 nsid->id, (int)nsid->ns_pid);
@@ -2624,8 +2586,7 @@  int rst_get_mnt_root(int mnt_id, char *path, int plen)
 	if (m == NULL)
 		return -1;
 
-	if (m->nsid->type == NS_OTHER)
-		return print_ns_root(m->nsid, 0, path, plen);
+	return print_ns_root(m->nsid, 0, path, plen);
 
 rroot:
 	path[0] = '/';
@@ -2635,28 +2596,10 @@  rroot:
 
 int mntns_maybe_create_roots(void)
 {
-	struct ns_id *ns;
-
 	if (!(root_ns_mask & CLONE_NEWNS))
 		return 0;
 
-	for (ns = ns_ids; ns != NULL; ns = ns->next) {
-		if (ns->nd != &mnt_ns_desc)
-			continue;
-
-		if (ns->type != NS_ROOT) {
-			BUG_ON(ns->type == NS_CRIU);
-
-			/*
-			 * If we have more than one (root) namespace,
-			 * then we'll need the roots yard.
-			 */
-			return create_mnt_roots();
-		}
-	}
-
-	/* No "other" mntns found, just go ahead, we don't need roots yard. */
-	return 0;
+	return create_mnt_roots();
 }
 
 static int do_restore_task_mnt_ns(struct ns_id *nsid, struct pstree_item *current)
@@ -2679,6 +2622,9 @@  static int do_restore_task_mnt_ns(struct ns_id *nsid, struct pstree_item *curren
 
 int restore_task_mnt_ns(struct pstree_item *current)
 {
+	if ((root_ns_mask & CLONE_NEWNS) == 0)
+		return 0;
+
 	if (current->ids && current->ids->has_mnt_ns_id) {
 		unsigned int id = current->ids->mnt_ns_id;
 		struct ns_id *nsid;
@@ -2692,7 +2638,7 @@  int restore_task_mnt_ns(struct pstree_item *current)
 		 * already there, otherwise it will have to do
 		 * setns().
 		 */
-		if (!current->parent || id == current->parent->ids->mnt_ns_id)
+		if (current->parent && id == current->parent->ids->mnt_ns_id)
 			return 0;
 
 		nsid = lookup_ns_by_id(id, &mnt_ns_desc);
@@ -2721,8 +2667,7 @@  void fini_restore_mntns(void)
 		if (nsid->nd != &mnt_ns_desc)
 			continue;
 		close_safe(&nsid->mnt.ns_fd);
-		if (nsid->type != NS_ROOT)
-			close_safe(&nsid->mnt.root_fd);
+		close_safe(&nsid->mnt.root_fd);
 		nsid->ns_populated = true;
 	}
 }
@@ -2736,9 +2681,6 @@  static int populate_roots_yard(void)
 	char path[PATH_MAX];
 	struct ns_id *nsid;
 
-	if (mnt_roots == NULL)
-		return 0;
-
 	if (make_yard(mnt_roots))
 		return -1;
 
@@ -2811,14 +2753,6 @@  static int populate_mnt_ns(void)
 	if (handle_overmounts(pms))
 		return -1;
 
-	/*
-	 * Set properties for the root before mounting a root yard,
-	 * otherwise the root yard can be propagated into the host
-	 * mntns and remain there.
-	 */
-	if (do_mount_root(pms))
-		return -1;
-
 	if (populate_roots_yard())
 		return -1;
 
@@ -2836,7 +2770,7 @@  static int populate_mnt_ns(void)
 	return ret;
 }
 
-static int __depopulate_roots_yard(void)
+int __depopulate_roots_yard(void)
 {
 	int ret = 0;
 
@@ -2961,64 +2895,11 @@  int prepare_mnt_ns(void)
 		pr_info("Cleaning mount namespace\n");
 		if (mnt_tree_for_each_reverse(ns.mnt.mntinfo_tree, do_umount_one))
 			return -1;
-	} else {
-		struct mount_info *mi;
-		char *ret;
-		char path[PATH_MAX];
-
-		/*
-		 * The whole tree of mountpoints is to be moved into one
-		 * place with the pivot_root() call. Don't do manual
-		 * umount (as we do above), all this stuff will go away
-		 * with a single umount call later.
-		 */
-
-		ret = realpath(opts.root, path);
-		if (!ret) {
-			pr_err("Unable to find real path for %s\n", opts.root);
-			return -1;
-		}
-
-		/* moving a mount residing under a shared mount is invalid. */
-		mi = mount_resolve_path(ns.mnt.mntinfo_tree, path);
-		if (mi == NULL) {
-			pr_err("Unable to find mount point for %s\n", opts.root);
-			return -1;
-		}
-		if (mi->parent == NULL) {
-			pr_err("New root and old root are the same\n");
-			return -1;
-		}
-
-		/* Our root is mounted over the parent (in the same directory) */
-		if (!strcmp(mi->parent->mountpoint, mi->mountpoint)) {
-			pr_err("The parent of the new root is unreachable\n");
-			return -1;
-		}
-
-		if (mount("none", mi->parent->mountpoint + 1, "none", MS_SLAVE, NULL)) {
-			pr_perror("Can't remount the parent of the new root with MS_SLAVE");
-			return -1;
-		}
-
-		/* Unprivileged users can't reveal what is under a mount */
-		if (root_ns_mask & CLONE_NEWUSER) {
-			if (mount(opts.root, opts.root, NULL, MS_BIND | MS_REC, NULL)) {
-				pr_perror("Can't remount bind-mount %s into itself", opts.root);
-				return -1;
-			}
-		}
-		if (chdir(opts.root)) {
-			pr_perror("chdir(%s) failed", opts.root ? : "/");
-			return -1;
-		}
 	}
 
 	free_mntinfo(old);
 
 	ret = populate_mnt_ns();
-	if (!ret && opts.root)
-		ret = cr_pivot_root(NULL);
 	if (ret)
 		return -1;
 
@@ -3032,16 +2913,6 @@  int prepare_mnt_ns(void)
 
 		if (nsid->nd != &mnt_ns_desc)
 			continue;
-		if (nsid->type == NS_ROOT) {
-			/* Pin one with a file descriptor */
-			nsid->mnt.ns_fd = open_proc(PROC_SELF, "ns/mnt");
-			if (nsid->mnt.ns_fd < 0)
-				goto err;
-			/* we set ns_populated so we don't need to open root_fd */
-			nsid->ns_populated = true;
-			continue;
-		}
-
 		/* Create the new mount namespace */
 		if (unshare(CLONE_NEWNS)) {
 			pr_perror("Unable to create a new mntns");

Comments

Pavel Emelianov Jan. 31, 2017, 11:41 a.m.
Applied, thanks.
Dmitry Safonov Jan. 31, 2017, 1:55 p.m.
2017-01-31 14:41 GMT+03:00 Pavel Emelyanov <xemul@virtuozzo.com>:
> Applied, thanks.

criu-dev failure bisected to this one:
https://travis-ci.org/xemul/criu/builds/196924134

[criu]# git bisect log
git bisect start
# bad: [6bfb1b226ce0c3f34e2790873a40d84f8545acdb] zdtm: Add checkskip
scripts for OFD locks
git bisect bad 6bfb1b226ce0c3f34e2790873a40d84f8545acdb
# good: [1004e9123f76ce7ffd7a3a5a86cef3a620eac77c] zdtm/socket_aio:
add a synchonization between processes
git bisect good 1004e9123f76ce7ffd7a3a5a86cef3a620eac77c
# bad: [e8971568a2d6ea409d726d3d3d07afc52b75aa70] scm: Allow to pass
flags argument to recv_fds()
git bisect bad e8971568a2d6ea409d726d3d3d07afc52b75aa70
# bad: [3e65c451b485eed2b9974980c230bdd9d2cb118f] test: check exit
codes for criu page-server and criu lazy-pages
git bisect bad 3e65c451b485eed2b9974980c230bdd9d2cb118f
# good: [320ccee2fe4cf1fb8272f6312f26de61c70ba7d6] tty: Unify id printing
git bisect good 320ccee2fe4cf1fb8272f6312f26de61c70ba7d6
# bad: [3f67731b8ef4ed46eb9d935683c0fc3ccee1101e] mount: create a
mount point for the root mount namespace in the roots yard
git bisect bad 3f67731b8ef4ed46eb9d935683c0fc3ccee1101e
# good: [607087074ec1a2391c11a1bc3a387a2fa5b4263a] tty: Print rdev and
dev pair in case of error
git bisect good 607087074ec1a2391c11a1bc3a387a2fa5b4263a
# first bad commit: [3f67731b8ef4ed46eb9d935683c0fc3ccee1101e] mount:
create a mount point for the root mount namespace in the roots yard
Andrey Vagin Jan. 31, 2017, 9:49 p.m.
On Tue, Jan 31, 2017 at 04:55:03PM +0300, Dmitry Safonov wrote:
> 2017-01-31 14:41 GMT+03:00 Pavel Emelyanov <xemul@virtuozzo.com>:
> > Applied, thanks.
> 
> criu-dev failure bisected to this one:
> https://travis-ci.org/xemul/criu/builds/196924134

It is because the second patch has not been commited.
> 
> [criu]# git bisect log
> git bisect start
> # bad: [6bfb1b226ce0c3f34e2790873a40d84f8545acdb] zdtm: Add checkskip
> scripts for OFD locks
> git bisect bad 6bfb1b226ce0c3f34e2790873a40d84f8545acdb
> # good: [1004e9123f76ce7ffd7a3a5a86cef3a620eac77c] zdtm/socket_aio:
> add a synchonization between processes
> git bisect good 1004e9123f76ce7ffd7a3a5a86cef3a620eac77c
> # bad: [e8971568a2d6ea409d726d3d3d07afc52b75aa70] scm: Allow to pass
> flags argument to recv_fds()
> git bisect bad e8971568a2d6ea409d726d3d3d07afc52b75aa70
> # bad: [3e65c451b485eed2b9974980c230bdd9d2cb118f] test: check exit
> codes for criu page-server and criu lazy-pages
> git bisect bad 3e65c451b485eed2b9974980c230bdd9d2cb118f
> # good: [320ccee2fe4cf1fb8272f6312f26de61c70ba7d6] tty: Unify id printing
> git bisect good 320ccee2fe4cf1fb8272f6312f26de61c70ba7d6
> # bad: [3f67731b8ef4ed46eb9d935683c0fc3ccee1101e] mount: create a
> mount point for the root mount namespace in the roots yard
> git bisect bad 3f67731b8ef4ed46eb9d935683c0fc3ccee1101e
> # good: [607087074ec1a2391c11a1bc3a387a2fa5b4263a] tty: Print rdev and
> dev pair in case of error
> git bisect good 607087074ec1a2391c11a1bc3a387a2fa5b4263a
> # first bad commit: [3f67731b8ef4ed46eb9d935683c0fc3ccee1101e] mount:
> create a mount point for the root mount namespace in the roots yard
> 
> -- 
>              Dmitry