[RH8,4/4] ms/teach move_mount(2) to work with OPEN_TREE_CLONE

Submitted by Pavel Tikhomirov on Sept. 2, 2020, 12:51 p.m.

Details

Message ID 20200902125121.676874-5-ptikhomirov@virtuozzo.com
State New
Series "Port open_tree and move_mount syscalls"
Headers show

Commit Message

Pavel Tikhomirov Sept. 2, 2020, 12:51 p.m.
Patchset description:
These syscalls were added as preparation step for new mount api (fsopen,
fsconfig, fsmount and fspick will be ported separately).

We can use them to implement "cross-namespace bind-mounting" like this:

fd = open_tree(AT_FDCWD, "/mnt", OPEN_TREE_CLONE);
setns(nsfd, CLONE_NEWNS);
move_mount(fd, "", AT_FDCWD, "/mnt2", MOVE_MOUNT_F_EMPTY_PATH);

This will allow us implementing feature of adding bindmounts to runing
container instead of having unreliable external propagations.

Version for VZ8 is slightly different from VZ7 version.

https://jira.sw.ru/browse/PSBM-107263

Current patch description:
From: David Howells <dhowells@redhat.com>

Allow a detached tree created by open_tree(..., OPEN_TREE_CLONE) to be
attached by move_mount(2).

If by the time of final fput() of OPEN_TREE_CLONE-opened file its tree is
not detached anymore, it won't be dissolved.  move_mount(2) is adjusted
to handle detached source.

That gives us equivalents of mount --bind and mount --rbind.

Thanks also to Alan Jenkins <alan.christopher.jenkins@gmail.com> for
providing a whole bunch of ways to break things using this interface.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

teach move_mount(2) to work with OPEN_TREE_CLONE
(cherry-picked from commit 44dfd84a6d54a675e35ab618d9fab47b36cb78cd)
do_move_mount(): fix an unsafe use of is_anon_ns()
(cherry-picked from commit 05883eee857eab4693e7d13ebab06716475c5754)
vfs: move_mount: reject moving kernel internal mounts
(cherry-picked from commit 570d7a98e7d6d5d8706d94ffd2d40adeaa318332)

https://jira.sw.ru/browse/PSBM-107263
Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
---
 fs/namespace.c | 63 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 56 insertions(+), 7 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/namespace.c b/fs/namespace.c
index 700c73f0bb94..5942ffe3802c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1815,10 +1815,16 @@  void dissolve_on_fput(struct vfsmount *mnt)
 	namespace_lock();
 	lock_mount_hash();
 	ns = real_mount(mnt)->mnt_ns;
-	umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
+	if (ns) {
+		if (is_anon_ns(ns))
+			umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
+		else
+			ns = NULL;
+	}
 	unlock_mount_hash();
 	namespace_unlock();
-	free_mnt_ns(ns);
+	if (ns)
+		free_mnt_ns(ns);
 }
 
 void drop_collected_mounts(struct vfsmount *mnt)
@@ -2026,6 +2032,10 @@  static int attach_recursive_mnt(struct mount *source_mnt,
 		attach_mnt(source_mnt, dest_mnt, dest_mp);
 		touch_mnt_namespace(source_mnt->mnt_ns);
 	} else {
+		if (source_mnt->mnt_ns) {
+			/* move from anon - the caller will destroy */
+			list_del_init(&source_mnt->mnt_ns->list);
+		}
 		mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
 		commit_tree(source_mnt);
 	}
@@ -2493,13 +2503,37 @@  static int do_set_group(struct path *path, const char *sibling_name)
 	return err;
 }
 
+/*
+ * Check that there aren't references to earlier/same mount namespaces in the
+ * specified subtree.  Such references can act as pins for mount namespaces
+ * that aren't checked by the mount-cycle checking code, thereby allowing
+ * cycles to be made.
+ */
+static bool check_for_nsfs_mounts(struct mount *subtree)
+{
+	struct mount *p;
+	bool ret = false;
+
+	lock_mount_hash();
+	for (p = subtree; p; p = next_mnt(p, subtree))
+		if (mnt_ns_loop(p->mnt.mnt_root))
+			goto out;
+
+	ret = true;
+out:
+	unlock_mount_hash();
+	return ret;
+}
+
 static int do_move_mount(struct path *old_path, struct path *new_path)
 {
 	struct path parent_path = {.mnt = NULL, .dentry = NULL};
+	struct mnt_namespace *ns;
 	struct mount *p;
 	struct mount *old;
 	struct mountpoint *mp;
 	int err;
+	bool attached;
 
 	mp = lock_mount(new_path);
 	if (IS_ERR(mp))
@@ -2507,12 +2541,20 @@  static int do_move_mount(struct path *old_path, struct path *new_path)
 
 	old = real_mount(old_path->mnt);
 	p = real_mount(new_path->mnt);
+	attached = mnt_has_parent(old);
+	ns = old->mnt_ns;
 
 	err = -EINVAL;
-	if (!check_mnt(p) || !check_mnt(old))
+	/* The mountpoint must be in our namespace. */
+	if (!check_mnt(p))
 		goto out;
 
-	if (!mnt_has_parent(old))
+	/* The thing moved must be mounted... */
+	if (!is_mounted(&old->mnt))
+		goto out;
+
+	/* ... and either ours or the root of anon namespace */
+	if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
 		goto out;
 
 	if (old->mnt.mnt_flags & MNT_LOCKED)
@@ -2527,7 +2569,7 @@  static int do_move_mount(struct path *old_path, struct path *new_path)
 	/*
 	 * Don't move a mount residing in a shared parent.
 	 */
-	if (IS_MNT_SHARED(old->mnt_parent))
+	if (attached && IS_MNT_SHARED(old->mnt_parent))
 		goto out;
 	/*
 	 * Don't move a mount tree containing unbindable mounts to a destination
@@ -2536,12 +2578,14 @@  static int do_move_mount(struct path *old_path, struct path *new_path)
 	if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
 		goto out;
 	err = -ELOOP;
+	if (!check_for_nsfs_mounts(old))
+		goto out;
 	for (; mnt_has_parent(p); p = p->mnt_parent)
 		if (p == old)
 			goto out;
 
 	err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
-				   &parent_path);
+				   attached ? &parent_path : NULL);
 	if (err)
 		goto out;
 
@@ -2550,8 +2594,11 @@  static int do_move_mount(struct path *old_path, struct path *new_path)
 	list_del_init(&old->mnt_expire);
 out:
 	unlock_mount(mp);
-	if (!err)
+	if (!err) {
 		path_put(&parent_path);
+		if (!attached)
+			free_mnt_ns(ns);
+	}
 	return err;
 }
 
@@ -3214,6 +3261,8 @@  SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 
 /*
  * Move a mount from one place to another.
+ * In combination with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be
+ * used to copy a mount subtree.
  *
  * Note the flags value is a combination of MOVE_MOUNT_* flags.
  */