[RHEL7,COMMIT] ms/teach move_mount(2) to work with OPEN_TREE_CLONE

Submitted by Vasily Averin on Aug. 26, 2020, 6:47 a.m.

Details

Message ID 202008260647.07Q6lpB7002797@vz7build.vvs.sw.ru
State New
Series "Port open_tree and move_mount syscalls"
Headers show

Commit Message

Vasily Averin Aug. 26, 2020, 6:47 a.m.
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.18.2.vz7.163.11
------>
commit 310dc74d15bc4ea210c57aa70e6ff5b29a8a1be5
Author: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Date:   Wed Aug 26 09:47:51 2020 +0300

    ms/teach move_mount(2) to work with OPEN_TREE_CLONE
    
    Patchset description:
    These syscalls were added as preparation step for new mount api (fsopen,
    fsconfig, fsmount and fspick will be ported separately).
    
    We can use them to implement "cross-namespace bind-mounting" like this:
    
    fd = open_tree(AT_FDCWD, "/mnt", OPEN_TREE_CLONE);
    setns(nsfd, CLONE_NEWNS);
    move_mount(fd, "", AT_FDCWD, "/mnt2", MOVE_MOUNT_F_EMPTY_PATH);
    
    This will allow us implementing feature of adding bindmounts to runing
    container instead of having unreliable external propagations.
    
    It is needed to VZ8, but does not apply cleanly so I will send it
    separately.
    
    https://jira.sw.ru/browse/PSBM-107263
    
    Current patch description:
    From: David Howells <dhowells@redhat.com>
    
    Allow a detached tree created by open_tree(..., OPEN_TREE_CLONE) to be
    attached by move_mount(2).
    
    If by the time of final fput() of OPEN_TREE_CLONE-opened file its tree is
    not detached anymore, it won't be dissolved.  move_mount(2) is adjusted
    to handle detached source.
    
    That gives us equivalents of mount --bind and mount --rbind.
    
    Thanks also to Alan Jenkins <alan.christopher.jenkins@gmail.com> for
    providing a whole bunch of ways to break things using this interface.
    
    Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
    Signed-off-by: David Howells <dhowells@redhat.com>
    Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
    
    teach move_mount(2) to work with OPEN_TREE_CLONE
    (cherry-picked from commit 44dfd84a6d54a675e35ab618d9fab47b36cb78cd)
    do_move_mount(): fix an unsafe use of is_anon_ns()
    (cherry-picked from commit 05883eee857eab4693e7d13ebab06716475c5754)
    vfs: move_mount: reject moving kernel internal mounts
    (cherry-picked from commit 570d7a98e7d6d5d8706d94ffd2d40adeaa318332)
    
    https://jira.sw.ru/browse/PSBM-107263
    Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
---
 fs/namespace.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 56 insertions(+), 7 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/namespace.c b/fs/namespace.c
index 0820db1..ef8d8c0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1985,10 +1985,16 @@  void dissolve_on_fput(struct vfsmount *mnt)
 	namespace_lock();
 	lock_mount_hash();
 	ns = real_mount(mnt)->mnt_ns;
-	umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
+	if (ns) {
+		if (is_anon_ns(ns))
+			umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
+		else
+			ns = NULL;
+	}
 	unlock_mount_hash();
 	namespace_unlock();
-	free_mnt_ns(ns);
+	if (ns)
+		free_mnt_ns(ns);
 }
 
 void drop_collected_mounts(struct vfsmount *mnt)
@@ -2197,6 +2203,10 @@  static int attach_recursive_mnt(struct mount *source_mnt,
 		attach_mnt(source_mnt, dest_mnt, dest_mp);
 		touch_mnt_namespace(source_mnt->mnt_ns);
 	} else {
+		if (source_mnt->mnt_ns) {
+			/* move from anon - the caller will destroy */
+			list_del_init(&source_mnt->mnt_ns->list);
+		}
 		mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
 		commit_tree(source_mnt);
 	}
@@ -2852,13 +2862,37 @@  out_unlock:
 	return err;
 }
 
+/*
+ * Check that there aren't references to earlier/same mount namespaces in the
+ * specified subtree.  Such references can act as pins for mount namespaces
+ * that aren't checked by the mount-cycle checking code, thereby allowing
+ * cycles to be made.
+ */
+static bool check_for_nsfs_mounts(struct mount *subtree)
+{
+	struct mount *p;
+	bool ret = false;
+
+	lock_mount_hash();
+	for (p = subtree; p; p = next_mnt(p, subtree))
+		if (mnt_ns_loop(p->mnt.mnt_root))
+			goto out;
+
+	ret = true;
+out:
+	unlock_mount_hash();
+	return ret;
+}
+
 static int do_move_mount(struct path *old_path, struct path *new_path)
 {
 	struct path parent_path = {.mnt = NULL, .dentry = NULL};
+	struct mnt_namespace *ns;
 	struct mount *p;
 	struct mount *old;
 	struct mountpoint *mp;
 	int err;
+	bool attached;
 
 	mp = lock_mount(new_path);
 	if (IS_ERR(mp))
@@ -2866,12 +2900,20 @@  static int do_move_mount(struct path *old_path, struct path *new_path)
 
 	old = real_mount(old_path->mnt);
 	p = real_mount(new_path->mnt);
+	attached = mnt_has_parent(old);
+	ns = old->mnt_ns;
 
 	err = -EINVAL;
-	if (!check_mnt(p) || !check_mnt(old))
+	/* The mountpoint must be in our namespace. */
+	if (!check_mnt(p))
 		goto out;
 
-	if (!mnt_has_parent(old))
+	/* The thing moved must be mounted... */
+	if (!is_mounted(&old->mnt))
+		goto out;
+
+	/* ... and either ours or the root of anon namespace */
+	if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
 		goto out;
 
 	if (old->mnt.mnt_flags & MNT_LOCKED)
@@ -2886,7 +2928,7 @@  static int do_move_mount(struct path *old_path, struct path *new_path)
 	/*
 	 * Don't move a mount residing in a shared parent.
 	 */
-	if (IS_MNT_SHARED(old->mnt_parent))
+	if (attached && IS_MNT_SHARED(old->mnt_parent))
 		goto out;
 	/*
 	 * Don't move a mount tree containing unbindable mounts to a destination
@@ -2895,12 +2937,14 @@  static int do_move_mount(struct path *old_path, struct path *new_path)
 	if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
 		goto out;
 	err = -ELOOP;
+	if (!check_for_nsfs_mounts(old))
+		goto out;
 	for (; mnt_has_parent(p); p = p->mnt_parent)
 		if (p == old)
 			goto out;
 
 	err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
-				   &parent_path);
+				   attached ? &parent_path : NULL);
 	if (err)
 		goto out;
 
@@ -2909,8 +2953,11 @@  static int do_move_mount(struct path *old_path, struct path *new_path)
 	list_del_init(&old->mnt_expire);
 out:
 	unlock_mount(mp);
-	if (!err)
+	if (!err) {
 		path_put(&parent_path);
+		if (!attached)
+			free_mnt_ns(ns);
+	}
 	return err;
 }
 
@@ -3578,6 +3625,8 @@  out_type:
 
 /*
  * Move a mount from one place to another.
+ * In combination with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be
+ * used to copy a mount subtree.
  *
  * Note the flags value is a combination of MOVE_MOUNT_* flags.
  */