[RHEL7,COMMIT] ms/saner handling of temporary namespaces

Submitted by Vasily Averin on Aug. 26, 2020, 6:47 a.m.

Details

Message ID 202008260647.07Q6lTsZ002518@vz7build.vvs.sw.ru
State New
Series "Port open_tree and move_mount syscalls"
Headers show

Commit Message

Vasily Averin Aug. 26, 2020, 6:47 a.m.
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.18.2.vz7.163.11
------>
commit bc2a579077308dd56461dd81679876fa867bb25f
Author: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Date:   Wed Aug 26 09:47:29 2020 +0300

    ms/saner handling of temporary namespaces
    
    Patchset description:
    These syscalls were added as preparation step for new mount api (fsopen,
    fsconfig, fsmount and fspick will be ported separately).
    
    We can use them to implement "cross-namespace bind-mounting" like this:
    
    fd = open_tree(AT_FDCWD, "/mnt", OPEN_TREE_CLONE);
    setns(nsfd, CLONE_NEWNS);
    move_mount(fd, "", AT_FDCWD, "/mnt2", MOVE_MOUNT_F_EMPTY_PATH);
    
    This will allow us implementing feature of adding bindmounts to runing
    container instead of having unreliable external propagations.
    
    It is needed to VZ8, but does not apply cleanly so I will send it
    separately.
    
    https://jira.sw.ru/browse/PSBM-107263
    
    Current patch description:
    From: Al Viro <viro@zeniv.linux.org.uk>
    
    mount_subtree() creates (and soon destroys) a temporary namespace,
    so that automounts could function normally.  These beasts should
    never become anyone's current namespaces; they don't, but it would
    be better to make prevention of that more straightforward.  And
    since they don't become anyone's current namespace, we don't need
    to bother with reserving procfs inums for those.
    
    Teach alloc_mnt_ns() to skip inum allocation if told so, adjust
    put_mnt_ns() accordingly, make mount_subtree() use temporary
    (anon) namespace.  is_anon_ns() checks if a namespace is such.
    
    Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
    
    (cherry-picked from commit 74e831221cfd79460ec11c1b641093863f0ef3ce)
    https://jira.sw.ru/browse/PSBM-107263
    Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
---
 fs/mount.h     |  5 ++++
 fs/namespace.c | 83 +++++++++++++++++++++++++++++-----------------------------
 2 files changed, 46 insertions(+), 42 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/mount.h b/fs/mount.h
index 3e0e6a2..eab4a7a 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -161,3 +161,8 @@  static inline bool is_local_mountpoint(struct dentry *dentry)
 
 	return __is_local_mountpoint(dentry);
 }
+
+static inline bool is_anon_ns(struct mnt_namespace *ns)
+{
+	return ns->seq == 0;
+}
diff --git a/fs/namespace.c b/fs/namespace.c
index dcc1050..59ec3235 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3244,7 +3244,8 @@  static void dec_mnt_namespaces(struct ucounts *ucounts)
 
 static void free_mnt_ns(struct mnt_namespace *ns)
 {
-	ns_free_inum(&ns->ns);
+	if (!is_anon_ns(ns))
+		ns_free_inum(&ns->ns);
 	dec_mnt_namespaces(ns->ucounts);
 	put_user_ns(ns->user_ns);
 
@@ -3264,7 +3265,7 @@  static void free_mnt_ns(struct mnt_namespace *ns)
 static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
 static LIST_HEAD(all_mntns_list); /* protected by namespace_sem */
 
-static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
+static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon)
 {
 	struct mnt_namespace *new_ns;
 	struct ucounts *ucounts;
@@ -3274,29 +3275,28 @@  static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 	if (!ucounts)
 		return ERR_PTR(-ENOSPC);
 
-	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
+	new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
 	if (!new_ns) {
 		dec_mnt_namespaces(ucounts);
 		return ERR_PTR(-ENOMEM);
 	}
-	ret = ns_alloc_inum(&new_ns->ns);
-	if (ret) {
-		kfree(new_ns);
-		dec_mnt_namespaces(ucounts);
-		return ERR_PTR(ret);
+	if (!anon) {
+		ret = ns_alloc_inum(&new_ns->ns);
+		if (ret) {
+			kfree(new_ns);
+			dec_mnt_namespaces(ucounts);
+			return ERR_PTR(ret);
+		}
 	}
 	new_ns->ns.ops = &mntns_operations;
-	new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
+	if (!anon)
+		new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
 	atomic_set(&new_ns->count, 1);
-	new_ns->root = NULL;
 	INIT_LIST_HEAD(&new_ns->list);
 	INIT_LIST_HEAD(&new_ns->mntns_list);
 	init_waitqueue_head(&new_ns->poll);
-	new_ns->event = 0;
 	new_ns->user_ns = get_user_ns(user_ns);
 	new_ns->ucounts = ucounts;
-	new_ns->mounts = 0;
-	new_ns->pending_mounts = 0;
 	return new_ns;
 }
 
@@ -3323,7 +3323,7 @@  struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
 
 	old = ns->root;
 
-	new_ns = alloc_mnt_ns(user_ns);
+	new_ns = alloc_mnt_ns(user_ns, false);
 	if (IS_ERR(new_ns))
 		return new_ns;
 
@@ -3380,40 +3380,28 @@  struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
 	return new_ns;
 }
 
-/**
- * create_mnt_ns - creates a private namespace and adds a root filesystem
- * @mnt: pointer to the new root filesystem mountpoint
- */
-static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
-{
-	struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
-	if (!IS_ERR(new_ns)) {
-		struct mount *mnt = real_mount(m);
-		mnt->mnt_ns = new_ns;
-		new_ns->root = mnt;
-		new_ns->mounts++;
-		list_add(&mnt->mnt_list, &new_ns->list);
-		namespace_lock();
-		list_add_tail(&new_ns->mntns_list, &all_mntns_list);
-		namespace_unlock();
-	} else {
-		mntput(m);
-	}
-	return new_ns;
-}
-
-struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
+struct dentry *mount_subtree(struct vfsmount *m, const char *name)
 {
+	struct mount *mnt = real_mount(m);
 	struct mnt_namespace *ns;
 	struct super_block *s;
 	struct path path;
 	int err;
 
-	ns = create_mnt_ns(mnt);
-	if (IS_ERR(ns))
+	ns = alloc_mnt_ns(&init_user_ns, true);
+	if (IS_ERR(ns)) {
+		mntput(m);
 		return ERR_CAST(ns);
+	}
+	mnt->mnt_ns = ns;
+	ns->root = mnt;
+	ns->mounts++;
+	list_add(&mnt->mnt_list, &ns->list);
+	namespace_lock();
+	list_add_tail(&ns->mntns_list, &all_mntns_list);
+	namespace_unlock();
 
-	err = vfs_path_lookup(mnt->mnt_root, mnt,
+	err = vfs_path_lookup(m->mnt_root, m,
 			name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
 
 	put_mnt_ns(ns);
@@ -3617,6 +3605,7 @@  out0:
 static void __init init_mount_tree(void)
 {
 	struct vfsmount *mnt;
+	struct mount *m;
 	struct mnt_namespace *ns;
 	struct path root;
 	struct file_system_type *type;
@@ -3629,10 +3618,17 @@  static void __init init_mount_tree(void)
 	if (IS_ERR(mnt))
 		panic("Can't create rootfs");
 
-	ns = create_mnt_ns(mnt);
+	ns = alloc_mnt_ns(&init_user_ns, false);
 	if (IS_ERR(ns))
 		panic("Can't allocate initial namespace");
-
+	m = real_mount(mnt);
+	m->mnt_ns = ns;
+	ns->root = m;
+	ns->mounts = 1;
+	list_add(&m->mnt_list, &ns->list);
+	namespace_lock();
+	list_add_tail(&ns->mntns_list, &all_mntns_list);
+	namespace_unlock();
 	init_task.nsproxy->mnt_ns = ns;
 	get_mnt_ns(ns);
 
@@ -3881,6 +3877,9 @@  static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
 		return -EPERM;
 
+	if (is_anon_ns(mnt_ns))
+		return -EINVAL;
+
 	if (fs->users != 1)
 		return -EINVAL;