[RHEL8,COMMIT] ms/saner handling of temporary namespaces

Submitted by Konstantin Khorenko on Sept. 22, 2020, 12:37 p.m.

Details

Message ID 202009221237.08MCbwF3008072@finist-co8.sw.ru
State New
Series "Port open_tree and move_mount syscalls"
Headers show

Commit Message

Konstantin Khorenko Sept. 22, 2020, 12:37 p.m.
The commit is pushed to "branch-rh8-4.18.0-193.6.3.vz8.4.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-193.6.3.vz8.4.6
------>
commit 51865b4696675b8be4bdefd61e7b2a3310b6a45d
Author: Al Viro <viro@zeniv.linux.org.uk>
Date:   Tue Sep 22 15:37:58 2020 +0300

    ms/saner handling of temporary namespaces
    
    Patchset description:
    These syscalls were added as preparation step for new mount api (fsopen,
    fsconfig, fsmount and fspick will be ported separately).
    
    We can use them to implement "cross-namespace bind-mounting" like this:
    
    fd = open_tree(AT_FDCWD, "/mnt", OPEN_TREE_CLONE);
    setns(nsfd, CLONE_NEWNS);
    move_mount(fd, "", AT_FDCWD, "/mnt2", MOVE_MOUNT_F_EMPTY_PATH);
    
    This will allow us implementing feature of adding bindmounts to runing
    container instead of having unreliable external propagations.
    
    Version for VZ8 is slightly different from VZ7 version.
    
    https://jira.sw.ru/browse/PSBM-107263
    
    Current patch description:
    From: Al Viro <viro@zeniv.linux.org.uk>
    
    mount_subtree() creates (and soon destroys) a temporary namespace,
    so that automounts could function normally.  These beasts should
    never become anyone's current namespaces; they don't, but it would
    be better to make prevention of that more straightforward.  And
    since they don't become anyone's current namespace, we don't need
    to bother with reserving procfs inums for those.
    
    Teach alloc_mnt_ns() to skip inum allocation if told so, adjust
    put_mnt_ns() accordingly, make mount_subtree() use temporary
    (anon) namespace.  is_anon_ns() checks if a namespace is such.
    
    Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
    
    (cherry-picked from commit 74e831221cfd79460ec11c1b641093863f0ef3ce)
    https://jira.sw.ru/browse/PSBM-107263
    Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
---
 fs/mount.h     |  5 ++++
 fs/namespace.c | 74 +++++++++++++++++++++++++++-------------------------------
 2 files changed, 40 insertions(+), 39 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/mount.h b/fs/mount.h
index f39bc9da4d73..6250de544760 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -146,3 +146,8 @@  static inline bool is_local_mountpoint(struct dentry *dentry)
 
 	return __is_local_mountpoint(dentry);
 }
+
+static inline bool is_anon_ns(struct mnt_namespace *ns)
+{
+	return ns->seq == 0;
+}
diff --git a/fs/namespace.c b/fs/namespace.c
index 1018ae0efa06..22589d59f476 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2862,7 +2862,8 @@  static void dec_mnt_namespaces(struct ucounts *ucounts)
 
 static void free_mnt_ns(struct mnt_namespace *ns)
 {
-	ns_free_inum(&ns->ns);
+	if (!is_anon_ns(ns))
+		ns_free_inum(&ns->ns);
 	dec_mnt_namespaces(ns->ucounts);
 	put_user_ns(ns->user_ns);
 	kfree(ns);
@@ -2877,7 +2878,7 @@  static void free_mnt_ns(struct mnt_namespace *ns)
  */
 static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
 
-static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
+static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon)
 {
 	struct mnt_namespace *new_ns;
 	struct ucounts *ucounts;
@@ -2887,28 +2888,27 @@  static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 	if (!ucounts)
 		return ERR_PTR(-ENOSPC);
 
-	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
+	new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
 	if (!new_ns) {
 		dec_mnt_namespaces(ucounts);
 		return ERR_PTR(-ENOMEM);
 	}
-	ret = ns_alloc_inum(&new_ns->ns);
-	if (ret) {
-		kfree(new_ns);
-		dec_mnt_namespaces(ucounts);
-		return ERR_PTR(ret);
+	if (!anon) {
+		ret = ns_alloc_inum(&new_ns->ns);
+		if (ret) {
+			kfree(new_ns);
+			dec_mnt_namespaces(ucounts);
+			return ERR_PTR(ret);
+		}
 	}
 	new_ns->ns.ops = &mntns_operations;
-	new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
+	if (!anon)
+		new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
 	atomic_set(&new_ns->count, 1);
-	new_ns->root = NULL;
 	INIT_LIST_HEAD(&new_ns->list);
 	init_waitqueue_head(&new_ns->poll);
-	new_ns->event = 0;
 	new_ns->user_ns = get_user_ns(user_ns);
 	new_ns->ucounts = ucounts;
-	new_ns->mounts = 0;
-	new_ns->pending_mounts = 0;
 	return new_ns;
 }
 
@@ -2932,7 +2932,7 @@  struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
 
 	old = ns->root;
 
-	new_ns = alloc_mnt_ns(user_ns);
+	new_ns = alloc_mnt_ns(user_ns, false);
 	if (IS_ERR(new_ns))
 		return new_ns;
 
@@ -2987,37 +2987,25 @@  struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
 	return new_ns;
 }
 
-/**
- * create_mnt_ns - creates a private namespace and adds a root filesystem
- * @mnt: pointer to the new root filesystem mountpoint
- */
-static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
-{
-	struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
-	if (!IS_ERR(new_ns)) {
-		struct mount *mnt = real_mount(m);
-		mnt->mnt_ns = new_ns;
-		new_ns->root = mnt;
-		new_ns->mounts++;
-		list_add(&mnt->mnt_list, &new_ns->list);
-	} else {
-		mntput(m);
-	}
-	return new_ns;
-}
-
-struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
+struct dentry *mount_subtree(struct vfsmount *m, const char *name)
 {
+	struct mount *mnt = real_mount(m);
 	struct mnt_namespace *ns;
 	struct super_block *s;
 	struct path path;
 	int err;
 
-	ns = create_mnt_ns(mnt);
-	if (IS_ERR(ns))
+	ns = alloc_mnt_ns(&init_user_ns, true);
+	if (IS_ERR(ns)) {
+		mntput(m);
 		return ERR_CAST(ns);
+	}
+	mnt->mnt_ns = ns;
+	ns->root = mnt;
+	ns->mounts++;
+	list_add(&mnt->mnt_list, &ns->list);
 
-	err = vfs_path_lookup(mnt->mnt_root, mnt,
+	err = vfs_path_lookup(m->mnt_root, m,
 			name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
 
 	put_mnt_ns(ns);
@@ -3229,6 +3217,7 @@  SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 static void __init init_mount_tree(void)
 {
 	struct vfsmount *mnt;
+	struct mount *m;
 	struct mnt_namespace *ns;
 	struct path root;
 	struct file_system_type *type;
@@ -3241,10 +3230,14 @@  static void __init init_mount_tree(void)
 	if (IS_ERR(mnt))
 		panic("Can't create rootfs");
 
-	ns = create_mnt_ns(mnt);
+	ns = alloc_mnt_ns(&init_user_ns, false);
 	if (IS_ERR(ns))
 		panic("Can't allocate initial namespace");
-
+	m = real_mount(mnt);
+	m->mnt_ns = ns;
+	ns->root = m;
+	ns->mounts = 1;
+	list_add(&m->mnt_list, &ns->list);
 	init_task.nsproxy->mnt_ns = ns;
 	get_mnt_ns(ns);
 
@@ -3485,6 +3478,9 @@  static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 	    !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
 		return -EPERM;
 
+	if (is_anon_ns(mnt_ns))
+		return -EINVAL;
+
 	if (fs->users != 1)
 		return -EINVAL;