[RHEL7,COMMIT] nsfs: add ioctl to get an owning user namespace for ns file descriptor

Submitted by Konstantin Khorenko on Dec. 7, 2017, 10:40 a.m.

Details

Message ID 201712071040.vB7AeEAD031044@finist_ce7.work
State New
Series "ioctl: port NS_GET_USERNS and NS_GET_PARENT"
Headers show

Commit Message

Konstantin Khorenko Dec. 7, 2017, 10:40 a.m.
The commit is pushed to "branch-rh7-3.10.0-693.11.1.vz7.39.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-693.11.1.vz7.39.1
------>
commit faec14cbc8d7681e6a6bb8f4785693a195ad0388
Author: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Date:   Thu Dec 7 13:40:13 2017 +0300

    nsfs: add ioctl to get an owning user namespace for ns file descriptor
    
    Patchset description:
    ioctl: port NS_GET_USERNS and NS_GET_PARENT
    
    Need these to be able to enter owner's userns of mountnamespace to have
    privileges to unmount everything, to uncover dumpable overmounted
    mounts. NS_GET_PARENT is not a must here but it is nice to have it too
    for criu.
    
    https://jira.sw.ru/browse/PSBM-57362
    
    Rework mainstream patches as we don't have nsfs and ns_common.
    
    Other option can be porting 10 more patches:
    
    5d826c847b34 ("new helper: readlink_copy()")
    435d5f4bb2cc ("common object embedded into various struct ....ns")
    58be28256d98 ("make mntns ->get()/->put()/->install()/->inum() work with
    &mnt_ns->ns")
    ff24870f46d5 ("netns: switch ->get()/->put()/->install()/->inum() to
    working with &net->ns")
    3c0411846118 ("switch the rest of proc_ns_operations to working with
    &...->ns")
    64964528b24e ("make proc_ns_operations work with struct ns_common *
    instead of void *")
    6344c433a452 ("new helpers: ns_alloc_inum/ns_free_inum")
    33c429405a2c ("copy address of proc_ns_ops into ns_common")
    f77c80142e1a ("bury struct proc_ns in fs/proc")
    e149ed2b805f ("take the targets of /proc/*/ns/* symlinks to separate fs")
    
    And re-applying on top of them:
    25b14e92af1a ("ns: allow ns_entries to have custom symlink content")
    
    And porting fix as new version uses rcu:
    073c516ff735 ("nsfs: mark dentry with DCACHE_RCUACCESS")
    
    But still ioctls won't apply to clean after it, so I think complete
    rework here is a better option.
    
    Pavel Tikhomirov (3):
      kernel: add a helper to get an owning user namespace for a namespace
      nsfs: add ioctl to get an owning user namespace for ns file descriptor
      nsfs: add ioctl to get a parent namespace
    
    ============================
    This patch description:
    
    Changes:
    We have no nsfs.c, so move the code to fs/proc/namespaces.c.
    Func __ns_get_path's replaced with analog __proc_ns_get_dentry,
    so pass additional arguments everywhere, skip EAGAIN part as
    we don't have it in proc_ns_get_dentry.
    I add expected resulting ns's proc_ns_operations to open_related_ns
    as get_ns gives us blind void* and we don't know if it is pid_namespace
    or user_namespace, and __proc_ns_get_dentry requires to know which.
    
    https://jira.sw.ru/browse/PSBM-57362
    
    ms commit 6786741dbf99 ("nsfs: add ioctl to get an owning user namespace
    for ns file descriptor")
    
    Each namespace has an owning user namespace and now there is not way to
    discover these relationships.
    
    Understending namespaces relationships allows to answer the question:
    what capability does process X have to perform operations on a resource
    governed by namespace Y?
    
    After a long discussion, Eric W. Biederman proposed to use ioctl-s for
    this purpose.
    
    The NS_GET_USERNS ioctl returns a file descriptor to an owning user
    namespace.
    It returns EPERM if a target namespace is outside of a current user
    namespace.
    
    v2: rename parent to relative
    
    v3: Add a missing mntput when returning -EAGAIN --EWB
    
    Acked-by: Serge Hallyn <serge@hallyn.com>
    Link: https://lkml.org/lkml/2016/7/6/158
    Signed-off-by: Andrei Vagin <avagin@openvz.org>
    
    Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
    Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
---
 fs/proc/namespaces.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 74 insertions(+), 7 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index b0c6d0417433..121b76d3fcd0 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -13,7 +13,12 @@ 
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
 #include "internal.h"
+#include <linux/ioctl.h>
 
+#define NSIO   0xb7
+
+/* Returns a file descriptor that refers to an owning user namespace */
+#define NS_GET_USERNS  _IO(NSIO, 0x1)
 
 static const struct proc_ns_operations *ns_entries[] = {
 #ifdef CONFIG_NET_NS
@@ -35,8 +40,11 @@  static const struct proc_ns_operations *ns_entries[] = {
 	&mntns_operations,
 };
 
+static long ns_ioctl(struct file *filp, unsigned int ioctl,
+		unsigned long arg);
 static const struct file_operations ns_file_operations = {
 	.llseek		= no_llseek,
+	.unlocked_ioctl = ns_ioctl,
 };
 
 static const struct inode_operations ns_inode_operations = {
@@ -64,18 +72,13 @@  const struct dentry_operations ns_dentry_operations =
 	.d_dname	= ns_dname,
 };
 
-static struct dentry *proc_ns_get_dentry(struct super_block *sb,
-	struct task_struct *task, const struct proc_ns_operations *ns_ops)
+static struct dentry *__proc_ns_get_dentry(struct super_block *sb,
+	void *ns, const struct proc_ns_operations *ns_ops)
 {
 	struct dentry *dentry, *result;
 	struct inode *inode;
 	struct proc_inode *ei;
 	struct qstr qname = { .name = "", };
-	void *ns;
-
-	ns = ns_ops->get(task);
-	if (!ns)
-		return ERR_PTR(-ENOENT);
 
 	dentry = d_alloc_pseudo(sb, &qname);
 	if (!dentry) {
@@ -113,6 +116,70 @@  static struct dentry *proc_ns_get_dentry(struct super_block *sb,
 	return dentry;
 }
 
+static struct dentry *proc_ns_get_dentry(struct super_block *sb,
+	struct task_struct *task, const struct proc_ns_operations *ns_ops)
+{
+	void *ns;
+
+	ns = ns_ops->get(task);
+	if (!ns)
+		return ERR_PTR(-ENOENT);
+
+	return __proc_ns_get_dentry(sb, ns, ns_ops);
+}
+
+static int open_related_ns(struct vfsmount *mnt, struct proc_ns *ns,
+		const struct proc_ns_operations *relative_ns_ops,
+		void *(*get_ns)(void *ns,
+			const struct proc_ns_operations *ns_ops)) {
+	struct path path = {};
+	struct file *f;
+	int fd;
+	void *relative;
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0)
+		return fd;
+
+	relative = get_ns(ns->ns, ns->ns_ops);
+	if (IS_ERR(relative)) {
+		put_unused_fd(fd);
+		return PTR_ERR(relative);
+	}
+
+	path.mnt = mntget(mnt);
+	path.dentry = __proc_ns_get_dentry(mnt->mnt_sb, relative, relative_ns_ops);
+	if (IS_ERR(path.dentry)) {
+		mntput(mnt);
+		put_unused_fd(fd);
+		return PTR_ERR(path.dentry);
+	}
+
+	f = dentry_open(&path, O_RDONLY, current_cred());
+	path_put(&path);
+	if (IS_ERR(f)) {
+		put_unused_fd(fd);
+		fd = PTR_ERR(f);
+	} else
+		fd_install(fd, f);
+
+	return fd;
+}
+
+static long ns_ioctl(struct file *filp, unsigned int ioctl,
+		unsigned long arg)
+{
+	struct vfsmount *mnt = filp->f_path.mnt;
+	struct proc_ns *ns = get_proc_ns(file_inode(filp));
+
+	switch (ioctl) {
+	case NS_GET_USERNS:
+		return open_related_ns(mnt, ns, &userns_operations, ns_get_owner);
+	default:
+		return -ENOTTY;
+	}
+}
+
 static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode *inode = dentry->d_inode;