[RHEL7,COMMIT] ms/net: add an ioctl to get a socket network namespace

Submitted by Konstantin Khorenko on April 2, 2018, 2:14 p.m.

Details

Message ID 201804021414.w32EEXkA023535@finist_ce7.work
State New
Series "Port SIOCGSKNS support for sockets and tun"
Headers show

Commit Message

Konstantin Khorenko April 2, 2018, 2:14 p.m.
The commit is pushed to "branch-rh7-3.10.0-693.21.1.vz7.46.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-693.21.1.vz7.46.2
------>
commit 8abd1d014cab71da36cc62e2deb3aa00a676c971
Author: Andrey Vagin <avagin@openvz.org>
Date:   Mon Apr 2 17:14:33 2018 +0300

    ms/net: add an ioctl to get a socket network namespace
    
    ms commit: c62cce2caee5
    
    Each socket operates in a network namespace where it has been created,
    so if we want to dump and restore a socket, we have to know its network
    namespace.
    
    We have a socket_diag to get information about sockets, it doesn't
    report sockets which are not bound or connected.
    
    This patch introduces a new socket ioctl, which is called SIOCGSKNS
    and used to get a file descriptor for a socket network namespace.
    
    A task must have CAP_NET_ADMIN in a target network namespace to
    use this ioctl.
    
    Cc: "David S. Miller" <davem@davemloft.net>
    Cc: Eric W. Biederman <ebiederm@xmission.com>
    Signed-off-by: Andrei Vagin <avagin@openvz.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>
    
    https://jira.sw.ru/browse/PSBM-79229
    
    Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
    Reviewed-by: Pavel Tikhoirov <ptikhomirov@virtuozzo.com>
    
    ============================================================
    Patchset description:
    Port SIOCGSKNS support for sockets and tun
    
    This patchset ports get-socket-and-tun-net-namespace-fd functionality
    from mainline kernel to vz7.
    
    Note, that in case of sockets and tun we don't have proc vfsmnt
    like we have for NS_GET_USERNS and NS_GET_PARENT. The workaround
    is to use current's pid_ns->proc_mnt. It has to work even in case
    of nested pid ns, since the namespaces inodes numbers are uniqul
    in the system.
    
    Kirill Tkhai (2):
          ms/net: add an ioctl to get a socket network namespace
          tun: Add ioctl() SIOCGSKNS cmd to allow obtaining net ns of tun device
---
 fs/proc/namespaces.c         |  2 +-
 include/linux/proc_ns.h      |  8 ++++++++
 include/uapi/linux/sockios.h |  1 +
 net/socket.c                 | 32 ++++++++++++++++++++++++++++++++
 4 files changed, 42 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 626d943b23f8..d03078162944 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -130,7 +130,7 @@  static struct dentry *proc_ns_get_dentry(struct super_block *sb,
 	return __proc_ns_get_dentry(sb, ns, ns_ops);
 }
 
-static int open_related_ns(struct vfsmount *mnt, struct proc_ns *ns,
+int open_related_ns(struct vfsmount *mnt, struct proc_ns *ns,
 		const struct proc_ns_operations *relative_ns_ops,
 		void *(*get_ns)(void *ns,
 			const struct proc_ns_operations *ns_ops)) {
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index af898ed6fbad..53d7aca52478 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -78,4 +78,12 @@  static inline bool proc_ns_inode(struct inode *inode) { return false; }
 
 #endif /* CONFIG_PROC_FS */
 
+struct vfsmount;
+int open_related_ns(struct vfsmount *mnt, struct proc_ns *ns,
+		    const struct proc_ns_operations *relative_ns_ops,
+		    void *(*get_ns)(void *ns,
+		    const struct proc_ns_operations *ns_ops));
+
+int open_net_ns_fd(struct net *net);
+
 #endif /* _LINUX_PROC_NS_H */
diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h
index e888b1aed69f..cf0a25ebc74c 100644
--- a/include/uapi/linux/sockios.h
+++ b/include/uapi/linux/sockios.h
@@ -84,6 +84,7 @@ 
 #define SIOCWANDEV	0x894A		/* get/set netdev parameters	*/
 
 #define SIOCOUTQNSD	0x894B		/* output queue size (not sent only) */
+#define SIOCGSKNS	0x894C		/* get socket network namespace */
 
 /* ARP cache control calls. */
 		    /*  0x8950 - 0x8952  * obsolete calls, don't re-use */
diff --git a/net/socket.c b/net/socket.c
index b76a5da91122..b79c1d5efee5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -67,6 +67,7 @@ 
 #include <linux/rcupdate.h>
 #include <linux/netdevice.h>
 #include <linux/proc_fs.h>
+#include <linux/proc_ns.h>
 #include <linux/seq_file.h>
 #include <linux/mutex.h>
 #include <linux/if_bridge.h>
@@ -1026,6 +1027,29 @@  static long sock_do_ioctl(struct net *net, struct socket *sock,
  *	what to do with it - that's up to the protocol still.
  */
 
+static void *get_net_ns(void *ns, const struct proc_ns_operations *ns_ops)
+{
+	struct net *net = ns;
+	return get_net(net);
+}
+
+int open_net_ns_fd(struct net *net)
+{
+	struct proc_ns ns = { .ns = net, .ns_ops = &netns_operations, };
+	struct vfsmount *proc_mnt;
+	int ret;
+
+	proc_mnt = mntget(task_active_pid_ns(current)->proc_mnt);
+	if (IS_ERR(proc_mnt))
+		return PTR_ERR(proc_mnt);
+
+	ret = open_related_ns(proc_mnt, &ns, ns.ns_ops, get_net_ns);
+	mntput(proc_mnt);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(open_net_ns_fd);
+
 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
 	struct socket *sock;
@@ -1093,6 +1117,13 @@  static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 				err = dlci_ioctl_hook(cmd, argp);
 			mutex_unlock(&dlci_ioctl_mutex);
 			break;
+		case SIOCGSKNS:
+			err = -EPERM;
+			if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+				break;
+
+			err = open_net_ns_fd(net);
+			break;
 		default:
 			err = sock_do_ioctl(net, sock, cmd, arg);
 			break;
@@ -3246,6 +3277,7 @@  static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
 	case SIOCSIFVLAN:
 	case SIOCADDDLCI:
 	case SIOCDELDLCI:
+	case SIOCGSKNS:
 		return sock_ioctl(file, cmd, arg);
 
 	case SIOCGIFFLAGS: