[Devel,RHEL7,COMMIT] ms/pidns: expose task pid_ns_for_children to userspace

Submitted by Konstantin Khorenko on May 12, 2017, 11:46 a.m.

Details

Message ID 201705121146.v4CBkbwx022417@finist_cl7.x64_64.work.ct
State New
Series "Series without cover letter"
Headers show

Commit Message

Konstantin Khorenko May 12, 2017, 11:46 a.m.
The commit is pushed to "branch-rh7-3.10.0-514.16.1.vz7.32.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-514.16.1.vz7.32.1
------>
commit accbb8b5adaa51e92d645ae1a70598fbd1fbafdc
Author: Kirill Tkhai <ktkhai@virtuozzo.com>
Date:   Fri May 12 15:46:37 2017 +0400

    ms/pidns: expose task pid_ns_for_children to userspace
    
    ms commit: eaa0d190bfe1ed891b814a52712dcd852554cb08
    
    pid_ns_for_children set by a task is known only to the task itself, and
    it's impossible to identify it from outside.
    
    It's a big problem for checkpoint/restore software like CRIU, because it
    can't correctly handle tasks, that do setns(CLONE_NEWPID) in proccess of
    their work.
    
    This patch solves the problem, and it exposes pid_ns_for_children to ns
    directory in standard way with the name "pid_for_children":
    
    ~# ls /proc/5531/ns -l | grep pid
    lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid -> pid:[4026531836]
    lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid_for_children -> pid:[4026532286]
    
    Link: http://lkml.kernel.org/r/149201123914.6007.2187327078064239572.stgit@localhost.localdomain
    Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
    
    Cc: Andrei Vagin <avagin@virtuozzo.com>
    Cc: Andreas Gruenbacher <agruenba@redhat.com>
    Cc: Kees Cook <keescook@chromium.org>
    Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
    Cc: Al Viro <viro@zeniv.linux.org.uk>
    Cc: Oleg Nesterov <oleg@redhat.com>
    Cc: Paul Moore <paul@paul-moore.com>
    Cc: Eric Biederman <ebiederm@xmission.com>
    Cc: Andy Lutomirski <luto@amacapital.net>
    Cc: Ingo Molnar <mingo@kernel.org>
    Cc: Serge Hallyn <serge@hallyn.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
    
    https://jira.sw.ru/browse/PSBM-58669
    
    Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 fs/proc/namespaces.c    |  1 +
 include/linux/proc_ns.h |  1 +
 kernel/pid_namespace.c  | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 35 insertions(+)

Patch hide | download patch | download mbox

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index e70c2d3..e317033 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -27,6 +27,7 @@  static const struct proc_ns_operations *ns_entries[] = {
 #endif
 #ifdef CONFIG_PID_NS
 	&pidns_operations,
+	&pidns_for_children_operations,
 #endif
 #ifdef CONFIG_USER_NS
 	&userns_operations,
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index b8dc2bc..8deba57 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -27,6 +27,7 @@  extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
 extern const struct proc_ns_operations ipcns_operations;
 extern const struct proc_ns_operations pidns_operations;
+extern const struct proc_ns_operations pidns_for_children_operations;
 extern const struct proc_ns_operations userns_operations;
 extern const struct proc_ns_operations mntns_operations;
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index b7cf629..594167d 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -336,6 +336,29 @@  static void *pidns_get(struct task_struct *task)
 	return ns;
 }
 
+static void *pidns_for_children_get(struct task_struct *task)
+{
+	struct pid_namespace *ns = NULL;
+
+	task_lock(task);
+	if (task->nsproxy) {
+		ns = task->nsproxy->pid_ns;
+		get_pid_ns(ns);
+	}
+	task_unlock(task);
+
+	if (ns) {
+		read_lock(&tasklist_lock);
+		if (!ns->child_reaper) {
+			put_pid_ns(ns);
+			ns = NULL;
+		}
+		read_unlock(&tasklist_lock);
+	}
+
+	return ns;
+}
+
 static void pidns_put(void *ns)
 {
 	put_pid_ns(ns);
@@ -387,6 +410,16 @@  const struct proc_ns_operations pidns_operations = {
 	.inum		= pidns_inum,
 };
 
+const struct proc_ns_operations pidns_for_children_operations = {
+	.name		= "pid_for_children",
+	.real_ns_name	= "pid",
+	.type		= CLONE_NEWPID,
+	.get		= pidns_for_children_get,
+	.put		= pidns_put,
+	.install	= pidns_install,
+	.inum		= pidns_inum,
+};
+
 static __init int pid_namespaces_init(void)
 {
 	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);