[v4,0/3] Make core_pattern support namespace

Submitted by Cao Shufeng on Nov. 16, 2016, 3:10 a.m.

Details

Message ID 1477380536-3307-1-git-send-email-caosf.fnst@cn.fujitsu.com
State New
Headers show

Patch hide | download patch | download mbox

diff --git a/fs/coredump.c b/fs/coredump.c
index aa2ef6c..f97a987 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -49,7 +49,6 @@ 
 
 int core_uses_pid;
 unsigned int core_pipe_limit;
-char core_pattern[CORENAME_MAX_SIZE] = "core";
 static int core_name_size = CORENAME_MAX_SIZE;
 
 struct core_name {
@@ -57,8 +56,6 @@  struct core_name {
 	int used, size;
 };
 
-/* The maximal length of core_pattern is also specified in sysctl.c */
-
 static int expand_corename(struct core_name *cn, int size)
 {
 	char *corename = krealloc(cn->corename, size, GFP_KERNEL);
@@ -183,10 +180,10 @@  static int cn_print_exe_file(struct core_name *cn)
  * name into corename, which must have space for at least
  * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
  */
-static int format_corename(struct core_name *cn, struct coredump_params *cprm)
+static int format_corename(struct core_name *cn, const char *pat_ptr,
+			   struct coredump_params *cprm)
 {
 	const struct cred *cred = current_cred();
-	const char *pat_ptr = core_pattern;
 	int ispipe = (*pat_ptr == '|');
 	int pid_in_pattern = 0;
 	int err = 0;
@@ -663,6 +660,8 @@  void do_coredump(const siginfo_t *siginfo)
 		 */
 		.mm_flags = mm->flags,
 	};
+	struct pid_namespace *pid_ns;
+	char core_pattern[CORENAME_MAX_SIZE];
 
 	audit_core_dumps(siginfo->si_signo);
 
@@ -672,6 +671,18 @@  void do_coredump(const siginfo_t *siginfo)
 	if (!__get_dumpable(cprm.mm_flags))
 		goto fail;
 
+	pid_ns = task_active_pid_ns(current);
+	spin_lock(&pid_ns->core_pattern_lock);
+	while (pid_ns != &init_pid_ns) {
+		if (pid_ns->core_pattern[0])
+			break;
+		spin_unlock(&pid_ns->core_pattern_lock);
+		pid_ns = pid_ns->parent,
+		spin_lock(&pid_ns->core_pattern_lock);
+	}
+	strcpy(core_pattern, pid_ns->core_pattern);
+	spin_unlock(&pid_ns->core_pattern_lock);
+
 	cred = prepare_creds();
 	if (!cred)
 		goto fail;
@@ -693,7 +704,7 @@  void do_coredump(const siginfo_t *siginfo)
 
 	old_cred = override_creds(cred);
 
-	ispipe = format_corename(&cn, &cprm);
+	ispipe = format_corename(&cn, core_pattern, &cprm);
 
 	if (ispipe) {
 		int dump_count;
@@ -740,7 +751,7 @@  void do_coredump(const siginfo_t *siginfo)
 		}
 
 		rcu_read_lock();
-		vinit_task = find_task_by_vpid(1);
+		vinit_task = find_task_by_pid_ns(1, pid_ns);
 		rcu_read_unlock();
 		if (!vinit_task) {
 			printk(KERN_WARNING "failed getting init task info, skipping core dump\n");
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 34cce96..f0b0c21 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -9,6 +9,7 @@ 
 #include <linux/nsproxy.h>
 #include <linux/kref.h>
 #include <linux/ns_common.h>
+#include <linux/binfmts.h>
 
 struct pidmap {
        atomic_t nr_free;
@@ -46,6 +47,8 @@  struct pid_namespace {
 	int hide_pid;
 	int reboot;	/* group exit code if this pidns was rebooted */
 	struct ns_common ns;
+	spinlock_t core_pattern_lock;
+	char core_pattern[CORENAME_MAX_SIZE];
 };
 
 extern struct pid_namespace init_pid_ns;
diff --git a/kernel/pid.c b/kernel/pid.c
index f66162f..e7ee122 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -83,6 +83,8 @@  struct pid_namespace init_pid_ns = {
 #ifdef CONFIG_PID_NS
 	.ns.ops = &pidns_operations,
 #endif
+	.core_pattern_lock = __SPIN_LOCK_UNLOCKED(init_pid_ns.core_pattern_lock),
+	.core_pattern = "core",
 };
 EXPORT_SYMBOL_GPL(init_pid_ns);
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index df9e8e9..c70ca5d 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -137,6 +137,8 @@  static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	for (i = 1; i < PIDMAP_ENTRIES; i++)
 		atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
 
+	spin_lock_init(&ns->core_pattern_lock);
+
 	return ns;
 
 out_free_map:
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 706309f..c73ced5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -484,7 +484,7 @@  static struct ctl_table kern_table[] = {
 	},
 	{
 		.procname	= "core_pattern",
-		.data		= core_pattern,
+		.data		= NULL,
 		.maxlen		= CORENAME_MAX_SIZE,
 		.mode		= 0644,
 		.proc_handler	= proc_dostring_coredump,
@@ -2401,12 +2401,20 @@  int proc_dointvec_minmax(struct ctl_table *table, int write,
 static void validate_coredump_safety(void)
 {
 #ifdef CONFIG_COREDUMP
+	struct pid_namespace *pid_ns = task_active_pid_ns(current);
+	const char *core_pattern;
+
+	spin_lock(&pid_ns->core_pattern_lock);
+	core_pattern = pid_ns->core_pattern;
+
 	if (suid_dumpable == SUID_DUMP_ROOT &&
 	    core_pattern[0] != '/' && core_pattern[0] != '|') {
 		printk(KERN_WARNING "Unsafe core_pattern used with "\
 			"suid_dumpable=2. Pipe handler or fully qualified "\
 			"core dump path required.\n");
 	}
+
+	spin_unlock(&pid_ns->core_pattern_lock);
 #endif
 }
 
@@ -2423,10 +2431,42 @@  static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
 static int proc_dostring_coredump(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	int error = proc_dostring(table, write, buffer, lenp, ppos);
-	if (!error)
-		validate_coredump_safety();
-	return error;
+	int ret;
+	char core_pattern[CORENAME_MAX_SIZE];
+	struct pid_namespace *pid_ns = task_active_pid_ns(current);
+
+	if (write) {
+		if (*ppos && sysctl_writes_strict == SYSCTL_WRITES_WARN)
+			warn_sysctl_write(table);
+
+		ret = _proc_do_string(core_pattern, table->maxlen, write,
+				      (char __user *)buffer, lenp, ppos);
+		if (ret)
+			return ret;
+
+		spin_lock(&pid_ns->core_pattern_lock);
+		strcpy(pid_ns->core_pattern, core_pattern);
+		spin_unlock(&pid_ns->core_pattern_lock);
+	} else {
+		spin_lock(&pid_ns->core_pattern_lock);
+		while (pid_ns != &init_pid_ns) {
+			if (pid_ns->core_pattern[0])
+				break;
+			spin_unlock(&pid_ns->core_pattern_lock);
+			pid_ns = pid_ns->parent,
+			spin_lock(&pid_ns->core_pattern_lock);
+		}
+		strcpy(core_pattern, pid_ns->core_pattern);
+		spin_unlock(&pid_ns->core_pattern_lock);
+
+		ret = _proc_do_string(core_pattern, table->maxlen, write,
+				      (char __user *)buffer, lenp, ppos);
+		if (ret)
+			return ret;
+	}
+
+	validate_coredump_safety();
+	return 0;
 }
 #endif
 
-- 
2.7.4


From caosf.fnst@cn.fujitsu.com Tue Oct 25 15:28:50 2016
Received: from localhost.localdomain (10.167.226.94) by
 G08CNEXCHPEKD01.g08.fujitsu.local (10.167.33.89) with Microsoft SMTP Server
 (TLS) id 14.3.279.2; Tue, 25 Oct 2016 15:28:50 +0800
From: Cao Shufeng <caosf.fnst@cn.fujitsu.com>
To: <linux-kernel@vger.kernel.org>
CC: <containers@lists.linux-foundation.org>, <ebiederm@xmission.com>,
 <mguzik@redhat.com>, <kamezawa.hiroyu@jp.fujitsu.com>,
 <stgraber@ubuntu.com>, <avagin@gmail.com>, <zhaolei@cn.fujitsu.com>,
 <mashimiao.fnst@cn.fujitsu.com>, <caosf.fnst@cn.fujitsu.com>
Subject: [PATCH v4 2/3] Limit dump_pipe program's permission to init for
 container
Date: Tue, 25 Oct 2016 15:28:55 +0800
Message-ID: <1477380536-3307-3-git-send-email-caosf.fnst@cn.fujitsu.com>
X-Mailer: git-send-email 2.1.0
In-Reply-To: <1477380536-3307-1-git-send-email-caosf.fnst@cn.fujitsu.com>
References: <1477380536-3307-1-git-send-email-caosf.fnst@cn.fujitsu.com>
Content-Type: text/plain
Return-Path: caosf.fnst@cn.fujitsu.com
X-MS-Exchange-Organization-AuthSource: G08CNEXCHPEKD01.g08.fujitsu.local
X-MS-Exchange-Organization-AuthAs: Internal
X-MS-Exchange-Organization-AuthMechanism: 06
X-Originating-IP: [10.167.226.94]
X-MS-Exchange-Organization-AVStamp-Mailbox: SMEXw]nP;1285660;0;This mail
 has been scanned by Trend Micro ScanMail for Microsoft Exchange;
X-MS-Exchange-Organization-SCL: 0
MIME-Version: 1.0
X-Evolution-POP3-UID: 24015
X-Evolution-Source: 1406508640.5943.5@localhost.localdomain
Content-Transfer-Encoding: 8bit

Currently when we set core_pattern to a pipe, the pipe program is
forked by kthread running with root's permission, and write dumpfile
into host's filesystem.
Same thing happened for container, the dumper and dumpfile are also
in host(not in container).

It have following program:
1: Not consistent with file_type core_pattern
   When we set core_pattern to a file, the container will write dump
   into container's filesystem instead of host.
2: Not safe for privileged container
   In a privileged container, user can destroy host system by following
   command:
   # # In a container
   # echo "|/bin/dd of=/boot/vmlinuz" >/proc/sys/kernel/core_pattern
   # make_dump

This patch switch dumper program's environment to init task, so, for
container, dumper program have same environment with init task in
container, which make dumper program put in container's filesystem, and
write coredump into container's filesystem.
The dumper's permission is also limited into subset of container's init
process.

Suggested-by: Eric W. Biederman <ebiederm@xmission.com>
Suggested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>

Signed-off-by: Cao ShuFeng<caosf.fnst@cn.fujitsu.com>
---
 fs/coredump.c           | 126 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/binfmts.h |   2 +
 2 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index 52f2ed6..aa2ef6c 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -502,6 +502,45 @@  static void wait_for_dump_helpers(struct file *file)
 }
 
 /*
+ * umh_ns_setup
+ * set the namesapces to the bask task of a container.
+ * we need to switch back to the original namespaces
+ * so that the thread of workqueue is not influlenced.
+ *
+ * this method runs in workqueue kernel thread.
+ */
+static void umh_ns_setup(struct subprocess_info *info)
+{
+	struct coredump_params *cp = (struct coredump_params *)info->data;
+	struct task_struct *base_task = cp->base_task;
+
+	if (base_task) {
+		cp->current_task_nsproxy = current->nsproxy;
+		//prevent current namespace from being freed
+		get_nsproxy(current->nsproxy);
+		/* Set namespaces to base_task */
+		get_nsproxy(base_task->nsproxy);
+		switch_task_namespaces(current, base_task->nsproxy);
+	}
+}
+
+/*
+ * umh_ns_cleanup
+ * cleanup what we have done in umh_ns_setup.
+ *
+ * this method runs in workqueue kernel thread.
+ */
+static void umh_ns_cleanup(struct subprocess_info *info)
+{
+	struct coredump_params *cp = (struct coredump_params *)info->data;
+	struct nsproxy *current_task_nsproxy = cp->current_task_nsproxy;
+	if (current_task_nsproxy) {
+		/* switch workqueue's original namespace back */
+		switch_task_namespaces(current, current_task_nsproxy);
+	}
+}
+
+/*
  * umh_pipe_setup
  * helper function to customize the process used
  * to collect the core in userspace.  Specifically
@@ -516,6 +555,8 @@  static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
 {
 	struct file *files[2];
 	struct coredump_params *cp = (struct coredump_params *)info->data;
+	struct task_struct *base_task;
+
 	int err = create_pipe_files(files, 0);
 	if (err)
 		return err;
@@ -524,10 +565,76 @@  static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
 
 	err = replace_fd(0, files[0], 0);
 	fput(files[0]);
+	if (err)
+		return err;
+
 	/* and disallow core files too */
 	current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
 
-	return err;
+	base_task = cp->base_task;
+	if (base_task) {
+		const struct cred *base_cred;
+
+		/* Set fs_root to base_task */
+		spin_lock(&base_task->fs->lock);
+		set_fs_root(current->fs, &base_task->fs->root);
+		set_fs_pwd(current->fs, &base_task->fs->pwd);
+		spin_unlock(&base_task->fs->lock);
+
+		/* Set cgroup to base_task */
+		current->flags &= ~PF_NO_SETAFFINITY;
+		err = cgroup_attach_task_all(base_task, current);
+		if (err < 0)
+			return err;
+
+		/* Set cred to base_task */
+		base_cred = get_task_cred(base_task);
+
+		new->uid   = base_cred->uid;
+		new->gid   = base_cred->gid;
+		new->suid  = base_cred->suid;
+		new->sgid  = base_cred->sgid;
+		new->euid  = base_cred->euid;
+		new->egid  = base_cred->egid;
+		new->fsuid = base_cred->fsuid;
+		new->fsgid = base_cred->fsgid;
+
+		new->securebits = base_cred->securebits;
+
+		new->cap_inheritable = base_cred->cap_inheritable;
+		new->cap_permitted   = base_cred->cap_permitted;
+		new->cap_effective   = base_cred->cap_effective;
+		new->cap_bset        = base_cred->cap_bset;
+		new->cap_ambient     = base_cred->cap_ambient;
+
+		security_cred_free(new);
+#ifdef CONFIG_SECURITY
+		new->security = NULL;
+#endif
+		err = security_prepare_creds(new, base_cred, GFP_KERNEL);
+		if (err < 0) {
+			put_cred(base_cred);
+			return err;
+		}
+
+		free_uid(new->user);
+		new->user = base_cred->user;
+		get_uid(new->user);
+
+		put_user_ns(new->user_ns);
+		new->user_ns = base_cred->user_ns;
+		get_user_ns(new->user_ns);
+
+		put_group_info(new->group_info);
+		new->group_info = base_cred->group_info;
+		get_group_info(new->group_info);
+
+		put_cred(base_cred);
+
+		validate_creds(new);
+	}
+
+	return 0;
 }
 
 void do_coredump(const siginfo_t *siginfo)
@@ -590,6 +697,7 @@  void do_coredump(const siginfo_t *siginfo)
 
 	if (ispipe) {
 		int dump_count;
+                struct task_struct *vinit_task;
 		char **helper_argv;
 		struct subprocess_info *sub_info;
 
@@ -631,6 +739,15 @@  void do_coredump(const siginfo_t *siginfo)
 			goto fail_dropcount;
 		}
 
+		rcu_read_lock();
+		vinit_task = find_task_by_vpid(1);
+		rcu_read_unlock();
+		if (!vinit_task) {
+			printk(KERN_WARNING "failed getting init task info, skipping core dump\n");
+			goto fail_dropcount;
+		}
+
+
 		helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL);
 		if (!helper_argv) {
 			printk(KERN_WARNING "%s failed to allocate memory\n",
@@ -638,15 +755,20 @@  void do_coredump(const siginfo_t *siginfo)
 			goto fail_dropcount;
 		}
 
+		get_task_struct(vinit_task);
+
+		cprm.base_task = vinit_task;
+
 		retval = -ENOMEM;
 		sub_info = call_usermodehelper_setup(helper_argv[0],
 						helper_argv, NULL, GFP_KERNEL,
-						NULL, NULL, umh_pipe_setup,
+						umh_ns_setup, umh_ns_cleanup, umh_pipe_setup,
 						NULL, &cprm);
 		if (sub_info)
 			retval = call_usermodehelper_exec(sub_info,
 							  UMH_WAIT_EXEC);
 
+		put_task_struct(vinit_task);
 		argv_free(helper_argv);
 		if (retval) {
 			printk(KERN_INFO "Core dump to |%s pipe failed\n",
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 1303b57..7ba4271 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -59,6 +59,8 @@  struct linux_binprm {
 
 /* Function parameter for binfmt->coredump */
 struct coredump_params {
+        struct task_struct *base_task;
+        struct nsproxy *current_task_nsproxy;
 	const siginfo_t *siginfo;
 	struct pt_regs *regs;
 	struct file *file;
-- 
2.7.4


From caosf.fnst@cn.fujitsu.com Tue Oct 25 15:28:46 2016
Received: from localhost.localdomain (10.167.226.94) by
 G08CNEXCHPEKD01.g08.fujitsu.local (10.167.33.89) with Microsoft SMTP Server
 (TLS) id 14.3.279.2; Tue, 25 Oct 2016 15:28:46 +0800
From: Cao Shufeng <caosf.fnst@cn.fujitsu.com>
To: <linux-kernel@vger.kernel.org>
CC: <containers@lists.linux-foundation.org>, <ebiederm@xmission.com>,
 <mguzik@redhat.com>, <kamezawa.hiroyu@jp.fujitsu.com>,
 <stgraber@ubuntu.com>, <avagin@gmail.com>, <zhaolei@cn.fujitsu.com>,
 <mashimiao.fnst@cn.fujitsu.com>, <caosf.fnst@cn.fujitsu.com>
Subject: [PATCH v4 1/3] Make call_usermodehelper_exec possible to set
 namespaces
Date: Tue, 25 Oct 2016 15:28:54 +0800
Message-ID: <1477380536-3307-2-git-send-email-caosf.fnst@cn.fujitsu.com>
X-Mailer: git-send-email 2.1.0
In-Reply-To: <1477380536-3307-1-git-send-email-caosf.fnst@cn.fujitsu.com>
References: <1477380536-3307-1-git-send-email-caosf.fnst@cn.fujitsu.com>
Content-Type: text/plain
Return-Path: caosf.fnst@cn.fujitsu.com
X-MS-Exchange-Organization-AuthSource: G08CNEXCHPEKD01.g08.fujitsu.local
X-MS-Exchange-Organization-AuthAs: Internal
X-MS-Exchange-Organization-AuthMechanism: 06
X-Originating-IP: [10.167.226.94]
X-MS-Exchange-Organization-AVStamp-Mailbox: SMEXw]nP;1285660;0;This mail
 has been scanned by Trend Micro ScanMail for Microsoft Exchange;
X-MS-Exchange-Organization-SCL: 0
MIME-Version: 1.0
X-Evolution-POP3-UID: 24014
X-Evolution-Source: 1406508640.5943.5@localhost.localdomain
Content-Transfer-Encoding: 8bit

Current call_usermodehelper_work() can not set namespaces for
the executed program.

This patch add above function for call_usermodehelper_work().
The init_intermediate is introduced for init works which should
be done before fork(). So that we get a method to set namespaces
for children. The cleanup_intermediate is introduced for cleaning
up what we have done in init_intermediate, like switching back
the namespace.

This function is helpful for coredump to run pipe_program in
specific container environment.

Signed-off-by: Cao Shufeng <caosf.fnst@cn.fujitsu.com>
Co-author-by: Zhao Lei <zhaolei@cn.fujitsu.com>
---
 fs/coredump.c               |  3 ++-
 include/linux/kmod.h        |  4 ++++
 init/do_mounts_initrd.c     |  3 ++-
 kernel/kmod.c               | 43 +++++++++++++++++++++++++++++++++++--------
 lib/kobject_uevent.c        |  3 ++-
 security/keys/request_key.c |  4 ++--
 6 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index 281b768..52f2ed6 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -641,7 +641,8 @@  void do_coredump(const siginfo_t *siginfo)
 		retval = -ENOMEM;
 		sub_info = call_usermodehelper_setup(helper_argv[0],
 						helper_argv, NULL, GFP_KERNEL,
-						umh_pipe_setup, NULL, &cprm);
+						NULL, NULL, umh_pipe_setup,
+						NULL, &cprm);
 		if (sub_info)
 			retval = call_usermodehelper_exec(sub_info,
 							  UMH_WAIT_EXEC);
diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index fcfd2bf..994e429 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -61,6 +61,8 @@  struct subprocess_info {
 	char **envp;
 	int wait;
 	int retval;
+	void (*init_intermediate)(struct subprocess_info *info);
+	void (*cleanup_intermediate)(struct subprocess_info *info);
 	int (*init)(struct subprocess_info *info, struct cred *new);
 	void (*cleanup)(struct subprocess_info *info);
 	void *data;
@@ -71,6 +73,8 @@  call_usermodehelper(char *path, char **argv, char **envp, int wait);
 
 extern struct subprocess_info *
 call_usermodehelper_setup(char *path, char **argv, char **envp, gfp_t gfp_mask,
+			  void (*init_intermediate)(struct subprocess_info *info),
+			  void (*cleanup_intermediate)(struct subprocess_info *info),
 			  int (*init)(struct subprocess_info *info, struct cred *new),
 			  void (*cleanup)(struct subprocess_info *), void *data);
 
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index a1000ca..59d11c9 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -72,7 +72,8 @@  static void __init handle_initrd(void)
 	current->flags |= PF_FREEZER_SKIP;
 
 	info = call_usermodehelper_setup("/linuxrc", argv, envp_init,
-					 GFP_KERNEL, init_linuxrc, NULL, NULL);
+					 GFP_KERNEL, NULL, NULL, init_linuxrc,
+					 NULL, NULL);
 	if (!info)
 		return;
 	call_usermodehelper_exec(info, UMH_WAIT_PROC);
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 0277d12..42f5a74 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -91,7 +91,8 @@  static int call_modprobe(char *module_name, int wait)
 	argv[4] = NULL;
 
 	info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL,
-					 NULL, free_modprobe_argv, NULL);
+					 NULL, NULL, NULL, free_modprobe_argv,
+                                         NULL);
 	if (!info)
 		goto free_module_name;
 
@@ -301,6 +302,9 @@  static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info)
 	/* Restore default kernel sig handler */
 	kernel_sigaction(SIGCHLD, SIG_IGN);
 
+	if(sub_info->cleanup_intermediate) {
+		sub_info->cleanup_intermediate(sub_info);
+	}
 	umh_complete(sub_info);
 }
 
@@ -322,6 +326,9 @@  static void call_usermodehelper_exec_work(struct work_struct *work)
 {
 	struct subprocess_info *sub_info =
 		container_of(work, struct subprocess_info, work);
+	if(sub_info->init_intermediate) {
+		sub_info->init_intermediate(sub_info);
+	}
 
 	if (sub_info->wait & UMH_WAIT_PROC) {
 		call_usermodehelper_exec_sync(sub_info);
@@ -334,6 +341,11 @@  static void call_usermodehelper_exec_work(struct work_struct *work)
 		 */
 		pid = kernel_thread(call_usermodehelper_exec_async, sub_info,
 				    CLONE_PARENT | SIGCHLD);
+
+		if(sub_info->cleanup_intermediate) {
+			sub_info->cleanup_intermediate(sub_info);
+		}
+
 		if (pid < 0) {
 			sub_info->retval = pid;
 			umh_complete(sub_info);
@@ -499,25 +511,38 @@  static void helper_unlock(void)
  * @argv: arg vector for process
  * @envp: environment for process
  * @gfp_mask: gfp mask for memory allocation
- * @cleanup: a cleanup function
+ * @init_intermediate: init function which is called in parent task
+ * @cleanup_intermediate: clean function which is called in parent task
  * @init: an init function
+ * @cleanup: a cleanup function
  * @data: arbitrary context sensitive data
  *
  * Returns either %NULL on allocation failure, or a subprocess_info
  * structure.  This should be passed to call_usermodehelper_exec to
  * exec the process and free the structure.
  *
- * The init function is used to customize the helper process prior to
- * exec.  A non-zero return code causes the process to error out, exit,
- * and return the failure to the calling process
+ * The init_intermediate is called in the parent task of user mode
+ * helper. It's designed for init works which must be done in
+ * parent task, like switching the pid_ns_for_children.
+ *
+ * The cleanup_intermediate is used when we want to cleanup what
+ * we have done in init_intermediate, it is also called in parent
+ * task.
+ *
+ * The init function is called after fork. It is used to customize the
+ * helper process prior to exec.  A non-zero return code causes the
+ * process to error out, exit, and return the failure to the
+ * calling process.
  *
- * The cleanup function is just before ethe subprocess_info is about to
+ * The cleanup function is just before the subprocess_info is about to
  * be freed.  This can be used for freeing the argv and envp.  The
  * Function must be runnable in either a process context or the
  * context in which call_usermodehelper_exec is called.
  */
 struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
 		char **envp, gfp_t gfp_mask,
+		void (*init_intermediate)(struct subprocess_info *info),
+		void (*cleanup_intermediate)(struct subprocess_info *info),
 		int (*init)(struct subprocess_info *info, struct cred *new),
 		void (*cleanup)(struct subprocess_info *info),
 		void *data)
@@ -532,8 +557,10 @@  struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
 	sub_info->argv = argv;
 	sub_info->envp = envp;
 
-	sub_info->cleanup = cleanup;
+	sub_info->init_intermediate = init_intermediate;
+	sub_info->cleanup_intermediate = cleanup_intermediate;
 	sub_info->init = init;
+	sub_info->cleanup = cleanup;
 	sub_info->data = data;
   out:
 	return sub_info;
@@ -619,7 +646,7 @@  int call_usermodehelper(char *path, char **argv, char **envp, int wait)
 	gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
 
 	info = call_usermodehelper_setup(path, argv, envp, gfp_mask,
-					 NULL, NULL, NULL);
+					 NULL, NULL, NULL, NULL, NULL);
 	if (info == NULL)
 		return -ENOMEM;
 
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index f6c2c1e..7a7c57a 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -345,7 +345,8 @@  int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 		retval = -ENOMEM;
 		info = call_usermodehelper_setup(env->argv[0], env->argv,
 						 env->envp, GFP_KERNEL,
-						 NULL, cleanup_uevent_env, env);
+						 NULL, NULL, NULL,
+						 cleanup_uevent_env, env);
 		if (info) {
 			retval = call_usermodehelper_exec(info, UMH_NO_WAIT);
 			env = NULL;	/* freed by cleanup_uevent_env */
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 43affcf..51dfb38 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -78,8 +78,8 @@  static int call_usermodehelper_keys(char *path, char **argv, char **envp,
 	struct subprocess_info *info;
 
 	info = call_usermodehelper_setup(path, argv, envp, GFP_KERNEL,
-					  umh_keys_init, umh_keys_cleanup,
-					  session_keyring);
+					 NULL, NULL, umh_keys_init,
+					 umh_keys_cleanup, session_keyring);
 	if (!info)
 		return -ENOMEM;