[RH7,v2,2/2] ve/kthread: fix race when work can be added to stopped kthread worker

Submitted by Pavel Tikhomirov on March 21, 2018, 12:38 p.m.

Details

Message ID 20180321123813.5071-2-ptikhomirov@virtuozzo.com
State New
Series "Series without cover letter"
Headers show

Commit Message

Pavel Tikhomirov March 21, 2018, 12:38 p.m.
Race can be reproduced with steps below:

1) Add these test patch to increase the race probability:

kernel/kmod.c
@@ -977,6 +977,8 @@ int call_usermodehelper_fns_ve(struct ve_struct *ve,
        khelper = ve_is_super(ve) ? &khelper_worker : &ve->ve_umh_worker;

        if (ve_is_super(ve) || (get_exec_env() == ve)) {
+               while(ve->is_running)
+                       cond_resched();
                err = call_usermodehelper_by(khelper, path, argv, envp, wait, init,
                                             cleanup, data);
                goto out_put;

2) Set corepattern to type pipe in CT:

echo "|/bin/dd of=/vz/coredumps/core-%e-%t.%p" > /proc/sys/kernel/core_pattern

3) Produce a segfault inside a container and next try to stop the
container killing init.

Coredump creates "dd" work and ads it to ve_umh_worker, which is already
stopped and will never handle these work, and our work will hang
forever, and container will never stop:

[root@kuchy ~]# cat /proc/6064/stack
[<ffffffff810a86f9>] __call_usermodehelper_exec+0x179/0x1a0
[<ffffffff810a8eae>] call_usermodehelper_by+0x5e/0xa0
[<ffffffff810a901c>] call_usermodehelper_fns_ve+0xec/0x170
[<ffffffff8128600c>] do_coredump+0x4dc/0xaf0
[<ffffffff810a1cc5>] get_signal_to_deliver+0x1c5/0x5e0
[<ffffffff8102a387>] do_signal+0x57/0x6b0
[<ffffffff8102aa3f>] do_notify_resume+0x5f/0xb0
[<ffffffff816df6ec>] retint_signal+0x48/0x8c
[<ffffffffffffffff>] 0xffffffffffffffff

Fix these by putting is_running check and usermodhelper queueing in one
op_sem lock critical section. It works as we already have these lock
taken in ve_stop_ns.

commit a08505af8c51 ("ve/kthread: khelper kthread in a container
introduced") says that we can't hold lock when waiting for userspace so
drop the lock when waiting.

v2: split function arguments change into separate preparatory patch

https://jira.sw.ru/browse/PSBM-82490
Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
---
 kernel/kmod.c | 50 +++++++++++++++++++++++++++++++++-----------------
 1 file changed, 33 insertions(+), 17 deletions(-)

Patch hide | download patch | download mbox

diff --git a/kernel/kmod.c b/kernel/kmod.c
index 9c37e8b3ab01..2b45f06cbe51 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -862,6 +862,19 @@  struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
 }
 EXPORT_SYMBOL(call_usermodehelper_setup);
 
+static void ve_op_sem_down_read(struct ve_struct *ve)
+{
+	if (ve)
+		down_read(&ve->op_sem);
+}
+
+static void ve_op_sem_up_read(struct ve_struct *ve)
+{
+	if (ve)
+		up_read(&ve->op_sem);
+}
+
+
 /**
  * call_usermodehelper_exec - start a usermode application
  * @sub_info: information about the subprocessa
@@ -914,7 +927,9 @@  static int __call_usermodehelper_exec(struct subprocess_info *sub_info,
 		goto unlock;
 
 	if (wait & UMH_KILLABLE) {
+		ve_op_sem_up_read(ve);
 		retval = wait_for_completion_killable(&done);
+		ve_op_sem_down_read(ve);
 		if (!retval)
 			goto wait_done;
 
@@ -924,7 +939,9 @@  static int __call_usermodehelper_exec(struct subprocess_info *sub_info,
 		/* fallthrough, umh_complete() was already called */
 	}
 
+	ve_op_sem_up_read(ve);
 	wait_for_completion(&done);
+	ve_op_sem_down_read(ve);
 wait_done:
 	retval = sub_info->retval;
 out:
@@ -975,29 +992,28 @@  int call_usermodehelper_fns_ve(struct ve_struct *ve,
 	if (!ve)
 		return -EFAULT;
 
-	if (ve_is_super(ve) || (get_exec_env() == ve)) {
-		err = call_usermodehelper_by(path, argv, envp, wait, init, cleanup,
-					     data, ve_is_super(ve) ? NULL : ve);
-		goto out_put;
-	}
-
-	if (wait > UMH_WAIT_EXEC) {
-		printk(KERN_ERR "VE#%s: Sleeping call for containers UMH is "
-				"not supported\n", ve->ve_name);
-		err = -EINVAL;
+	if (ve_is_super(ve)) {
+		err = call_usermodehelper_by(path, argv, envp,
+				wait, init, cleanup, data, NULL);
 		goto out_put;
 	}
 
+	/*
+	 * These lock is needed to remove race with ve_stop_ns for containers,
+	 * we should not be able to add work to stopped khelper, what can lead
+	 * to infinite wait for work's completion.
+	 *
+	 * Waiting for user-space with semaphore taken is wrong, so
+	 * __call_usermodehelper_exec will release it as soon as the work is
+	 * queued and wait will be done unlocked.
+	 */
 	down_read(&ve->op_sem);
 	err = -EPIPE;
-	if (!ve->is_running)
-		goto out;
-
-	err = call_usermodehelper_by(path, argv, envp, wait, init,
-				     cleanup, data, ve);
-
-out:
+	if (ve->is_running)
+		err = call_usermodehelper_by(path, argv, envp,
+				wait, init, cleanup, data, ve);
 	up_read(&ve->op_sem);
+
 out_put:
 	put_ve(ve);
 	return err;