[RHEL7,COMMIT] sched/rt: Stop for_each_process_thread() iterations in tg_has_rt_tasks()

Submitted by Konstantin Khorenko on April 27, 2018, 10:44 a.m.

Details

Message ID 201804271044.w3RAibW4000913@finist_ce7.work
State New
Series "sched/rt: Stop for_each_process_thread() iterations in tg_has_rt_tasks()"
Headers show

Commit Message

Konstantin Khorenko April 27, 2018, 10:44 a.m.
The commit is pushed to "branch-rh7-3.10.0-693.21.1.vz7.47.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-693.21.1.vz7.47.2
------>
commit 686f2db03b54b781ac72d93f74a49180d01cce86
Author: Kirill Tkhai <ktkhai@virtuozzo.com>
Date:   Fri Apr 27 13:44:36 2018 +0300

    sched/rt: Stop for_each_process_thread() iterations in tg_has_rt_tasks()
    
    tg_rt_schedulable() iterates over all child task groups,
    while tg_has_rt_tasks() iterates over all linked tasks.
    In case of systems with big number of tasks, this may
    take a lot of time.
    
    I observed hard LOCKUP on machine with 20000+ processes
    after write to "cpu.rt_period_us" of cpu cgroup with
    39 children. The problem occurred because of tasklist_lock
    is held for a long time and other processes can't do fork().
    
    PID: 1036268  TASK: ffff88766c310000  CPU: 36  COMMAND: "criu"
     #0 [ffff887f7f408e48] crash_nmi_callback at ffffffff81050601
     #1 [ffff887f7f408e58] nmi_handle at ffffffff816e0cc7
     #2 [ffff887f7f408eb0] do_nmi at ffffffff816e0fb0
     #3 [ffff887f7f408ef0] end_repeat_nmi at ffffffff816e00b9
        [exception RIP: tg_rt_schedulable+463]
        RIP: ffffffff810bf49f  RSP: ffff886537ad7d50  RFLAGS: 00000202
        RAX: 0000000000000000  RBX: 000000003b9aca00  RCX: ffff883e9cb4b1b0
        RDX: ffff887d0be43608  RSI: ffff886537ad7dd8  RDI: ffff8840a6ad0000
        RBP: ffff886537ad7d68   R8: ffff887d0be431b0   R9: 00000000000e7ef0
        R10: ffff88164fc39400  R11: 0000000000023380  R12: ffffffff81ef8d00
        R13: ffffffff810bea40  R14: 0000000000000000  R15: ffff8840a6ad0000
        ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
---
 kernel/sched/core.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

Patch hide | download patch | download mbox

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f14bbf5c8ed6..5987f85379a9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8955,17 +8955,27 @@  void sched_move_task(struct task_struct *tsk)
  */
 static DEFINE_MUTEX(rt_constraints_mutex);
 
-/* Must be called with tasklist_lock held */
 static inline int tg_has_rt_tasks(struct task_group *tg)
 {
-	struct task_struct *g, *p;
+	struct cgroup *cgroup;
+	struct task_struct *p;
+	struct cgroup_iter it;
+	int ret = 0;
 
-	do_each_thread(g, p) {
-		if (rt_task(p) && task_rq(p)->rt.tg == tg)
-			return 1;
-	} while_each_thread(g, p);
+	if (task_group_is_autogroup(tg))
+                return 0;
 
-	return 0;
+	cgroup = tg->css.cgroup;
+
+	cgroup_iter_start(cgroup, &it);
+	while ((p = cgroup_iter_next(cgroup, &it)))
+		if (rt_task(p)) {
+			ret = 1;
+			break;
+		}
+	cgroup_iter_end(cgroup, &it);
+
+	return ret;
 }
 
 struct rt_schedulable_data {
@@ -9053,7 +9063,6 @@  static int tg_set_rt_bandwidth(struct task_group *tg,
 	int i, err = 0;
 
 	mutex_lock(&rt_constraints_mutex);
-	qread_lock(&tasklist_lock);
 	err = __rt_schedulable(tg, rt_period, rt_runtime);
 	if (err)
 		goto unlock;
@@ -9071,7 +9080,6 @@  static int tg_set_rt_bandwidth(struct task_group *tg,
 	}
 	raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
 unlock:
-	qread_unlock(&tasklist_lock);
 	mutex_unlock(&rt_constraints_mutex);
 
 	return err;
@@ -9130,9 +9138,7 @@  static int sched_rt_global_constraints(void)
 	int ret = 0;
 
 	mutex_lock(&rt_constraints_mutex);
-	qread_lock(&tasklist_lock);
 	ret = __rt_schedulable(NULL, 0, 0);
-	qread_unlock(&tasklist_lock);
 	mutex_unlock(&rt_constraints_mutex);
 
 	return ret;