[RHEL7,COMMIT] sched: Call calc_load_ve() out of jiffies_lock

Submitted by Konstantin Khorenko on July 19, 2018, 12:56 p.m.

Details

Message ID 201807191256.w6JCuvr0015893@finist_ce7.work
State New
Series "Make calc_load_ve() be executed out of jiffies_lock"
Headers show

Commit Message

Konstantin Khorenko July 19, 2018, 12:56 p.m.
The commit is pushed to "branch-rh7-3.10.0-862.6.3.vz7.62.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-862.6.3.vz7.62.4
------>
commit e8d11b283ec87d45ea0101430ab226a6f7c34e19
Author: Kirill Tkhai <ktkhai@virtuozzo.com>
Date:   Thu Jul 19 15:56:57 2018 +0300

    sched: Call calc_load_ve() out of jiffies_lock
    
    jiffies_lock is a big global seqlock, which is used in many
    places. In combination with another actions like smp call
    functions and readers of this seqlock, system may hang for
    a long time. There is already a pair of hard lockups because
    of long iteration in calc_load_ve() with jiffies_lock held,
    which made readers of this seqlock to spin long time.
    
    This patch makes calc_load_ve() to use separate lock,
    and this relaxes jiffies_lock. I think, this should be enough
    to resolve the problem, since both the crashes I saw contains
    readers of the seqlock on parallel cpus, and we won't have
    to relax further (say, moving calc_load_ve() to softirq).
    
    Note, that the principal change of this patch makes is
    jiffies_lock readers on parallel cpus won't wait till calc_load_ve()
    finishes, so instead of (n_readers + 1) cpus waiting till
    this function completes, there will be only 1 cpu doing that.
    
    https://jira.sw.ru/browse/PSBM-84967
    
    Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
    
    =========================
    Patchset description:
    Make calc_load_ve() be executed out of jiffies_lock
    
    https://jira.sw.ru/browse/PSBM-84967
    
    Kirill Tkhai (3):
          sched: Make calc_global_load() return true when it's need to update ve statistic
          sched: Export calc_load_ve()
          sched: Call calc_load_ve() out of jiffies_lock
---
 kernel/sched/core.c       | 13 ++++++++-----
 kernel/time/tick-common.c |  8 +++++++-
 kernel/time/tick-sched.c  |  5 ++++-
 kernel/time/timekeeping.c |  5 ++++-
 4 files changed, 23 insertions(+), 8 deletions(-)

Patch hide | download patch | download mbox

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 693823a1bd36..26f4959f9ab4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2885,10 +2885,16 @@  static LIST_HEAD(ve_root_list);
 
 void calc_load_ve(void)
 {
+	static DEFINE_SPINLOCK(load_ve_lock);
 	unsigned long nr_unint, nr_active;
 	struct task_group *tg;
 	int i;
 
+	/*
+	 * This is called without jiffies_lock, and here we protect
+	 * against very rare parallel execution on two or more cpus.
+	 */
+	spin_lock(&load_ve_lock);
 	rcu_read_lock();
 	list_for_each_entry_rcu(tg, &ve_root_list, ve_root_list) {
 		nr_active = 0;
@@ -2913,16 +2919,13 @@  void calc_load_ve(void)
 	rcu_read_unlock();
 
 	nr_unint = nr_uninterruptible() * FIXED_1;
-	/*
-	 * This is called from do_timer() only, which can't be excuted
-	 * in parallel on two or more cpus. So, we have to protect
-	 * the below modifications from readers only.
-	 */
+
 	write_seqcount_begin(&kstat_glob.nr_unint_avg_seq);
 	CALC_LOAD(kstat_glob.nr_unint_avg[0], EXP_1, nr_unint);
 	CALC_LOAD(kstat_glob.nr_unint_avg[1], EXP_5, nr_unint);
 	CALC_LOAD(kstat_glob.nr_unint_avg[2], EXP_15, nr_unint);
 	write_seqcount_end(&kstat_glob.nr_unint_avg_seq);
+	spin_unlock(&load_ve_lock);
 }
 #endif /* CONFIG_VE */
 
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index ed88d128c5ce..35462b2d236d 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -63,13 +63,19 @@  int tick_is_oneshot_available(void)
 static void tick_periodic(int cpu)
 {
 	if (tick_do_timer_cpu == cpu) {
+		bool calc_ve;
+
 		write_seqlock(&jiffies_lock);
 
 		/* Keep track of the next tick event */
 		tick_next_period = ktime_add(tick_next_period, tick_period);
 
-		do_timer(1);
+		calc_ve = do_timer(1);
 		write_sequnlock(&jiffies_lock);
+
+		if (calc_ve)
+			calc_load_ve();
+
 		update_wall_time();
 	}
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 85c7fe06eace..baba7c990290 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -52,6 +52,7 @@  struct tick_sched *tick_get_tick_sched(int cpu)
 static void tick_do_update_jiffies64(ktime_t now)
 {
 	unsigned long ticks = 0;
+	bool calc_ve = false;
 	ktime_t delta;
 
 	/*
@@ -80,7 +81,7 @@  static void tick_do_update_jiffies64(ktime_t now)
 			last_jiffies_update = ktime_add_ns(last_jiffies_update,
 							   incr * ticks);
 		}
-		do_timer(++ticks);
+		calc_ve = do_timer(++ticks);
 
 		/* Keep the tick_next_period variable up to date */
 		tick_next_period = ktime_add(last_jiffies_update, tick_period);
@@ -89,6 +90,8 @@  static void tick_do_update_jiffies64(ktime_t now)
 		return;
 	}
 	write_sequnlock(&jiffies_lock);
+	if (calc_ve)
+		calc_load_ve();
 	update_wall_time();
 }
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 774651652076..be6dbff71d48 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2151,8 +2151,11 @@  EXPORT_SYMBOL(hardpps);
  */
 void xtime_update(unsigned long ticks)
 {
+	bool calc_ve;
 	write_seqlock(&jiffies_lock);
-	do_timer(ticks);
+	calc_ve = do_timer(ticks);
 	write_sequnlock(&jiffies_lock);
+	if (calc_ve)
+		calc_load_ve();
 	update_wall_time();
 }