[Devel,RH7] sysctl: panic only on softlockup_panic-th consequent softlockup

Submitted by Pavel Tikhomirov on Sept. 21, 2016, 2:27 p.m.

Details

Message ID 1474468040-14674-1-git-send-email-ptikhomirov@virtuozzo.com
State New
Series "sysctl: panic only on softlockup_panic-th consequent softlockup"
Headers show

Commit Message

Pavel Tikhomirov Sept. 21, 2016, 2:27 p.m.
https://jira.sw.ru/browse/PSBM-52199

Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
---
 kernel/sysctl.c   |  3 ++-
 kernel/watchdog.c | 36 +++++++++++++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c8f7bc3..40dfc98 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -123,6 +123,7 @@  EXPORT_SYMBOL(ve_allow_module_load);
 #ifdef CONFIG_LOCKUP_DETECTOR
 static int sixty = 60;
 static int neg_one = -1;
+static int int_max = INT_MAX;
 #endif
 
 static int zero;
@@ -910,7 +911,7 @@  static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &zero,
-		.extra2		= &one,
+		.extra2		= &int_max,
 	},
 #ifdef CONFIG_SMP
 	{
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index ba61141..171255b 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -304,6 +304,40 @@  static void watchdog_interrupt_count(void)
 static int watchdog_nmi_enable(unsigned int cpu);
 static void watchdog_nmi_disable(unsigned int cpu);
 
+/*
+ * Delay panic until softlockup_panic softlockups in raw.
+ * Consider softlockups are in raw if they fit in interval
+ * of twice as softlockup_panic * softlockup_thresh.
+ * (Soft lockups most likely will come one just after another
+ * with period of softlockup_thresh, but to cover the accumulating
+ * error give them twice as much the time as teoreticaly needed.)
+ */
+
+static int need_panic_on_softlockup(void)
+{
+	static raw_spinlock_t lock = __RAW_SPIN_LOCK_UNLOCKED(lock);
+	static unsigned long intend;
+	static int count;
+	static int thresh;
+	unsigned long now = get_timestamp();
+	int slp = softlockup_panic;
+
+	if (!slp)
+		return 0;
+
+	raw_spin_lock(&lock);
+	if (!intend || time_after(now, intend)) {
+		intend = now + 2 * slp * get_softlockup_thresh();
+		thresh = slp;
+		count = 0;
+	}
+	if (++count >= thresh)
+		return 1;
+	raw_spin_unlock(&lock);
+
+	return 0;
+}
+
 /* watchdog kicker functions */
 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
@@ -390,7 +424,7 @@  static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 		}
 
 		add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
-		if (softlockup_panic)
+		if (need_panic_on_softlockup())
 			panic("softlockup: hung tasks");
 		__this_cpu_write(soft_watchdog_warn, true);
 	} else

Comments

Konstantin Khorenko Sept. 23, 2016, 2:27 p.m.
Pasha,

1) please write a good description. Why it's needed? How to use?
2) i thought we came up to the variant when sysctl defines not the number of softlockups in a row,
    but the delay in seconds?
3) please put the reviewer

Thank you.

--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 09/21/2016 05:27 PM, Pavel Tikhomirov wrote:
> https://jira.sw.ru/browse/PSBM-52199
>
> Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
> ---
>  kernel/sysctl.c   |  3 ++-
>  kernel/watchdog.c | 36 +++++++++++++++++++++++++++++++++++-
>  2 files changed, 37 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index c8f7bc3..40dfc98 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -123,6 +123,7 @@ EXPORT_SYMBOL(ve_allow_module_load);
>  #ifdef CONFIG_LOCKUP_DETECTOR
>  static int sixty = 60;
>  static int neg_one = -1;
> +static int int_max = INT_MAX;
>  #endif
>
>  static int zero;
> @@ -910,7 +911,7 @@ static struct ctl_table kern_table[] = {
>  		.mode		= 0644,
>  		.proc_handler	= proc_dointvec_minmax,
>  		.extra1		= &zero,
> -		.extra2		= &one,
> +		.extra2		= &int_max,
>  	},
>  #ifdef CONFIG_SMP
>  	{
> diff --git a/kernel/watchdog.c b/kernel/watchdog.c
> index ba61141..171255b 100644
> --- a/kernel/watchdog.c
> +++ b/kernel/watchdog.c
> @@ -304,6 +304,40 @@ static void watchdog_interrupt_count(void)
>  static int watchdog_nmi_enable(unsigned int cpu);
>  static void watchdog_nmi_disable(unsigned int cpu);
>
> +/*
> + * Delay panic until softlockup_panic softlockups in raw.
> + * Consider softlockups are in raw if they fit in interval
> + * of twice as softlockup_panic * softlockup_thresh.
> + * (Soft lockups most likely will come one just after another
> + * with period of softlockup_thresh, but to cover the accumulating
> + * error give them twice as much the time as teoreticaly needed.)
> + */
> +
> +static int need_panic_on_softlockup(void)
> +{
> +	static raw_spinlock_t lock = __RAW_SPIN_LOCK_UNLOCKED(lock);
> +	static unsigned long intend;
> +	static int count;
> +	static int thresh;
> +	unsigned long now = get_timestamp();
> +	int slp = softlockup_panic;
> +
> +	if (!slp)
> +		return 0;
> +
> +	raw_spin_lock(&lock);
> +	if (!intend || time_after(now, intend)) {
> +		intend = now + 2 * slp * get_softlockup_thresh();
> +		thresh = slp;
> +		count = 0;
> +	}
> +	if (++count >= thresh)
> +		return 1;
> +	raw_spin_unlock(&lock);
> +
> +	return 0;
> +}
> +
>  /* watchdog kicker functions */
>  static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
>  {
> @@ -390,7 +424,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
>  		}
>
>  		add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
> -		if (softlockup_panic)
> +		if (need_panic_on_softlockup())
>  			panic("softlockup: hung tasks");
>  		__this_cpu_write(soft_watchdog_warn, true);
>  	} else
>