[Devel,RH7,v3,1/2] fence-watchdog: log wdog actions directly to special file

Submitted by Pavel Tikhomirov on July 20, 2017, 2:58 p.m.

Details

Message ID 20170720145848.8970-1-ptikhomirov@virtuozzo.com
State New
Series "Series without cover letter"
Headers show

Commit Message

Pavel Tikhomirov July 20, 2017, 2:58 p.m.
If watchdog was triggered, that means that userspace is unreliable
and we can't expect kernel messages are logged by userspace logger,
thus we need ourselfs write info about watchdog triggered to special
file configured through sysfs /sys/kernel/watchdog_log_path, so that
users can see that watchdog was triggered but not some other event.

Note: we schedule do_halt_or_reboot to workqueue, which consists of
pool of kernel threads. Manager thread adds more worker threads when
all existing workers sleep, restricted by number of max_active. For
our system_wq, max_active == 256. So we can not fear that waiting for
fsync in our work will deadlock with writeback work.

Print timestamp in format "HH:MM:SS/YYYY-MM-DD" local time.

v2: use kernel_write helper
v3: add timestamp to message, control string operation len

https://jira.sw.ru/browse/PSBM-54747
Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
---
 kernel/fence-watchdog.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)

Patch hide | download patch | download mbox

diff --git a/kernel/fence-watchdog.c b/kernel/fence-watchdog.c
index f5c2347f..c35b3ad 100644
--- a/kernel/fence-watchdog.c
+++ b/kernel/fence-watchdog.c
@@ -23,6 +23,8 @@ 
 #include <linux/fence-watchdog.h>
 #include <linux/device.h>
 #include <linux/kmsg_dump.h>
+#include <linux/fs.h>
+#include <linux/string.h>
 
 #define MAX_U64			(~(u64)0)
 #define MAX_JIFFIES_DELTA	(10 * 365UL * 24UL * 3600UL * HZ)
@@ -41,11 +43,81 @@  const char *action_names[] = {"crash", "reboot", "halt", "netfilter", NULL};
 DEFINE_VVAR(volatile unsigned long, fence_wdog_jiffies64) = MAX_U64;
 static int fence_wdog_action = FENCE_WDOG_CRASH;
 static atomic_t not_fenced = ATOMIC_INIT(-1);
+static char fence_wdog_log_path[PATH_MAX] = "/fence_wdog.log";
+
+#define SECS_PER_MIN	60
+#define PREFIX_LEN	39
+
+static int print_prefix(char *msg) {
+	struct timeval tv;
+	struct tm tm;
+
+	do_gettimeofday(&tv);
+	time_to_tm(tv.tv_sec - sys_tz.tz_minuteswest * SECS_PER_MIN, 0, &tm);
+
+	return snprintf(msg, PREFIX_LEN, "[%02d:%02d:%02d/%04ld-%02d-%02d] fence-watchdog: ",
+			tm.tm_hour, tm.tm_min, tm.tm_sec,
+			tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday);
+}
+
+#define MSG_LEN (PREFIX_LEN + 10)
+
+void fence_wdog_log(void)
+{
+	char msg[MSG_LEN];
+	struct file *file;
+	int ret, len;
+
+	ret = print_prefix(msg);
+	if (ret < 0)
+		return;
+
+	len = strlen(msg);
+
+	ret = snprintf(msg + len, MSG_LEN - len, "%s\n", action_names[fence_wdog_action]);
+	if (ret != strlen(action_names[fence_wdog_action]) + 1) {
+		printk(KERN_EMERG"fence-watchdog: Failed to sprintf msg\n");
+		return;
+	}
+
+	file = filp_open(fence_wdog_log_path,
+			 O_CREAT | O_WRONLY | O_APPEND | O_NOFOLLOW | O_LARGEFILE,
+			 0600);
+	if (IS_ERR(file)) {
+		printk(KERN_EMERG"fence-watchdog: Failed to open log path\n");
+		return;
+	}
+
+	if (!S_ISREG(file_inode(file)->i_mode)) {
+		printk(KERN_EMERG"fence-watchdog: Wrong type of log file\n");
+		goto close;
+	}
+
+	ret = kernel_write(file, msg, strlen(msg), file->f_pos);
+	if (ret < 0) {
+		printk(KERN_EMERG"fence-watchdog: Failed to write msg, ret=%d\n", ret);
+		goto close;
+	}
+
+	ret = vfs_fsync(file, 0);
+	if (ret < 0)
+		printk(KERN_EMERG"fence-watchdog: Failed to fsync log file ret=%d\n", ret);
+
+close:
+	ret = filp_close(file, NULL);
+	if (ret < 0)
+		printk(KERN_EMERG"fence-watchdog: Failed to close log file ret=%d\n", ret);
+
+	return;
+}
 
 static void do_halt_or_reboot(struct work_struct *dummy)
 {
 	printk(KERN_EMERG"fence-watchdog: %s\n",
 	       action_names[fence_wdog_action]);
+
+	fence_wdog_log();
+
 	switch (fence_wdog_action) {
 	case FENCE_WDOG_REBOOT:
 		emergency_restart();
@@ -166,6 +238,30 @@  static ssize_t fence_wdog_available_actions_show(struct kobject *kobj,
 	return ret;
 }
 
+static ssize_t fence_wdog_log_path_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%s\n", fence_wdog_log_path);
+}
+
+#define STORE_FORMAT_LEN 16
+
+static ssize_t fence_wdog_log_path_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	char format[STORE_FORMAT_LEN];
+	int ret;
+
+	ret = snprintf(format, STORE_FORMAT_LEN, "%%%ds", PATH_MAX - 1);
+	if (ret < 0)
+		return ret;
+
+
+	if (sscanf(buf, format, fence_wdog_log_path) != 1)
+		return -EINVAL;
+	return 0;
+}
+
 static struct kobj_attribute fence_wdog_timer_attr =
 	__ATTR(watchdog_timer, 0644,
 		fence_wdog_timer_show, fence_wdog_timer_store);
@@ -178,10 +274,15 @@  static struct kobj_attribute fence_wdog_available_actions_attr =
 	__ATTR(watchdog_available_actions, 0644,
 		fence_wdog_available_actions_show, NULL);
 
+static struct kobj_attribute fence_wdog_log_path_attr =
+	__ATTR(watchdog_log_path, 0644,
+		fence_wdog_log_path_show, fence_wdog_log_path_store);
+
 static struct attribute *fence_wdog_attrs[] = {
 	&fence_wdog_timer_attr.attr,
 	&fence_wdog_action_attr.attr,
 	&fence_wdog_available_actions_attr.attr,
+	&fence_wdog_log_path_attr.attr,
 	NULL,
 };
 

Comments

Kirill Tkhai July 20, 2017, 3 p.m.
On 20.07.2017 17:58, Pavel Tikhomirov wrote:
> If watchdog was triggered, that means that userspace is unreliable
> and we can't expect kernel messages are logged by userspace logger,
> thus we need ourselfs write info about watchdog triggered to special
> file configured through sysfs /sys/kernel/watchdog_log_path, so that
> users can see that watchdog was triggered but not some other event.
> 
> Note: we schedule do_halt_or_reboot to workqueue, which consists of
> pool of kernel threads. Manager thread adds more worker threads when
> all existing workers sleep, restricted by number of max_active. For
> our system_wq, max_active == 256. So we can not fear that waiting for
> fsync in our work will deadlock with writeback work.
> 
> Print timestamp in format "HH:MM:SS/YYYY-MM-DD" local time.
> 
> v2: use kernel_write helper
> v3: add timestamp to message, control string operation len
> 
> https://jira.sw.ru/browse/PSBM-54747
> Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>

Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>

> ---
>  kernel/fence-watchdog.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 101 insertions(+)
> 
> diff --git a/kernel/fence-watchdog.c b/kernel/fence-watchdog.c
> index f5c2347f..c35b3ad 100644
> --- a/kernel/fence-watchdog.c
> +++ b/kernel/fence-watchdog.c
> @@ -23,6 +23,8 @@
>  #include <linux/fence-watchdog.h>
>  #include <linux/device.h>
>  #include <linux/kmsg_dump.h>
> +#include <linux/fs.h>
> +#include <linux/string.h>
>  
>  #define MAX_U64			(~(u64)0)
>  #define MAX_JIFFIES_DELTA	(10 * 365UL * 24UL * 3600UL * HZ)
> @@ -41,11 +43,81 @@ const char *action_names[] = {"crash", "reboot", "halt", "netfilter", NULL};
>  DEFINE_VVAR(volatile unsigned long, fence_wdog_jiffies64) = MAX_U64;
>  static int fence_wdog_action = FENCE_WDOG_CRASH;
>  static atomic_t not_fenced = ATOMIC_INIT(-1);
> +static char fence_wdog_log_path[PATH_MAX] = "/fence_wdog.log";
> +
> +#define SECS_PER_MIN	60
> +#define PREFIX_LEN	39
> +
> +static int print_prefix(char *msg) {
> +	struct timeval tv;
> +	struct tm tm;
> +
> +	do_gettimeofday(&tv);
> +	time_to_tm(tv.tv_sec - sys_tz.tz_minuteswest * SECS_PER_MIN, 0, &tm);
> +
> +	return snprintf(msg, PREFIX_LEN, "[%02d:%02d:%02d/%04ld-%02d-%02d] fence-watchdog: ",
> +			tm.tm_hour, tm.tm_min, tm.tm_sec,
> +			tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday);
> +}
> +
> +#define MSG_LEN (PREFIX_LEN + 10)
> +
> +void fence_wdog_log(void)
> +{
> +	char msg[MSG_LEN];
> +	struct file *file;
> +	int ret, len;
> +
> +	ret = print_prefix(msg);
> +	if (ret < 0)
> +		return;
> +
> +	len = strlen(msg);
> +
> +	ret = snprintf(msg + len, MSG_LEN - len, "%s\n", action_names[fence_wdog_action]);
> +	if (ret != strlen(action_names[fence_wdog_action]) + 1) {
> +		printk(KERN_EMERG"fence-watchdog: Failed to sprintf msg\n");
> +		return;
> +	}
> +
> +	file = filp_open(fence_wdog_log_path,
> +			 O_CREAT | O_WRONLY | O_APPEND | O_NOFOLLOW | O_LARGEFILE,
> +			 0600);
> +	if (IS_ERR(file)) {
> +		printk(KERN_EMERG"fence-watchdog: Failed to open log path\n");
> +		return;
> +	}
> +
> +	if (!S_ISREG(file_inode(file)->i_mode)) {
> +		printk(KERN_EMERG"fence-watchdog: Wrong type of log file\n");
> +		goto close;
> +	}
> +
> +	ret = kernel_write(file, msg, strlen(msg), file->f_pos);
> +	if (ret < 0) {
> +		printk(KERN_EMERG"fence-watchdog: Failed to write msg, ret=%d\n", ret);
> +		goto close;
> +	}
> +
> +	ret = vfs_fsync(file, 0);
> +	if (ret < 0)
> +		printk(KERN_EMERG"fence-watchdog: Failed to fsync log file ret=%d\n", ret);
> +
> +close:
> +	ret = filp_close(file, NULL);
> +	if (ret < 0)
> +		printk(KERN_EMERG"fence-watchdog: Failed to close log file ret=%d\n", ret);
> +
> +	return;
> +}
>  
>  static void do_halt_or_reboot(struct work_struct *dummy)
>  {
>  	printk(KERN_EMERG"fence-watchdog: %s\n",
>  	       action_names[fence_wdog_action]);
> +
> +	fence_wdog_log();
> +
>  	switch (fence_wdog_action) {
>  	case FENCE_WDOG_REBOOT:
>  		emergency_restart();
> @@ -166,6 +238,30 @@ static ssize_t fence_wdog_available_actions_show(struct kobject *kobj,
>  	return ret;
>  }
>  
> +static ssize_t fence_wdog_log_path_show(struct kobject *kobj,
> +		struct kobj_attribute *attr, char *buf)
> +{
> +	return snprintf(buf, PAGE_SIZE, "%s\n", fence_wdog_log_path);
> +}
> +
> +#define STORE_FORMAT_LEN 16
> +
> +static ssize_t fence_wdog_log_path_store(struct kobject *kobj,
> +		struct kobj_attribute *attr, const char *buf, size_t count)
> +{
> +	char format[STORE_FORMAT_LEN];
> +	int ret;
> +
> +	ret = snprintf(format, STORE_FORMAT_LEN, "%%%ds", PATH_MAX - 1);
> +	if (ret < 0)
> +		return ret;
> +
> +
> +	if (sscanf(buf, format, fence_wdog_log_path) != 1)
> +		return -EINVAL;
> +	return 0;
> +}
> +
>  static struct kobj_attribute fence_wdog_timer_attr =
>  	__ATTR(watchdog_timer, 0644,
>  		fence_wdog_timer_show, fence_wdog_timer_store);
> @@ -178,10 +274,15 @@ static struct kobj_attribute fence_wdog_available_actions_attr =
>  	__ATTR(watchdog_available_actions, 0644,
>  		fence_wdog_available_actions_show, NULL);
>  
> +static struct kobj_attribute fence_wdog_log_path_attr =
> +	__ATTR(watchdog_log_path, 0644,
> +		fence_wdog_log_path_show, fence_wdog_log_path_store);
> +
>  static struct attribute *fence_wdog_attrs[] = {
>  	&fence_wdog_timer_attr.attr,
>  	&fence_wdog_action_attr.attr,
>  	&fence_wdog_available_actions_attr.attr,
> +	&fence_wdog_log_path_attr.attr,
>  	NULL,
>  };
>  
>