[rh7,2/2] ve/fs/proc: Make per-thread and per-process allocation latencies.

Submitted by Andrey Ryabinin on Feb. 19, 2018, 12:28 p.m.

Details

Message ID 20180219122837.4441-2-aryabinin@virtuozzo.com
State New
Series "Series without cover letter"
Headers show

Commit Message

Andrey Ryabinin Feb. 19, 2018, 12:28 p.m.
Follow-up for 6d9a9210395e ("ve/page_alloc, kstat: account allocation latencies per-task")
Make per-thread and per-process allocation latencies:

  - /proc/<pid>/vz_latency - cumulative for a thread group
  - /proc/<pid>/tasks/<pid>/vz_latency - thread-specific

During allocation we collect per-thread latency. When thread dies,
it submits its own latencies into shared task->signal.alloc_lat struct.
/proc/<pid>/vz_latency - sums allocation latencies over all live threads
plus latencies of already dead tasks from task->signal.alloc_lat.

https://jira.sw.ru/browse/PSBM-81395
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Pavel Borzenkov <Pavel.Borzenkov@acronis.com>
---
 fs/proc/base.c        | 76 +++++++++++++++++++++++++++++++++++----------------
 include/linux/sched.h |  3 ++
 kernel/exit.c         | 16 +++++++++++
 3 files changed, 71 insertions(+), 24 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5646a351c076..989121776d43 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -583,24 +583,23 @@  static void lastlat_seq_show(struct seq_file *m,
 	seq_printf(m, "%-12s %20Lu %20lu\n", name,
 			snap->totlat, snap->count);
 }
+static const char *alloc_descr[] = {
+	"allocatomic:",
+	"alloc:",
+	"allocmp:",
+};
+static const int alloc_types[] = {
+	KSTAT_ALLOCSTAT_ATOMIC,
+	KSTAT_ALLOCSTAT_LOW,
+	KSTAT_ALLOCSTAT_LOW_MP,
+};
 
-static int vz_lat_show_proc(struct seq_file *m, void *v)
+static int proc_tid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
+			struct pid *pid, struct task_struct *task)
 {
 	int i;
-	struct inode *inode = m->private;
-	struct task_struct *task = get_proc_task(inode);
-	static const char *alloc_descr[] = {
-		"allocatomic:",
-		"alloc:",
-		"allocmp:",
-	};
-	static const int alloc_types[] = {
-		KSTAT_ALLOCSTAT_ATOMIC,
-		KSTAT_ALLOCSTAT_LOW,
-		KSTAT_ALLOCSTAT_LOW_MP,
-	};
 
-	seq_printf(m, "%-11s %20s %20s\n",
+	seq_printf(m, "%-12s %20s %20s\n",
 			"Type", "Total_lat", "Calls");
 
 	for (i = 0; i < ARRAY_SIZE(alloc_types); i++)
@@ -609,17 +608,43 @@  static int vz_lat_show_proc(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int vz_lat_open(struct inode *inode, struct file *file)
+static int proc_tgid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
+			struct pid *pid, struct task_struct *task)
 {
-	return single_open(file, vz_lat_show_proc, inode);
-}
+	int i;
+	unsigned long flags;
+	u64 lat[3];
+	u64 count[3];
 
-static const struct file_operations proc_vz_lat_operations = {
-	.open		= vz_lat_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+	for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
+		lat[i] = task->alloc_lat[alloc_types[i]].totlat;
+		count[i] = task->alloc_lat[alloc_types[i]].count;
+	}
+
+	if (lock_task_sighand(task, &flags)) {
+		struct task_struct *t = task;
+		while_each_thread(task, t) {
+			for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
+				lat[i] += t->alloc_lat[alloc_types[i]].totlat;
+				count[i] += t->alloc_lat[alloc_types[i]].count;
+			}
+		}
+		for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
+			lat[i] += t->signal->alloc_lat[alloc_types[i]].totlat;
+			count[i] += t->signal->alloc_lat[alloc_types[i]].count;
+		}
+		unlock_task_sighand(task, &flags);
+	}
+
+	seq_printf(m, "%-12s %20s %20s\n",
+			"Type", "Total_lat", "Calls");
+
+	for (i = 0; i < ARRAY_SIZE(alloc_types); i++)
+		seq_printf(m, "%-12s %20Lu %20Lu\n", alloc_descr[i],
+			lat[i], count[i]);
+
+	return 0;
+}
 #endif
 
 #ifdef CONFIG_CGROUPS
@@ -3060,7 +3085,7 @@  static const struct pid_entry tgid_base_stuff[] = {
 	REG("aio",	  S_IRUGO|S_IWUSR, proc_aio_operations),
 #endif
 #ifdef CONFIG_VE
-	REG("vz_latency", S_IRUGO, proc_vz_lat_operations),
+	ONE("vz_latency", S_IRUGO, proc_tgid_vz_lat),
 #endif
 };
 
@@ -3410,6 +3435,9 @@  static const struct pid_entry tid_base_stuff[] = {
 	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
 	REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
 #endif
+#ifdef CONFIG_VE
+	ONE("vz_latency", S_IRUGO, proc_tid_vz_lat),
+#endif
 };
 
 static int proc_tid_base_readdir(struct file * filp,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1595be347b6d..9189d1b160c8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -710,6 +710,9 @@  struct signal_struct {
 #ifdef CONFIG_TASKSTATS
 	struct taskstats *stats;
 #endif
+#ifdef CONFIG_VE
+	struct kstat_lat_snap_struct alloc_lat[KSTAT_ALLOCSTAT_NR];
+#endif
 #ifdef CONFIG_AUDIT
 	unsigned audit_tty;
 	unsigned audit_tty_log_passwd;
diff --git a/kernel/exit.c b/kernel/exit.c
index ce5d456b590b..1f243edca11a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -736,6 +736,20 @@  static void check_stack_usage(void)
 static inline void check_stack_usage(void) {}
 #endif
 
+void kstat_add_dying(struct task_struct *tsk)
+{
+#ifdef CONFIG_VE
+	int i;
+
+	spin_lock_irq(&tsk->sighand->siglock);
+	for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) {
+		tsk->signal->alloc_lat[i].totlat += tsk->alloc_lat[i].totlat;
+		tsk->signal->alloc_lat[i].count += tsk->alloc_lat[i].count;
+	}
+	spin_unlock_irq(&tsk->sighand->siglock);
+#endif
+}
+
 void do_exit(long code)
 {
 	struct task_struct *tsk = current;
@@ -808,6 +822,8 @@  void do_exit(long code)
 		exit_itimers(tsk->signal);
 		if (tsk->mm)
 			setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
+	} else {
+		kstat_add_dying(tsk);
 	}
 	acct_collect(code, group_dead);
 	if (group_dead)

Comments

Kirill Tkhai Feb. 19, 2018, 2:02 p.m.
On 19.02.2018 15:28, Andrey Ryabinin wrote:
> Follow-up for 6d9a9210395e ("ve/page_alloc, kstat: account allocation latencies per-task")
> Make per-thread and per-process allocation latencies:
> 
>   - /proc/<pid>/vz_latency - cumulative for a thread group
>   - /proc/<pid>/tasks/<pid>/vz_latency - thread-specific
> 
> During allocation we collect per-thread latency. When thread dies,
> it submits its own latencies into shared task->signal.alloc_lat struct.
> /proc/<pid>/vz_latency - sums allocation latencies over all live threads
> plus latencies of already dead tasks from task->signal.alloc_lat.
> 
> https://jira.sw.ru/browse/PSBM-81395
> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> Cc: Pavel Borzenkov <Pavel.Borzenkov@acronis.com>
> ---
>  fs/proc/base.c        | 76 +++++++++++++++++++++++++++++++++++----------------
>  include/linux/sched.h |  3 ++
>  kernel/exit.c         | 16 +++++++++++
>  3 files changed, 71 insertions(+), 24 deletions(-)
> 
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index 5646a351c076..989121776d43 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -583,24 +583,23 @@ static void lastlat_seq_show(struct seq_file *m,
>  	seq_printf(m, "%-12s %20Lu %20lu\n", name,
>  			snap->totlat, snap->count);
>  }
> +static const char *alloc_descr[] = {
> +	"allocatomic:",
> +	"alloc:",
> +	"allocmp:",
> +};
> +static const int alloc_types[] = {
> +	KSTAT_ALLOCSTAT_ATOMIC,
> +	KSTAT_ALLOCSTAT_LOW,
> +	KSTAT_ALLOCSTAT_LOW_MP,
> +};
>  
> -static int vz_lat_show_proc(struct seq_file *m, void *v)
> +static int proc_tid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
> +			struct pid *pid, struct task_struct *task)
>  {
>  	int i;
> -	struct inode *inode = m->private;
> -	struct task_struct *task = get_proc_task(inode);
> -	static const char *alloc_descr[] = {
> -		"allocatomic:",
> -		"alloc:",
> -		"allocmp:",
> -	};
> -	static const int alloc_types[] = {
> -		KSTAT_ALLOCSTAT_ATOMIC,
> -		KSTAT_ALLOCSTAT_LOW,
> -		KSTAT_ALLOCSTAT_LOW_MP,
> -	};
>  
> -	seq_printf(m, "%-11s %20s %20s\n",
> +	seq_printf(m, "%-12s %20s %20s\n",
>  			"Type", "Total_lat", "Calls");
>  
>  	for (i = 0; i < ARRAY_SIZE(alloc_types); i++)
> @@ -609,17 +608,43 @@ static int vz_lat_show_proc(struct seq_file *m, void *v)
>  	return 0;
>  }
>  
> -static int vz_lat_open(struct inode *inode, struct file *file)
> +static int proc_tgid_vz_lat(struct seq_file *m, struct pid_namespace *ns,
> +			struct pid *pid, struct task_struct *task)
>  {
> -	return single_open(file, vz_lat_show_proc, inode);
> -}
> +	int i;
> +	unsigned long flags;
> +	u64 lat[3];
> +	u64 count[3];

This hard-coded 3 and ARRAY_SIZE(alloc_types) below look a little bit ambiguous
for me. If someone decides to increase the array, he will have to remember about
this place. What do you think about ARRAY_SIZE(alloc_types) or something like
this above?
  
> -static const struct file_operations proc_vz_lat_operations = {
> -	.open		= vz_lat_open,
> -	.read		= seq_read,
> -	.llseek		= seq_lseek,
> -	.release	= single_release,
> -};
> +	for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
> +		lat[i] = task->alloc_lat[alloc_types[i]].totlat;
> +		count[i] = task->alloc_lat[alloc_types[i]].count;
> +	}
> +
> +	if (lock_task_sighand(task, &flags)) {
> +		struct task_struct *t = task;
> +		while_each_thread(task, t) {
> +			for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
> +				lat[i] += t->alloc_lat[alloc_types[i]].totlat;
> +				count[i] += t->alloc_lat[alloc_types[i]].count;
> +			}
> +		}
> +		for (i = 0; i < ARRAY_SIZE(alloc_types); i++) {
> +			lat[i] += t->signal->alloc_lat[alloc_types[i]].totlat;
> +			count[i] += t->signal->alloc_lat[alloc_types[i]].count;
> +		}
> +		unlock_task_sighand(task, &flags);
> +	}
> +
> +	seq_printf(m, "%-12s %20s %20s\n",
> +			"Type", "Total_lat", "Calls");
> +
> +	for (i = 0; i < ARRAY_SIZE(alloc_types); i++)
> +		seq_printf(m, "%-12s %20Lu %20Lu\n", alloc_descr[i],
> +			lat[i], count[i]);
> +
> +	return 0;
> +}
>  #endif
>  
>  #ifdef CONFIG_CGROUPS
> @@ -3060,7 +3085,7 @@ static const struct pid_entry tgid_base_stuff[] = {
>  	REG("aio",	  S_IRUGO|S_IWUSR, proc_aio_operations),
>  #endif
>  #ifdef CONFIG_VE
> -	REG("vz_latency", S_IRUGO, proc_vz_lat_operations),
> +	ONE("vz_latency", S_IRUGO, proc_tgid_vz_lat),
>  #endif
>  };
>  
> @@ -3410,6 +3435,9 @@ static const struct pid_entry tid_base_stuff[] = {
>  	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
>  	REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
>  #endif
> +#ifdef CONFIG_VE
> +	ONE("vz_latency", S_IRUGO, proc_tid_vz_lat),
> +#endif
>  };
>  
>  static int proc_tid_base_readdir(struct file * filp,
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 1595be347b6d..9189d1b160c8 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -710,6 +710,9 @@ struct signal_struct {
>  #ifdef CONFIG_TASKSTATS
>  	struct taskstats *stats;
>  #endif
> +#ifdef CONFIG_VE
> +	struct kstat_lat_snap_struct alloc_lat[KSTAT_ALLOCSTAT_NR];
> +#endif
>  #ifdef CONFIG_AUDIT
>  	unsigned audit_tty;
>  	unsigned audit_tty_log_passwd;
> diff --git a/kernel/exit.c b/kernel/exit.c
> index ce5d456b590b..1f243edca11a 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -736,6 +736,20 @@ static void check_stack_usage(void)
>  static inline void check_stack_usage(void) {}
>  #endif
>  
> +void kstat_add_dying(struct task_struct *tsk)
> +{
> +#ifdef CONFIG_VE
> +	int i;
> +
> +	spin_lock_irq(&tsk->sighand->siglock);
> +	for (i = 0; i < KSTAT_ALLOCSTAT_NR; i++) {
> +		tsk->signal->alloc_lat[i].totlat += tsk->alloc_lat[i].totlat;
> +		tsk->signal->alloc_lat[i].count += tsk->alloc_lat[i].count;
> +	}
> +	spin_unlock_irq(&tsk->sighand->siglock);
> +#endif
> +}
> +
>  void do_exit(long code)
>  {
>  	struct task_struct *tsk = current;
> @@ -808,6 +822,8 @@ void do_exit(long code)
>  		exit_itimers(tsk->signal);
>  		if (tsk->mm)
>  			setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
> +	} else {
> +		kstat_add_dying(tsk);
>  	}
>  	acct_collect(code, group_dead);
>  	if (group_dead)
>