[RHEL7,COMMIT] fs/fuse kio: implement iostat statistics info

Submitted by Konstantin Khorenko on May 29, 2019, 2:24 p.m.

Details

Message ID 201905291424.x4TEOgCS009519@finist-ce7.sw.ru
State New
Series "Statistics for vstorage kernel fast-path"
Headers show

Commit Message

Konstantin Khorenko May 29, 2019, 2:24 p.m.
The commit is pushed to "branch-rh7-3.10.0-957.12.2.vz7.96.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-957.12.2.vz7.96.7
------>
commit 2b6a2845c63c274e6a252bf913f5766b04694b9a
Author: Pavel Butsykin <pbutsykin@virtuozzo.com>
Date:   Wed May 29 17:24:41 2019 +0300

    fs/fuse kio: implement iostat statistics info
    
    This statistical information contains general information about all
    kio requests for the last STAT_TIMER_PERIOD time. Made by analogy
    with /.vstorage.info/iostat statistics of user-mod client.
    
    example:
    read         109      445645      26415104         4096    4096    4096
    write        0        0           0                0       0       0
    sync         0                    0
    
    Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
    
    =====================
    Patchset description:
    
    Statistics for vstorage kernel fast-path
    
    Vstorage provides plenty of statistics information via 'vstorage -c cl mnt-top',
    but when it comes kernel fast-path, it doesn't work. All because mnt-top
    command collects information from .vstorage.info directory, where vstorage-mount
    provides a lot of different statistical information in the form of files, but
    it was not implemented for for fast-path.
    
    This patch-set is aimed to implementation of the support of some statistics
    inforamtion files from .vstorage.info:
      cs_stats
      fstat
      fstat_lat
      iostat
      requests
    Which will be located at "/sys/fs/fuse/connections/*mnt_id*/kio_stat/". This
    will be enough to maintain vstorage mnt-top command for fast-path mount points.
    
    https://pmc.acronis.com/browse/VSTOR-20979
    
    Acked-by: Alexey Kuznetsov <kuznet@virtuozzo.com>
    
    Pavel Butsykin (14):
      fs/fuse: add conn_ctl to fuse_conn struct
      fs/fuse kio: create sysfs stat directory
      fs/fuse kio: implement iostat statistics info
      fs/fuse kio: make interface pcs_kio_file_list() for listing kio files
      fs/fuse kio: make interface pcs_kio_req_list() for listing kio reqs
      fs/fuse kio: add retry counter for kio requests
      fs/fuse kio: implement pcs_strerror()
      fs/fuse kio: implement requests statistics info
      fs/fuse kio: implement fstat statistics info
      fs/fuse kio: implement fstat_lat statistics info
      fs/fuse kio: remove useless pcs_cs initialization
      fs/fuse kio: implement cs statistics accounting
      fs/fuse kio: convert rpc state id to string
      fs/fuse kio: implement cs_stats statistics info
---
 fs/fuse/kio/pcs/fuse_stat.c | 288 ++++++++++++++++++++++++++++++++++++++++++--
 fs/fuse/kio/pcs/fuse_stat.h |  37 +++++-
 fs/fuse/kio/pcs/pcs_map.c   |   1 +
 3 files changed, 317 insertions(+), 9 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/fuse/kio/pcs/fuse_stat.c b/fs/fuse/kio/pcs/fuse_stat.c
index da8428ebe600..fc08a0294b3c 100644
--- a/fs/fuse/kio/pcs/fuse_stat.c
+++ b/fs/fuse/kio/pcs/fuse_stat.c
@@ -1,11 +1,236 @@ 
 #include <net/sock.h>
 #include <linux/types.h>
 
+#include "pcs_req.h"
 #include "fuse_stat.h"
 #include "pcs_cluster.h"
 
 extern struct super_block *fuse_control_sb;
 
+#define CURR_IDX(__iostat) atomic_read(&(__iostat)->idx)
+#define LAST_IDX(__iostat) !CURR_IDX(__iostat)
+
+#define CURR(__iostat) period[CURR_IDX(__iostat)]
+#define LAST(__iostat) period[LAST_IDX(__iostat)]
+
+#define STAT_SWITCH(__stat) \
+	atomic_set(&(__stat)->idx, !atomic_read(&(__stat)->idx));
+
+#define STAT_SEQ_READ_BARRIER(__stat)           \
+	while(read_seqretry(&(__stat)->seqlock, \
+			    read_seqbegin(&(__stat)->seqlock)));
+
+
+static inline void fuse_val_stat_update(struct fuse_val_stat *s, u64 val)
+{
+	preempt_disable();
+	if (!__this_cpu_read(s->events)) {
+		__this_cpu_write(s->val_min, val);
+		__this_cpu_write(s->val_max, val);
+	} else if (val < __this_cpu_read(s->val_min))
+		__this_cpu_write(s->val_min, val);
+	else if (val > __this_cpu_read(s->val_max))
+		__this_cpu_write(s->val_max, val);
+	this_cpu_add(s->val_total, val);
+	this_cpu_inc(s->events);
+	preempt_enable();
+}
+
+static inline unsigned long long fuse_evt_rate(struct fuse_val_stat const *s, unsigned period)
+{
+	return DIV_ROUND_UP(s->events, period);
+}
+
+static inline unsigned long long fuse_val_rate(struct fuse_val_stat const *s, unsigned period)
+{
+	return DIV_ROUND_UP(s->val_total, period);
+}
+
+static inline unsigned long long fuse_val_aver(struct fuse_val_stat const *s)
+{
+	return s->events ? s->val_total / s->events : 0;
+}
+
+static inline unsigned long long fuse_val_cnt_min(struct fuse_val_stat *s1,
+						  struct fuse_val_stat *s2)
+{
+	return min(s1->val_min, s2->val_min);
+}
+
+static inline unsigned long long fuse_val_cnt_max(struct fuse_val_stat *s1,
+						  struct fuse_val_stat *s2)
+{
+	return max(s1->val_max, s2->val_max);
+}
+
+#define EVT_RATE(s) fuse_evt_rate(&(s), STAT_TIMER_PERIOD)
+#define VAL_RATE(s) fuse_val_rate(&(s), STAT_TIMER_PERIOD)
+#define VAL_AVER(s) fuse_val_aver(&(s))
+#define CNT_MIN(_s1, _s2) fuse_val_cnt_min(&(_s1), &(_s2))
+#define CNT_MAX(_s1, _s2) fuse_val_cnt_max(&(_s1), &(_s2))
+
+static inline void fuse_val_stat_sum(struct fuse_val_stat *s,
+				     struct fuse_val_stat *add)
+{
+	if (!add->events)
+		return;
+
+	if (!s->events)
+		*s = *add;
+	else {
+		s->val_min = min(s->val_min, add->val_min);
+		s->val_max = max(s->val_max, add->val_max);
+		s->val_total += add->val_total;
+		s->events += add->events;
+	}
+}
+
+static void stat_period_read(struct pcs_fuse_io_stat __percpu *in,
+			     struct pcs_fuse_io_stat *out)
+{
+	int cpu;
+	bool inited = false;
+
+	for_each_possible_cpu(cpu) {
+		struct pcs_fuse_io_stat *stat = per_cpu_ptr(in, cpu);
+		if (inited) {
+			fuse_val_stat_sum(&out->read_bytes,
+					  &stat->read_bytes);
+			fuse_val_stat_sum(&out->write_bytes,
+					  &stat->write_bytes);
+			fuse_val_stat_sum(&out->flush_cnt,
+					  &stat->flush_cnt);
+		} else {
+			*out = *stat;
+			inited = true;
+		}
+	}
+}
+
+static void fuse_iostat_up(struct pcs_fuse_io_stat_sync *iostat)
+{
+	struct pcs_fuse_io_stat lstat;
+	int cpu;
+
+	spin_lock(&iostat->lock);
+	for_each_possible_cpu(cpu) {
+		struct pcs_fuse_io_stat *last =
+			per_cpu_ptr(iostat->LAST(iostat), cpu);
+		memset(last, 0, sizeof(*last));
+	}
+	STAT_SWITCH(iostat);
+	STAT_SEQ_READ_BARRIER(iostat);
+
+	stat_period_read(iostat->LAST(iostat), &lstat);
+
+	fuse_val_stat_sum(&iostat->glob.read_bytes, &lstat.read_bytes);
+	fuse_val_stat_sum(&iostat->glob.write_bytes, &lstat.write_bytes);
+	fuse_val_stat_sum(&iostat->glob.flush_cnt, &lstat.flush_cnt);
+	spin_unlock(&iostat->lock);
+}
+
+static int pcs_fuse_iostat_show(struct seq_file *m, void *v)
+{
+	struct inode *inode = m->private;
+	struct pcs_fuse_stat *stat;
+	struct pcs_fuse_io_stat_sync *iostat;
+	struct pcs_fuse_io_stat last_stat, glob_stat;
+
+	if (!inode)
+		return 0;
+
+	mutex_lock(&fuse_mutex);
+	stat = inode->i_private;
+	if (!stat)
+		goto out;
+
+	seq_printf(m, "# operation  ops/sec  bytes/sec   total            req_min req_avg req_max (bytes)\n");
+
+	iostat = &stat->io;
+
+	spin_lock(&iostat->lock);
+	stat_period_read(iostat->LAST(iostat), &last_stat);
+	glob_stat = iostat->glob;
+	spin_unlock(&iostat->lock);
+
+	seq_printf(m, "read         %-8llu %-11llu %-16llu %-6llu  %-6llu  %-6llu\n",
+		   EVT_RATE(last_stat.read_bytes), VAL_RATE(last_stat.read_bytes),
+		   glob_stat.read_bytes.val_total, last_stat.read_bytes.val_min,
+		   VAL_AVER(last_stat.read_bytes), last_stat.read_bytes.val_max);
+	seq_printf(m, "write        %-8llu %-11llu %-16llu %-6llu  %-6llu  %-6llu\n",
+		   EVT_RATE(last_stat.write_bytes), VAL_RATE(last_stat.write_bytes),
+		   glob_stat.write_bytes.val_total, last_stat.write_bytes.val_min,
+		   VAL_AVER(last_stat.write_bytes), last_stat.write_bytes.val_max);
+	seq_printf(m, "sync         %-8llu             %-16llu\n",
+		   EVT_RATE(last_stat.flush_cnt), glob_stat.flush_cnt.events);
+out:
+	mutex_unlock(&fuse_mutex);
+	return 0;
+}
+
+static int pcs_fuse_iostat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pcs_fuse_iostat_show, inode);
+}
+
+static const struct file_operations pcs_fuse_iostat_ops = {
+	.owner   = THIS_MODULE,
+	.open    = pcs_fuse_iostat_open,
+	.read    = seq_read,
+	.llseek	 = seq_lseek,
+	.release = single_release,
+};
+
+static inline
+struct fuse_val_stat *req_stat_entry(struct pcs_fuse_io_stat *io, u32 type)
+{
+	switch (type) {
+		case PCS_CS_READ_RESP:
+			return &io->read_bytes;
+		case PCS_CS_WRITE_SYNC_RESP:
+		case PCS_CS_WRITE_RESP:
+			return &io->write_bytes;
+		case PCS_CS_SYNC_RESP:
+			return &io->flush_cnt;
+		default:
+			break;
+	}
+	WARN_ON_ONCE(1);
+	return NULL;
+}
+
+static void fuse_iostat_count(struct pcs_fuse_io_stat_sync *iostat,
+			      u64 size, u32 type)
+{
+	struct fuse_val_stat *se;
+
+	write_seqlock(&iostat->seqlock);
+	se = req_stat_entry(iostat->CURR(iostat), type);
+	BUG_ON(!se);
+	fuse_val_stat_update(se, size);
+	write_sequnlock(&iostat->seqlock);
+}
+
+void pcs_fuse_stat_io_count(struct pcs_int_request *ireq, struct pcs_msg *resp)
+{
+	struct pcs_fuse_stat *stat = &ireq->cc->stat;
+	struct pcs_cs_iohdr *h = (struct pcs_cs_iohdr *)msg_inline_head(resp);
+	u64 size = h->hdr.type != PCS_CS_SYNC_RESP ? ireq->iochunk.size : 0;
+
+	fuse_iostat_count(&stat->io, size, h->hdr.type);
+}
+
+static void pcs_fuse_stat_work(struct work_struct *w)
+{
+	struct pcs_cluster_core *cc =
+		container_of(w, struct pcs_cluster_core, stat.work.work);
+	struct pcs_fuse_stat *stat = &cc->stat;
+
+	fuse_iostat_up(&stat->io);
+
+	mod_delayed_work(cc->wq, &cc->stat.work, STAT_TIMER_PERIOD * HZ);
+}
+
 static struct dentry *fuse_kio_add_dentry(struct dentry *parent,
 					  struct fuse_conn *fc,
 					  const char *name,
@@ -48,14 +273,45 @@  static void fuse_kio_rm_dentry(struct dentry *dentry)
 	dput(dentry);
 }
 
+int pcs_fuse_io_stat_alloc(struct pcs_fuse_io_stat_sync *iostat)
+{
+	atomic_set(&iostat->idx, 0);
+	iostat->CURR(iostat) = alloc_percpu(struct pcs_fuse_io_stat);
+	if (!iostat->CURR(iostat))
+		return -ENOMEM;
+
+	iostat->LAST(iostat) = alloc_percpu(struct pcs_fuse_io_stat);
+	if (!iostat->LAST(iostat))
+		goto fail;
+
+	memset(&iostat->glob, 0, sizeof(iostat->glob));
+
+	seqlock_init(&iostat->seqlock);
+	spin_lock_init(&iostat->lock);
+	return 0;
+fail:
+	free_percpu(iostat->CURR(iostat));
+	return -ENOMEM;
+}
+
+void pcs_fuse_io_stat_free(struct pcs_fuse_io_stat_sync *iostat)
+{
+	free_percpu(iostat->LAST(iostat));
+	free_percpu(iostat->CURR(iostat));
+}
+
 void pcs_fuse_stat_init(struct pcs_fuse_stat *stat)
 {
-	struct fuse_conn *fc =
-		container_of(stat, struct pcs_fuse_cluster, cc.stat)->fc;
+	struct pcs_cluster_core *cc =
+		container_of(stat, struct pcs_cluster_core, stat);
+	struct fuse_conn *fc = container_of(cc,struct pcs_fuse_cluster, cc)->fc;
 
 	mutex_lock(&fuse_mutex);
 	if (!fuse_control_sb)
-		goto out;
+		goto fail1;
+
+	if (pcs_fuse_io_stat_alloc(&stat->io))
+		goto fail1;
 
 	stat->kio_stat = fuse_kio_add_dentry(fc->conn_ctl, fc, "kio_stat",
 					     S_IFDIR | S_IXUSR, 2,
@@ -63,21 +319,37 @@  void pcs_fuse_stat_init(struct pcs_fuse_stat *stat)
 					     &simple_dir_operations, fc);
 	if (!stat->kio_stat) {
 		pr_err("kio: can't create kio stat directory");
-		goto out;
+		goto fail2;
 	}
 
-	/* Stat initialize */
-out:
+	INIT_DELAYED_WORK(&stat->work, pcs_fuse_stat_work);
+	mod_delayed_work(cc->wq, &stat->work, STAT_TIMER_PERIOD * HZ);
+
+	stat->iostat = fuse_kio_add_dentry(stat->kio_stat, fc, "iostat",
+					   S_IFREG | S_IRUSR, 1, NULL,
+					   &pcs_fuse_iostat_ops, stat);
+	mutex_unlock(&fuse_mutex);
+	return;
+
+fail2:
+	pcs_fuse_io_stat_free(&stat->io);
+fail1:
 	mutex_unlock(&fuse_mutex);
 }
 
 void pcs_fuse_stat_fini(struct pcs_fuse_stat *stat)
 {
+	mutex_lock(&fuse_mutex);
 	if (!stat->kio_stat)
 		return;
 
-	mutex_lock(&fuse_mutex);
-	if (fuse_control_sb)
+	if (fuse_control_sb) {
+		if (stat->iostat)
+			fuse_kio_rm_dentry(stat->iostat);
 		fuse_kio_rm_dentry(stat->kio_stat);
+	}
 	mutex_unlock(&fuse_mutex);
+
+	cancel_delayed_work_sync(&stat->work);
+	pcs_fuse_io_stat_free(&stat->io);
 }
diff --git a/fs/fuse/kio/pcs/fuse_stat.h b/fs/fuse/kio/pcs/fuse_stat.h
index 14687ffd83f2..edbc2c7b8361 100644
--- a/fs/fuse/kio/pcs/fuse_stat.h
+++ b/fs/fuse/kio/pcs/fuse_stat.h
@@ -1,11 +1,46 @@ 
 #ifndef _FUSE_STAT_H_
 #define _FUSE_STAT_H_ 1
 
+#define STAT_TIMER_PERIOD 5
+
+struct pcs_msg;
+struct pcs_int_request;
+
+struct fuse_val_stat {
+	u64 events;
+	u64 val_total;
+	u64 val_min;
+	u64 val_max;
+};
+
+struct pcs_fuse_io_stat {
+	struct fuse_val_stat read_bytes;
+	struct fuse_val_stat write_bytes;
+	struct fuse_val_stat flush_cnt;
+} ____cacheline_aligned;
+
+struct pcs_fuse_io_stat_sync {
+	struct pcs_fuse_io_stat glob;
+	struct pcs_fuse_io_stat __percpu *period[2];
+	atomic_t idx;
+	seqlock_t seqlock;
+	spinlock_t lock;
+};
+
 struct pcs_fuse_stat {
+	struct pcs_fuse_io_stat_sync io;
+	struct delayed_work work;
+
 	struct dentry *kio_stat;
+	struct dentry *iostat;
 };
 
 void pcs_fuse_stat_init(struct pcs_fuse_stat *stat);
 void pcs_fuse_stat_fini(struct pcs_fuse_stat *stat);
 
-#endif /* _FUSE_STAT_H_ */
\ No newline at end of file
+void pcs_fuse_stat_io_count(struct pcs_int_request *ireq, struct pcs_msg *resp);
+
+int pcs_fuse_io_stat_alloc(struct pcs_fuse_io_stat_sync *iostat);
+void pcs_fuse_io_stat_free(struct pcs_fuse_io_stat_sync *iostat);
+
+#endif /* _FUSE_STAT_H_ */
diff --git a/fs/fuse/kio/pcs/pcs_map.c b/fs/fuse/kio/pcs/pcs_map.c
index fb9e4ddae8a2..7b2adfd4dfc7 100644
--- a/fs/fuse/kio/pcs/pcs_map.c
+++ b/fs/fuse/kio/pcs/pcs_map.c
@@ -2711,6 +2711,7 @@  static int commit_sync_info(struct pcs_int_request *req,
 				max_iolat = srec->sync.ts_io;
 		}
 	}
+	pcs_fuse_stat_io_count(req, resp);
 	cs_log_io_times(req, resp, max_iolat);
 
 	evaluate_dirty_status(m);