[RFC,3/3] fs/fuse kio: implement iostat statistics info

Submitted by Pavel Butsykin on May 21, 2019, 3:27 p.m.

Details

Message ID 20190521152757.27649-4-pbutsykin@virtuozzo.com
State New
Series "Statistics for vstorage kernel fast-path"
Headers show

Commit Message

Pavel Butsykin May 21, 2019, 3:27 p.m.
This statistical information contains general information about all
kio requests for the last STAT_TIMER_PERIOD time. Made by analogy
with /.vstorage.info/iostat statistics of user-mod client.

example:
read         109      445645      26415104         4096    4096    4096
write        0        0           0                0       0       0
sync         0                    0

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
---
 fs/fuse/kio/pcs/fuse_stat.c | 257 ++++++++++++++++++++++++++++++++++++++++++--
 fs/fuse/kio/pcs/fuse_stat.h |  29 ++++-
 fs/fuse/kio/pcs/pcs_map.c   |   1 +
 3 files changed, 278 insertions(+), 9 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/fuse/kio/pcs/fuse_stat.c b/fs/fuse/kio/pcs/fuse_stat.c
index da8428ebe600..4a7b4bf0d328 100644
--- a/fs/fuse/kio/pcs/fuse_stat.c
+++ b/fs/fuse/kio/pcs/fuse_stat.c
@@ -1,11 +1,219 @@ 
 #include <net/sock.h>
 #include <linux/types.h>
 
+#include "pcs_req.h"
 #include "fuse_stat.h"
 #include "pcs_cluster.h"
 
 extern struct super_block *fuse_control_sb;
 
+#define CURR(__stat) atomic_read(&__stat->io.idx)
+#define LAST(__stat) !CURR(__stat)
+
+static inline void fuse_val_stat_update(struct fuse_val_stat *s, u64 val)
+{
+	preempt_disable();
+	if (!__this_cpu_read(s->events)) {
+		__this_cpu_write(s->val_min, val);
+		__this_cpu_write(s->val_max, val);
+	} else if (val < __this_cpu_read(s->val_min))
+		__this_cpu_write(s->val_min, val);
+	else if (val > __this_cpu_read(s->val_max))
+		__this_cpu_write(s->val_max, val);
+	this_cpu_add(s->val_total, val);
+	this_cpu_inc(s->events);
+	preempt_enable();
+}
+
+static inline unsigned long long fuse_evt_rate(struct fuse_val_stat const *s, unsigned period)
+{
+	return DIV_ROUND_UP(s->events, period);
+}
+
+static inline unsigned long long fuse_val_rate(struct fuse_val_stat const *s, unsigned period)
+{
+	return DIV_ROUND_UP(s->val_total, period);
+}
+
+static inline unsigned long long fuse_val_aver(struct fuse_val_stat const *s)
+{
+	return s->events ? s->val_total / s->events : 0;
+}
+
+#define EVT_RATE(s)   fuse_evt_rate(&(s), STAT_TIMER_PERIOD)
+#define VAL_RATE(s)   fuse_val_rate(&(s), STAT_TIMER_PERIOD)
+#define VAL_AVER(s)   fuse_val_aver(&(s))
+
+static inline void fuse_val_stat_sum(struct fuse_val_stat *s,
+				     struct fuse_val_stat *add)
+{
+	if (!add->events)
+		return;
+
+	if (!s->events)
+		*s = *add;
+	else {
+		s->val_min = min(s->val_min, add->val_min);
+		s->val_max = max(s->val_max, add->val_max);
+		s->val_total += add->val_total;
+		s->events += add->events;
+	}
+}
+
+static inline void stat_period_switch(struct pcs_fuse_stat *stat)
+{
+	atomic_set(&stat->io.idx, !atomic_read(&stat->io.idx));
+}
+
+static inline void stat_period_seqcount_read_barrier(struct pcs_fuse_stat *stat)
+{
+	while(read_seqretry(&stat->seqlock, read_seqbegin(&stat->seqlock)));
+}
+
+static void stat_period_read(struct pcs_fuse_io_stat __percpu *in,
+			     struct pcs_fuse_io_stat *out)
+{
+	int cpu;
+	bool inited = false;
+
+	for_each_possible_cpu(cpu) {
+		struct pcs_fuse_io_stat *stat = per_cpu_ptr(in, cpu);
+		if (inited) {
+			fuse_val_stat_sum(&out->read_bytes,
+					  &stat->read_bytes);
+			fuse_val_stat_sum(&out->write_bytes,
+					  &stat->write_bytes);
+			fuse_val_stat_sum(&out->flush_cnt,
+					  &stat->flush_cnt);
+		} else {
+			*out = *stat;
+			inited = true;
+		}
+	}
+}
+
+static int pcs_fuse_iostat_show(struct seq_file *m, void *v)
+{
+	struct inode *inode = m->private;
+	struct pcs_fuse_stat *stat;
+	struct pcs_fuse_io_stat last_stat, glob_stat;
+
+	if (!inode)
+		return 0;
+
+	mutex_lock(&fuse_mutex);
+	stat = inode->i_private;
+	if (!stat)
+		goto out;
+
+	seq_printf(m, "# operation  ops/sec  bytes/sec   total            req_min req_avg req_max (bytes)\n");
+
+	spin_lock(&stat->lock);
+	stat_period_read(stat->io.period[LAST(stat)], &last_stat);
+	glob_stat = stat->io.glob;
+	spin_unlock(&stat->lock);
+
+	seq_printf(m, "read         %-8llu %-11llu %-16llu %-6llu  %-6llu  %-6llu\n",
+		   EVT_RATE(last_stat.read_bytes), VAL_RATE(last_stat.read_bytes),
+		   glob_stat.read_bytes.val_total, last_stat.read_bytes.val_min,
+		   VAL_AVER(last_stat.read_bytes), last_stat.read_bytes.val_max);
+	seq_printf(m, "write        %-8llu %-11llu %-16llu %-6llu  %-6llu  %-6llu\n",
+		   EVT_RATE(last_stat.write_bytes), VAL_RATE(last_stat.write_bytes),
+		   glob_stat.write_bytes.val_total, last_stat.write_bytes.val_min,
+		   VAL_AVER(last_stat.write_bytes), last_stat.write_bytes.val_max);
+	seq_printf(m, "sync         %-8llu             %-16llu\n",
+		   EVT_RATE(last_stat.flush_cnt), glob_stat.flush_cnt.events);
+out:
+	mutex_unlock(&fuse_mutex);
+	return 0;
+}
+
+static int pcs_fuse_iostat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pcs_fuse_iostat_show, inode);
+}
+
+static const struct file_operations pcs_fuse_iostat_ops = {
+	.owner   = THIS_MODULE,
+	.open    = pcs_fuse_iostat_open,
+	.read    = seq_read,
+	.llseek	 = seq_lseek,
+	.release = single_release,
+};
+
+static inline
+struct fuse_val_stat *req_stat_entry(struct pcs_fuse_io_stat *io, u32 type)
+{
+	switch (type) {
+		case PCS_CS_READ_RESP:
+			return &io->read_bytes;
+		case PCS_CS_WRITE_SYNC_RESP:
+		case PCS_CS_WRITE_RESP:
+			return &io->write_bytes;
+		case PCS_CS_SYNC_RESP:
+			return &io->flush_cnt;
+		default:
+			break;
+	}
+	WARN_ON_ONCE(1);
+	return NULL;
+}
+
+void pcs_fuse_stat_io_count(struct pcs_int_request *ireq, struct pcs_msg *resp)
+{
+	struct pcs_cluster_core *cc = ireq->cc;
+	struct pcs_fuse_stat *stat = &cc->stat;
+	struct pcs_cs_iohdr *h = (struct pcs_cs_iohdr *)msg_inline_head(resp);
+	u64 size = h->hdr.type != PCS_CS_SYNC_RESP ? ireq->iochunk.size : 0;
+	struct fuse_val_stat *se;
+
+	write_seqlock(&stat->seqlock);
+	se = req_stat_entry(stat->io.period[CURR(stat)], h->hdr.type);
+	BUG_ON(!se);
+	fuse_val_stat_update(se, size);
+	write_sequnlock(&stat->seqlock);
+}
+
+static inline void fuse_io_stat_up(struct fuse_val_stat *stat,
+				   struct fuse_val_stat *up)
+{
+	if (!stat->events) {
+		*stat = *up;
+	} else {
+		stat->val_min   = min(stat->val_min, up->val_min);
+		stat->val_max   = max(stat->val_max, up->val_max);
+		stat->val_total = stat->val_total + up->val_total;
+		stat->events    = stat->events + up->events;
+	}
+}
+
+static void pcs_fuse_stat_work(struct work_struct *w)
+{
+	struct pcs_cluster_core *cc =
+		container_of(w, struct pcs_cluster_core, stat.work.work);
+	struct pcs_fuse_stat *stat = &cc->stat;
+	struct pcs_fuse_io_stat last_stat;
+	int cpu;
+
+	spin_lock(&stat->lock);
+	for_each_possible_cpu(cpu) {
+		struct pcs_fuse_io_stat *last =
+			per_cpu_ptr(stat->io.period[LAST(stat)], cpu);
+		memset(last, 0, sizeof(*last));
+	}
+	stat_period_switch(stat);
+	stat_period_seqcount_read_barrier(stat);
+
+	stat_period_read(stat->io.period[LAST(stat)], &last_stat);
+
+	fuse_io_stat_up(&stat->io.glob.read_bytes, &last_stat.read_bytes);
+	fuse_io_stat_up(&stat->io.glob.write_bytes, &last_stat.write_bytes);
+	fuse_io_stat_up(&stat->io.glob.flush_cnt, &last_stat.flush_cnt);
+	spin_unlock(&stat->lock);
+
+	mod_delayed_work(cc->wq, &cc->stat.work, STAT_TIMER_PERIOD * HZ);
+}
+
 static struct dentry *fuse_kio_add_dentry(struct dentry *parent,
 					  struct fuse_conn *fc,
 					  const char *name,
@@ -50,12 +258,24 @@  static void fuse_kio_rm_dentry(struct dentry *dentry)
 
 void pcs_fuse_stat_init(struct pcs_fuse_stat *stat)
 {
-	struct fuse_conn *fc =
-		container_of(stat, struct pcs_fuse_cluster, cc.stat)->fc;
+	struct pcs_cluster_core *cc =
+		container_of(stat, struct pcs_cluster_core, stat);
+	struct fuse_conn *fc = container_of(cc,struct pcs_fuse_cluster, cc)->fc;
 
 	mutex_lock(&fuse_mutex);
 	if (!fuse_control_sb)
-		goto out;
+		goto fail1;
+
+	atomic_set(&stat->io.idx, 0);
+	stat->io.period[CURR(stat)] = alloc_percpu(struct pcs_fuse_io_stat);
+	if (!stat->io.period[CURR(stat)])
+		goto fail1;
+
+	stat->io.period[LAST(stat)] = alloc_percpu(struct pcs_fuse_io_stat);
+	if (!stat->io.period[LAST(stat)])
+		goto fail2;
+
+	memset(&stat->io.glob, 0, sizeof(stat->io.glob));
 
 	stat->kio_stat = fuse_kio_add_dentry(fc->conn_ctl, fc, "kio_stat",
 					     S_IFDIR | S_IXUSR, 2,
@@ -63,21 +283,42 @@  void pcs_fuse_stat_init(struct pcs_fuse_stat *stat)
 					     &simple_dir_operations, fc);
 	if (!stat->kio_stat) {
 		pr_err("kio: can't create kio stat directory");
-		goto out;
+		goto fail3;
 	}
 
-	/* Stat initialize */
-out:
+	seqlock_init(&stat->seqlock);
+	spin_lock_init(&stat->lock);
+	INIT_DELAYED_WORK(&stat->work, pcs_fuse_stat_work);
+	mod_delayed_work(cc->wq, &stat->work, STAT_TIMER_PERIOD * HZ);
+
+	stat->iostat = fuse_kio_add_dentry(stat->kio_stat, fc, "iostat",
+					   S_IFREG | S_IRUSR, 1, NULL,
+					   &pcs_fuse_iostat_ops, stat);
+	mutex_unlock(&fuse_mutex);
+	return;
+
+fail3:
+	free_percpu(stat->io.period[LAST(stat)]);
+fail2:
+	free_percpu(stat->io.period[CURR(stat)]);
+fail1:
 	mutex_unlock(&fuse_mutex);
 }
 
 void pcs_fuse_stat_fini(struct pcs_fuse_stat *stat)
 {
+	mutex_lock(&fuse_mutex);
 	if (!stat->kio_stat)
 		return;
 
-	mutex_lock(&fuse_mutex);
-	if (fuse_control_sb)
+	if (fuse_control_sb) {
+		if (stat->iostat)
+			fuse_kio_rm_dentry(stat->iostat);
 		fuse_kio_rm_dentry(stat->kio_stat);
+	}
 	mutex_unlock(&fuse_mutex);
+
+	cancel_delayed_work_sync(&stat->work);
+	free_percpu(stat->io.period[LAST(stat)]);
+	free_percpu(stat->io.period[CURR(stat)]);
 }
diff --git a/fs/fuse/kio/pcs/fuse_stat.h b/fs/fuse/kio/pcs/fuse_stat.h
index 14687ffd83f2..6c8a03acfd3c 100644
--- a/fs/fuse/kio/pcs/fuse_stat.h
+++ b/fs/fuse/kio/pcs/fuse_stat.h
@@ -1,11 +1,38 @@ 
 #ifndef _FUSE_STAT_H_
 #define _FUSE_STAT_H_ 1
 
+#define STAT_TIMER_PERIOD 5
+
+struct fuse_val_stat {
+	u64 events;
+	u64 val_total;
+	u64 val_min;
+	u64 val_max;
+};
+
+struct pcs_fuse_io_stat {
+	struct fuse_val_stat read_bytes;
+	struct fuse_val_stat write_bytes;
+	struct fuse_val_stat flush_cnt;
+} ____cacheline_aligned;
+
 struct pcs_fuse_stat {
+	struct {
+		struct pcs_fuse_io_stat glob;
+		struct pcs_fuse_io_stat __percpu *period[2];
+		atomic_t idx;
+	} io;
+	seqlock_t seqlock;
+	spinlock_t lock;
+	struct delayed_work work;
+
 	struct dentry *kio_stat;
+	struct dentry *iostat;
 };
 
 void pcs_fuse_stat_init(struct pcs_fuse_stat *stat);
 void pcs_fuse_stat_fini(struct pcs_fuse_stat *stat);
 
-#endif /* _FUSE_STAT_H_ */
\ No newline at end of file
+void pcs_fuse_stat_io_count(struct pcs_int_request *ireq, struct pcs_msg *resp);
+
+#endif /* _FUSE_STAT_H_ */
diff --git a/fs/fuse/kio/pcs/pcs_map.c b/fs/fuse/kio/pcs/pcs_map.c
index 2e30f0461101..b795a45cc376 100644
--- a/fs/fuse/kio/pcs/pcs_map.c
+++ b/fs/fuse/kio/pcs/pcs_map.c
@@ -2701,6 +2701,7 @@  static int commit_sync_info(struct pcs_int_request *req,
 				max_iolat = srec->sync.ts_io;
 		}
 	}
+	pcs_fuse_stat_io_count(req, resp);
 	cs_log_io_times(req, resp, max_iolat);
 
 	evaluate_dirty_status(m);