[RHEL7,COMMIT] fs/fuse kio: share bandwith/IOPS for prometheus stats

Submitted by Konstantin Khorenko on Nov. 11, 2019, 1:20 p.m.

Details

Message ID 201911111320.xABDK73m007478@finist-ce7.sw.ru
State New
Series "fs/fuse kio: share bandwith/IOPS for prometheus stats"
Headers show

Commit Message

Konstantin Khorenko Nov. 11, 2019, 1:20 p.m.
The commit is pushed to "branch-rh7-3.10.0-1062.4.1.vz7.115.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1062.4.1.vz7.115.12
------>
commit 98927dceeb4bdcc9dc8aca846e28633a1ad77a93
Author: Sergey Lysanov <slysanov@virtuozzo.com>
Date:   Thu Nov 7 16:49:33 2019 +0300

    fs/fuse kio: share bandwith/IOPS for prometheus stats
    
    Pass the following counters from KIO to prometheus through sysfs:
     - reads_total
     - read_bytes_total
     - writes_total
     - write_bytes_total
     - flushes_total
    
    The compatability with previous version of prometheus proto was saved -
    size of histograms wasn't changed.
    
    https://pmc.acronis.com/browse/VSTOR-20601
    
    Signed-off-by: Sergey Lysanov <slysanov@virtuozzo.com>
    Reviewed-by: Ildar Ismagilov <ildar.ismagilov@virtuozzo.com>
---
 fs/fuse/fuse_i.h                       |   3 +-
 fs/fuse/kio/pcs/fuse_io.c              |  14 +++--
 fs/fuse/kio/pcs/fuse_ktrace.h          |   2 +-
 fs/fuse/kio/pcs/fuse_prometheus_prot.h |  35 ++++++++---
 fs/fuse/kio/pcs/pcs_cs.c               |   2 +-
 fs/fuse/kio/pcs/pcs_fuse_kdirect.c     | 109 ++++++++++++++++++++-------------
 6 files changed, 103 insertions(+), 62 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 092916ce8c0e..1e9ba641922e 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1183,7 +1183,8 @@  struct fuse_req *fuse_generic_request_alloc(struct fuse_conn *fc,
 					    struct kmem_cache *cachep,
 					    unsigned npages, gfp_t flags);
 
-void fuse_stat_account(struct fuse_conn * fc, int op, ktime_t val);
+void fuse_stat_observe(struct fuse_conn *fc, int op, ktime_t val);
+void fuse_stat_account(struct fuse_conn *fc, int op, u64 val);
 
 
 int fuse_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/fs/fuse/kio/pcs/fuse_io.c b/fs/fuse/kio/pcs/fuse_io.c
index fe70f6c02bc0..cbee5f2eff06 100644
--- a/fs/fuse/kio/pcs/fuse_io.c
+++ b/fs/fuse/kio/pcs/fuse_io.c
@@ -47,7 +47,8 @@  static void on_read_done(struct pcs_fuse_req *r, size_t size)
 			clear_highpage(r->exec.io.bvec[i].bv_page);
 		}
 	}
-	fuse_stat_account(pfc->fc, KFUSE_OP_READ, ktime_sub(ktime_get(), r->exec.ireq.ts));
+	fuse_stat_observe(pfc->fc, KFUSE_OP_READ, ktime_sub(ktime_get(), r->exec.ireq.ts));
+	fuse_stat_account(pfc->fc, KFUSE_OP_READ, size);
 	r->req.out.args[0].size = size;
 	fuse_read_dio_end(fi);
 	request_end(pfc->fc, &r->req);
@@ -58,7 +59,8 @@  static void on_sync_done(struct pcs_fuse_req *r)
 	struct pcs_fuse_cluster *pfc = cl_from_req(r);
 
 	DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
-	fuse_stat_account(pfc->fc, KFUSE_OP_FSYNC, ktime_sub(ktime_get(), r->exec.ireq.ts));
+	fuse_stat_observe(pfc->fc, KFUSE_OP_FSYNC, ktime_sub(ktime_get(), r->exec.ireq.ts));
+	fuse_stat_account(pfc->fc, KFUSE_OP_FSYNC, 0);
 	request_end(pfc->fc, &r->req);
 }
 
@@ -71,7 +73,8 @@  static void on_write_done(struct pcs_fuse_req *r, off_t pos, size_t size)
 	out->size = size;
 
 	DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
-	fuse_stat_account(pfc->fc, KFUSE_OP_WRITE, ktime_sub(ktime_get(), r->exec.ireq.ts));
+	fuse_stat_observe(pfc->fc, KFUSE_OP_WRITE, ktime_sub(ktime_get(), r->exec.ireq.ts));
+	fuse_stat_account(pfc->fc, KFUSE_OP_WRITE, size);
 	fuse_write_dio_end(fi);
 	request_end(pfc->fc, &r->req);
 }
@@ -82,7 +85,8 @@  static void on_fallocate_done(struct pcs_fuse_req *r, off_t pos, size_t size)
 	struct fuse_inode *fi = get_fuse_inode(r->req.io_inode);
 
 	DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
-	fuse_stat_account(pfc->fc, KFUSE_OP_FALLOCATE, ktime_sub(ktime_get(), r->exec.ireq.ts));
+	fuse_stat_observe(pfc->fc, KFUSE_OP_FALLOCATE, ktime_sub(ktime_get(), r->exec.ireq.ts));
+	fuse_stat_account(pfc->fc, KFUSE_OP_FALLOCATE, 0);
 	fuse_write_dio_end(fi);
 
 	request_end(pfc->fc, &r->req);
@@ -268,7 +272,7 @@  static void falloc_req_complete(struct pcs_int_request *ireq)
 	spin_unlock(&di->kq_lock);
 
 	DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
-	fuse_stat_account(pfc->fc, KFUSE_OP_FALLOCATE, ktime_sub(ktime_get(), ireq->ts));
+	fuse_stat_observe(pfc->fc, KFUSE_OP_FALLOCATE, ktime_sub(ktime_get(), ireq->ts));
 	fuse_write_dio_end(fi);
 
 	request_end(pfc->fc, &r->req);
diff --git a/fs/fuse/kio/pcs/fuse_ktrace.h b/fs/fuse/kio/pcs/fuse_ktrace.h
index 7cce9e26959a..45a4064aa6d0 100644
--- a/fs/fuse/kio/pcs/fuse_ktrace.h
+++ b/fs/fuse/kio/pcs/fuse_ktrace.h
@@ -19,7 +19,7 @@  struct fuse_ktrace
 	struct dentry				*dir;
 	unsigned long __percpu			*ovfl;
 	struct dentry				*prometheus_dentry;
-	struct kfuse_histogram * __percpu	*prometheus_hist;
+	struct kfuse_metrics __percpu	*prometheus_metrics;
 	u8 * __percpu				buf;
 };
 
diff --git a/fs/fuse/kio/pcs/fuse_prometheus_prot.h b/fs/fuse/kio/pcs/fuse_prometheus_prot.h
index e39f2337268f..2959b1e7ff14 100644
--- a/fs/fuse/kio/pcs/fuse_prometheus_prot.h
+++ b/fs/fuse/kio/pcs/fuse_prometheus_prot.h
@@ -8,23 +8,38 @@ 
 #define KFUSE_OP_CS_WRITE_ZERO	4
 #define KFUSE_OP_CS_FIEMAP	5
 
-#define KFUSE_OP_READ		6
-#define KFUSE_OP_WRITE		7
-#define KFUSE_OP_FSYNC		8
-#define KFUSE_OP_FALLOCATE	9
-#define KFUSE_OP_MAX		10
+#define KFUSE_OP_READ				6
+#define KFUSE_OP_WRITE				7
+#define KFUSE_OP_FSYNC				8
+#define KFUSE_OP_FALLOCATE			9
+#define KFUSE_OP_UNALIGNED_WRITE	10
+#define KFUSE_OP_UNALIGNED_READ		11
+#define KFUSE_OP_MAX				12
 
+/* Histograms contain latencies of all operations except unaligned
+ * writes and reads
+ */
+#define KFUSE_HISTOGRAM_MAX	10
 #define KFUSE_PROM_MAX		(9*5 + 2)
 
-struct kfuse_stat_rec
-{
+struct kfuse_histogram {
 	u64	buckets[KFUSE_PROM_MAX];
 	u64	sum;
 };
 
-struct kfuse_histogram
-{
-	struct kfuse_stat_rec	metrics[KFUSE_OP_MAX];
+struct kfuse_counter {
+	u64 events;
+	u64 val_total;
+};
+
+struct kfuse_metrics {
+	/* Histograms are compatible with old version of proto
+	 * between userspace and kio where the counters were skipped.
+	 */
+	struct kfuse_histogram	hists[KFUSE_HISTOGRAM_MAX];
+
+	/* Counters were added in 3.5 release */
+	struct kfuse_counter	cnts[KFUSE_OP_MAX];
 };
 
 #endif /* __FUSE_PROMETHEUS_PROT__ */
diff --git a/fs/fuse/kio/pcs/pcs_cs.c b/fs/fuse/kio/pcs/pcs_cs.c
index c6ff456c59c2..e29039d03fea 100644
--- a/fs/fuse/kio/pcs/pcs_cs.c
+++ b/fs/fuse/kio/pcs/pcs_cs.c
@@ -289,7 +289,7 @@  void cs_log_io_times(struct pcs_int_request * ireq, struct pcs_msg * resp, unsig
 	struct pcs_cs_iohdr * h = (struct pcs_cs_iohdr *)msg_inline_head(resp);
 	int reqt = h->hdr.type != PCS_CS_SYNC_RESP ? ireq->iochunk.cmd : PCS_REQ_T_SYNC;
 
-	fuse_stat_account(fc, reqt, ktime_sub(ktime_get(), ireq->ts_sent));
+	fuse_stat_observe(fc, reqt, ktime_sub(ktime_get(), ireq->ts_sent));
 	if (fc->ktrace && fc->ktrace_level >= LOG_TRACE) {
 		int n = 1;
 		struct fuse_trace_hdr * t;
diff --git a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
index 2bda2381bb8e..98dd0cf3ddd9 100644
--- a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
+++ b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
@@ -1350,17 +1350,8 @@  static void fuse_trace_free(struct fuse_ktrace *tr)
 	if (tr->prometheus_dentry) {
 		debugfs_remove(tr->prometheus_dentry);
 	}
-	if (tr->prometheus_hist) {
-		int cpu;
-
-		for_each_possible_cpu(cpu) {
-			struct kfuse_histogram ** histp;
-			histp = per_cpu_ptr(tr->prometheus_hist, cpu);
-			if (*histp)
-				free_page((unsigned long)*histp);
-		}
-		free_percpu(tr->prometheus_hist);
-	}
+	if (tr->prometheus_metrics)
+		free_percpu(tr->prometheus_metrics);
 	free_percpu(tr->buf);
 	debugfs_remove(tr->dir);
 	kfree(tr);
@@ -1408,20 +1399,20 @@  static struct rchan_callbacks relay_callbacks = {
 	.remove_buf_file	= remove_buf_file_callback,
 };
 
-void fuse_stat_account(struct fuse_conn * fc, int op, ktime_t val)
+void fuse_stat_observe(struct fuse_conn *fc, int op, ktime_t val)
 {
 	struct fuse_ktrace * tr = fc->ktrace;
 
-	BUG_ON(op >= KFUSE_OP_MAX);
+	BUG_ON(op >= KFUSE_HISTOGRAM_MAX);
 
 	if (tr) {
-		struct kfuse_histogram ** histp;
+		struct kfuse_metrics *metrics;
 		int cpu;
 
 		cpu = get_cpu();
-		histp = per_cpu_ptr(tr->prometheus_hist, cpu);
-		if (histp && *histp) {
-			struct kfuse_stat_rec * rec = (*histp)->metrics + op;
+		metrics = per_cpu_ptr(tr->prometheus_metrics, cpu);
+		if (metrics) {
+			struct kfuse_histogram *rec = &metrics->hists[op];
 			int bucket;
 			unsigned long long lat = ktime_to_ns(val)/1000;
 
@@ -1445,6 +1436,27 @@  void fuse_stat_account(struct fuse_conn * fc, int op, ktime_t val)
 	}
 }
 
+void fuse_stat_account(struct fuse_conn *fc, int op, u64 val)
+{
+	struct fuse_ktrace *tr = fc->ktrace;
+
+	BUG_ON(op >= KFUSE_OP_MAX);
+
+	if (tr) {
+		struct kfuse_metrics *metrics;
+		int cpu;
+
+		cpu = get_cpu();
+		metrics = per_cpu_ptr(tr->prometheus_metrics, cpu);
+		if (metrics) {
+			struct kfuse_counter *cnt = &metrics->cnts[op];
+			cnt->val_total += val;
+			++cnt->events;
+		}
+		put_cpu();
+	}
+}
+
 static int prometheus_file_open(struct inode *inode, struct file *filp)
 {
 	struct fuse_ktrace * tr = inode->i_private;
@@ -1465,48 +1477,57 @@  static int prometheus_file_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+/* NOTE: old versions of userspace could read only histograms */
 static ssize_t prometheus_file_read(struct file *filp,
 				    char __user *buffer,
 				    size_t count,
 				    loff_t *ppos)
 {
-	struct fuse_ktrace * tr = filp->private_data;
-	struct kfuse_histogram * hist;
+	struct fuse_ktrace *tr = filp->private_data;
+	struct kfuse_metrics *stats;
 	int cpu;
 
-	if (*ppos >= sizeof(struct kfuse_histogram))
+	if (*ppos >= sizeof(struct kfuse_metrics))
 		return 0;
-	if (*ppos + count > sizeof(struct kfuse_histogram))
-		count = sizeof(struct kfuse_histogram) - *ppos;
+	if (*ppos + count > sizeof(struct kfuse_metrics))
+		count = sizeof(struct kfuse_metrics) - *ppos;
 
-	hist = (void*)get_zeroed_page(GFP_KERNEL);
-	if (!hist)
+	stats = (void *)get_zeroed_page(GFP_KERNEL);
+	BUILD_BUG_ON(sizeof(*stats) > PAGE_SIZE);
+	if (!stats)
 		return -ENOMEM;
 
-	if (!tr->prometheus_hist)
+	if (!tr->prometheus_metrics)
 		return -EINVAL;
 
 	for_each_possible_cpu(cpu) {
-		struct kfuse_histogram ** histp;
+		struct kfuse_metrics *m;
 
-		histp = per_cpu_ptr(tr->prometheus_hist, cpu);
-		if (histp && *histp) {
+		m = per_cpu_ptr(tr->prometheus_metrics, cpu);
+		if (m) {
 			int i, k;
-			for (i = 0; i < KFUSE_OP_MAX; i++) {
+			/* aggregate histograms from each cpu */
+			for (i = 0; i < KFUSE_HISTOGRAM_MAX; i++) {
 				for (k = 0; k < KFUSE_PROM_MAX; k++) {
-					hist->metrics[i].buckets[k] += (*histp)->metrics[i].buckets[k];
+					stats->hists[i].buckets[k] += m->hists[i].buckets[k];
 				}
-				hist->metrics[i].sum += (*histp)->metrics[i].sum;
+				stats->hists[i].sum += m->hists[i].sum;
+			}
+
+			/* aggregate counters from each cpu */
+			for (i = 0; i < KFUSE_OP_MAX; i++) {
+				stats->cnts[i].events += m->cnts[i].events;
+				stats->cnts[i].val_total += m->cnts[i].val_total;
 			}
 		}
 	}
 
-	if (copy_to_user(buffer, (char*)hist + *ppos, count))
+	if (copy_to_user(buffer, (char *)stats + *ppos, count))
 		count = -EFAULT;
 	else
 		*ppos += count;
 
-	free_page((unsigned long)hist);
+	free_page((unsigned long)stats);
 	return count;
 }
 
@@ -1522,7 +1543,8 @@  static int fuse_ktrace_setup(struct fuse_conn * fc)
 	struct fuse_ktrace * tr = NULL;
 	struct fuse_ktrace * old_tr;
 	struct dentry * dir;
-	struct kfuse_histogram * __percpu * hist;
+	struct kfuse_metrics __percpu * metrics;
+	int cpu;
 	char name[16];
 
 	if (!fuse_trace_root)
@@ -1554,19 +1576,18 @@  static int fuse_ktrace_setup(struct fuse_conn * fc)
 
 	tr->prometheus_dentry = debugfs_create_file("prometheus", S_IFREG|0444, dir, tr,
 						    &prometheus_file_operations);
-	hist = (void*)alloc_percpu(void *);
-	if (hist) {
-		int cpu;
 
-		BUILD_BUG_ON(sizeof(struct kfuse_histogram) > PAGE_SIZE);
+	ret = -ENOMEM;
 
-		for_each_possible_cpu(cpu) {
-			struct kfuse_histogram ** histp;
-			histp = per_cpu_ptr(hist, cpu);
-			*histp = (void*)get_zeroed_page(GFP_KERNEL);
-		}
-		tr->prometheus_hist = hist;
+	metrics = alloc_percpu(struct kfuse_metrics);
+	if (!metrics)
+		goto err;
+	for_each_possible_cpu(cpu) {
+		struct kfuse_metrics *m;
+		m = per_cpu_ptr(metrics, cpu);
+		memset(m, 0, sizeof(*m));
 	}
+	tr->prometheus_metrics = metrics;
 
 	tr->buf = __alloc_percpu(KTRACE_LOG_BUF_SIZE, 16);