[RHEL7,COMMIT] fs/fuse kio: don't wait read requests in case of fsync/flush

Submitted by Konstantin Khorenko on Oct. 21, 2019, 11:12 a.m.

Details

Message ID 201910211112.x9LBCtkv009394@finist-ce7.sw.ru
State New
Series "fs/fuse kio: don't wait read requests in case of fsync/flush"
Headers show

Commit Message

Konstantin Khorenko Oct. 21, 2019, 11:12 a.m.
The commit is pushed to "branch-rh7-3.10.0-1062.1.2.vz7.114.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1062.1.2.vz7.114.7
------>
commit b9ae1b8f967a39d1961f6d254047b59ec7a512d7
Author: Ildar Ismagilov <ildar.ismagilov@virtuozzo.com>
Date:   Mon Oct 21 14:12:55 2019 +0300

    fs/fuse kio: don't wait read requests in case of fsync/flush
    
    In this patch, the KIO requests are divided into two types: read and write.
    And in case of fsync/flush we only wait for completion write requests.
    
    https://pmc.acronis.com/browse/VSTOR-11372
    
    Signed-off-by: Ildar Ismagilov <ildar.ismagilov@virtuozzo.com>
    Acked-by: Alexey Kuznetsov <kuznet@acronis.com>
---
 fs/fuse/dir.c                      |  3 +-
 fs/fuse/file.c                     |  6 ++--
 fs/fuse/fuse_i.h                   | 57 ++++++++++++++++++++++++++++++++++++++
 fs/fuse/inode.c                    |  3 ++
 fs/fuse/kio/pcs/fuse_io.c          | 15 ++++++----
 fs/fuse/kio/pcs/pcs_fuse_kdirect.c | 18 ++++++++----
 6 files changed, 89 insertions(+), 13 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 4974ce801279..ccaf6058c76d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1638,7 +1638,7 @@  void fuse_set_nowrite(struct inode *inode)
 	BUG_ON(fi->writectr < 0);
 	fi->writectr += FUSE_NOWRITE;
 	spin_unlock(&fi->lock);
-	inode_dio_wait(inode);
+	fuse_write_dio_wait(fi);
 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
 }
 
@@ -1778,6 +1778,7 @@  int fuse_do_setattr(struct inode *inode, struct iattr *attr,
 
 	if (is_truncate) {
 		fuse_set_nowrite(inode);
+		fuse_read_dio_wait(fi);
 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index e46b8844dd49..cfafff050de8 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -480,11 +480,10 @@  static int fuse_release(struct inode *inode, struct file *file)
 		/*
 		 * Flush pending requests before FUSE_RELEASE makes userspace
 		 * to drop the lease of the file. Otherwise, they never finish.
-		 * Keep in mind, that in kio case fuse_sync_writes() currently
-		 * waits all type of requests (not only write).
 		 */
 		mutex_lock(&inode->i_mutex);
 		fuse_sync_writes(inode);
+		fuse_read_dio_wait(fi);
 
 		if (fi->num_openers == 0 && ff->fc->kio.op->file_close)
 			ff->fc->kio.op->file_close(ff->fc, file, inode);
@@ -3730,6 +3729,7 @@  static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
 				goto out;
 
 			fuse_sync_writes(inode);
+			fuse_read_dio_wait(fi);
 		}
 	}
 
@@ -3939,6 +3939,7 @@  int fuse_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		__u64 start, __u64 len)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_inode *fi = get_fuse_inode(inode);
 	int err = 0;
 
 	if (is_bad_inode(inode))
@@ -3970,6 +3971,7 @@  int fuse_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 	mutex_lock(&inode->i_mutex);
 
 	fuse_sync_writes(inode);
+	fuse_read_dio_wait(fi);
 
 	if (fieinfo->fi_extents_max == 0) {
 		err = fuse_request_fiemap(inode, 0, &start, &len, NULL, fieinfo);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index cd9b997b885e..092916ce8c0e 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -143,6 +143,13 @@  struct fuse_inode {
 
 	/** Private kdirect io context */
 	void *private;
+
+	/** Direct IO operations */
+	struct {
+		wait_queue_head_t waitq;
+		atomic_t read_count;
+		atomic_t write_count;
+	} dio;
 };
 
 /** FUSE inode state bits */
@@ -1060,6 +1067,56 @@  void fuse_flush_writepages(struct inode *inode);
 void fuse_set_nowrite(struct inode *inode);
 void fuse_release_nowrite(struct inode *inode);
 
+static inline void fuse_read_dio_begin(struct fuse_inode *fi)
+{
+	atomic_inc(&fi->dio.read_count);
+}
+
+static inline void fuse_read_dio_end(struct fuse_inode *fi)
+{
+	if (atomic_dec_and_test(&fi->dio.read_count))
+		wake_up(&fi->dio.waitq);
+}
+
+static inline void fuse_read_dio_wait(struct fuse_inode *fi)
+{
+	wait_event(fi->dio.waitq,
+			atomic_read(&fi->dio.read_count) == 0);
+}
+
+static inline int fuse_read_dio_count(struct fuse_inode *fi)
+{
+	return atomic_read(&fi->dio.read_count);
+}
+
+static inline void fuse_write_dio_begin(struct fuse_inode *fi)
+{
+	atomic_inc(&fi->dio.write_count);
+}
+
+static inline void fuse_write_dio_end(struct fuse_inode *fi)
+{
+	if (atomic_dec_and_test(&fi->dio.write_count))
+		wake_up(&fi->dio.waitq);
+}
+
+static inline void fuse_write_dio_wait(struct fuse_inode *fi)
+{
+	wait_event(fi->dio.waitq,
+			atomic_read(&fi->dio.write_count) == 0);
+}
+
+static inline int fuse_write_dio_count(struct fuse_inode *fi)
+{
+	return atomic_read(&fi->dio.write_count);
+}
+
+static inline void fuse_dio_wait(struct fuse_inode *fi)
+{
+	fuse_read_dio_wait(fi);
+	fuse_write_dio_wait(fi);
+}
+
 /**
  * File-system tells the kernel to invalidate cache for the given node id.
  */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 8c0d213c07b9..fc5f066d3e4d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -109,6 +109,9 @@  static struct inode *fuse_alloc_inode(struct super_block *sb)
 	fi->writepages = RB_ROOT;
 	init_waitqueue_head(&fi->page_waitq);
 	spin_lock_init(&fi->lock);
+	init_waitqueue_head(&fi->dio.waitq);
+	atomic_set(&fi->dio.read_count, 0);
+	atomic_set(&fi->dio.write_count, 0);
 	fi->forget = fuse_alloc_forget();
 	if (!fi->forget) {
 		kmem_cache_free(fuse_inode_cachep, inode);
diff --git a/fs/fuse/kio/pcs/fuse_io.c b/fs/fuse/kio/pcs/fuse_io.c
index ed5926eb5d4d..fe70f6c02bc0 100644
--- a/fs/fuse/kio/pcs/fuse_io.c
+++ b/fs/fuse/kio/pcs/fuse_io.c
@@ -36,6 +36,7 @@  static void intreq_complete(struct pcs_int_request *ireq)
 static void on_read_done(struct pcs_fuse_req *r, size_t size)
 {
 	struct pcs_fuse_cluster *pfc = cl_from_req(r);
+	struct fuse_inode *fi = get_fuse_inode(r->req.io_inode);
 
 	DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
 
@@ -48,7 +49,7 @@  static void on_read_done(struct pcs_fuse_req *r, size_t size)
 	}
 	fuse_stat_account(pfc->fc, KFUSE_OP_READ, ktime_sub(ktime_get(), r->exec.ireq.ts));
 	r->req.out.args[0].size = size;
-	inode_dio_end(r->req.io_inode);
+	fuse_read_dio_end(fi);
 	request_end(pfc->fc, &r->req);
 }
 
@@ -65,22 +66,24 @@  static void on_write_done(struct pcs_fuse_req *r, off_t pos, size_t size)
 {
 	struct fuse_write_out *out = &r->req.misc.write.out;
 	struct pcs_fuse_cluster *pfc = cl_from_req(r);
+	struct fuse_inode *fi = get_fuse_inode(r->req.io_inode);
 
 	out->size = size;
 
 	DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
 	fuse_stat_account(pfc->fc, KFUSE_OP_WRITE, ktime_sub(ktime_get(), r->exec.ireq.ts));
-	inode_dio_end(r->req.io_inode);
+	fuse_write_dio_end(fi);
 	request_end(pfc->fc, &r->req);
 }
 
 static void on_fallocate_done(struct pcs_fuse_req *r, off_t pos, size_t size)
 {
 	struct pcs_fuse_cluster *pfc = cl_from_req(r);
+	struct fuse_inode *fi = get_fuse_inode(r->req.io_inode);
 
 	DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
 	fuse_stat_account(pfc->fc, KFUSE_OP_FALLOCATE, ktime_sub(ktime_get(), r->exec.ireq.ts));
-	inode_dio_end(r->req.io_inode);
+	fuse_write_dio_end(fi);
 
 	request_end(pfc->fc, &r->req);
 }
@@ -88,10 +91,11 @@  static void on_fallocate_done(struct pcs_fuse_req *r, off_t pos, size_t size)
 static void on_fiemap_done(struct pcs_fuse_req *r)
 {
 	struct pcs_fuse_cluster *pfc = cl_from_req(r);
+	struct fuse_inode *fi = get_fuse_inode(r->req.io_inode);
 
 	DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
 
-	inode_dio_end(r->req.io_inode);
+	fuse_write_dio_end(fi);
 	request_end(pfc->fc, &r->req);
 }
 
@@ -255,6 +259,7 @@  static void falloc_req_complete(struct pcs_int_request *ireq)
 	struct pcs_fuse_req * r = ireq->completion_data.priv;
 	struct pcs_dentry_info *di = get_pcs_inode(r->req.io_inode);
 	struct pcs_fuse_cluster *pfc = cl_from_req(r);
+	struct fuse_inode *fi = get_fuse_inode(r->req.io_inode);
 
 	BUG_ON(ireq->type != PCS_IREQ_NOOP);
 
@@ -264,7 +269,7 @@  static void falloc_req_complete(struct pcs_int_request *ireq)
 
 	DTRACE("do fuse_request_end req:%p op:%d err:%d\n", &r->req, r->req.in.h.opcode, r->req.out.h.error);
 	fuse_stat_account(pfc->fc, KFUSE_OP_FALLOCATE, ktime_sub(ktime_get(), ireq->ts));
-	inode_dio_end(r->req.io_inode);
+	fuse_write_dio_end(fi);
 
 	request_end(pfc->fc, &r->req);
 }
diff --git a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
index 90e12bf20e41..2bda2381bb8e 100644
--- a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
+++ b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
@@ -713,6 +713,7 @@  void ireq_destroy(struct pcs_int_request *ireq)
 static int submit_size_grow(struct inode *inode, unsigned long long size)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_inode *fi = get_fuse_inode(inode);
 	struct fuse_file *ff;
 	struct fuse_setattr_in inarg;
 	struct fuse_attr_out outarg;
@@ -720,9 +721,9 @@  static int submit_size_grow(struct inode *inode, unsigned long long size)
 	int err;
 
 	/* Caller comes here w/o i_mutex, but vfs_truncate is blocked
-	   at inode_dio_wait() see fuse_set_nowrite
+	   at fuse_write_dio_wait see fuse_set_nowrite
 	 */
-	BUG_ON(!atomic_read(&inode->i_dio_count));
+	BUG_ON(!fuse_write_dio_count(fi));
 
 	TRACE("ino:%ld size:%lld \n",inode->i_ino, size);
 
@@ -880,11 +881,14 @@  static inline int req_wait_grow_queue(struct pcs_fuse_req *r,
 				      off_t offset, size_t size)
 {
 	struct pcs_dentry_info *di = get_pcs_inode(r->req.io_inode);
+	struct fuse_inode *fi = get_fuse_inode(r->req.io_inode);
 
 	if (!kqueue_insert(di, ff, &r->req))
 		return -EIO;
 
-	inode_dio_begin(r->req.io_inode);
+	BUG_ON(r->req.in.h.opcode != FUSE_WRITE && r->req.in.h.opcode != FUSE_FALLOCATE);
+	fuse_write_dio_begin(fi);
+
 	wait_grow(r, di, offset + size);
 	return 1;
 }
@@ -901,6 +905,7 @@  static int pcs_fuse_prep_rw(struct pcs_fuse_req *r, struct fuse_file *ff)
 {
 	struct fuse_req *req = &r->req;
 	struct pcs_dentry_info *di = get_pcs_inode(req->io_inode);
+	struct fuse_inode *fi = get_fuse_inode(req->io_inode);
 	int ret;
 
 	spin_lock(&di->lock);
@@ -1005,7 +1010,10 @@  static int pcs_fuse_prep_rw(struct pcs_fuse_req *r, struct fuse_file *ff)
 
 	if (!kqueue_insert(di, ff, req))
 		return -EIO;
-	inode_dio_begin(req->io_inode);
+	if (req->in.h.opcode == FUSE_READ)
+		fuse_read_dio_begin(fi);
+	else
+		fuse_write_dio_begin(fi);
 	return 0;
 fail:
 pending:
@@ -1205,7 +1213,7 @@  static void pcs_kio_setattr_handle(struct fuse_inode *fi, struct fuse_req *req)
 	if (di->size.op == PCS_SIZE_SHRINK) {
 		BUG_ON(!mutex_is_locked(&req->io_inode->i_mutex));
 		/* wait for aio reads in flight */
-		inode_dio_wait(req->io_inode);
+		fuse_dio_wait(fi);
 
 		req->end = _pcs_shrink_end;
 	} else