[Devel,RHEL7,COMMIT] fuse: queue work for aio_complete (v3)

Submitted by Konstantin Khorenko on Nov. 3, 2016, 8:57 a.m.

Details

Message ID 201611030857.uA38vVPd029803@finist_cl7.x64_64.work.ct
State New
Series "fuse: queue work for aio_complete (v3)"
Headers show

Commit Message

Konstantin Khorenko Nov. 3, 2016, 8:57 a.m.
The commit is pushed to "branch-rh7-3.10.0-327.36.1.vz7.19.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.36.1.vz7.19.5
------>
commit 1a258c698ada93f8cbca12ed6717093f849ab5b4
Author: Maxim Patlasov <mpatlasov@virtuozzo.com>
Date:   Thu Nov 3 12:57:30 2016 +0400

    fuse: queue work for aio_complete (v3)
    
    We cannot simply call aio_complete() from fuse_aio_complete() becase rhel7
    doesn't call __fput from a separate context as it used to be on rhel6.
    Otherwise a deadlock for single-threaded fuse daemon can happen: if process
    who generated AIO is already close(2) the file, fput() called from aio_complete
    will be the last fput(); hence, it will send flush_mtime (or release) request
    to userspace who is busy now writing ACK for given AIO to in-kernel fuse.
    
    Changed in v2 (thanks azaitsev@ for suggestion):
     - call aio_complete() with increased f_count; this allows to avoid extra
       context switch when the user still holds the file opened.
    Changed in v3 (thanks pborzenkov@ for fixing):
     - avoid io->iocb use-after-free ('io->iocb' struct is already freed at
       the time fuse_fput_routine() is called. Use 'io->file' member to get
       proper 'struct file' instead.)
    
    https://jira.sw.ru/browse/PSBM-54547
    
    Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
---
 fs/fuse/dev.c    | 11 ++++++++++-
 fs/fuse/file.c   | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 fs/fuse/fuse_i.h |  1 +
 3 files changed, 61 insertions(+), 3 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 0b6d000..d6029ca 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -25,6 +25,7 @@  MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 MODULE_ALIAS("devname:fuse");
 
 static struct kmem_cache *fuse_req_cachep;
+extern struct workqueue_struct *fuse_fput_wq;
 
 static struct fuse_conn *fuse_get_conn(struct file *file)
 {
@@ -2243,11 +2244,16 @@  static struct miscdevice fuse_miscdevice = {
 int __init fuse_dev_init(void)
 {
 	int err = -ENOMEM;
+
+	fuse_fput_wq = create_workqueue("fuse_fput");
+	if (!fuse_fput_wq)
+		goto out;
+
 	fuse_req_cachep = kmem_cache_create("fuse_request",
 					    sizeof(struct fuse_req),
 					    0, 0, NULL);
 	if (!fuse_req_cachep)
-		goto out;
+		goto out_destroq_wq;
 
 	err = misc_register(&fuse_miscdevice);
 	if (err)
@@ -2257,6 +2263,8 @@  int __init fuse_dev_init(void)
 
  out_cache_clean:
 	kmem_cache_destroy(fuse_req_cachep);
+ out_destroq_wq:
+	destroy_workqueue(fuse_fput_wq);
  out:
 	return err;
 }
@@ -2265,4 +2273,5 @@  void fuse_dev_cleanup(void)
 {
 	misc_deregister(&fuse_miscdevice);
 	kmem_cache_destroy(fuse_req_cachep);
+	destroy_workqueue(fuse_fput_wq);
 }
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 49ee3de..796638a 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -19,6 +19,13 @@ 
 #include <linux/falloc.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/virtinfo.h>
+#include <linux/file.h>
+
+struct workqueue_struct *fuse_fput_wq;
+static DEFINE_SPINLOCK(fuse_fput_lock);
+static LIST_HEAD(fuse_fput_head);
+static void fuse_fput_routine(struct work_struct *);
+static DECLARE_WORK(fuse_fput_work, fuse_fput_routine);
 
 static const struct file_operations fuse_direct_io_file_operations;
 static void fuse_sync_writes(struct inode *inode);
@@ -761,6 +768,29 @@  static void fuse_release_user_pages(struct fuse_req *req, int write)
 	}
 }
 
+static void fuse_fput_routine(struct work_struct *data)
+{
+	spin_lock(&fuse_fput_lock);
+	while (likely(!list_empty(&fuse_fput_head))) {
+		struct fuse_io_priv *io = list_entry(fuse_fput_head.next,
+						     struct fuse_io_priv,
+						     list);
+		struct file *file = io->file;
+
+		list_del(&io->list);
+		spin_unlock(&fuse_fput_lock);
+
+		/* hack: __fput() is not visible outside fs/file_table.c */
+		BUG_ON(atomic_long_read(&file->f_count));
+		atomic_long_inc(&file->f_count);
+		fput(file);
+
+		kfree(io);
+		spin_lock(&fuse_fput_lock);
+	}
+	spin_unlock(&fuse_fput_lock);
+}
+
 /**
  * In case of short read, the caller sets 'pos' to the position of
  * actual end of fuse request in IO request. Otherwise, if bytes_requested
@@ -792,6 +822,7 @@  static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
 
 	if (!left) {
 		long res;
+		struct file *file = io->iocb->ki_filp;
 
 		if (io->err)
 			res = io->err;
@@ -801,7 +832,7 @@  static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
 			res = io->bytes < 0 ? io->size : io->bytes;
 
 			if (!is_sync_kiocb(io->iocb)) {
-				struct path *path = &io->iocb->ki_filp->f_path;
+				struct path *path = &file->f_path;
 				struct inode *inode = path->dentry->d_inode;
 				struct fuse_conn *fc = get_fuse_conn(inode);
 				struct fuse_inode *fi = get_fuse_inode(inode);
@@ -819,8 +850,25 @@  static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
 			       io, err, pos, io->err, io->bytes,
 			       io->size, is_sync_kiocb(io->iocb), res,
 			       io->iocb->ki_opcode, io->iocb->ki_pos);
+
+		/* We have to bump f_count here to avoid deadlock for
+		 * single-threaded fuse daemon: if process who generated
+		 * AIO is already close(2) the file, fput() called from
+		 * aio_complete will be the last fput(); hence, it will send
+		 * flush_mtime (or release) request to userspace who is busy
+		 * now writing ACK for given AIO to in-kernel fuse */
+		get_file(file);
+		BUG_ON(io->file != io->iocb->ki_filp);
 		aio_complete(io->iocb, res, 0);
-		kfree(io);
+
+		if (unlikely(atomic_long_dec_and_test(&file->f_count))) {
+			spin_lock(&fuse_fput_lock);
+			list_add(&io->list, &fuse_fput_head);
+			spin_unlock(&fuse_fput_lock);
+			queue_work(fuse_fput_wq, &fuse_fput_work);
+		} else {
+			kfree(io);
+		}
 	}
 }
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 24aaeb7..1d24bf6 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -277,6 +277,7 @@  struct fuse_io_priv {
 	int err;
 	struct kiocb *iocb;
 	struct file *file;
+	struct list_head list;
 };
 
 /**