[Devel,vz7] fuse: fuse_writepage_locked must check for FUSE_INVALIDATE_FILES (v2)

Submitted by Maxim Patlasov on Dec. 27, 2016, 2:11 a.m.

Details

Message ID 148280462576.10259.12891881271110366594.stgit@maxim-thinkpad
State New
Series "fuse: fuse_writepage_locked must check for FUSE_INVALIDATE_FILES"
Headers show

Commit Message

Maxim Patlasov Dec. 27, 2016, 2:11 a.m.
The patch fixes another race dealing with fuse_invalidate_files,
this time when it races with truncate(2):

Thread A: the flusher performs writeback as usual:

  fuse_writepages -->
    fuse_send_writepages -->
      end_page_writeback

but before fuse_send_writepages acquires fc->lock and calls fuse_flush_writepages,
some innocent user process re-dirty-es the page.

Thread B: truncate(2) attempts to truncate (shrink) file as usual:

  fuse_do_setattr -->
    invalidate_inode_pages2

(This is possible because Thread A has not incremented fi->writectr yet.) But
invalidate_inode_pages2 finds that re-dirty-ed page and sticks in:

  invalidate_inode_pages2 -->
    fuse_launder_page -->
      fuse_writepage_locked -->
	fuse_wait_on_page_writeback

Thread A: the flusher proceeds with fuse_flush_writepages, sends write request
to userspace fuse daemon, but the daemon is not obliged to fulfill it immediately.
So, thread B waits now for thread A, while thread A waits for userspace.

Now fuse_invalidate_files steps in sticking in filemap_write_and_wait on the
page locked by Thread B (launder_page always work on a locked page). Deadlock.

The patch fixes deadlock by waking up fuse_writepage_locked after marking
files with FAIL_IMMEDIATELY flag.

Changed in v2:
  - instead of flagging "fail_immediately", let fuse_writepage_locked return
    fuse_file pointer, then the caller (fuse_launder_page) can use it for
    conditional wait on __fuse_wait_on_page_writeback_or_invalidate. This is
    important because otherwise fuse_invalidate_files may deadlock when
    launder waits for fuse writeback.

Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
---
 fs/fuse/file.c |   51 +++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 45 insertions(+), 6 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 0ffc806..34e75c2 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1963,7 +1963,8 @@  static struct fuse_file *fuse_write_file(struct fuse_conn *fc,
 }
 
 static int fuse_writepage_locked(struct page *page,
-				 struct writeback_control *wbc)
+				 struct writeback_control *wbc,
+				 struct fuse_file **ff_pp)
 {
 	struct address_space *mapping = page->mapping;
 	struct inode *inode = mapping->host;
@@ -1971,13 +1972,30 @@  static int fuse_writepage_locked(struct page *page,
 	struct fuse_inode *fi = get_fuse_inode(inode);
 	struct fuse_req *req;
 	struct page *tmp_page;
+	struct fuse_file *ff;
+	int err = 0;
 
 	if (fuse_page_is_writeback(inode, page->index)) {
 		if (wbc->sync_mode != WB_SYNC_ALL) {
 			redirty_page_for_writepage(wbc, page);
 			return 0;
 		}
-		fuse_wait_on_page_writeback(inode, page->index);
+
+		/* we can acquire ff here because we do have locked pages here! */
+		ff = fuse_write_file(fc, get_fuse_inode(inode));
+		if (!ff)
+			goto dummy_end_page_wb_err;
+
+		/* FUSE_NOTIFY_INVAL_FILES must be able to wake us up */
+		__fuse_wait_on_page_writeback_or_invalidate(inode, ff, page->index);
+
+		if (test_bit(FUSE_S_FAIL_IMMEDIATELY, &ff->ff_state)) {
+			if (ff_pp)
+				*ff_pp = ff;
+			goto dummy_end_page_wb;
+		}
+
+		fuse_release_ff(inode, ff);
 	}
 
 	if (test_set_page_writeback(page))
@@ -1995,6 +2013,8 @@  static int fuse_writepage_locked(struct page *page,
 	req->ff = fuse_write_file(fc, fi);
 	if (!req->ff)
 		goto err_nofile;
+	if (ff_pp)
+		*ff_pp = fuse_file_get(req->ff);
 	fuse_write_fill(req, req->ff, page_offset(page), 0);
 	fuse_account_request(fc, PAGE_CACHE_SIZE);
 
@@ -2029,13 +2049,23 @@  err_free:
 err:
 	end_page_writeback(page);
 	return -ENOMEM;
+
+dummy_end_page_wb_err:
+	printk("FUSE: page under fwb dirtied on dead file\n");
+	err = -EIO;
+	/* fall through ... */
+dummy_end_page_wb:
+	if (test_set_page_writeback(page))
+		BUG();
+	end_page_writeback(page);
+	return err;
 }
 
 static int fuse_writepage(struct page *page, struct writeback_control *wbc)
 {
 	int err;
 
-	err = fuse_writepage_locked(page, wbc);
+	err = fuse_writepage_locked(page, wbc, NULL);
 	unlock_page(page);
 
 	return err;
@@ -2423,9 +2453,18 @@  static int fuse_launder_page(struct page *page)
 		struct writeback_control wbc = {
 			.sync_mode = WB_SYNC_ALL,
 		};
-		err = fuse_writepage_locked(page, &wbc);
-		if (!err)
-			fuse_wait_on_page_writeback(inode, page->index);
+		struct fuse_file *ff = NULL;
+		err = fuse_writepage_locked(page, &wbc, &ff);
+		if (!err) {
+			/*
+			 * We need to check FAIL_IMMEDIATELY because otherwise
+			 * fuse_do_setattr may stick in invalidate_inode_pages2
+			 * forever (if fuse_invalidate_files is in progress).
+			 */
+			__fuse_wait_on_page_writeback_or_invalidate(inode,
+								    ff, page->index);
+			fuse_release_ff(inode, ff);
+		}
 	}
 	return err;
 }

Comments

Dmitry Monakhov Jan. 12, 2017, 10:04 a.m.
Maxim Patlasov <mpatlasov@virtuozzo.com> writes:

> The patch fixes another race dealing with fuse_invalidate_files,
> this time when it races with truncate(2):
>
> Thread A: the flusher performs writeback as usual:
>
>   fuse_writepages -->
>     fuse_send_writepages -->
>       end_page_writeback
>
> but before fuse_send_writepages acquires fc->lock and calls fuse_flush_writepages,
> some innocent user process re-dirty-es the page.
>
> Thread B: truncate(2) attempts to truncate (shrink) file as usual:
>
>   fuse_do_setattr -->
>     invalidate_inode_pages2
>
> (This is possible because Thread A has not incremented fi->writectr yet.) But
> invalidate_inode_pages2 finds that re-dirty-ed page and sticks in:
>
>   invalidate_inode_pages2 -->
>     fuse_launder_page -->
>       fuse_writepage_locked -->
> 	fuse_wait_on_page_writeback
>
> Thread A: the flusher proceeds with fuse_flush_writepages, sends write request
> to userspace fuse daemon, but the daemon is not obliged to fulfill it immediately.
> So, thread B waits now for thread A, while thread A waits for userspace.
>
> Now fuse_invalidate_files steps in sticking in filemap_write_and_wait on the
> page locked by Thread B (launder_page always work on a locked page). Deadlock.
>
> The patch fixes deadlock by waking up fuse_writepage_locked after marking
> files with FAIL_IMMEDIATELY flag.
>
> Changed in v2:
>   - instead of flagging "fail_immediately", let fuse_writepage_locked return
>     fuse_file pointer, then the caller (fuse_launder_page) can use it for
>     conditional wait on __fuse_wait_on_page_writeback_or_invalidate. This is
>     important because otherwise fuse_invalidate_files may deadlock when
>     launder waits for fuse writeback.
ACK-by: dmonakhov@openvz.org
>
> Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
> ---
>  fs/fuse/file.c |   51 +++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 45 insertions(+), 6 deletions(-)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 0ffc806..34e75c2 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -1963,7 +1963,8 @@ static struct fuse_file *fuse_write_file(struct fuse_conn *fc,
>  }
>  
>  static int fuse_writepage_locked(struct page *page,
> -				 struct writeback_control *wbc)
> +				 struct writeback_control *wbc,
> +				 struct fuse_file **ff_pp)
>  {
>  	struct address_space *mapping = page->mapping;
>  	struct inode *inode = mapping->host;
> @@ -1971,13 +1972,30 @@ static int fuse_writepage_locked(struct page *page,
>  	struct fuse_inode *fi = get_fuse_inode(inode);
>  	struct fuse_req *req;
>  	struct page *tmp_page;
> +	struct fuse_file *ff;
> +	int err = 0;
>  
>  	if (fuse_page_is_writeback(inode, page->index)) {
>  		if (wbc->sync_mode != WB_SYNC_ALL) {
>  			redirty_page_for_writepage(wbc, page);
>  			return 0;
>  		}
> -		fuse_wait_on_page_writeback(inode, page->index);
> +
> +		/* we can acquire ff here because we do have locked pages here! */
> +		ff = fuse_write_file(fc, get_fuse_inode(inode));
> +		if (!ff)
> +			goto dummy_end_page_wb_err;
> +
> +		/* FUSE_NOTIFY_INVAL_FILES must be able to wake us up */
> +		__fuse_wait_on_page_writeback_or_invalidate(inode, ff, page->index);
> +
> +		if (test_bit(FUSE_S_FAIL_IMMEDIATELY, &ff->ff_state)) {
> +			if (ff_pp)
> +				*ff_pp = ff;
> +			goto dummy_end_page_wb;
> +		}
> +
> +		fuse_release_ff(inode, ff);
>  	}
>  
>  	if (test_set_page_writeback(page))
> @@ -1995,6 +2013,8 @@ static int fuse_writepage_locked(struct page *page,
>  	req->ff = fuse_write_file(fc, fi);
>  	if (!req->ff)
>  		goto err_nofile;
> +	if (ff_pp)
> +		*ff_pp = fuse_file_get(req->ff);
>  	fuse_write_fill(req, req->ff, page_offset(page), 0);
>  	fuse_account_request(fc, PAGE_CACHE_SIZE);
>  
> @@ -2029,13 +2049,23 @@ err_free:
>  err:
>  	end_page_writeback(page);
>  	return -ENOMEM;
> +
> +dummy_end_page_wb_err:
> +	printk("FUSE: page under fwb dirtied on dead file\n");
> +	err = -EIO;
> +	/* fall through ... */
> +dummy_end_page_wb:
> +	if (test_set_page_writeback(page))
> +		BUG();
> +	end_page_writeback(page);
> +	return err;
>  }
>  
>  static int fuse_writepage(struct page *page, struct writeback_control *wbc)
>  {
>  	int err;
>  
> -	err = fuse_writepage_locked(page, wbc);
> +	err = fuse_writepage_locked(page, wbc, NULL);
>  	unlock_page(page);
>  
>  	return err;
> @@ -2423,9 +2453,18 @@ static int fuse_launder_page(struct page *page)
>  		struct writeback_control wbc = {
>  			.sync_mode = WB_SYNC_ALL,
>  		};
> -		err = fuse_writepage_locked(page, &wbc);
> -		if (!err)
> -			fuse_wait_on_page_writeback(inode, page->index);
> +		struct fuse_file *ff = NULL;
> +		err = fuse_writepage_locked(page, &wbc, &ff);
> +		if (!err) {
> +			/*
> +			 * We need to check FAIL_IMMEDIATELY because otherwise
> +			 * fuse_do_setattr may stick in invalidate_inode_pages2
> +			 * forever (if fuse_invalidate_files is in progress).
> +			 */
> +			__fuse_wait_on_page_writeback_or_invalidate(inode,
> +								    ff, page->index);
> +			fuse_release_ff(inode, ff);
> +		}
>  	}
>  	return err;
>  }