[RHEL7,COMMIT] ms/ext4: do not ask jbd2 to write data for delalloc buffers

Submitted by Konstantin Khorenko on Oct. 25, 2019, 10:03 p.m.

Details

Message ID 201910252203.x9PM3Ik9006931@finist-ce7.sw.ru
State New
Series "ms/ext4: do not ask jbd2 to write data for delalloc buffers"
Headers show

Commit Message

Konstantin Khorenko Oct. 25, 2019, 10:03 p.m.
The commit is pushed to "branch-rh7-3.10.0-1062.1.2.vz7.114.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1062.1.2.vz7.114.10
------>
commit 9cf08f4c6962a6beeb13696dcc50c6de0a6f9adb
Author: Jan Kara <jack@suse.cz>
Date:   Sat Oct 26 01:03:18 2019 +0300

    ms/ext4: do not ask jbd2 to write data for delalloc buffers
    
    Currently we ask jbd2 to write all dirty allocated buffers before
    committing a transaction when doing writeback of delay allocated blocks.
    However this is unnecessary since we move all pages to writeback state
    before dropping a transaction handle and then submit all the necessary
    IO. We still need the transaction commit to wait for all the outstanding
    writeback before flushing disk caches during transaction commit to avoid
    data exposure issues though. Use the new jbd2 capability and ask it to
    only wait for outstanding writeback during transaction commit when
    writing back data in ext4_writepages().
    
    Tested-by: "HUANG Weller (CM/ESW12-CN)" <Weller.Huang@cn.bosch.com>
    Signed-off-by: Jan Kara <jack@suse.cz>
    Signed-off-by: Theodore Ts'o <tytso@mit.edu>
    (cherry picked from commit ee0876bc69ee8d24d524fb2e9e41e3682aaebb11)
    Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
---
 fs/ext4/ext4.h        |  3 +++
 fs/ext4/ext4_jbd2.h   | 12 +++++++++++-
 fs/ext4/inode.c       | 10 +++++++---
 fs/ext4/move_extent.c |  2 +-
 4 files changed, 22 insertions(+), 5 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 14406898f1ae..6f57c1a85878 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -574,6 +574,9 @@  enum {
 #define EXT4_GET_BLOCKS_ZERO			0x0200
 #define EXT4_GET_BLOCKS_CREATE_ZERO		(EXT4_GET_BLOCKS_CREATE |\
 					EXT4_GET_BLOCKS_ZERO)
+	/* Caller will submit data before dropping transaction handle. This
+	 * allows jbd2 to avoid submitting data before commit. */
+#define EXT4_GET_BLOCKS_IO_SUBMIT		0x0400
 
 /*
  * The bit position of these flags must not overlap with any of the
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 4688225d8b23..523b93eadc85 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -361,7 +361,8 @@  static inline int ext4_journal_force_commit(journal_t *journal)
 	return 0;
 }
 
-static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
+static inline int ext4_jbd2_inode_add_write(handle_t *handle,
+					    struct inode *inode)
 {
 	if (ext4_handle_valid(handle))
 		return jbd2_journal_inode_add_write(handle,
@@ -369,6 +370,15 @@  static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
 	return 0;
 }
 
+static inline int ext4_jbd2_inode_add_wait(handle_t *handle,
+					   struct inode *inode)
+{
+	if (ext4_handle_valid(handle))
+		return jbd2_journal_inode_add_wait(handle,
+						   EXT4_I(inode)->jinode);
+	return 0;
+}
+
 static inline void ext4_update_inode_fsync_trans(handle_t *handle,
 						 struct inode *inode,
 						 int datasync)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ae559ad227ef..4d0917fad919 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -701,7 +701,10 @@  int ext4_map_blocks(handle_t *handle, struct inode *inode,
 		    !(flags & EXT4_GET_BLOCKS_ZERO) &&
 		    !IS_NOQUOTA(inode) &&
 		    ext4_should_order_data(inode)) {
-			ret = ext4_jbd2_file_inode(handle, inode);
+			if (flags & EXT4_GET_BLOCKS_IO_SUBMIT)
+				ret = ext4_jbd2_inode_add_wait(handle, inode);
+			else
+				ret = ext4_jbd2_inode_add_write(handle, inode);
 			if (ret)
 				return ret;
 		}
@@ -2116,7 +2119,8 @@  static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
 	 * the data was copied into the page cache.
 	 */
 	get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
-			   EXT4_GET_BLOCKS_METADATA_NOFAIL;
+			   EXT4_GET_BLOCKS_METADATA_NOFAIL |
+			   EXT4_GET_BLOCKS_IO_SUBMIT;
 	dioread_nolock = ext4_should_dioread_nolock(inode);
 	if (dioread_nolock)
 		get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
@@ -3663,7 +3667,7 @@  static int __ext4_block_zero_page_range(handle_t *handle,
 		err = 0;
 		mark_buffer_dirty(bh);
 		if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE))
-			err = ext4_jbd2_file_inode(handle, inode);
+			err = ext4_jbd2_inode_add_write(handle, inode);
 	}
 
 unlock:
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 51e4542933ae..fcaa0835dc47 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -403,7 +403,7 @@  move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
 
 	/* Even in case of data=writeback it is reasonable to pin
 	 * inode to transaction, to prevent unexpected data loss */
-	*err = ext4_jbd2_file_inode(handle, orig_inode);
+	*err = ext4_jbd2_inode_add_write(handle, orig_inode);
 
 unlock_pages:
 	unlock_page(pagep[0]);