[RHEL7,COMMIT] ms/ext4: move handling of list of shrinkable inodes into extent status code

Submitted by Konstantin Khorenko on April 13, 2018, 4:22 p.m.

Details

Message ID 201804131622.w3DGMLoU012414@finist_ce7.work
State New
Series "ext4: speedup shrinking non-delay extents"
Headers show

Commit Message

Konstantin Khorenko April 13, 2018, 4:22 p.m.
The commit is pushed to "branch-rh7-3.10.0-693.21.1.vz7.46.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-693.21.1.vz7.46.5
------>
commit 4a45bc07cbf78806045c5b19bf6e2652ce000d23
Author: Jan Kara <jack@suse.cz>
Date:   Fri Apr 13 19:22:21 2018 +0300

    ms/ext4: move handling of list of shrinkable inodes into extent status code
    
    Currently callers adding extents to extent status tree were responsible
    for adding the inode to the list of inodes with freeable extents. This
    is error prone and puts list handling in unnecessarily many places.
    
    Just add inode to the list automatically when the first non-delay extent
    is added to the tree and remove inode from the list when the last
    non-delay extent is removed.
    
    Signed-off-by: Jan Kara <jack@suse.cz>
    Signed-off-by: Theodore Ts'o <tytso@mit.edu>
    (cherry picked from commit b0dea4c1651f3cdb6d17604fa473e72cb74cdc6b)
    
    https://jira.sw.ru/browse/PSBM-83335
    
    We do face a situation when all (32) cpus on a node content on sbi->s_es_lock
    shrinking extents on a single superblock and
    shrinking extents goes very slow (180 sec in average!).
    
    crash> struct ext4_sb_info 0xffff882fcb7ca800 -p
    
      s_es_nr_inode = 3173832,
      s_es_stats = {
        es_stats_shrunk = 70,
        es_stats_cache_hits = 35182748,
        es_stats_cache_misses = 2622931,
        es_stats_scan_time = 182642303461,
        es_stats_max_scan_time = 276290979674,
    
    This patch should help a bit because it decreases sbi->s_es_nr_inode right
    in __es_shrink() as a side effect, thus cpus which comes later to __es_shrink()
    will loop less cycles.
    
    Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
    Acked-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 fs/ext4/extents.c        |  2 --
 fs/ext4/extents_status.c | 10 ++++++----
 fs/ext4/extents_status.h |  2 --
 fs/ext4/inode.c          |  2 --
 fs/ext4/ioctl.c          |  2 --
 fs/ext4/super.c          |  1 -
 6 files changed, 6 insertions(+), 13 deletions(-)

Patch hide | download patch | download mbox

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index a8675aea44ad..ccbb952482e8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4658,7 +4658,6 @@  int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 
 	trace_ext4_ext_map_blocks_exit(inode, flags, map,
 				       err ? err : allocated);
-	ext4_es_list_add(inode);
 	return err ? err : allocated;
 }
 
@@ -5248,7 +5247,6 @@  int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		error = ext4_fill_fiemap_extents(inode, start_blk,
 						 len_blks, fieinfo);
 	}
-	ext4_es_list_add(inode);
 	return error;
 }
 
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index d632a3e43994..77f44d382aa5 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -297,7 +297,7 @@  void ext4_es_find_delayed_extent_range(struct inode *inode,
 	trace_ext4_es_find_delayed_extent_range_exit(inode, es);
 }
 
-void ext4_es_list_add(struct inode *inode)
+static void ext4_es_list_add(struct inode *inode)
 {
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -313,7 +313,7 @@  void ext4_es_list_add(struct inode *inode)
 	spin_unlock(&sbi->s_es_lock);
 }
 
-void ext4_es_list_del(struct inode *inode)
+static void ext4_es_list_del(struct inode *inode)
 {
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -343,7 +343,8 @@  ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
 	 * We don't count delayed extent because we never try to reclaim them
 	 */
 	if (!ext4_es_is_delayed(es)) {
-		EXT4_I(inode)->i_es_shk_nr++;
+		if (!EXT4_I(inode)->i_es_shk_nr++)
+			ext4_es_list_add(inode);
 		percpu_counter_inc(&EXT4_SB(inode->i_sb)->
 					s_es_stats.es_stats_shk_cnt);
 	}
@@ -362,7 +363,8 @@  static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
 	/* Decrease the shrink counter when this es is not delayed */
 	if (!ext4_es_is_delayed(es)) {
 		BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
-		EXT4_I(inode)->i_es_shk_nr--;
+		if (!--EXT4_I(inode)->i_es_shk_nr)
+			ext4_es_list_del(inode);
 		percpu_counter_dec(&EXT4_SB(inode->i_sb)->
 					s_es_stats.es_stats_shk_cnt);
 	}
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index 0e6a33e81e5f..b0b78b95f481 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -150,7 +150,5 @@  static inline void ext4_es_store_pblock_status(struct extent_status *es,
 
 extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi);
 extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
-extern void ext4_es_list_add(struct inode *inode);
-extern void ext4_es_list_del(struct inode *inode);
 
 #endif /* _EXT4_EXTENTS_STATUS_H */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1853dccc88c7..e633c707b119 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -500,7 +500,6 @@  int ext4_map_blocks(handle_t *handle, struct inode *inode,
 
 	/* Lookup extent status tree firstly */
 	if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
-		ext4_es_list_add(inode);
 		if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
 			map->m_pblk = ext4_es_pblock(&es) +
 					map->m_lblk - es.es_lblk;
@@ -1519,7 +1518,6 @@  static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
 
 	/* Lookup extent status tree firstly */
 	if (ext4_es_lookup_extent(inode, iblock, &es)) {
-		ext4_es_list_add(inode);
 		if (ext4_es_is_hole(&es)) {
 			retval = 0;
 			down_read(&EXT4_I(inode)->i_data_sem);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index f81c7105e046..affd0a4fb440 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -79,8 +79,6 @@  static void swap_inode_data(struct inode *inode1, struct inode *inode2)
 	memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
 	ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
 	ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
-	ext4_es_list_del(inode1);
-	ext4_es_list_del(inode2);
 
 	isize = i_size_read(inode1);
 	i_size_write(inode1, i_size_read(inode2));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 32709698fb23..a8aae06a9336 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1109,7 +1109,6 @@  void ext4_clear_inode(struct inode *inode)
 	dquot_drop(inode);
 	ext4_discard_preallocations(inode);
 	ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
-	ext4_es_list_del(inode);
 	if (EXT4_I(inode)->jinode) {
 		jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
 					       EXT4_I(inode)->jinode);