[RHEL7,COMMIT] mm/vmscan: shrink tcache, tswap upfront everything else

Submitted by Konstantin Khorenko on Oct. 22, 2018, 12:34 p.m.

Details

Message ID 201810221234.w9MCYKws009533@finist-ce7.sw.ru
State New
Series "mm/vmscan: shrink tcache, tswap upfront everything else."
Headers show

Commit Message

Konstantin Khorenko Oct. 22, 2018, 12:34 p.m.
The commit is pushed to "branch-rh7-3.10.0-862.14.4.vz7.72.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-862.14.4.vz7.72.13
------>
commit fe87c15bf3ac7a82fae04cf336880906de606072
Author: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date:   Thu Oct 18 19:13:49 2018 +0300

    mm/vmscan: shrink tcache, tswap upfront everything else
    
    We don't want to evict page cache or anon to swap while
    there are a lot of reclaimable pages in tcache/tswap.
    Reclaim them first, and only after that go to traditional reclaim
    
    Notes:
     1) we keep tcache and tswap generic shrinkers so if new tcache/tswap
     are generated heavily, background kswapd thread does not forget to
     shrink tcache/tswap
    
     2) in shrink_tcrutches() we don't break for_each_node_mask() cycle even
     in case shrinking first node gives us enough nr_reclaimed.
     We want to make similar memory pressure on all nodes and not to trash
     only the first one and stop.
    
    https://jira.sw.ru/browse/PSBM-89403
    Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
    Reviewed-by: Konstantin Khorenko <khorenko@virtuozzo.com>
---
 mm/internal.h | 32 ++++++++++++++++++++++++++++++++
 mm/tcache.c   |  4 ++--
 mm/tswap.c    |  2 +-
 mm/vmscan.c   | 43 ++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 77 insertions(+), 4 deletions(-)

Patch hide | download patch | download mbox

diff --git a/mm/internal.h b/mm/internal.h
index 2072b9b04b6b..c7265beced97 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -384,6 +384,38 @@  unsigned long reclaim_clean_pages_from_list(struct zone *zone,
 enum ttu_flags;
 struct tlbflush_unmap_batch;
 
+#ifdef CONFIG_TCACHE
+unsigned long tswap_shrink_scan(struct shrinker *shrinker,
+				struct shrink_control *sc);
+
+static inline unsigned long tswap_shrink(struct shrink_control *sc)
+{
+	unsigned long ret = tswap_shrink_scan(NULL, sc);
+	if (ret == SHRINK_STOP)
+		ret = 0;
+	return ret;
+}
+#else
+static inline tswap_shrink(struct shrink_control *sc)
+{ return 0; }
+#endif
+
+#ifdef CONFIG_TSWAP
+unsigned long tcache_shrink_scan(struct shrinker *shrinker,
+			struct shrink_control *sc);
+
+static inline unsigned long tcache_shrink(struct shrink_control *sc)
+{
+	unsigned long ret = tcache_shrink_scan(NULL, sc);
+	if (ret == SHRINK_STOP)
+		ret = 0;
+	return ret;
+}
+#else
+static inline unsigned long tcache_shrink(struct shrink_control *sc)
+{ return 0; }
+#endif
+
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 void try_to_unmap_flush(void);
 void try_to_unmap_flush_dirty(void);
diff --git a/mm/tcache.c b/mm/tcache.c
index eb9c9dea4e51..61f4a6ea26b3 100644
--- a/mm/tcache.c
+++ b/mm/tcache.c
@@ -1210,7 +1210,7 @@  static unsigned long tcache_shrink_count(struct shrinker *shrink,
 #define TCACHE_SCAN_BATCH 128UL
 static DEFINE_PER_CPU(struct page * [TCACHE_SCAN_BATCH], tcache_page_vec);
 
-static unsigned long tcache_shrink_scan(struct shrinker *shrink,
+unsigned long tcache_shrink_scan(struct shrinker *shrink,
 					struct shrink_control *sc)
 {
 	long nr_isolated, nr_reclaimed;
@@ -1218,7 +1218,7 @@  static unsigned long tcache_shrink_scan(struct shrinker *shrink,
 
 	pages = get_cpu_var(tcache_page_vec); /* Implies rcu_read_lock_sched() */
 
-	if (WARN_ON(sc->nr_to_scan > TCACHE_SCAN_BATCH))
+	if (sc->nr_to_scan > TCACHE_SCAN_BATCH)
 		sc->nr_to_scan = TCACHE_SCAN_BATCH;
 
 	nr_isolated = tcache_lru_isolate(sc->nid, pages, sc->nr_to_scan);
diff --git a/mm/tswap.c b/mm/tswap.c
index e6804dcba6e2..73b1f85d5279 100644
--- a/mm/tswap.c
+++ b/mm/tswap.c
@@ -236,7 +236,7 @@  static int tswap_evict_page(struct page *page)
 	return err;
 }
 
-static unsigned long tswap_shrink_scan(struct shrinker *shrink,
+unsigned long tswap_shrink_scan(struct shrinker *shrink,
 				       struct shrink_control *sc)
 {
 	struct tswap_lru *lru = &tswap_lru_node[sc->nid];
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2481caa15ec1..43f761c39eac 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2799,6 +2799,37 @@  static bool all_unreclaimable(struct zonelist *zonelist,
 	return true;
 }
 
+static void shrink_tcrutches(struct scan_control *scan_ctrl)
+{
+	int nid;
+	unsigned long shrunk;
+	nodemask_t *nodemask = scan_ctrl->nodemask ? : &node_online_map;
+
+	do {
+		shrunk = 0;
+
+		for_each_node_mask(nid, *nodemask) {
+			struct shrink_control sc = {
+				.gfp_mask = scan_ctrl->gfp_mask,
+				.nid = nid,
+				.memcg = NULL,
+				.nr_to_scan = scan_ctrl->nr_to_reclaim -
+					      scan_ctrl->nr_reclaimed,
+			};
+			shrunk = tcache_shrink(&sc);
+			scan_ctrl->nr_reclaimed += shrunk;
+			if (!shrunk)
+				shrunk += tswap_shrink(&sc);
+			/*
+			 * We scan all nodes even if we reclaim more than
+			 * nr_to_reclaim, we want to make similar memory
+			 * pressure on all nodes and not to trash only the
+			 * first one and stop.
+			 */
+		}
+	} while (shrunk && scan_ctrl->nr_reclaimed < scan_ctrl->nr_to_reclaim);
+}
+
 /*
  * This is the main entry point to direct page reclaim.
  *
@@ -2829,8 +2860,12 @@  static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 	{KSTAT_PERF_ENTER(ttfp);
 	delayacct_freepages_start();
 
-	if (global_reclaim(sc))
+	if (global_reclaim(sc)) {
 		count_vm_event(ALLOCSTALL);
+		shrink_tcrutches(sc);
+		if (sc->nr_reclaimed >= sc->nr_to_reclaim)
+			goto out;
+	}
 
 	do {
 		vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
@@ -3472,6 +3507,12 @@  static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
 		if (sc.priority < DEF_PRIORITY - 2)
 			sc.may_writepage = 1;
 
+		shrink_tcrutches(&sc);
+		if (sc.nr_reclaimed >= sc.nr_to_reclaim &&
+			pgdat_balanced(pgdat, order, *classzone_idx))
+			goto out;
+
+
 		/*
 		 * Now scan the zone in the dma->highmem direction, stopping
 		 * at the last zone which needs scanning.