[Devel,rh7,19/21] ms/mm: memcontrol: revert use of root_mem_cgroup res_counter

Submitted by Andrey Ryabinin on Nov. 1, 2016, 9:21 a.m.

Details

Message ID 1477992100-18263-19-git-send-email-aryabinin@virtuozzo.com
State New
Series "Series without cover letter"
Headers show

Commit Message

Andrey Ryabinin Nov. 1, 2016, 9:21 a.m.
From: Johannes Weiner <hannes@cmpxchg.org>

Dave Hansen reports a massive scalability regression in an uncontained
page fault benchmark with more than 30 concurrent threads, which he
bisected down to 05b843012335 ("mm: memcontrol: use root_mem_cgroup
res_counter") and pin-pointed on res_counter spinlock contention.

That change relied on the per-cpu charge caches to mostly swallow the
res_counter costs, but it's apparent that the caches don't scale yet.

Revert memcg back to bypassing res_counters on the root level in order
to restore performance for uncontained workloads.

Reported-by: Dave Hansen <dave@sr71.net>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Tested-by: Dave Hansen <dave.hansen@intel.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Vladimir Davydov <vdavydov@parallels.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

https://jira.sw.ru/browse/PSBM-51558
(cherry picked from commit ce00a967377baadf2481521e131771adc7652856)
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
---
 mm/memcontrol.c | 82 ++++++++++++++++++++++++++++++---------------------------
 1 file changed, 43 insertions(+), 39 deletions(-)

Patch hide | download patch | download mbox

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 11c06d3..cb7657e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2814,13 +2814,14 @@  done:
 
 static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
-	if (!mem_cgroup_is_root(memcg)) {
-		unsigned long bytes = nr_pages * PAGE_SIZE;
+	unsigned long bytes = nr_pages * PAGE_SIZE;
 
-		res_counter_uncharge(&memcg->res, bytes);
-		if (do_swap_account)
-			res_counter_uncharge(&memcg->memsw, bytes);
-	}
+	if (mem_cgroup_is_root(memcg))
+		return;
+
+	res_counter_uncharge(&memcg->res, bytes);
+	if (do_swap_account)
+		res_counter_uncharge(&memcg->memsw, bytes);
 }
 
 /*
@@ -4183,7 +4184,6 @@  out:
 	return retval;
 }
 
-
 static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
 					       enum mem_cgroup_stat_index idx)
 {
@@ -4199,6 +4199,30 @@  static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
 	return val;
 }
 
+static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
+{
+	u64 val;
+
+	if (!mem_cgroup_is_root(memcg)) {
+		if (!swap)
+			return res_counter_read_u64(&memcg->res, RES_USAGE);
+		else
+			return res_counter_read_u64(&memcg->memsw, RES_USAGE);
+	}
+
+	/*
+	 * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
+	 * as well as in MEM_CGROUP_STAT_RSS_HUGE.
+	 */
+	val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
+	val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
+
+	if (swap)
+		val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
+
+	return val << PAGE_SHIFT;
+}
+
 void mem_cgroup_fill_meminfo(struct mem_cgroup *memcg, struct meminfo *mi)
 {
 	int nid;
@@ -4236,30 +4260,6 @@  int mem_cgroup_enough_memory(struct mem_cgroup *memcg, long pages)
 	return free < pages ? -ENOMEM : 0;
 }
 
-static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
-{
-	u64 val;
-
-	if (!mem_cgroup_is_root(memcg)) {
-		if (!swap)
-			return res_counter_read_u64(&memcg->res, RES_USAGE);
-		else
-			return res_counter_read_u64(&memcg->memsw, RES_USAGE);
-	}
-
-	/*
-	 * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
-	 * as well as in MEM_CGROUP_STAT_RSS_HUGE.
-	 */
-	val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
-	val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
-
-	if (swap)
-		val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
-
-	return val << PAGE_SHIFT;
-}
-
 static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
 			       struct file *file, char __user *buf,
 			       size_t nbytes, loff_t *ppos)
@@ -6429,7 +6429,7 @@  static void __mem_cgroup_clear_mc(void)
 		/* uncharge swap account from the old cgroup */
 		if (!mem_cgroup_is_root(mc.from))
 			res_counter_uncharge(&mc.from->memsw,
-						PAGE_SIZE * mc.moved_swap);
+					     PAGE_SIZE * mc.moved_swap);
 
 		for (i = 0; i < mc.moved_swap; i++)
 			css_put(&mc.from->css);
@@ -6811,7 +6811,8 @@  void mem_cgroup_uncharge_swap(swp_entry_t entry)
 	rcu_read_lock();
 	memcg = mem_cgroup_lookup(id);
 	if (memcg) {
-		res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+		if (!mem_cgroup_is_root(memcg))
+			res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
 		mem_cgroup_swap_statistics(memcg, false);
 		css_put(&memcg->css);
 	}
@@ -6970,12 +6971,15 @@  static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
 {
 	unsigned long flags;
 
-	if (nr_mem)
-		res_counter_uncharge(&memcg->res, nr_mem * PAGE_SIZE);
-	if (nr_memsw)
-		res_counter_uncharge(&memcg->memsw, nr_memsw * PAGE_SIZE);
-
-	memcg_oom_recover(memcg);
+	if (!mem_cgroup_is_root(memcg)) {
+		if (nr_mem)
+			res_counter_uncharge(&memcg->res,
+					     nr_mem * PAGE_SIZE);
+		if (nr_memsw)
+			res_counter_uncharge(&memcg->memsw,
+					     nr_memsw * PAGE_SIZE);
+		memcg_oom_recover(memcg);
+	}
 
 	local_irq_save(flags);
 	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);