[RHEL8,COMMIT] mm, memcg: add oom counter to memory.stat memcgroup file

Submitted by Konstantin Khorenko on Oct. 5, 2020, 10:18 a.m.

Details

Message ID 202010051018.095AIeKI142623@finist-co8.sw.ru
State New
Series "mm, memcg: add oom counter to memory.stat memcgroup file"
Headers show

Commit Message

Konstantin Khorenko Oct. 5, 2020, 10:18 a.m.
The commit is pushed to "branch-rh8-4.18.0-193.6.3.vz8.4.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-193.6.3.vz8.4.11
------>
commit 67a87abe411ef0fbea509e04c9115f23fbb072f2
Author: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date:   Mon Oct 5 13:18:40 2020 +0300

    mm, memcg: add oom counter to memory.stat memcgroup file
    
    Add oom counter to memory.stat file. oom shows amount of oom kills
    triggered due to cgroup's memory limit. total_oom shows total sum of
    oom kills triggered due to cgroup's and it's sub-groups memory limits.
    
    memory.stat in the root cgroup counts global oom kills.
    
    E.g:
     # mkdir /sys/fs/cgroup/memory/test/
     # echo 100M > /sys/fs/cgroup/memory/test/memory.limit_in_bytes
     # echo 100M > /sys/fs/cgroup/memory/test/memory.memsw.limit_in_bytes
     # echo $$ > /sys/fs/cgroup/memory/test/tasks
     # ./vm-scalability/usemem -O 200M
     # grep oom /sys/fs/cgroup/memory/test/memory.stat
       oom 1
       total_oom 1
     # echo -1 > /sys/fs/cgroup/memory/test/memory.memsw.limit_in_bytes
     # echo -1 > /sys/fs/cgroup/memory/test/memory.limit_in_bytes
     # ./vm-scalability/usemem -O 1000G
     # grep oom /sys/fs/cgroup/memory/memory.stat
        oom 1
        total_oom 2
    
    https://jira.sw.ru/browse/PSBM-108287
    Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
    
    khorenko@ notes:
    1) non-root memcg:
       * oom - number of OOMs caused by limit exceeded by this particular memcg
       * total_oom - number of OOMs caused by limit exceeded by this particular
         memcg and all nested memory cgroups.
    
       Note: as the current memcg is not root, then it's "local" OOM,
       thus processes were killed in the same memcg which caused it.
    
    2) root memcg:
       * oom - the number of global OOMs happened
       * total_oom - the number of global OOMs + number of OOMs caused by limit
         exceeded in all nested memory cgroups
    
    Note: root memory cgroup cannot be limited => no OOMs can be cause by its limit.
---
 include/linux/memcontrol.h |  2 ++
 mm/memcontrol.c            | 33 ++++++++++++++++++++++++++-------
 2 files changed, 28 insertions(+), 7 deletions(-)

Patch hide | download patch | download mbox

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b097f137a3df..eb8634128a81 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -75,6 +75,8 @@  struct accumulated_stats {
 	unsigned long stat[MEMCG_NR_STAT];
 	unsigned long events[NR_VM_EVENT_ITEMS];
 	unsigned long lru_pages[NR_LRU_LISTS];
+	unsigned long oom;
+	unsigned long oom_kill;
 	const unsigned int *stats_array;
 	const unsigned int *events_array;
 	int stats_size;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a2d9f74ef77c..71e0697ff838 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3144,6 +3144,8 @@  void accumulate_memcg_tree(struct mem_cgroup *memcg,
 		for (i = 0; i < NR_LRU_LISTS; i++)
 			acc->lru_pages[i] +=
 				mem_cgroup_nr_lru_pages(mi, BIT(i));
+		acc->oom += atomic_long_read(&mi->memory_events[MEMCG_OOM]);
+		acc->oom_kill += atomic_long_read(&mi->memory_events[MEMCG_OOM_KILL]);
 
 		cond_resched();
 	}
@@ -3899,6 +3901,13 @@  static int memcg_stat_show(struct seq_file *m, void *v)
 	BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats));
 	BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
 
+	memset(&acc, 0, sizeof(acc));
+	acc.stats_size = ARRAY_SIZE(memcg1_stats);
+	acc.stats_array = memcg1_stats;
+	acc.events_size = ARRAY_SIZE(memcg1_events);
+	acc.events_array = memcg1_events;
+	accumulate_memcg_tree(memcg, &acc);
+
 	for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
 		if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
 			continue;
@@ -3911,6 +3920,18 @@  static int memcg_stat_show(struct seq_file *m, void *v)
 		seq_printf(m, "%s %lu\n", memcg1_event_names[i],
 			   memcg_sum_events(memcg, memcg1_events[i]));
 
+	/*
+	 * For root_mem_cgroup we want to account global ooms as well.
+	 * The diff between allo MEMCG_OOM_KILL and MEMCG_OOM events
+	 * should give us the glogbal ooms count.
+	 */
+	if (memcg == root_mem_cgroup)
+		seq_printf(m, "oom %lu\n", acc.oom_kill - acc.oom +
+			atomic_long_read(&memcg->memory_events[MEMCG_OOM]));
+	else
+		seq_printf(m, "oom %lu\n",
+			atomic_long_read(&memcg->memory_events[MEMCG_OOM]));
+
 	for (i = 0; i < NR_LRU_LISTS; i++)
 		seq_printf(m, "%s %lu\n", mem_cgroup_lru_names[i],
 			   mem_cgroup_nr_lru_pages(memcg, BIT(i)) * PAGE_SIZE);
@@ -3927,13 +3948,6 @@  static int memcg_stat_show(struct seq_file *m, void *v)
 		seq_printf(m, "hierarchical_memsw_limit %llu\n",
 			   (u64)memsw * PAGE_SIZE);
 
-	memset(&acc, 0, sizeof(acc));
-	acc.stats_size = ARRAY_SIZE(memcg1_stats);
-	acc.stats_array = memcg1_stats;
-	acc.events_size = ARRAY_SIZE(memcg1_events);
-	acc.events_array = memcg1_events;
-	accumulate_memcg_tree(memcg, &acc);
-
 	for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
 		if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
 			continue;
@@ -3945,6 +3959,11 @@  static int memcg_stat_show(struct seq_file *m, void *v)
 		seq_printf(m, "total_%s %llu\n", memcg1_event_names[i],
 			   (u64)acc.events[i]);
 
+	if (memcg == root_mem_cgroup)
+		seq_printf(m, "total_oom %lu\n", acc.oom_kill);
+	else
+		seq_printf(m, "total_oom %lu\n", acc.oom);
+
 	for (i = 0; i < NR_LRU_LISTS; i++)
 		seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i],
 			   (u64)acc.lru_pages[i] * PAGE_SIZE);