[RHEL7,COMMIT] mm/memcontrol: fix memory.high

Submitted by Konstantin Khorenko on July 8, 2019, 10:28 a.m.

Details

Message ID 201907081028.x68AScMm020158@finist-ce7.sw.ru
State New
Series "Series without cover letter"
Headers show

Commit Message

Konstantin Khorenko July 8, 2019, 10:28 a.m.
The commit is pushed to "vz7.96.12" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-957.12.2.vz7.96.12
------>
commit 222af0e107e39792184bb516e793a583ab386fe7
Author: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date:   Mon Jul 8 13:28:38 2019 +0300

    mm/memcontrol: fix memory.high
    
    Our commit b607d3e1a953 ("ms/mm: memcontrol: fold mem_cgroup_do_charge()")
    accidently loses piece of code making memory.high work. Bring it back.
    
    Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
---
 include/linux/memcontrol.h |  2 ++
 include/linux/sched.h      |  3 +++
 include/linux/tracehook.h  |  3 +++
 mm/memcontrol.c            | 65 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 73 insertions(+)

Patch hide | download patch | download mbox

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index aa8cef097055..265b5e350779 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -138,6 +138,8 @@  extern void mem_cgroup_note_oom_kill(struct mem_cgroup *memcg,
 extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 					struct task_struct *p);
 
+void mem_cgroup_handle_over_high(void);
+
 unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
 					   int nid, unsigned int lru_mask);
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ea08dfd17448..74e34bcd1e2d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1824,6 +1824,9 @@  struct task_struct {
 #endif
 #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
 	unsigned int memcg_kmem_skip_account;
+
+	/* Number of pages to reclaim on returning to userland: */
+	unsigned int			memcg_nr_pages_over_high;
 #endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	atomic_t ptrace_bp_refcnt;
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 1e98b5530425..f4e830954674 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -47,6 +47,7 @@ 
 #define _LINUX_TRACEHOOK_H	1
 
 #include <linux/sched.h>
+#include <linux/memcontrol.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
 #include <linux/task_work.h>
@@ -194,6 +195,8 @@  static inline void tracehook_notify_resume(struct pt_regs *regs)
 	smp_mb__after_clear_bit();
 	if (unlikely(current->task_works))
 		task_work_run();
+
+	mem_cgroup_handle_over_high();
 }
 
 #endif	/* <linux/tracehook.h> */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d11aa24ed207..0e6911b2dcc3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -55,6 +55,7 @@ 
 #include <linux/oom.h>
 #include <linux/virtinfo.h>
 #include <linux/migrate.h>
+#include <linux/tracehook.h>
 #include "internal.h"
 #include <net/sock.h>
 #include <net/ip.h>
@@ -311,6 +312,7 @@  struct mem_cgroup {
 
 	/* vmpressure notifications */
 	struct vmpressure vmpressure;
+	struct work_struct high_work;
 
 	/*
 	 * the counter to account for kernel memory usage.
@@ -2996,6 +2998,44 @@  static bool kmem_reclaim_is_low(struct mem_cgroup *memcg)
 	return dcache_is_low(memcg);
 }
 
+static void reclaim_high(struct mem_cgroup *memcg,
+			 unsigned int nr_pages,
+			 gfp_t gfp_mask)
+{
+	do {
+		if (page_counter_read(&memcg->memory) <= memcg->high)
+			continue;
+
+		try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, 0);
+	} while ((memcg = parent_mem_cgroup(memcg)));
+}
+
+static void high_work_func(struct work_struct *work)
+{
+	struct mem_cgroup *memcg;
+
+	memcg = container_of(work, struct mem_cgroup, high_work);
+	reclaim_high(memcg, CHARGE_BATCH, GFP_KERNEL);
+}
+
+/*
+ * Scheduled by try_charge() to be executed from the userland return path
+ * and reclaims memory over the high limit.
+ */
+void mem_cgroup_handle_over_high(void)
+{
+	unsigned int nr_pages = current->memcg_nr_pages_over_high;
+	struct mem_cgroup *memcg;
+
+	if (likely(!nr_pages))
+		return;
+
+	memcg = get_mem_cgroup_from_mm(current->mm);
+	reclaim_high(memcg, nr_pages, GFP_KERNEL);
+	css_put(&memcg->css);
+	current->memcg_nr_pages_over_high = 0;
+}
+
 /**
  * mem_cgroup_try_charge - try charging a memcg
  * @memcg: memcg to charge
@@ -3195,6 +3235,28 @@  static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, bool kmem_charge
 	if (batch > nr_pages)
 		refill_stock(memcg, batch - nr_pages);
 done:
+	/*
+	 * If the hierarchy is above the normal consumption range, schedule
+	 * reclaim on returning to userland.  We can perform reclaim here
+	 * if __GFP_RECLAIM but let's always punt for simplicity and so that
+	 * GFP_KERNEL can consistently be used during reclaim.  @memcg is
+	 * not recorded as it most likely matches current's and won't
+	 * change in the meantime.  As high limit is checked again before
+	 * reclaim, the cost of mismatch is negligible.
+	 */
+	do {
+		if (page_counter_read(&memcg->memory) > memcg->high) {
+			/* Don't bother a random interrupted task */
+			if (in_interrupt()) {
+				schedule_work(&memcg->high_work);
+				break;
+			}
+			current->memcg_nr_pages_over_high += batch;
+			set_notify_resume(current);
+			break;
+		}
+	} while ((memcg = parent_mem_cgroup(memcg)));
+
 	return 0;
 }
 
@@ -6435,6 +6497,7 @@  mem_cgroup_css_alloc(struct cgroup *cont)
 	memcg->last_scanned_node = MAX_NUMNODES;
 	INIT_LIST_HEAD(&memcg->oom_notify);
 	memcg->move_charge_at_immigrate = 0;
+	INIT_WORK(&memcg->high_work, high_work_func);
 	mutex_init(&memcg->thresholds_lock);
 	spin_lock_init(&memcg->move_lock);
 	vmpressure_init(&memcg->vmpressure);
@@ -6625,6 +6688,8 @@  static void mem_cgroup_css_free(struct cgroup *cont)
 	mem_cgroup_reparent_charges(memcg);
 
 	vmpressure_cleanup(&memcg->vmpressure);
+	cancel_work_sync(&memcg->high_work);
+
 	memcg_destroy_kmem(memcg);
 	memcg_free_shrinker_maps(memcg);
 	__mem_cgroup_free(memcg);