[2/2] cgroup, memcg: Rework css_stacks debugging.

Submitted by Andrey Ryabinin on May 18, 2020, 3:14 p.m.

Details

Message ID 20200518151447.4377-2-aryabinin@virtuozzo.com
State New
Series "Series without cover letter"
Headers show

Commit Message

Andrey Ryabinin May 18, 2020, 3:14 p.m.
Current css_stacks debugging isn't very usefull, it records only ~128 stacks
which is not enough and doesn't allow us to identify the problem.
Instead of recording full stacks, record only instruction pointer
and the counter of how often we hit it.

Example of output:
 css_relese: css_get/put ips
  1, [ffffffffb9713c68] __ub_set_css+0x98/0xe0
  851675, [ffffffffb985a208] get_mem_cgroup_from_mm+0x58/0xb0
  776, [ffffffffb985a3e3] __memcg_kmem_get_cache+0x183/0x1a0
  776, [ffffffffb985a3b8] __memcg_kmem_get_cache+0x158/0x1a0
  70664, [ffffffffb985f678] __memcg_kmem_newpage_charge+0x148/0x1c0
  166228, [ffffffffb985f15b] __mem_cgroup_try_charge+0x12b/0x190
  776, [ffffffffb9859c00] memcg_kmem_cache_create_func+0x50/0x80
  614007, [ffffffffb985f508] __memcg_kmem_put_cache+0x48/0x70
  116613, [ffffffffb9713f48] __ub_get_css+0x68/0x190
  116137, [ffffffffb9716648] ub_enough_memory+0xe8/0x110
  352, [ffffffffb9714764] ub_total_pages+0x64/0xa0
  51, [ffffffffb9715f88] bc_fill_sysinfo.part.0+0x58/0x80
  51, [ffffffffb97161c8] bc_mem_notify+0x218/0x270
  22, [ffffffffb9714538] ub_sync_memcg+0x58/0x80
  1, [ffffffffb9859b80] mem_cgroup_force_empty_write+0x40/0x70
  1, [ffffffffb985bfb4] mem_cgroup_css_offline+0x264/0x2a0
  1, [ffffffffb97369e0] cgroup_offline_fn+0x170/0x1a0
  1, [ffffffffb9713c78] __ub_set_css+0xa8/0xe0

https://jira.sw.ru/browse/PSBM-98148
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
---
 include/linux/cgroup.h | 22 ++++++++++++++--------
 kernel/cgroup.c        | 37 ++++++++++++++++++-------------------
 mm/memcontrol.c        | 18 ++++++++++--------
 3 files changed, 42 insertions(+), 35 deletions(-)

Patch hide | download patch | download mbox

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1c1ee0c458e0..919cddd78168 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -117,9 +117,11 @@  extern void cgroup_unload_subsys(struct cgroup_subsys *ss);
 
 extern int proc_cgroup_show(struct seq_file *, void *);
 
+#define CSS_IPS_COUNT 512
+/* Instruction pointer and count */
 struct css_stacks {
-	atomic_t offset;
-	unsigned long stacks[(PAGE_SIZE*2)/8 - 1];
+	unsigned long ips[CSS_IPS_COUNT];
+	atomic_t count[CSS_IPS_COUNT];
 };
 
 /* Per-subsystem/per-cgroup state maintained by the system. */
@@ -154,7 +156,7 @@  enum {
 extern struct static_key css_stacks_on;
 void __save_css_stack(struct cgroup_subsys_state *css);
 
-static inline void save_css_stack(struct cgroup_subsys_state *css)
+static __always_inline void save_css_stack(struct cgroup_subsys_state *css)
 {
 	if (static_key_false(&css_stacks_on))
 		__save_css_stack(css);
@@ -166,7 +168,7 @@  static inline void save_css_stack(struct cgroup_subsys_state *css)
  * - an existing ref-counted reference to the css
  * - task->cgroups for a locked task
  */
-static inline void css_get(struct cgroup_subsys_state *css)
+static __always_inline void css_get(struct cgroup_subsys_state *css)
 {
 	/* We don't need to reference count the root state */
 	if (!(css->flags & CSS_ROOT)) {
@@ -182,12 +184,16 @@  static inline void css_get(struct cgroup_subsys_state *css)
  * the css has been destroyed.
  */
 
-static inline bool css_tryget(struct cgroup_subsys_state *css)
+static __always_inline bool css_tryget(struct cgroup_subsys_state *css)
 {
+	bool ret;
+
 	if (css->flags & CSS_ROOT)
 		return true;
-	save_css_stack(css);
-	return percpu_ref_tryget_live(&css->refcnt);
+	ret = percpu_ref_tryget_live(&css->refcnt);
+	if (ret)
+		save_css_stack(css);
+	return ret;
 }
 
 /*
@@ -195,7 +201,7 @@  static inline bool css_tryget(struct cgroup_subsys_state *css)
  * css_get() or css_tryget()
  */
 
-static inline void css_put(struct cgroup_subsys_state *css)
+static __always_inline void css_put(struct cgroup_subsys_state *css)
 {
 	if (!(css->flags & CSS_ROOT)) {
 		save_css_stack(css);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index bf218e88ac9d..389128d37334 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4150,7 +4150,7 @@  static void css_dput_fn(struct work_struct *work)
 
 	css_stacks = css->stacks;
 	if (css_stacks)
-		free_pages((unsigned long)css_stacks, 1);
+		kfree(css_stacks);
 
 	percpu_ref_exit(&css->refcnt);
 	cgroup_dput(css->cgroup);
@@ -4221,9 +4221,9 @@  static void init_cgroup_css(struct cgroup_subsys_state *css,
 {
 	css->cgroup = cgrp;
 	css->flags = 0;
-	if (static_key_false(&css_stacks_on) && slab_is_available())
-		css->stacks = (struct css_stacks *)
-			__get_free_pages(GFP_KERNEL|__GFP_NOFAIL|__GFP_ZERO, 1);
+	if (static_key_false(&css_stacks_on) && slab_is_available() &&
+	    ss == &mem_cgroup_subsys)
+		css->stacks = kzalloc(sizeof(*css->stacks), GFP_KERNEL);
 	else
 		css->stacks = 0;
 	if (cgrp == dummytop)
@@ -4261,28 +4261,27 @@  static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
 
 void __save_css_stack(struct cgroup_subsys_state *css)
 {
-	unsigned long entries[8];
-	unsigned int offset;
+	int i;
 	struct css_stacks *css_stacks;
-	struct stack_trace trace = {
-		.nr_entries = 0,
-		.entries = entries,
-		.max_entries = 8,
-		.skip = 0
-	};
+	unsigned long ip = _RET_IP_;
 
 	css_stacks = css->stacks;
 	if (!css_stacks)
 		return;
 
-	memset(entries, 0, sizeof(entries));
-	offset = atomic_add_return(8*8, &css_stacks->offset) % (PAGE_SIZE*2);
-	if (offset == 0) {
-		offset += 8;
-		trace.max_entries = 7;
+	for (i = 0; i < CSS_IPS_COUNT; i++) {
+		if (css_stacks->ips[i] == 0) {
+			if (cmpxchg(&css_stacks->ips[i], 0, ip) != 0)
+				continue;
+			atomic_inc(&css_stacks->count[i]);
+			break;
+		}
+		if (css_stacks->ips[i] == ip) {
+			atomic_inc(&css_stacks->count[i]);
+			break;
+		}
 	}
-	save_stack_trace(&trace);
-	memcpy(((char*)css_stacks)+offset, entries, trace.max_entries*8);
+	WARN(i == CSS_IPS_COUNT, "css_ips overflow %p %pS\n", css, (void *)ip);
 }
 EXPORT_SYMBOL(__save_css_stack);
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c3586e8e27ca..6c8989f1260d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3002,17 +3002,19 @@  void memcg_css_release_check_kmem(struct cgroup_subsys_state *css)
 
 		css_stacks = css->stacks;
 		if (css_stacks) {
-			pr_err("css_relese: css_stacks offset %d\n",
-			       atomic_read(&css_stacks->offset));
-			for (i = 0; i < PAGE_SIZE*2/8 - 1; i++) {
-				if (css_stacks->stacks[i])
-					pr_err("\t%pS\n",
-					       (void *)css_stacks->stacks[i]);
+			pr_err("css_relese: css_get/put ips\n");
+			for (i = 0; i < CSS_IPS_COUNT; i++) {
+				if (css_stacks->ips[i])
+					pr_err("\t%d, [%lx] %pS\n",
+						atomic_read(&css_stacks->count[i]),
+						css_stacks->ips[i],
+						(void *)css_stacks->ips[i]);
 				else
-					continue;
+					break;
 			}
+			BUG();
 		}
-		BUG();
+
 	}
 
 }

Comments

Evgenii Shatokhin May 18, 2020, 4:09 p.m.
On 18.05.2020 18:14, Andrey Ryabinin wrote:
> Current css_stacks debugging isn't very usefull, it records only ~128 stacks
> which is not enough and doesn't allow us to identify the problem.
> Instead of recording full stacks, record only instruction pointer
> and the counter of how often we hit it.
> 
> Example of output:
>   css_relese: css_get/put ips
>    1, [ffffffffb9713c68] __ub_set_css+0x98/0xe0
>    851675, [ffffffffb985a208] get_mem_cgroup_from_mm+0x58/0xb0
>    776, [ffffffffb985a3e3] __memcg_kmem_get_cache+0x183/0x1a0
>    776, [ffffffffb985a3b8] __memcg_kmem_get_cache+0x158/0x1a0
>    70664, [ffffffffb985f678] __memcg_kmem_newpage_charge+0x148/0x1c0
>    166228, [ffffffffb985f15b] __mem_cgroup_try_charge+0x12b/0x190
>    776, [ffffffffb9859c00] memcg_kmem_cache_create_func+0x50/0x80
>    614007, [ffffffffb985f508] __memcg_kmem_put_cache+0x48/0x70
>    116613, [ffffffffb9713f48] __ub_get_css+0x68/0x190
>    116137, [ffffffffb9716648] ub_enough_memory+0xe8/0x110
>    352, [ffffffffb9714764] ub_total_pages+0x64/0xa0
>    51, [ffffffffb9715f88] bc_fill_sysinfo.part.0+0x58/0x80
>    51, [ffffffffb97161c8] bc_mem_notify+0x218/0x270
>    22, [ffffffffb9714538] ub_sync_memcg+0x58/0x80
>    1, [ffffffffb9859b80] mem_cgroup_force_empty_write+0x40/0x70
>    1, [ffffffffb985bfb4] mem_cgroup_css_offline+0x264/0x2a0
>    1, [ffffffffb97369e0] cgroup_offline_fn+0x170/0x1a0
>    1, [ffffffffb9713c78] __ub_set_css+0xa8/0xe0
> 
> https://jira.sw.ru/browse/PSBM-98148
> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> ---
>   include/linux/cgroup.h | 22 ++++++++++++++--------
>   kernel/cgroup.c        | 37 ++++++++++++++++++-------------------
>   mm/memcontrol.c        | 18 ++++++++++--------
>   3 files changed, 42 insertions(+), 35 deletions(-)
> 
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index 1c1ee0c458e0..919cddd78168 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -117,9 +117,11 @@ extern void cgroup_unload_subsys(struct cgroup_subsys *ss);
>   
>   extern int proc_cgroup_show(struct seq_file *, void *);
>   
> +#define CSS_IPS_COUNT 512
> +/* Instruction pointer and count */
>   struct css_stacks {
> -	atomic_t offset;
> -	unsigned long stacks[(PAGE_SIZE*2)/8 - 1];
> +	unsigned long ips[CSS_IPS_COUNT];
> +	atomic_t count[CSS_IPS_COUNT];
>   };
>   
>   /* Per-subsystem/per-cgroup state maintained by the system. */
> @@ -154,7 +156,7 @@ enum {
>   extern struct static_key css_stacks_on;
>   void __save_css_stack(struct cgroup_subsys_state *css);
>   
> -static inline void save_css_stack(struct cgroup_subsys_state *css)
> +static __always_inline void save_css_stack(struct cgroup_subsys_state *css)
>   {
>   	if (static_key_false(&css_stacks_on))
>   		__save_css_stack(css);
> @@ -166,7 +168,7 @@ static inline void save_css_stack(struct cgroup_subsys_state *css)
>    * - an existing ref-counted reference to the css
>    * - task->cgroups for a locked task
>    */
> -static inline void css_get(struct cgroup_subsys_state *css)
> +static __always_inline void css_get(struct cgroup_subsys_state *css)
>   {
>   	/* We don't need to reference count the root state */
>   	if (!(css->flags & CSS_ROOT)) {
> @@ -182,12 +184,16 @@ static inline void css_get(struct cgroup_subsys_state *css)
>    * the css has been destroyed.
>    */
>   
> -static inline bool css_tryget(struct cgroup_subsys_state *css)
> +static __always_inline bool css_tryget(struct cgroup_subsys_state *css)
>   {
> +	bool ret;
> +
>   	if (css->flags & CSS_ROOT)
>   		return true;
> -	save_css_stack(css);
> -	return percpu_ref_tryget_live(&css->refcnt);
> +	ret = percpu_ref_tryget_live(&css->refcnt);
> +	if (ret)
> +		save_css_stack(css);
> +	return ret;
>   }
>   
>   /*
> @@ -195,7 +201,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
>    * css_get() or css_tryget()
>    */
>   
> -static inline void css_put(struct cgroup_subsys_state *css)
> +static __always_inline void css_put(struct cgroup_subsys_state *css)
>   {
>   	if (!(css->flags & CSS_ROOT)) {
>   		save_css_stack(css);
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index bf218e88ac9d..389128d37334 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -4150,7 +4150,7 @@ static void css_dput_fn(struct work_struct *work)
>   
>   	css_stacks = css->stacks;
>   	if (css_stacks)
> -		free_pages((unsigned long)css_stacks, 1);
> +		kfree(css_stacks);
>   
>   	percpu_ref_exit(&css->refcnt);
>   	cgroup_dput(css->cgroup);
> @@ -4221,9 +4221,9 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
>   {
>   	css->cgroup = cgrp;
>   	css->flags = 0;
> -	if (static_key_false(&css_stacks_on) && slab_is_available())
> -		css->stacks = (struct css_stacks *)
> -			__get_free_pages(GFP_KERNEL|__GFP_NOFAIL|__GFP_ZERO, 1);
> +	if (static_key_false(&css_stacks_on) && slab_is_available() &&
> +	    ss == &mem_cgroup_subsys)
> +		css->stacks = kzalloc(sizeof(*css->stacks), GFP_KERNEL);
>   	else
>   		css->stacks = 0;
>   	if (cgrp == dummytop)
> @@ -4261,28 +4261,27 @@ static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
>   
>   void __save_css_stack(struct cgroup_subsys_state *css)
>   {
> -	unsigned long entries[8];
> -	unsigned int offset;
> +	int i;
>   	struct css_stacks *css_stacks;
> -	struct stack_trace trace = {
> -		.nr_entries = 0,
> -		.entries = entries,
> -		.max_entries = 8,
> -		.skip = 0
> -	};
> +	unsigned long ip = _RET_IP_;
>   
>   	css_stacks = css->stacks;
>   	if (!css_stacks)
>   		return;
>   
> -	memset(entries, 0, sizeof(entries));
> -	offset = atomic_add_return(8*8, &css_stacks->offset) % (PAGE_SIZE*2);
> -	if (offset == 0) {
> -		offset += 8;
> -		trace.max_entries = 7;
> +	for (i = 0; i < CSS_IPS_COUNT; i++) {
> +		if (css_stacks->ips[i] == 0) {
> +			if (cmpxchg(&css_stacks->ips[i], 0, ip) != 0)
> +				continue;
> +			atomic_inc(&css_stacks->count[i]);
> +			break;
> +		}
> +		if (css_stacks->ips[i] == ip) {
> +			atomic_inc(&css_stacks->count[i]);
> +			break;
> +		}
>   	}
> -	save_stack_trace(&trace);
> -	memcpy(((char*)css_stacks)+offset, entries, trace.max_entries*8);
> +	WARN(i == CSS_IPS_COUNT, "css_ips overflow %p %pS\n", css, (void *)ip);
>   }
>   EXPORT_SYMBOL(__save_css_stack);
>   
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index c3586e8e27ca..6c8989f1260d 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3002,17 +3002,19 @@ void memcg_css_release_check_kmem(struct cgroup_subsys_state *css)
>   
>   		css_stacks = css->stacks;
>   		if (css_stacks) {
> -			pr_err("css_relese: css_stacks offset %d\n",
> -			       atomic_read(&css_stacks->offset));
> -			for (i = 0; i < PAGE_SIZE*2/8 - 1; i++) {
> -				if (css_stacks->stacks[i])
> -					pr_err("\t%pS\n",
> -					       (void *)css_stacks->stacks[i]);
> +			pr_err("css_relese: css_get/put ips\n");
> +			for (i = 0; i < CSS_IPS_COUNT; i++) {
> +				if (css_stacks->ips[i])
> +					pr_err("\t%d, [%lx] %pS\n",
> +						atomic_read(&css_stacks->count[i]),
> +						css_stacks->ips[i],
> +						(void *)css_stacks->ips[i]);

If the function that called css_get/put was in a ReadyKernel patch 
module, and the patch was unloaded or replaced since then, the ip would 
be meaningless here.

As a crazy idea, we could perhaps move the code that outputs the 
collected data from memcg_css_release_check_kmem into a separate 
exported function and call it for each css when the RK patch module is 
unloaded. Not sure about races here though. Also not sure what to do 
with the accumulated ips after that - clear or keep.

Or, may be, there is a better idea?

>   				else
> -					continue;
> +					break;
>   			}
> +			BUG();
>   		}
> -		BUG();
> +
>   	}
>   
>   }
>
Kirill Tkhai May 21, 2020, 1:34 p.m.
On 18.05.2020 18:14, Andrey Ryabinin wrote:
> Current css_stacks debugging isn't very usefull, it records only ~128 stacks
> which is not enough and doesn't allow us to identify the problem.
> Instead of recording full stacks, record only instruction pointer
> and the counter of how often we hit it.
> 
> Example of output:
>  css_relese: css_get/put ips
>   1, [ffffffffb9713c68] __ub_set_css+0x98/0xe0
>   851675, [ffffffffb985a208] get_mem_cgroup_from_mm+0x58/0xb0
>   776, [ffffffffb985a3e3] __memcg_kmem_get_cache+0x183/0x1a0
>   776, [ffffffffb985a3b8] __memcg_kmem_get_cache+0x158/0x1a0
>   70664, [ffffffffb985f678] __memcg_kmem_newpage_charge+0x148/0x1c0
>   166228, [ffffffffb985f15b] __mem_cgroup_try_charge+0x12b/0x190
>   776, [ffffffffb9859c00] memcg_kmem_cache_create_func+0x50/0x80
>   614007, [ffffffffb985f508] __memcg_kmem_put_cache+0x48/0x70
>   116613, [ffffffffb9713f48] __ub_get_css+0x68/0x190
>   116137, [ffffffffb9716648] ub_enough_memory+0xe8/0x110
>   352, [ffffffffb9714764] ub_total_pages+0x64/0xa0
>   51, [ffffffffb9715f88] bc_fill_sysinfo.part.0+0x58/0x80
>   51, [ffffffffb97161c8] bc_mem_notify+0x218/0x270
>   22, [ffffffffb9714538] ub_sync_memcg+0x58/0x80
>   1, [ffffffffb9859b80] mem_cgroup_force_empty_write+0x40/0x70
>   1, [ffffffffb985bfb4] mem_cgroup_css_offline+0x264/0x2a0
>   1, [ffffffffb97369e0] cgroup_offline_fn+0x170/0x1a0
>   1, [ffffffffb9713c78] __ub_set_css+0xa8/0xe0
> 
> https://jira.sw.ru/browse/PSBM-98148
> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>

Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>

> ---
>  include/linux/cgroup.h | 22 ++++++++++++++--------
>  kernel/cgroup.c        | 37 ++++++++++++++++++-------------------
>  mm/memcontrol.c        | 18 ++++++++++--------
>  3 files changed, 42 insertions(+), 35 deletions(-)
> 
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index 1c1ee0c458e0..919cddd78168 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -117,9 +117,11 @@ extern void cgroup_unload_subsys(struct cgroup_subsys *ss);
>  
>  extern int proc_cgroup_show(struct seq_file *, void *);
>  
> +#define CSS_IPS_COUNT 512
> +/* Instruction pointer and count */
>  struct css_stacks {
> -	atomic_t offset;
> -	unsigned long stacks[(PAGE_SIZE*2)/8 - 1];
> +	unsigned long ips[CSS_IPS_COUNT];
> +	atomic_t count[CSS_IPS_COUNT];
>  };
>  
>  /* Per-subsystem/per-cgroup state maintained by the system. */
> @@ -154,7 +156,7 @@ enum {
>  extern struct static_key css_stacks_on;
>  void __save_css_stack(struct cgroup_subsys_state *css);
>  
> -static inline void save_css_stack(struct cgroup_subsys_state *css)
> +static __always_inline void save_css_stack(struct cgroup_subsys_state *css)
>  {
>  	if (static_key_false(&css_stacks_on))
>  		__save_css_stack(css);
> @@ -166,7 +168,7 @@ static inline void save_css_stack(struct cgroup_subsys_state *css)
>   * - an existing ref-counted reference to the css
>   * - task->cgroups for a locked task
>   */
> -static inline void css_get(struct cgroup_subsys_state *css)
> +static __always_inline void css_get(struct cgroup_subsys_state *css)
>  {
>  	/* We don't need to reference count the root state */
>  	if (!(css->flags & CSS_ROOT)) {
> @@ -182,12 +184,16 @@ static inline void css_get(struct cgroup_subsys_state *css)
>   * the css has been destroyed.
>   */
>  
> -static inline bool css_tryget(struct cgroup_subsys_state *css)
> +static __always_inline bool css_tryget(struct cgroup_subsys_state *css)
>  {
> +	bool ret;
> +
>  	if (css->flags & CSS_ROOT)
>  		return true;
> -	save_css_stack(css);
> -	return percpu_ref_tryget_live(&css->refcnt);
> +	ret = percpu_ref_tryget_live(&css->refcnt);
> +	if (ret)
> +		save_css_stack(css);
> +	return ret;
>  }
>  
>  /*
> @@ -195,7 +201,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
>   * css_get() or css_tryget()
>   */
>  
> -static inline void css_put(struct cgroup_subsys_state *css)
> +static __always_inline void css_put(struct cgroup_subsys_state *css)
>  {
>  	if (!(css->flags & CSS_ROOT)) {
>  		save_css_stack(css);
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index bf218e88ac9d..389128d37334 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -4150,7 +4150,7 @@ static void css_dput_fn(struct work_struct *work)
>  
>  	css_stacks = css->stacks;
>  	if (css_stacks)
> -		free_pages((unsigned long)css_stacks, 1);
> +		kfree(css_stacks);
>  
>  	percpu_ref_exit(&css->refcnt);
>  	cgroup_dput(css->cgroup);
> @@ -4221,9 +4221,9 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
>  {
>  	css->cgroup = cgrp;
>  	css->flags = 0;
> -	if (static_key_false(&css_stacks_on) && slab_is_available())
> -		css->stacks = (struct css_stacks *)
> -			__get_free_pages(GFP_KERNEL|__GFP_NOFAIL|__GFP_ZERO, 1);
> +	if (static_key_false(&css_stacks_on) && slab_is_available() &&
> +	    ss == &mem_cgroup_subsys)
> +		css->stacks = kzalloc(sizeof(*css->stacks), GFP_KERNEL);
>  	else
>  		css->stacks = 0;
>  	if (cgrp == dummytop)
> @@ -4261,28 +4261,27 @@ static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
>  
>  void __save_css_stack(struct cgroup_subsys_state *css)
>  {
> -	unsigned long entries[8];
> -	unsigned int offset;
> +	int i;
>  	struct css_stacks *css_stacks;
> -	struct stack_trace trace = {
> -		.nr_entries = 0,
> -		.entries = entries,
> -		.max_entries = 8,
> -		.skip = 0
> -	};
> +	unsigned long ip = _RET_IP_;
>  
>  	css_stacks = css->stacks;
>  	if (!css_stacks)
>  		return;
>  
> -	memset(entries, 0, sizeof(entries));
> -	offset = atomic_add_return(8*8, &css_stacks->offset) % (PAGE_SIZE*2);
> -	if (offset == 0) {
> -		offset += 8;
> -		trace.max_entries = 7;
> +	for (i = 0; i < CSS_IPS_COUNT; i++) {

I hope this won't too slow.

> +		if (css_stacks->ips[i] == 0) {
> +			if (cmpxchg(&css_stacks->ips[i], 0, ip) != 0)
> +				continue;
> +			atomic_inc(&css_stacks->count[i]);
> +			break;
> +		}
> +		if (css_stacks->ips[i] == ip) {
> +			atomic_inc(&css_stacks->count[i]);
> +			break;
> +		}
>  	}
> -	save_stack_trace(&trace);
> -	memcpy(((char*)css_stacks)+offset, entries, trace.max_entries*8);
> +	WARN(i == CSS_IPS_COUNT, "css_ips overflow %p %pS\n", css, (void *)ip);
>  }
>  EXPORT_SYMBOL(__save_css_stack);
>  
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index c3586e8e27ca..6c8989f1260d 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3002,17 +3002,19 @@ void memcg_css_release_check_kmem(struct cgroup_subsys_state *css)
>  
>  		css_stacks = css->stacks;
>  		if (css_stacks) {
> -			pr_err("css_relese: css_stacks offset %d\n",
> -			       atomic_read(&css_stacks->offset));
> -			for (i = 0; i < PAGE_SIZE*2/8 - 1; i++) {
> -				if (css_stacks->stacks[i])
> -					pr_err("\t%pS\n",
> -					       (void *)css_stacks->stacks[i]);
> +			pr_err("css_relese: css_get/put ips\n");
> +			for (i = 0; i < CSS_IPS_COUNT; i++) {
> +				if (css_stacks->ips[i])
> +					pr_err("\t%d, [%lx] %pS\n",
> +						atomic_read(&css_stacks->count[i]),
> +						css_stacks->ips[i],
> +						(void *)css_stacks->ips[i]);
>  				else
> -					continue;
> +					break;
>  			}
> +			BUG();
>  		}
> -		BUG();
> +
>  	}
>  
>  }
>