[rh7] mm/memcg: Fix race on mem cgroup offline/kmem deactivation.

Submitted by Andrey Ryabinin on May 8, 2020, 4:10 p.m.

Details

Message ID 20200508161000.31280-1-aryabinin@virtuozzo.com
State New
Series "mm/memcg: Fix race on mem cgroup offline/kmem deactivation."
Headers show

Commit Message

Andrey Ryabinin May 8, 2020, 4:10 p.m.
The following race could happen

  Initial state: kmem = 0, cgroup is online

             CPU0:
   charge_kmem(1); //kmem = 1
   memcg_uncharge_kmem()
   {
   ...
        kmem = page_counter_uncharge(&memcg->kmem, nr_pages); //kmem = 0

----------------------------------------------------------------------

             CPU1:

    charge_kmem(1); //kmem=1 again
    ....
    mem_cgroup_css_offline();
       memcg_deactivate_kmem() {
           ...
           css_get();
           memcg_kmem_mark_dead(memcg);
           if (page_counter_read(&memcg->kmem)) //kmem=1, thus return
                   return;
       }
----------------------------------------------------------------------

               CPU0:

        /* Not down to 0 */
        if (kmem)
                return;
        if (memcg_kmem_test_and_clear_dead(memcg))
                css_put(&memcg->css);              //kill memcg with kmem != 0

   }

Fix this by making a fake kmem charge on cgroup creation, and uncharging it
back during the offline stage. This way we guarantee that kmem is never 0
while cgroup is online, so the race above isn't possible anymore.

https://jira.sw.ru/browse/PSBM-98148
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Suggested-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 mm/memcontrol.c | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4846e7a9bf63..08aa00e0661a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4794,6 +4794,43 @@  static int __memcg_activate_kmem(struct mem_cgroup *memcg,
 	memcg->kmemcg_id = memcg_id;
 	set_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
 	set_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
+
+	/*
+	 * Fake charge to keep kmem > 0 while cgroup is online. This prevents
+	 * premature cgroup destruction due to the following race:
+	 * CPU0:
+	 *       charge_kmem(1); //kmem = 1
+	 *       memcg_uncharge_kmem()
+	 *       {
+	 *       ...
+	 *            kmem = page_counter_uncharge(&memcg->kmem, nr_pages); //kmem = 0
+	 *
+	 *    ----------------------------------------------------------------------
+	 *
+	 *                 CPU1:
+	 *
+	 *        charge_kmem(1); //kmem=1 again
+	 *        ....
+	 *        mem_cgroup_css_offline();
+	 *           memcg_deactivate_kmem() {
+	 *               ...
+	 *               css_get();
+	 *               memcg_kmem_mark_dead(memcg);
+	 *               if (page_counter_read(&memcg->kmem)) //kmem=1, thus return
+	 *                       return;
+	 *           }
+	 *    ----------------------------------------------------------------------
+	 *
+	 *                   CPU0:
+	 *
+	 *            if (kmem)
+	 *                    return;
+	 *            if (memcg_kmem_test_and_clear_dead(memcg))
+	 *                    css_put(&memcg->css);              //kill memcg with kmem != 0
+	 *
+	 *       }
+	 */
+	page_counter_charge(&memcg->kmem, 1);
 out:
 	return err;
 }
@@ -6112,7 +6149,8 @@  static void memcg_deactivate_kmem(struct mem_cgroup *memcg)
 
 	memcg_kmem_mark_dead(memcg);
 
-	if (page_counter_read(&memcg->kmem))
+	/* Uncharge fake charge from __memcg_activate_kmem() */
+	if (page_counter_uncharge(&memcg->kmem, 1))
 		return;
 
 	/*

Comments

Kirill Tkhai May 8, 2020, 4:16 p.m.
On 08.05.2020 19:10, Andrey Ryabinin wrote:
> The following race could happen
> 
>   Initial state: kmem = 0, cgroup is online
> 
>              CPU0:
>    charge_kmem(1); //kmem = 1
>    memcg_uncharge_kmem()
>    {
>    ...
>         kmem = page_counter_uncharge(&memcg->kmem, nr_pages); //kmem = 0
> 
> ----------------------------------------------------------------------
> 
>              CPU1:
> 
>     charge_kmem(1); //kmem=1 again
>     ....
>     mem_cgroup_css_offline();
>        memcg_deactivate_kmem() {
>            ...
>            css_get();
>            memcg_kmem_mark_dead(memcg);
>            if (page_counter_read(&memcg->kmem)) //kmem=1, thus return
>                    return;
>        }
> ----------------------------------------------------------------------
> 
>                CPU0:
> 
>         /* Not down to 0 */
>         if (kmem)
>                 return;
>         if (memcg_kmem_test_and_clear_dead(memcg))
>                 css_put(&memcg->css);              //kill memcg with kmem != 0
> 
>    }
> 
> Fix this by making a fake kmem charge on cgroup creation, and uncharging it
> back during the offline stage. This way we guarantee that kmem is never 0
> while cgroup is online, so the race above isn't possible anymore.
> 
> https://jira.sw.ru/browse/PSBM-98148
> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> Suggested-by: Kirill Tkhai <ktkhai@virtuozzo.com>

Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>

> ---
>  mm/memcontrol.c | 40 +++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 39 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 4846e7a9bf63..08aa00e0661a 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4794,6 +4794,43 @@ static int __memcg_activate_kmem(struct mem_cgroup *memcg,
>  	memcg->kmemcg_id = memcg_id;
>  	set_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
>  	set_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
> +
> +	/*
> +	 * Fake charge to keep kmem > 0 while cgroup is online. This prevents
> +	 * premature cgroup destruction due to the following race:
> +	 * CPU0:
> +	 *       charge_kmem(1); //kmem = 1
> +	 *       memcg_uncharge_kmem()
> +	 *       {
> +	 *       ...
> +	 *            kmem = page_counter_uncharge(&memcg->kmem, nr_pages); //kmem = 0
> +	 *
> +	 *    ----------------------------------------------------------------------
> +	 *
> +	 *                 CPU1:
> +	 *
> +	 *        charge_kmem(1); //kmem=1 again
> +	 *        ....
> +	 *        mem_cgroup_css_offline();
> +	 *           memcg_deactivate_kmem() {
> +	 *               ...
> +	 *               css_get();
> +	 *               memcg_kmem_mark_dead(memcg);
> +	 *               if (page_counter_read(&memcg->kmem)) //kmem=1, thus return
> +	 *                       return;
> +	 *           }
> +	 *    ----------------------------------------------------------------------
> +	 *
> +	 *                   CPU0:
> +	 *
> +	 *            if (kmem)
> +	 *                    return;
> +	 *            if (memcg_kmem_test_and_clear_dead(memcg))
> +	 *                    css_put(&memcg->css);              //kill memcg with kmem != 0
> +	 *
> +	 *       }
> +	 */
> +	page_counter_charge(&memcg->kmem, 1);
>  out:
>  	return err;
>  }
> @@ -6112,7 +6149,8 @@ static void memcg_deactivate_kmem(struct mem_cgroup *memcg)
>  
>  	memcg_kmem_mark_dead(memcg);
>  
> -	if (page_counter_read(&memcg->kmem))
> +	/* Uncharge fake charge from __memcg_activate_kmem() */
> +	if (page_counter_uncharge(&memcg->kmem, 1))
>  		return;
>  
>  	/*
>