[rh7,v4,4/9] mm/mem_cgroup_iter: Always assign iter->last_visited under rcu

Submitted by Konstantin Khorenko on Feb. 24, 2021, 6:55 p.m.

Details

Message ID 20210224185541.862-5-khorenko@virtuozzo.com
State New
Series "mm/mem_cgroup_iter: Reduce the number of iterator restarts upon cgroup removals"
Headers show

Commit Message

Konstantin Khorenko Feb. 24, 2021, 6:55 p.m.
It's quite strange to have rcu section in mem_cgroup_iter(),
but do not use rcu_dereference/rcu_assign for pointers being defended.

We plan to access/assign '->last_visited' during iterator invalidation,
so we'll need the protection there anyway.

https://jira.sw.ru/browse/PSBM-123655

Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
---
 mm/memcontrol.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

Patch hide | download patch | download mbox

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8040f09425bf..d0251d27de00 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -194,8 +194,18 @@  struct mem_cgroup_reclaim_iter {
 	/*
 	 * last scanned hierarchy member. Valid only if last_dead_count
 	 * matches memcg->dead_count of the hierarchy root group.
+	 *
+	 * Memory pointed by 'last_visited' is freed not earlier than
+	 * one rcu period after we accessed it:
+	 *   cgroup_offline_fn()
+	 *    offline_css()
+	 *    list_del_rcu()
+	 *    dput()
+	 *    ...
+	 *     cgroup_diput()
+	 *      call_rcu(&cgrp->rcu_head, cgroup_free_rcu)
 	 */
-	struct mem_cgroup *last_visited;
+	struct mem_cgroup __rcu *last_visited;
 	unsigned long last_dead_count;
 
 	/* scan generation, increased every round-trip */
@@ -1591,8 +1601,7 @@  mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter,
 	 */
 	*sequence = atomic_read(&root->dead_count);
 	if (iter->last_dead_count == *sequence) {
-		smp_rmb();
-		position = iter->last_visited;
+		position = rcu_dereference(iter->last_visited);
 
 		/*
 		 * We cannot take a reference to root because we might race
@@ -1620,8 +1629,7 @@  static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
 	 * don't lose destruction events in between.  We could have
 	 * raced with the destruction of @new_position after all.
 	 */
-	iter->last_visited = new_position;
-	smp_wmb();
+	rcu_assign_pointer(iter->last_visited, new_position);
 	iter->last_dead_count = sequence;
 
 	/* root reference counting symmetric to mem_cgroup_iter_load */
@@ -1681,7 +1689,7 @@  struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
 			mz = mem_cgroup_zoneinfo(root, nid, zid);
 			iter = &mz->reclaim_iter[reclaim->priority];
 			if (prev && reclaim->generation != iter->generation) {
-				iter->last_visited = NULL;
+				rcu_assign_pointer(iter->last_visited, NULL);
 				goto out_unlock;
 			}
 

Comments

Kirill Tkhai Feb. 26, 2021, 9:12 a.m.
On 24.02.2021 21:55, Konstantin Khorenko wrote:
> It's quite strange to have rcu section in mem_cgroup_iter(),
> but do not use rcu_dereference/rcu_assign for pointers being defended.
> 
> We plan to access/assign '->last_visited' during iterator invalidation,
> so we'll need the protection there anyway.
> 
> https://jira.sw.ru/browse/PSBM-123655
> 
> Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
> ---
>  mm/memcontrol.c | 20 ++++++++++++++------
>  1 file changed, 14 insertions(+), 6 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 8040f09425bf..d0251d27de00 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -194,8 +194,18 @@ struct mem_cgroup_reclaim_iter {
>  	/*
>  	 * last scanned hierarchy member. Valid only if last_dead_count
>  	 * matches memcg->dead_count of the hierarchy root group.
> +	 *
> +	 * Memory pointed by 'last_visited' is freed not earlier than
> +	 * one rcu period after we accessed it:
> +	 *   cgroup_offline_fn()
> +	 *    offline_css()
> +	 *    list_del_rcu()
> +	 *    dput()
> +	 *    ...
> +	 *     cgroup_diput()
> +	 *      call_rcu(&cgrp->rcu_head, cgroup_free_rcu)
>  	 */
> -	struct mem_cgroup *last_visited;
> +	struct mem_cgroup __rcu *last_visited;
>  	unsigned long last_dead_count;
>  
>  	/* scan generation, increased every round-trip */
> @@ -1591,8 +1601,7 @@ mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter,
>  	 */
>  	*sequence = atomic_read(&root->dead_count);
>  	if (iter->last_dead_count == *sequence) {
> -		smp_rmb();
> -		position = iter->last_visited;
> +		position = rcu_dereference(iter->last_visited);
>  
>  		/*
>  		 * We cannot take a reference to root because we might race
> @@ -1620,8 +1629,7 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
>  	 * don't lose destruction events in between.  We could have
>  	 * raced with the destruction of @new_position after all.
>  	 */
> -	iter->last_visited = new_position;
> -	smp_wmb();

We can't remove barriers in this patch, this makes the patch wrong.
We should remove barriers somewhere in [9/9].

> +	rcu_assign_pointer(iter->last_visited, new_position);
>  	iter->last_dead_count = sequence;
>  
>  	/* root reference counting symmetric to mem_cgroup_iter_load */
> @@ -1681,7 +1689,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
>  			mz = mem_cgroup_zoneinfo(root, nid, zid);
>  			iter = &mz->reclaim_iter[reclaim->priority];
>  			if (prev && reclaim->generation != iter->generation) {
> -				iter->last_visited = NULL;
> +				rcu_assign_pointer(iter->last_visited, NULL);
>  				goto out_unlock;
>  			}
>  
>
Konstantin Khorenko Feb. 26, 2021, 10:22 a.m.
On 02/26/2021 12:12 PM, Kirill Tkhai wrote:
> On 24.02.2021 21:55, Konstantin Khorenko wrote:
>> It's quite strange to have rcu section in mem_cgroup_iter(),
>> but do not use rcu_dereference/rcu_assign for pointers being defended.
>>
>> We plan to access/assign '->last_visited' during iterator invalidation,
>> so we'll need the protection there anyway.
>>
>> https://jira.sw.ru/browse/PSBM-123655
>>
>> Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
>> ---
>>  mm/memcontrol.c | 20 ++++++++++++++------
>>  1 file changed, 14 insertions(+), 6 deletions(-)
>>
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 8040f09425bf..d0251d27de00 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -194,8 +194,18 @@ struct mem_cgroup_reclaim_iter {
>>  	/*
>>  	 * last scanned hierarchy member. Valid only if last_dead_count
>>  	 * matches memcg->dead_count of the hierarchy root group.
>> +	 *
>> +	 * Memory pointed by 'last_visited' is freed not earlier than
>> +	 * one rcu period after we accessed it:
>> +	 *   cgroup_offline_fn()
>> +	 *    offline_css()
>> +	 *    list_del_rcu()
>> +	 *    dput()
>> +	 *    ...
>> +	 *     cgroup_diput()
>> +	 *      call_rcu(&cgrp->rcu_head, cgroup_free_rcu)
>>  	 */
>> -	struct mem_cgroup *last_visited;
>> +	struct mem_cgroup __rcu *last_visited;
>>  	unsigned long last_dead_count;
>>
>>  	/* scan generation, increased every round-trip */
>> @@ -1591,8 +1601,7 @@ mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter,
>>  	 */
>>  	*sequence = atomic_read(&root->dead_count);
>>  	if (iter->last_dead_count == *sequence) {
>> -		smp_rmb();
>> -		position = iter->last_visited;
>> +		position = rcu_dereference(iter->last_visited);
>>
>>  		/*
>>  		 * We cannot take a reference to root because we might race
>> @@ -1620,8 +1629,7 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
>>  	 * don't lose destruction events in between.  We could have
>>  	 * raced with the destruction of @new_position after all.
>>  	 */
>> -	iter->last_visited = new_position;
>> -	smp_wmb();
>
> We can't remove barriers in this patch, this makes the patch wrong.
> We should remove barriers somewhere in [9/9].

Ok, will resend.

Thank you very much!

>
>> +	rcu_assign_pointer(iter->last_visited, new_position);
>>  	iter->last_dead_count = sequence;
>>
>>  	/* root reference counting symmetric to mem_cgroup_iter_load */
>> @@ -1681,7 +1689,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
>>  			mz = mem_cgroup_zoneinfo(root, nid, zid);
>>  			iter = &mz->reclaim_iter[reclaim->priority];
>>  			if (prev && reclaim->generation != iter->generation) {
>> -				iter->last_visited = NULL;
>> +				rcu_assign_pointer(iter->last_visited, NULL);
>>  				goto out_unlock;
>>  			}
>>
>>
>
> .
>
Konstantin Khorenko Feb. 26, 2021, 2:12 p.m.
--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 02/26/2021 12:12 PM, Kirill Tkhai wrote:
> On 24.02.2021 21:55, Konstantin Khorenko wrote:
>> It's quite strange to have rcu section in mem_cgroup_iter(),
>> but do not use rcu_dereference/rcu_assign for pointers being defended.
>>
>> We plan to access/assign '->last_visited' during iterator invalidation,
>> so we'll need the protection there anyway.
>>
>> https://jira.sw.ru/browse/PSBM-123655
>>
>> Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
>> ---
>>  mm/memcontrol.c | 20 ++++++++++++++------
>>  1 file changed, 14 insertions(+), 6 deletions(-)
>>
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 8040f09425bf..d0251d27de00 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -194,8 +194,18 @@ struct mem_cgroup_reclaim_iter {
>>  	/*
>>  	 * last scanned hierarchy member. Valid only if last_dead_count
>>  	 * matches memcg->dead_count of the hierarchy root group.
>> +	 *
>> +	 * Memory pointed by 'last_visited' is freed not earlier than
>> +	 * one rcu period after we accessed it:
>> +	 *   cgroup_offline_fn()
>> +	 *    offline_css()
>> +	 *    list_del_rcu()
>> +	 *    dput()
>> +	 *    ...
>> +	 *     cgroup_diput()
>> +	 *      call_rcu(&cgrp->rcu_head, cgroup_free_rcu)
>>  	 */
>> -	struct mem_cgroup *last_visited;
>> +	struct mem_cgroup __rcu *last_visited;
>>  	unsigned long last_dead_count;
>>
>>  	/* scan generation, increased every round-trip */
>> @@ -1591,8 +1601,7 @@ mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter,
>>  	 */
>>  	*sequence = atomic_read(&root->dead_count);
>>  	if (iter->last_dead_count == *sequence) {
>> -		smp_rmb();
>> -		position = iter->last_visited;
>> +		position = rcu_dereference(iter->last_visited);
>>
>>  		/*
>>  		 * We cannot take a reference to root because we might race
>> @@ -1620,8 +1629,7 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
>>  	 * don't lose destruction events in between.  We could have
>>  	 * raced with the destruction of @new_position after all.
>>  	 */
>> -	iter->last_visited = new_position;
>> -	smp_wmb();
>
> We can't remove barriers in this patch, this makes the patch wrong.
> We should remove barriers somewhere in [9/9].

JFYI: i've moved this not to [9/9], but to
[PATCH rh7 v4 7/9] mm/mem_cgroup_iter: Don't bother checking 'dead_count' anymore

>
>> +	rcu_assign_pointer(iter->last_visited, new_position);
>>  	iter->last_dead_count = sequence;
>>
>>  	/* root reference counting symmetric to mem_cgroup_iter_load */
>> @@ -1681,7 +1689,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
>>  			mz = mem_cgroup_zoneinfo(root, nid, zid);
>>  			iter = &mz->reclaim_iter[reclaim->priority];
>>  			if (prev && reclaim->generation != iter->generation) {
>> -				iter->last_visited = NULL;
>> +				rcu_assign_pointer(iter->last_visited, NULL);
>>  				goto out_unlock;
>>  			}
>>
>>
>
> .
>