[Devel,rh7,4/9] tcache: Make tcache_lru_isolate() keep ni->lock less

Submitted by Kirill Tkhai on Aug. 15, 2017, 11:23 a.m.

Details

Message ID 150279623042.12806.16552870327591239511.stgit@localhost.localdomain
State New
Series "Manage LRU lists under per-filesystem lock"
Headers show

Commit Message

Kirill Tkhai Aug. 15, 2017, 11:23 a.m.
Grab pool using RCU technics, and do not use ni->lock.
This refactors the function and will be used in further.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 mm/tcache.c |   36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

Patch hide | download patch | download mbox

diff --git a/mm/tcache.c b/mm/tcache.c
index f2a9f439afb..cecaf02b365 100644
--- a/mm/tcache.c
+++ b/mm/tcache.c
@@ -1031,31 +1031,51 @@  tcache_lru_isolate(int nid, struct page **pages, int nr_to_isolate)
 	int nr, nr_isolated = 0;
 	struct rb_node *rbn;
 
-	spin_lock_irq(&ni->lock);
+	rcu_read_lock();
 again:
 	rbn = rb_first(&ni->reclaim_tree);
-	if (!rbn)
+	if (!rbn) {
+		rcu_read_unlock();
 		goto out;
-
-	rb_erase(rbn, &ni->reclaim_tree);
-	RB_CLEAR_NODE(rbn);
+	}
 
 	pni = rb_entry(rbn, struct tcache_pool_nodeinfo, reclaim_node);
-	if (!tcache_grab_pool(pni->pool))
+	if (!tcache_grab_pool(pni->pool)) {
+		spin_lock_irq(&ni->lock);
+		if (!RB_EMPTY_NODE(rbn) && list_empty(&pni->lru)) {
+			rb_erase(rbn, &ni->reclaim_tree);
+			RB_CLEAR_NODE(rbn);
+		}
+		spin_unlock_irq(&ni->lock);
 		goto again;
+	}
+	rcu_read_unlock();
 
+	spin_lock_irq(&ni->lock);
 	spin_lock(&pni->lock);
 	nr = __tcache_lru_isolate(pni, pages, nr_to_isolate);
-	ni->nr_pages -= nr;
 	nr_isolated += nr;
 
+	if (!nr) {
+		spin_unlock(&pni->lock);
+		spin_unlock_irq(&ni->lock);
+		goto out_put;
+	}
+
+	ni->nr_pages -= nr;
+
+	if (!RB_EMPTY_NODE(rbn)) {
+		rb_erase(rbn, &ni->reclaim_tree);
+		RB_CLEAR_NODE(rbn);
+	}
 	if (!list_empty(&pni->lru))
 		__tcache_insert_reclaim_node(ni, pni);
 
 	spin_unlock(&pni->lock);
+	spin_unlock_irq(&ni->lock);
+out_put:
 	tcache_put_pool(pni->pool);
 out:
-	spin_unlock_irq(&ni->lock);
 	return nr_isolated;
 }
 

Comments

Andrey Ryabinin Aug. 15, 2017, 1:01 p.m.
On 08/15/2017 02:23 PM, Kirill Tkhai wrote:
> Grab pool using RCU technics, and do not use ni->lock.
> This refactors the function and will be used in further.
> 
> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
> ---
>  mm/tcache.c |   36 ++++++++++++++++++++++++++++--------
>  1 file changed, 28 insertions(+), 8 deletions(-)
> 
> diff --git a/mm/tcache.c b/mm/tcache.c
> index f2a9f439afb..cecaf02b365 100644
> --- a/mm/tcache.c
> +++ b/mm/tcache.c
> @@ -1031,31 +1031,51 @@ tcache_lru_isolate(int nid, struct page **pages, int nr_to_isolate)
>  	int nr, nr_isolated = 0;
>  	struct rb_node *rbn;
>  
> -	spin_lock_irq(&ni->lock);
> +	rcu_read_lock();

I don't get this. RCU lock can't protect from concurrent rb_erase(),
how is this supposed to work?

>  again:
>  	rbn = rb_first(&ni->reclaim_tree);
> -	if (!rbn)
> +	if (!rbn) {
> +		rcu_read_unlock();
>  		goto out;
> -
> -	rb_erase(rbn, &ni->reclaim_tree);
> -	RB_CLEAR_NODE(rbn);
> +	}
>  
>  	pni = rb_entry(rbn, struct tcache_pool_nodeinfo, reclaim_node);
> -	if (!tcache_grab_pool(pni->pool))
> +	if (!tcache_grab_pool(pni->pool)) {
> +		spin_lock_irq(&ni->lock);
> +		if (!RB_EMPTY_NODE(rbn) && list_empty(&pni->lru)) {
> +			rb_erase(rbn, &ni->reclaim_tree);
> +			RB_CLEAR_NODE(rbn);
> +		}
> +		spin_unlock_irq(&ni->lock);
>  		goto again;
> +	}
> +	rcu_read_unlock();
>  
> +	spin_lock_irq(&ni->lock);
>  	spin_lock(&pni->lock);
>  	nr = __tcache_lru_isolate(pni, pages, nr_to_isolate);
> -	ni->nr_pages -= nr;
>  	nr_isolated += nr;
>  
> +	if (!nr) {
> +		spin_unlock(&pni->lock);
> +		spin_unlock_irq(&ni->lock);
> +		goto out_put;
> +	}
> +
> +	ni->nr_pages -= nr;
> +
> +	if (!RB_EMPTY_NODE(rbn)) {
> +		rb_erase(rbn, &ni->reclaim_tree);
> +		RB_CLEAR_NODE(rbn);
> +	}
>  	if (!list_empty(&pni->lru))
>  		__tcache_insert_reclaim_node(ni, pni);
>  
>  	spin_unlock(&pni->lock);
> +	spin_unlock_irq(&ni->lock);
> +out_put:
>  	tcache_put_pool(pni->pool);
>  out:
> -	spin_unlock_irq(&ni->lock);
>  	return nr_isolated;
>  }
>  
>
Kirill Tkhai Aug. 15, 2017, 1:59 p.m.
On 15.08.2017 16:01, Andrey Ryabinin wrote:
> On 08/15/2017 02:23 PM, Kirill Tkhai wrote:
>> Grab pool using RCU technics, and do not use ni->lock.
>> This refactors the function and will be used in further.
>>
>> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
>> ---
>>  mm/tcache.c |   36 ++++++++++++++++++++++++++++--------
>>  1 file changed, 28 insertions(+), 8 deletions(-)
>>
>> diff --git a/mm/tcache.c b/mm/tcache.c
>> index f2a9f439afb..cecaf02b365 100644
>> --- a/mm/tcache.c
>> +++ b/mm/tcache.c
>> @@ -1031,31 +1031,51 @@ tcache_lru_isolate(int nid, struct page **pages, int nr_to_isolate)
>>  	int nr, nr_isolated = 0;
>>  	struct rb_node *rbn;
>>  
>> -	spin_lock_irq(&ni->lock);
>> +	rcu_read_lock();
> 
> I don't get this. RCU lock can't protect from concurrent rb_erase(),
> how is this supposed to work?

I thought, rb_first() is only one memory dereference. It seems, we may
introduce rb_leftmost pointer and assign it via RCU to handle such situation.

>>  again:
>>  	rbn = rb_first(&ni->reclaim_tree);
>> -	if (!rbn)
>> +	if (!rbn) {
>> +		rcu_read_unlock();
>>  		goto out;
>> -
>> -	rb_erase(rbn, &ni->reclaim_tree);
>> -	RB_CLEAR_NODE(rbn);
>> +	}
>>  
>>  	pni = rb_entry(rbn, struct tcache_pool_nodeinfo, reclaim_node);
>> -	if (!tcache_grab_pool(pni->pool))
>> +	if (!tcache_grab_pool(pni->pool)) {
>> +		spin_lock_irq(&ni->lock);
>> +		if (!RB_EMPTY_NODE(rbn) && list_empty(&pni->lru)) {
>> +			rb_erase(rbn, &ni->reclaim_tree);
>> +			RB_CLEAR_NODE(rbn);
>> +		}
>> +		spin_unlock_irq(&ni->lock);
>>  		goto again;
>> +	}
>> +	rcu_read_unlock();
>>  
>> +	spin_lock_irq(&ni->lock);
>>  	spin_lock(&pni->lock);
>>  	nr = __tcache_lru_isolate(pni, pages, nr_to_isolate);
>> -	ni->nr_pages -= nr;
>>  	nr_isolated += nr;
>>  
>> +	if (!nr) {
>> +		spin_unlock(&pni->lock);
>> +		spin_unlock_irq(&ni->lock);
>> +		goto out_put;
>> +	}
>> +
>> +	ni->nr_pages -= nr;
>> +
>> +	if (!RB_EMPTY_NODE(rbn)) {
>> +		rb_erase(rbn, &ni->reclaim_tree);
>> +		RB_CLEAR_NODE(rbn);
>> +	}
>>  	if (!list_empty(&pni->lru))
>>  		__tcache_insert_reclaim_node(ni, pni);
>>  
>>  	spin_unlock(&pni->lock);
>> +	spin_unlock_irq(&ni->lock);
>> +out_put:
>>  	tcache_put_pool(pni->pool);
>>  out:
>> -	spin_unlock_irq(&ni->lock);
>>  	return nr_isolated;
>>  }
>>  
>>