[Devel,RHEL7,COMMIT] ve/sched: Do not loose boosting during set_next_entity() called for not busted task

Submitted by Konstantin Khorenko on Dec. 9, 2016, 12:11 p.m.

Details

Message ID 201612091211.uB9CBcow013875@finist_cl7.x64_64.work.ct
State New
Series "sched: Do not loose boosting during set_next_entity() called for not busted task"
Headers show

Commit Message

Konstantin Khorenko Dec. 9, 2016, 12:11 p.m.
The commit is pushed to "branch-rh7-3.10.0-327.36.1.vz7.20.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.36.1.vz7.20.13
------>
commit 4bd37ae69b5864a02dd9643a8e02a465e60c3b97
Author: Kirill Tkhai <ktkhai@virtuozzo.com>
Date:   Fri Dec 9 16:11:38 2016 +0400

    ve/sched: Do not loose boosting during set_next_entity() called for not busted task
    
    Imagine the situation. Not boosted task A is running on cfs_rqA,
    and this moment boosted task B wakes up and becomes queued:
       rq,cfs_rq0
            |
         cfs_rq1
           ...
            |
         cfs_rqN
         /    \
    cfs_rqA  cfs_rqB
       |        |
       A        B
    
    As B is boosted, cfs_rq1 and cfs_rq2 entities are boosted also.
    Than, someone calls sched_setscheduler() for task A:
    
    dequeue_task_fair()  /* call dequeue_entity() only for B and cfs_rqB,
                            as cfs_rqN have other queue is queued.
                            So that, their on_rq remains 1. */
    put_prev_task_fair() /* does not change boosting of cfs_rq1-...cfs_rqN,
                            as they have boosted children */
    ...
    
    set_curr_task_fair() /* dequeues cfs_rq1-...-cfs_rqN from boosted lists,
                            as they remained on_rq after dequeue_task_fair().
                            @Problem is here@ */
    
    enqueue_task_fair()  /* does not change boosting, as it's called for not boosted task */
    
    So, we have cfs_rqB and task B are both boosted and queued on busted lists,
    while their (grand)parents are not on busted lists.
    
    The patch fixes reason of this broken hierary.
    
    https://jira.sw.ru/browse/PSBM-56465
    
    Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 kernel/sched/fair.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

Patch hide | download patch | download mbox

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 96fe1d5..5f276ab 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3176,7 +3176,7 @@  check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 }
 
 static void
-set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, bool boosted)
 {
 	/* 'current' is not kept within the tree. */
 	if (se->on_rq) {
@@ -3187,7 +3187,7 @@  set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		 */
 		update_stats_wait_end(cfs_rq, se);
 		__dequeue_entity(cfs_rq, se);
-		if (entity_boosted(se))
+		if (entity_boosted(se) && boosted)
 			__dequeue_boosted_entity(cfs_rq, se);
 	}
 
@@ -5050,7 +5050,15 @@  static struct task_struct *pick_next_task_fair(struct rq *rq)
 
 	do {
 		se = pick_next_entity(cfs_rq);
-		set_next_entity(cfs_rq, se);
+		/*
+		 * If se can be dequeued from boosted node,
+		 * we will pick a boosted task at the end,
+		 * because we pick boosted tasks firstly.
+		 * So, "true" covers this case.
+		 * If se is not boosted, "true" does not
+		 * affect any way. Thus, pass it unconditionally.
+		 */
+		set_next_entity(cfs_rq, se, true);
 		cfs_rq = group_cfs_rq(se);
 	} while (cfs_rq);
 
@@ -7859,11 +7867,16 @@  static void switched_to_fair(struct rq *rq, struct task_struct *p)
 static void set_curr_task_fair(struct rq *rq)
 {
 	struct sched_entity *se = &rq->curr->se;
+	bool boosted = false;
+
+#ifdef CONFIG_CFS_BANDWIDTH
+	boosted = se->boosted;
+#endif
 
 	for_each_sched_entity(se) {
 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
-		set_next_entity(cfs_rq, se);
+		set_next_entity(cfs_rq, se, boosted);
 		/* ensure bandwidth has been allocated on our new cfs_rq */
 		account_cfs_rq_runtime(cfs_rq, 0);
 	}

Comments

Kirill Tkhai Dec. 12, 2016, 6:53 p.m.
Konstantin, please, revert this. The patch is not correct, because we always dequeue
boosted parents of current task, while it's running, and queue it back, when it's put.

On 09.12.2016 15:11, Konstantin Khorenko wrote:
> The commit is pushed to "branch-rh7-3.10.0-327.36.1.vz7.20.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
> after rh7-3.10.0-327.36.1.vz7.20.13
> ------>
> commit 4bd37ae69b5864a02dd9643a8e02a465e60c3b97
> Author: Kirill Tkhai <ktkhai@virtuozzo.com>
> Date:   Fri Dec 9 16:11:38 2016 +0400
> 
>     ve/sched: Do not loose boosting during set_next_entity() called for not busted task
>     
>     Imagine the situation. Not boosted task A is running on cfs_rqA,
>     and this moment boosted task B wakes up and becomes queued:
>        rq,cfs_rq0
>             |
>          cfs_rq1
>            ...
>             |
>          cfs_rqN
>          /    \
>     cfs_rqA  cfs_rqB
>        |        |
>        A        B
>     
>     As B is boosted, cfs_rq1 and cfs_rq2 entities are boosted also.
>     Than, someone calls sched_setscheduler() for task A:
>     
>     dequeue_task_fair()  /* call dequeue_entity() only for B and cfs_rqB,
>                             as cfs_rqN have other queue is queued.
>                             So that, their on_rq remains 1. */
>     put_prev_task_fair() /* does not change boosting of cfs_rq1-...cfs_rqN,
>                             as they have boosted children */
>     ...
>     
>     set_curr_task_fair() /* dequeues cfs_rq1-...-cfs_rqN from boosted lists,
>                             as they remained on_rq after dequeue_task_fair().
>                             @Problem is here@ */
>     
>     enqueue_task_fair()  /* does not change boosting, as it's called for not boosted task */
>     
>     So, we have cfs_rqB and task B are both boosted and queued on busted lists,
>     while their (grand)parents are not on busted lists.
>     
>     The patch fixes reason of this broken hierary.
>     
>     https://jira.sw.ru/browse/PSBM-56465
>     
>     Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
> ---
>  kernel/sched/fair.c | 21 +++++++++++++++++----
>  1 file changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 96fe1d5..5f276ab 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3176,7 +3176,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
>  }
>  
>  static void
> -set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
> +set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, bool boosted)
>  {
>  	/* 'current' is not kept within the tree. */
>  	if (se->on_rq) {
> @@ -3187,7 +3187,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
>  		 */
>  		update_stats_wait_end(cfs_rq, se);
>  		__dequeue_entity(cfs_rq, se);
> -		if (entity_boosted(se))
> +		if (entity_boosted(se) && boosted)
>  			__dequeue_boosted_entity(cfs_rq, se);
>  	}
>  
> @@ -5050,7 +5050,15 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
>  
>  	do {
>  		se = pick_next_entity(cfs_rq);
> -		set_next_entity(cfs_rq, se);
> +		/*
> +		 * If se can be dequeued from boosted node,
> +		 * we will pick a boosted task at the end,
> +		 * because we pick boosted tasks firstly.
> +		 * So, "true" covers this case.
> +		 * If se is not boosted, "true" does not
> +		 * affect any way. Thus, pass it unconditionally.
> +		 */
> +		set_next_entity(cfs_rq, se, true);
>  		cfs_rq = group_cfs_rq(se);
>  	} while (cfs_rq);
>  
> @@ -7859,11 +7867,16 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p)
>  static void set_curr_task_fair(struct rq *rq)
>  {
>  	struct sched_entity *se = &rq->curr->se;
> +	bool boosted = false;
> +
> +#ifdef CONFIG_CFS_BANDWIDTH
> +	boosted = se->boosted;
> +#endif
>  
>  	for_each_sched_entity(se) {
>  		struct cfs_rq *cfs_rq = cfs_rq_of(se);
>  
> -		set_next_entity(cfs_rq, se);
> +		set_next_entity(cfs_rq, se, boosted);
>  		/* ensure bandwidth has been allocated on our new cfs_rq */
>  		account_cfs_rq_runtime(cfs_rq, 0);
>  	}
>