@@ -3079,7 +3079,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
se->vruntime = max_vruntime(se->vruntime, vruntime);
}
-static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
+static void check_enqueue_throttle(struct cfs_rq *cfs_rq, int flags);
static inline void check_schedstat_required(void)
{
@@ -3139,8 +3139,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
if (cfs_rq->nr_running == 1) {
list_add_leaf_cfs_rq(cfs_rq);
- if (!(flags & ENQUEUE_BOOST))
- check_enqueue_throttle(cfs_rq);
+ check_enqueue_throttle(cfs_rq, flags);
}
}
@@ -3975,7 +3974,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
* expired/exceeded, otherwise it may be allowed to steal additional ticks of
* runtime as update_curr() throttling can not not trigger until it's on-rq.
*/
-static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
+static void check_enqueue_throttle(struct cfs_rq *cfs_rq, int flags)
{
WARN_ON(cfs_rq_has_boosted_entities(cfs_rq));
@@ -4002,6 +4001,9 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
}
}
+ if (flags & ENQUEUE_BOOST)
+ return;
+
/* an active group must be handled by the update_curr()->put() path */
if (!cfs_rq->runtime_enabled || cfs_rq->curr)
return;
@@ -4174,7 +4176,7 @@ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
-static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
+static void check_enqueue_throttle(struct cfs_rq *cfs_rq, int flags) {}
static inline void sync_throttle(struct task_group *tg, int cpu) {}
static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
The commit is pushed to "branch-rh7-3.10.0-514.10.2.vz7.29.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-514.10.2.vz7.29.16 ------> commit d70c7dafc992d7094908764ecd69e7649c718fd7 Author: Kirill Tkhai <ktkhai@virtuozzo.com> Date: Fri Apr 14 20:14:31 2017 +0400 sched/fair: Synchronize throttle_count on boosted wakeups Currently, we skip updating of throttle count during boosted wakeups in enqueue_entity(). It's because of boosted tasks make cfs_rq's boosted too, and throttling is impossible there. But it's wrong. Imagine the below situation. Boosted task enqueued as first task on cfs_rq skips updating throttle_count. Then not-boosted task wakes up, and the update is not called too, because there are two tasks on the cfs_rq. Later, the first task goes to sleep in kernel (e.g., call sys_poll()), the second task is working long and then throttling happen (because the second task is not boosted and runtime expires). Now, the time for the third not-boosted task to be moved on this cpu. The update does happen again, as there are two tasks on the cfs_rq, but check_preempt_wakeup() works as well. It skips throttled_hierarchy() check as the throttle_count was not updated, and finds matching se between the first task and rq->curr. Then, the third task's runtime is good enough to replace rq->curr, set_next_buddy() is called. When rq->curr.se depth is less than the cfs_rq's depth, cfs_rq's (grand)parent se is marked as next buddy, though it does not contain queued children (the cfs_rq is throttled and dequeued). So, further pick_next_entity() gets NULL pointer on dereferrence of parent_cfs_rq->rb_leftmost, and crashes. https://jira.sw.ru/browse/PSBM-62208 Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com> --- kernel/sched/fair.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-)