diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c index 2067080bb..8629b37d1 100755 --- a/kernel/sched/autogroup.c +++ b/kernel/sched/autogroup.c @@ -31,7 +31,7 @@ static inline void autogroup_destroy(struct kref *kref) ag->tg->rt_se = NULL; ag->tg->rt_rq = NULL; #endif - sched_offline_group(ag->tg); + sched_release_group(ag->tg); sched_destroy_group(ag->tg); } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d6a5e5ab2..ae04a165a 100755 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7924,6 +7924,22 @@ static void sched_free_group(struct task_group *tg) kmem_cache_free(task_group_cache, tg); } +static void sched_free_group_rcu(struct rcu_head *rcu) +{ + sched_free_group(container_of(rcu, struct task_group, rcu)); +} + +static void sched_unregister_group(struct task_group *tg) +{ + unregister_fair_sched_group(tg); + unregister_rt_sched_group(tg); + /* + * We have to wait for yet another RCU grace period to expire, as + * print_cfs_stats() might run concurrently. + */ + call_rcu(&tg->rcu, sched_free_group_rcu); +} + /* allocate runqueue etc for a new task group */ struct task_group *sched_create_group(struct task_group *parent) { @@ -7967,25 +7983,35 @@ void sched_online_group(struct task_group *tg, struct task_group *parent) } /* rcu callback to free various structures associated with a task group */ -static void sched_free_group_rcu(struct rcu_head *rhp) +static void sched_unregister_group_rcu(struct rcu_head *rhp) { /* Now it should be safe to free those cfs_rqs: */ - sched_free_group(container_of(rhp, struct task_group, rcu)); + sched_unregister_group(container_of(rhp, struct task_group, rcu)); } void sched_destroy_group(struct task_group *tg) { /* Wait for possible concurrent references to cfs_rqs complete: */ - call_rcu(&tg->rcu, sched_free_group_rcu); + call_rcu(&tg->rcu, sched_unregister_group_rcu); } -void sched_offline_group(struct task_group *tg) +void sched_release_group(struct task_group *tg) { unsigned long flags; - /* End participation in shares distribution: */ - unregister_fair_sched_group(tg); - + /* + * Unlink first, to avoid walk_tg_tree_from() from finding us (via + * sched_cfs_period_timer()). + * + * For this to be effective, we have to wait for all pending users of + * this task group to leave their RCU critical section to ensure no new + * user will see our dying task group any more. Specifically ensure + * that tg_unthrottle_up() won't add decayed cfs_rq's to it. + * + * We therefore defer calling unregister_fair_sched_group() to + * sched_unregister_group() which is guarantied to get called only after the + * current RCU grace period has expired. + */ spin_lock_irqsave(&task_group_lock, flags); list_del_rcu(&tg->list); list_del_rcu(&tg->siblings); @@ -8105,7 +8131,7 @@ static void cpu_cgroup_css_released(struct cgroup_subsys_state *css) { struct task_group *tg = css_tg(css); - sched_offline_group(tg); + sched_release_group(tg); } static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) @@ -8115,7 +8141,7 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) /* * Relies on the RCU grace period between css_released() and this. */ - sched_free_group(tg); + sched_unregister_group(tg); } /* diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d6ef2b01e..806eb067b 100755 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -11483,8 +11483,6 @@ void free_fair_sched_group(struct task_group *tg) { int i; - destroy_cfs_bandwidth(tg_cfs_bandwidth(tg)); - for_each_possible_cpu(i) { if (tg->cfs_rq) kfree(tg->cfs_rq[i]); @@ -11561,6 +11559,8 @@ void unregister_fair_sched_group(struct task_group *tg) struct rq *rq; int cpu; + destroy_cfs_bandwidth(tg_cfs_bandwidth(tg)); + for_each_possible_cpu(cpu) { if (tg->se[cpu]) remove_entity_load_avg(tg->se[cpu]); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index f49db5d9c..d6e52fd04 100755 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -144,13 +144,17 @@ static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se) return rt_rq->rq; } +void unregister_rt_sched_group(struct task_group *tg) +{ + if (tg->rt_se) + destroy_rt_bandwidth(&tg->rt_bandwidth); + +} + void free_rt_sched_group(struct task_group *tg) { int i; - if (tg->rt_se) - destroy_rt_bandwidth(&tg->rt_bandwidth); - for_each_possible_cpu(i) { if (tg->rt_rq) kfree(tg->rt_rq[i]); @@ -257,6 +261,8 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) return &rq->rt; } +void unregister_rt_sched_group(struct task_group *tg) { } + void free_rt_sched_group(struct task_group *tg) { } int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6de9ccc79..9c56d1af6 100755 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -491,6 +491,7 @@ extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b); extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b); extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq); +extern void unregister_rt_sched_group(struct task_group *tg); extern void free_rt_sched_group(struct task_group *tg); extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent); extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, @@ -506,7 +507,7 @@ extern struct task_group *sched_create_group(struct task_group *parent); extern void sched_online_group(struct task_group *tg, struct task_group *parent); extern void sched_destroy_group(struct task_group *tg); -extern void sched_offline_group(struct task_group *tg); +extern void sched_release_group(struct task_group *tg); extern void sched_move_task(struct task_struct *tsk);