#include #include "../sched.h" #include "ems.h" #include #include #include #define TINY_TASK_RATIO_SHIFT 3 /* 12.5% */ #define BUSY_GROUP_RATIO_SHIFT 2 /* 25% */ struct lb_env { struct rq *src_rq; struct rq *dst_rq; struct task_struct *push_task; u64 flags; }; static struct lb_env __percpu *lb_env; static struct cpu_stop_work __percpu *lb_work; static bool slowest_has_misfit(int dst_cpu) { int cpu; if (cpumask_test_cpu(dst_cpu, cpu_slowest_mask())) return false; for_each_cpu(cpu, cpu_slowest_mask()) if (ems_rq_nr_misfited(cpu_rq(cpu))) return true; return false; } void lb_enqueue_misfit_task(struct task_struct *p, struct rq *rq) { bool cur_misfit = !ems_task_fits_max_cap(p, cpu_of(rq)); if (cur_misfit) ems_rq_update_nr_misfited(rq, true); else ems_task_misfited(p) = false; trace_lb_update_misfit(p, true, cur_misfit, cpu_of(rq), ems_rq_nr_misfited(rq), "enqueue"); } void lb_dequeue_misfit_task(struct task_struct *p, struct rq *rq) { ems_rq_update_nr_misfited(rq, false); trace_lb_update_misfit(p, true, true, cpu_of(rq), ems_rq_nr_misfited(rq), "dequeue"); } void lb_update_misfit_status(struct task_struct *p, struct rq *rq, bool *need_update) { bool old_misfit, cur_misfit; if (!p) return; old_misfit = ems_task_misfited(p); cur_misfit = !ems_task_fits_max_cap(p, rq->cpu); if (cur_misfit != old_misfit) { ems_task_misfited(p) = cur_misfit; ems_rq_update_nr_misfited(rq, cur_misfit); } trace_lb_update_misfit(p, old_misfit, cur_misfit, cpu_of(rq), ems_rq_nr_misfited(rq), "update"); } void lb_nohz_balancer_kick(struct rq *rq, unsigned int *flag, int *done) { *done = true; /* * tick path migration takes care of misfit task. * so we have to check for nr_running >= 2 here. */ if (rq->nr_running >= 2 && cpu_overutilized(rq->cpu)) { *flag = NOHZ_KICK_MASK; trace_lb_nohz_balancer_kick(rq); } } static bool _lb_can_migrate_task(struct task_struct *p, int src_cpu, int dst_cpu) { /* boosted task can't migrate to slower cpu */ if (capacity_orig_of(dst_cpu) < capacity_orig_of(src_cpu)) { if (is_boosted_tex_task(p)) { trace_lb_can_migrate_task(p, dst_cpu, false, "tex"); return false; } if (sysbusy_boost_task(p)) { trace_lb_can_migrate_task(p, dst_cpu, false, "sysbusy-boost"); return false; } } if (!ontime_can_migrate_task(p, dst_cpu)) { trace_lb_can_migrate_task(p, dst_cpu, false, "ontime"); return false; } if (!cpumask_test_cpu(dst_cpu, cpus_binding_mask(p))) { trace_lb_can_migrate_task(p, dst_cpu, false, "ems-binded"); return false; } if (!cpumask_test_cpu(dst_cpu, p->cpus_ptr)) { trace_lb_can_migrate_task(p, dst_cpu, false, "cpus-allowed"); return false; } trace_lb_can_migrate_task(p, dst_cpu, true, "can-migrate"); return true; } bool lb_busiest_queue_pre_condition(struct rq *rq, bool check_overutil) { /* if there is no fair task, we can't balance */ if (!rq->cfs.h_nr_running) return false; /* if rq is in the active balance, will be less busy */ if (rq->active_balance) return false; if (check_overutil && !cpu_overutilized(cpu_of(rq))) return false; return true; } /* * This function called when src rq has only one task, * and need to check whether need an active balance or not */ bool lb_queue_need_active_mgt(struct rq *src, struct rq *dst) { if (!ems_rq_nr_misfited(src)) return false; if (!available_idle_cpu(cpu_of(dst))) return false; return true; } bool lb_group_is_busy(struct cpumask *cpus, int nr_task, unsigned long util) { unsigned long capacity = capacity_orig_of(cpumask_first(cpus)); int nr_cpus = cpumask_weight(cpus); /* * if there is a available cpu in the group, * this group is not busy */ if (nr_task <= nr_cpus) return false; if ((util + (util >> BUSY_GROUP_RATIO_SHIFT)) < (capacity * nr_cpus)) return false; return true; } static void lb_find_busiest_faster_queue(int dst_cpu, struct cpumask *src_cpus, struct rq **busiest) { int src_cpu, busiest_cpu = -1; struct cpumask candidate_mask; unsigned long util, busiest_util = 0; unsigned long util_sum = 0, nr_task_sum = 0; unsigned long tiny_task_util; tiny_task_util = capacity_orig_of(cpumask_first(src_cpus)) >> TINY_TASK_RATIO_SHIFT; cpumask_and(&candidate_mask, src_cpus, cpu_active_mask); cpumask_and(&candidate_mask, &candidate_mask, ecs_available_cpus()); if (cpumask_empty(&candidate_mask)) return; for_each_cpu(src_cpu, &candidate_mask) { struct rq *src_rq = cpu_rq(src_cpu); trace_lb_cpu_util(src_cpu, "faster"); util = ml_cpu_util(src_cpu) + cpu_util_rt(src_rq); util_sum += util; nr_task_sum += src_rq->cfs.h_nr_running; if (!lb_busiest_queue_pre_condition(src_rq, true)) continue; if (src_rq->cfs.h_nr_running < 2) continue; if (src_rq->cfs.h_nr_running == 2 && ml_task_util(src_rq->curr) < tiny_task_util) continue; if (util < busiest_util) continue; busiest_util = util; busiest_cpu = src_cpu; } /* * Don't allow migrating to lower cluster unless * this faster cluster is sufficiently loaded. */ if (!lb_group_is_busy(&candidate_mask, nr_task_sum, util_sum)) return; if (busiest_cpu != -1) *busiest = cpu_rq(busiest_cpu); } static void lb_find_busiest_slower_queue(int dst_cpu, struct cpumask *src_cpus, struct rq **busiest) { int src_cpu, busiest_cpu = -1; unsigned long util, busiest_util = 0; for_each_cpu_and(src_cpu, src_cpus, cpu_active_mask) { struct rq *src_rq = cpu_rq(src_cpu); trace_lb_cpu_util(src_cpu, "slower"); if (!lb_busiest_queue_pre_condition(src_rq, true)) continue; if (src_rq->cfs.h_nr_running == 1 && !lb_queue_need_active_mgt(src_rq, cpu_rq(dst_cpu))) continue; util = ml_cpu_util(src_cpu) + cpu_util_rt(src_rq); if (util < busiest_util) continue; busiest_util = util; busiest_cpu = src_cpu; } if (busiest_cpu != -1) *busiest = cpu_rq(busiest_cpu); } static void lb_find_busiest_equivalent_queue(int dst_cpu, struct cpumask *src_cpus, struct rq **busiest) { int src_cpu, busiest_cpu = -1; unsigned long util, busiest_util = 0; for_each_cpu_and(src_cpu, src_cpus, cpu_active_mask) { struct rq *src_rq = cpu_rq(src_cpu); trace_lb_cpu_util(src_cpu, "equal"); if (!lb_busiest_queue_pre_condition(src_rq, false)) continue; if (src_rq->nr_running < 2) continue; /* find highest util cpu */ util = ml_cpu_util(src_cpu) + cpu_util_rt(src_rq); if (util < busiest_util) continue; busiest_util = util; busiest_cpu = src_cpu; } if (busiest_cpu != -1) *busiest = cpu_rq(busiest_cpu); } static void __lb_find_busiest_queue(int dst_cpu, struct cpumask *src_cpus, struct rq **busiest) { unsigned long dst_capacity, src_capacity; src_capacity = capacity_orig_of(cpumask_first(src_cpus)); dst_capacity = capacity_orig_of(dst_cpu); if (dst_capacity == src_capacity) lb_find_busiest_equivalent_queue(dst_cpu, src_cpus, busiest); else if (dst_capacity > src_capacity) lb_find_busiest_slower_queue(dst_cpu, src_cpus, busiest); else lb_find_busiest_faster_queue(dst_cpu, src_cpus, busiest); } void lb_find_busiest_queue(int dst_cpu, struct sched_group *group, struct cpumask *env_cpus, struct rq **busiest, int *done) { int cpu = -1; struct cpumask src_cpus; *done = true; cpumask_and(&src_cpus, sched_group_span(group), env_cpus); /* if group has only one cpu, don't need to traverse group */ if (cpumask_weight(&src_cpus) == 1) *busiest = cpu_rq(cpumask_first(&src_cpus)); else __lb_find_busiest_queue(dst_cpu, &src_cpus, busiest); if (*busiest) cpu = cpu_of(*busiest); trace_lb_find_busiest_queue(dst_cpu, cpu, &src_cpus); } void lb_can_migrate_task(struct task_struct *p, int dst_cpu, int *can_migrate) { int src_cpu = task_cpu(p); if (_lb_can_migrate_task(p, src_cpu, dst_cpu)) return; *can_migrate = false; } static int lb_active_migration_stop(void *data) { struct lb_env *env = data; struct rq *src_rq = env->src_rq, *dst_rq = env->dst_rq; struct rq_flags rf; int ret; rq_lock_irq(src_rq, &rf); ret = detach_one_task(src_rq, dst_rq, env->push_task); src_rq->active_balance = 0; ems_rq_migrated(dst_rq) = false; rq_unlock(src_rq, &rf); if (ret) attach_one_task(dst_rq, env->push_task); local_irq_enable(); return 0; } static bool lb_task_need_active_mgt(struct task_struct *p, int dst_cpu, int src_cpu) { if (capacity_orig_of(dst_cpu) <= capacity_orig_of(src_cpu)) return false; if (!ems_task_misfited(p)) return false; return true; } static int lb_idle_pull_tasks(int dst_cpu, int src_cpu) { struct lb_env *env = per_cpu_ptr(lb_env, src_cpu); struct rq *dst_rq = cpu_rq(dst_cpu); struct rq *src_rq = cpu_rq(src_cpu); struct task_struct *pulled_task = NULL, *p; bool active_balance = false; unsigned long flags; raw_spin_rq_lock_irqsave(src_rq, flags); list_for_each_entry_reverse(p, &src_rq->cfs_tasks, se.group_node) { if (!_lb_can_migrate_task(p, src_cpu, dst_cpu)) continue; if (task_running(src_rq, p)) { if (lb_task_need_active_mgt(p, dst_cpu, src_cpu)) { pulled_task = p; active_balance = true; break; } continue; } if (can_migrate(p, dst_cpu)) { update_rq_clock(src_rq); detach_task(src_rq, dst_rq, p); pulled_task = p; break; } } if (!pulled_task) { raw_spin_rq_unlock_irqrestore(src_rq, flags); return 0; } if (active_balance) { if (src_rq->active_balance) { raw_spin_rq_unlock_irqrestore(src_rq, flags); return 0; } src_rq->active_balance = 1; ems_rq_migrated(dst_rq) = true; env->src_rq = src_rq; env->dst_rq = dst_rq; env->push_task = pulled_task; /* lock must be dropped before waking the stopper */ raw_spin_rq_unlock_irqrestore(src_rq, flags); trace_lb_active_migration(p, src_cpu, dst_cpu, "idle"); stop_one_cpu_nowait(src_cpu, lb_active_migration_stop, env, per_cpu_ptr(lb_work, src_cpu)); /* we did not pull any task here */ return 0; } raw_spin_rq_unlock(src_rq); attach_one_task(dst_rq, pulled_task); local_irq_restore(flags); return 1; } static int compute_range(int cpu, int cl_idx, bool short_idle) { int range = get_pe_list(cl_idx)->num_of_cpus; bool slowest_misfit = slowest_has_misfit(cpu); bool fastest = cpumask_test_cpu(cpu, cpu_fastest_mask()); bool slowest = cpumask_test_cpu(cpu, cpu_slowest_mask()); if (short_idle && !slowest_misfit) return 1; if ((range >= MAX_CLUSTER_NUM) && (fastest || slowest)) range -= 1; return range; } #define NIB_AVG_IDLE_THRESHOLD 500000 static bool determine_short_idle(u64 avg_idle) { u64 idle_threshold = NIB_AVG_IDLE_THRESHOLD; if (emstune_should_spread()) idle_threshold >>= 2; return avg_idle < idle_threshold; } static int lb_has_pushable_tasks(struct rq *rq) { return !plist_head_empty(&rq->rt.pushable_tasks); } static int lb_pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && cpumask_test_cpu(cpu, p->cpus_ptr)) return 1; return 0; } static struct task_struct *lb_pick_highest_pushable_task(struct rq *rq, int cpu) { struct plist_head *head = &rq->rt.pushable_tasks; struct task_struct *p; if (!lb_has_pushable_tasks(rq)) return NULL; plist_for_each_entry(p, head, pushable_tasks) { if (lb_pick_rt_task(rq, p, cpu)) return p; } return NULL; } #define MIN_RUNNABLE_THRESHOLD (500000) static bool lb_short_runnable(struct task_struct *p) { return ktime_get_ns() - ems_last_waked(p) < MIN_RUNNABLE_THRESHOLD; } static int lb_idle_pull_tasks_rt(struct rq *dst_rq) { struct rq *src_rq; struct task_struct *pulled_task = NULL; int cpu, src_cpu = -1, dst_cpu = dst_rq->cpu, ret = 0; if (sched_rt_runnable(dst_rq)) return 0; for_each_possible_cpu(cpu) { if (lb_has_pushable_tasks(cpu_rq(cpu))) { src_cpu = cpu; break; } } if (src_cpu == -1) return 0; if (src_cpu == dst_cpu) return 0; src_rq = cpu_rq(src_cpu); double_lock_balance(dst_rq, src_rq); if (sched_rt_runnable(dst_rq)) goto out; pulled_task = lb_pick_highest_pushable_task(src_rq, dst_cpu); if (!pulled_task) goto out; if (lb_short_runnable(pulled_task)) goto out; deactivate_task(src_rq, pulled_task, 0); set_task_cpu(pulled_task, dst_cpu); activate_task(dst_rq, pulled_task, 0); ret = 1; out: double_unlock_balance(dst_rq, src_rq); trace_lb_idle_pull_tasks_rt(src_cpu, pulled_task, ret); return ret; } void lb_newidle_balance(struct rq *dst_rq, struct rq_flags *rf, int *pulled_task, int *done) { int dst_cpu = dst_rq->cpu; int cl_idx = ems_rq_cluster_idx(dst_rq); bool short_idle; struct rq *busiest = NULL; int i, range = 0, src_cpu = -1; dst_rq->misfit_task_load = 0; *done = true; short_idle = determine_short_idle(dst_rq->avg_idle); dst_rq->idle_stamp = rq_clock(dst_rq); /* * Do not pull tasks towards !active CPUs... */ if (!cpu_active(dst_cpu)) return; rq_unpin_lock(dst_rq, rf); /* * There is a task waiting to run. No need to search for one. * Return 0; the task will be enqueued when switching to idle. */ if (dst_rq->ttwu_pending) goto out; if (dst_rq->nr_running) goto out; if (lb_idle_pull_tasks_rt(dst_rq)) goto out; /* final check again after drop rq lock during RT balance */ if (dst_rq->nr_running) goto out; if (!READ_ONCE(dst_rq->rd->overload)) goto out; if (atomic_read(&dst_rq->nr_iowait) && short_idle) goto out; /* if system is busy state */ if (sysbusy_on_somac()) goto out; raw_spin_rq_unlock(dst_rq); range = compute_range(dst_cpu, cl_idx, short_idle); for (i = 0; i < range; i++) { struct pe_list *pl = get_pe_list(cl_idx); if (!pl) break; /* if this rq has a task, stop idle-pull */ if (dst_rq->nr_running > 0) break; __lb_find_busiest_queue(dst_cpu, &(pl->cpus[i]), &busiest); if (busiest) { src_cpu = cpu_of(busiest); if (dst_cpu != src_cpu) *pulled_task = lb_idle_pull_tasks(dst_cpu, src_cpu); if (*pulled_task) break; } } raw_spin_rq_lock(dst_rq); out: /* * While browsing the domains, we released the rq lock, a task could * have been enqueued in the meantime. Since we're not going idle, * pretend we pulled a task. */ if (dst_rq->cfs.h_nr_running && !*pulled_task) *pulled_task = 1; /* Is there a task of a high priority class? */ if (dst_rq->nr_running != dst_rq->cfs.h_nr_running) *pulled_task = -1; if (*pulled_task) dst_rq->idle_stamp = 0; rq_repin_lock(dst_rq, rf); trace_lb_newidle_balance(dst_cpu, src_cpu, *pulled_task, range, short_idle); } /* * Active migration to push the misfit task */ void lb_tick(struct rq *src_rq) { int src_cpu = src_rq->cpu, dst_cpu; struct task_struct *p = src_rq->curr; struct lb_env *env = per_cpu_ptr(lb_env, src_cpu); struct rq *dst_rq; unsigned long rq_flags; /* if system is busy state */ if (sysbusy_on_somac()) return; /* if src cpu is idle, we don't need to push the task */ if (available_idle_cpu(src_cpu)) return; /* if there is no misfit task in this cpu */ if (!ems_rq_nr_misfited(src_rq)) return; /* if current task is not fair */ if (get_sched_class(p) != EMS_SCHED_FAIR) return; if (!ems_task_misfited(p)) return; raw_spin_rq_lock_irqsave(src_rq, rq_flags); dst_cpu = __ems_select_task_rq_fair(p, src_cpu, 0, 0); /* stop migration, if there is no dst cpu or dst cpu is not idle */ if (dst_cpu < 0 || !available_idle_cpu(dst_cpu)) goto out_unlock; /* stop migration if same/lower capacity CPU */ if (src_cpu == dst_cpu || capacity_orig_of(dst_cpu) <= capacity_orig_of(src_cpu)) goto out_unlock; if (src_rq->active_balance) goto out_unlock; dst_rq = cpu_rq(dst_cpu); src_rq->active_balance = 1; ems_rq_migrated(dst_rq) = true; env->src_rq = src_rq; env->dst_rq = dst_rq; env->push_task = p; trace_lb_active_migration(p, src_cpu, dst_cpu, "tick-balanced"); raw_spin_rq_unlock_irqrestore(src_rq, rq_flags); stop_one_cpu_nowait(src_cpu, lb_active_migration_stop, env, per_cpu_ptr(lb_work, src_cpu)); wake_up_if_idle(dst_cpu); return; out_unlock: raw_spin_rq_unlock_irqrestore(src_rq, rq_flags); trace_lb_active_migration(p, src_cpu, dst_cpu, "tick-no-balanced"); } void lb_init(void) { lb_env = alloc_percpu(struct lb_env); lb_work = alloc_percpu(struct cpu_stop_work); }