kernel_samsung_a53x/kernel/sched/ems/balance.c
2024-06-15 16:02:09 -03:00

711 lines
16 KiB
C
Executable file

#include <dt-bindings/soc/samsung/ems.h>
#include "../sched.h"
#include "ems.h"
#include <trace/hooks/sched.h>
#include <trace/events/ems.h>
#include <trace/events/ems_debug.h>
#define TINY_TASK_RATIO_SHIFT 3 /* 12.5% */
#define BUSY_GROUP_RATIO_SHIFT 2 /* 25% */
struct lb_env {
struct rq *src_rq;
struct rq *dst_rq;
struct task_struct *push_task;
u64 flags;
};
static struct lb_env __percpu *lb_env;
static struct cpu_stop_work __percpu *lb_work;
static bool slowest_has_misfit(int dst_cpu)
{
int cpu;
if (cpumask_test_cpu(dst_cpu, cpu_slowest_mask()))
return false;
for_each_cpu(cpu, cpu_slowest_mask())
if (ems_rq_nr_misfited(cpu_rq(cpu)))
return true;
return false;
}
void lb_enqueue_misfit_task(struct task_struct *p, struct rq *rq)
{
bool cur_misfit = !ems_task_fits_max_cap(p, cpu_of(rq));
if (cur_misfit)
ems_rq_update_nr_misfited(rq, true);
else
ems_task_misfited(p) = false;
trace_lb_update_misfit(p, true, cur_misfit,
cpu_of(rq), ems_rq_nr_misfited(rq), "enqueue");
}
void lb_dequeue_misfit_task(struct task_struct *p, struct rq *rq)
{
ems_rq_update_nr_misfited(rq, false);
trace_lb_update_misfit(p, true, true,
cpu_of(rq), ems_rq_nr_misfited(rq), "dequeue");
}
void lb_update_misfit_status(struct task_struct *p,
struct rq *rq, bool *need_update)
{
bool old_misfit, cur_misfit;
if (!p)
return;
old_misfit = ems_task_misfited(p);
cur_misfit = !ems_task_fits_max_cap(p, rq->cpu);
if (cur_misfit != old_misfit) {
ems_task_misfited(p) = cur_misfit;
ems_rq_update_nr_misfited(rq, cur_misfit);
}
trace_lb_update_misfit(p, old_misfit, cur_misfit,
cpu_of(rq), ems_rq_nr_misfited(rq), "update");
}
void lb_nohz_balancer_kick(struct rq *rq, unsigned int *flag, int *done)
{
*done = true;
/*
* tick path migration takes care of misfit task.
* so we have to check for nr_running >= 2 here.
*/
if (rq->nr_running >= 2 && cpu_overutilized(rq->cpu)) {
*flag = NOHZ_KICK_MASK;
trace_lb_nohz_balancer_kick(rq);
}
}
static bool _lb_can_migrate_task(struct task_struct *p, int src_cpu, int dst_cpu)
{
/* boosted task can't migrate to slower cpu */
if (capacity_orig_of(dst_cpu) < capacity_orig_of(src_cpu)) {
if (is_boosted_tex_task(p)) {
trace_lb_can_migrate_task(p, dst_cpu, false, "tex");
return false;
}
if (sysbusy_boost_task(p)) {
trace_lb_can_migrate_task(p, dst_cpu, false, "sysbusy-boost");
return false;
}
}
if (!ontime_can_migrate_task(p, dst_cpu)) {
trace_lb_can_migrate_task(p, dst_cpu, false, "ontime");
return false;
}
if (!cpumask_test_cpu(dst_cpu, cpus_binding_mask(p))) {
trace_lb_can_migrate_task(p, dst_cpu, false, "ems-binded");
return false;
}
if (!cpumask_test_cpu(dst_cpu, p->cpus_ptr)) {
trace_lb_can_migrate_task(p, dst_cpu, false, "cpus-allowed");
return false;
}
trace_lb_can_migrate_task(p, dst_cpu, true, "can-migrate");
return true;
}
bool lb_busiest_queue_pre_condition(struct rq *rq, bool check_overutil)
{
/* if there is no fair task, we can't balance */
if (!rq->cfs.h_nr_running)
return false;
/* if rq is in the active balance, will be less busy */
if (rq->active_balance)
return false;
if (check_overutil && !cpu_overutilized(cpu_of(rq)))
return false;
return true;
}
/*
* This function called when src rq has only one task,
* and need to check whether need an active balance or not
*/
bool lb_queue_need_active_mgt(struct rq *src, struct rq *dst)
{
if (!ems_rq_nr_misfited(src))
return false;
if (!available_idle_cpu(cpu_of(dst)))
return false;
return true;
}
bool lb_group_is_busy(struct cpumask *cpus,
int nr_task, unsigned long util)
{
unsigned long capacity = capacity_orig_of(cpumask_first(cpus));
int nr_cpus = cpumask_weight(cpus);
/*
* if there is a available cpu in the group,
* this group is not busy
*/
if (nr_task <= nr_cpus)
return false;
if ((util + (util >> BUSY_GROUP_RATIO_SHIFT)) < (capacity * nr_cpus))
return false;
return true;
}
static void lb_find_busiest_faster_queue(int dst_cpu,
struct cpumask *src_cpus, struct rq **busiest)
{
int src_cpu, busiest_cpu = -1;
struct cpumask candidate_mask;
unsigned long util, busiest_util = 0;
unsigned long util_sum = 0, nr_task_sum = 0;
unsigned long tiny_task_util;
tiny_task_util = capacity_orig_of(cpumask_first(src_cpus)) >> TINY_TASK_RATIO_SHIFT;
cpumask_and(&candidate_mask, src_cpus, cpu_active_mask);
cpumask_and(&candidate_mask, &candidate_mask, ecs_available_cpus());
if (cpumask_empty(&candidate_mask))
return;
for_each_cpu(src_cpu, &candidate_mask) {
struct rq *src_rq = cpu_rq(src_cpu);
trace_lb_cpu_util(src_cpu, "faster");
util = ml_cpu_util(src_cpu) + cpu_util_rt(src_rq);
util_sum += util;
nr_task_sum += src_rq->cfs.h_nr_running;
if (!lb_busiest_queue_pre_condition(src_rq, true))
continue;
if (src_rq->cfs.h_nr_running < 2)
continue;
if (src_rq->cfs.h_nr_running == 2 &&
ml_task_util(src_rq->curr) < tiny_task_util)
continue;
if (util < busiest_util)
continue;
busiest_util = util;
busiest_cpu = src_cpu;
}
/*
* Don't allow migrating to lower cluster unless
* this faster cluster is sufficiently loaded.
*/
if (!lb_group_is_busy(&candidate_mask, nr_task_sum, util_sum))
return;
if (busiest_cpu != -1)
*busiest = cpu_rq(busiest_cpu);
}
static void lb_find_busiest_slower_queue(int dst_cpu,
struct cpumask *src_cpus, struct rq **busiest)
{
int src_cpu, busiest_cpu = -1;
unsigned long util, busiest_util = 0;
for_each_cpu_and(src_cpu, src_cpus, cpu_active_mask) {
struct rq *src_rq = cpu_rq(src_cpu);
trace_lb_cpu_util(src_cpu, "slower");
if (!lb_busiest_queue_pre_condition(src_rq, true))
continue;
if (src_rq->cfs.h_nr_running == 1 &&
!lb_queue_need_active_mgt(src_rq, cpu_rq(dst_cpu)))
continue;
util = ml_cpu_util(src_cpu) + cpu_util_rt(src_rq);
if (util < busiest_util)
continue;
busiest_util = util;
busiest_cpu = src_cpu;
}
if (busiest_cpu != -1)
*busiest = cpu_rq(busiest_cpu);
}
static void lb_find_busiest_equivalent_queue(int dst_cpu,
struct cpumask *src_cpus, struct rq **busiest)
{
int src_cpu, busiest_cpu = -1;
unsigned long util, busiest_util = 0;
for_each_cpu_and(src_cpu, src_cpus, cpu_active_mask) {
struct rq *src_rq = cpu_rq(src_cpu);
trace_lb_cpu_util(src_cpu, "equal");
if (!lb_busiest_queue_pre_condition(src_rq, false))
continue;
if (src_rq->nr_running < 2)
continue;
/* find highest util cpu */
util = ml_cpu_util(src_cpu) + cpu_util_rt(src_rq);
if (util < busiest_util)
continue;
busiest_util = util;
busiest_cpu = src_cpu;
}
if (busiest_cpu != -1)
*busiest = cpu_rq(busiest_cpu);
}
static void __lb_find_busiest_queue(int dst_cpu,
struct cpumask *src_cpus, struct rq **busiest)
{
unsigned long dst_capacity, src_capacity;
src_capacity = capacity_orig_of(cpumask_first(src_cpus));
dst_capacity = capacity_orig_of(dst_cpu);
if (dst_capacity == src_capacity)
lb_find_busiest_equivalent_queue(dst_cpu, src_cpus, busiest);
else if (dst_capacity > src_capacity)
lb_find_busiest_slower_queue(dst_cpu, src_cpus, busiest);
else
lb_find_busiest_faster_queue(dst_cpu, src_cpus, busiest);
}
void lb_find_busiest_queue(int dst_cpu, struct sched_group *group,
struct cpumask *env_cpus, struct rq **busiest, int *done)
{
int cpu = -1;
struct cpumask src_cpus;
*done = true;
cpumask_and(&src_cpus, sched_group_span(group), env_cpus);
/* if group has only one cpu, don't need to traverse group */
if (cpumask_weight(&src_cpus) == 1)
*busiest = cpu_rq(cpumask_first(&src_cpus));
else
__lb_find_busiest_queue(dst_cpu, &src_cpus, busiest);
if (*busiest)
cpu = cpu_of(*busiest);
trace_lb_find_busiest_queue(dst_cpu, cpu, &src_cpus);
}
void lb_can_migrate_task(struct task_struct *p, int dst_cpu, int *can_migrate)
{
int src_cpu = task_cpu(p);
if (_lb_can_migrate_task(p, src_cpu, dst_cpu))
return;
*can_migrate = false;
}
static int lb_active_migration_stop(void *data)
{
struct lb_env *env = data;
struct rq *src_rq = env->src_rq, *dst_rq = env->dst_rq;
struct rq_flags rf;
int ret;
rq_lock_irq(src_rq, &rf);
ret = detach_one_task(src_rq, dst_rq, env->push_task);
src_rq->active_balance = 0;
ems_rq_migrated(dst_rq) = false;
rq_unlock(src_rq, &rf);
if (ret)
attach_one_task(dst_rq, env->push_task);
local_irq_enable();
return 0;
}
static bool lb_task_need_active_mgt(struct task_struct *p, int dst_cpu, int src_cpu)
{
if (capacity_orig_of(dst_cpu) <= capacity_orig_of(src_cpu))
return false;
if (!ems_task_misfited(p))
return false;
return true;
}
static int lb_idle_pull_tasks(int dst_cpu, int src_cpu)
{
struct lb_env *env = per_cpu_ptr(lb_env, src_cpu);
struct rq *dst_rq = cpu_rq(dst_cpu);
struct rq *src_rq = cpu_rq(src_cpu);
struct task_struct *pulled_task = NULL, *p;
bool active_balance = false;
unsigned long flags;
raw_spin_rq_lock_irqsave(src_rq, flags);
list_for_each_entry_reverse(p, &src_rq->cfs_tasks, se.group_node) {
if (!_lb_can_migrate_task(p, src_cpu, dst_cpu))
continue;
if (task_running(src_rq, p)) {
if (lb_task_need_active_mgt(p, dst_cpu, src_cpu)) {
pulled_task = p;
active_balance = true;
break;
}
continue;
}
if (can_migrate(p, dst_cpu)) {
update_rq_clock(src_rq);
detach_task(src_rq, dst_rq, p);
pulled_task = p;
break;
}
}
if (!pulled_task) {
raw_spin_rq_unlock_irqrestore(src_rq, flags);
return 0;
}
if (active_balance) {
if (src_rq->active_balance) {
raw_spin_rq_unlock_irqrestore(src_rq, flags);
return 0;
}
src_rq->active_balance = 1;
ems_rq_migrated(dst_rq) = true;
env->src_rq = src_rq;
env->dst_rq = dst_rq;
env->push_task = pulled_task;
/* lock must be dropped before waking the stopper */
raw_spin_rq_unlock_irqrestore(src_rq, flags);
trace_lb_active_migration(p, src_cpu, dst_cpu, "idle");
stop_one_cpu_nowait(src_cpu, lb_active_migration_stop,
env, per_cpu_ptr(lb_work, src_cpu));
/* we did not pull any task here */
return 0;
}
raw_spin_rq_unlock(src_rq);
attach_one_task(dst_rq, pulled_task);
local_irq_restore(flags);
return 1;
}
static int compute_range(int cpu, int cl_idx, bool short_idle)
{
int range = get_pe_list(cl_idx)->num_of_cpus;
bool slowest_misfit = slowest_has_misfit(cpu);
bool fastest = cpumask_test_cpu(cpu, cpu_fastest_mask());
bool slowest = cpumask_test_cpu(cpu, cpu_slowest_mask());
if (short_idle && !slowest_misfit)
return 1;
if ((range >= MAX_CLUSTER_NUM)
&& (fastest || slowest))
range -= 1;
return range;
}
#define NIB_AVG_IDLE_THRESHOLD 500000
static bool determine_short_idle(u64 avg_idle)
{
u64 idle_threshold = NIB_AVG_IDLE_THRESHOLD;
if (emstune_should_spread())
idle_threshold >>= 2;
return avg_idle < idle_threshold;
}
static int lb_has_pushable_tasks(struct rq *rq)
{
return !plist_head_empty(&rq->rt.pushable_tasks);
}
static int lb_pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
{
if (!task_running(rq, p) &&
cpumask_test_cpu(cpu, p->cpus_ptr))
return 1;
return 0;
}
static struct task_struct *lb_pick_highest_pushable_task(struct rq *rq, int cpu)
{
struct plist_head *head = &rq->rt.pushable_tasks;
struct task_struct *p;
if (!lb_has_pushable_tasks(rq))
return NULL;
plist_for_each_entry(p, head, pushable_tasks) {
if (lb_pick_rt_task(rq, p, cpu))
return p;
}
return NULL;
}
#define MIN_RUNNABLE_THRESHOLD (500000)
static bool lb_short_runnable(struct task_struct *p)
{
return ktime_get_ns() - ems_last_waked(p) < MIN_RUNNABLE_THRESHOLD;
}
static int lb_idle_pull_tasks_rt(struct rq *dst_rq)
{
struct rq *src_rq;
struct task_struct *pulled_task = NULL;
int cpu, src_cpu = -1, dst_cpu = dst_rq->cpu, ret = 0;
if (sched_rt_runnable(dst_rq))
return 0;
for_each_possible_cpu(cpu) {
if (lb_has_pushable_tasks(cpu_rq(cpu))) {
src_cpu = cpu;
break;
}
}
if (src_cpu == -1)
return 0;
if (src_cpu == dst_cpu)
return 0;
src_rq = cpu_rq(src_cpu);
double_lock_balance(dst_rq, src_rq);
if (sched_rt_runnable(dst_rq))
goto out;
pulled_task = lb_pick_highest_pushable_task(src_rq, dst_cpu);
if (!pulled_task)
goto out;
if (lb_short_runnable(pulled_task))
goto out;
deactivate_task(src_rq, pulled_task, 0);
set_task_cpu(pulled_task, dst_cpu);
activate_task(dst_rq, pulled_task, 0);
ret = 1;
out:
double_unlock_balance(dst_rq, src_rq);
trace_lb_idle_pull_tasks_rt(src_cpu, pulled_task, ret);
return ret;
}
void lb_newidle_balance(struct rq *dst_rq, struct rq_flags *rf,
int *pulled_task, int *done)
{
int dst_cpu = dst_rq->cpu;
int cl_idx = ems_rq_cluster_idx(dst_rq);
bool short_idle;
struct rq *busiest = NULL;
int i, range = 0, src_cpu = -1;
dst_rq->misfit_task_load = 0;
*done = true;
short_idle = determine_short_idle(dst_rq->avg_idle);
dst_rq->idle_stamp = rq_clock(dst_rq);
/*
* Do not pull tasks towards !active CPUs...
*/
if (!cpu_active(dst_cpu))
return;
rq_unpin_lock(dst_rq, rf);
/*
* There is a task waiting to run. No need to search for one.
* Return 0; the task will be enqueued when switching to idle.
*/
if (dst_rq->ttwu_pending)
goto out;
if (dst_rq->nr_running)
goto out;
if (lb_idle_pull_tasks_rt(dst_rq))
goto out;
/* final check again after drop rq lock during RT balance */
if (dst_rq->nr_running)
goto out;
if (!READ_ONCE(dst_rq->rd->overload))
goto out;
if (atomic_read(&dst_rq->nr_iowait) && short_idle)
goto out;
/* if system is busy state */
if (sysbusy_on_somac())
goto out;
raw_spin_rq_unlock(dst_rq);
range = compute_range(dst_cpu, cl_idx, short_idle);
for (i = 0; i < range; i++) {
struct pe_list *pl = get_pe_list(cl_idx);
if (!pl)
break;
/* if this rq has a task, stop idle-pull */
if (dst_rq->nr_running > 0)
break;
__lb_find_busiest_queue(dst_cpu, &(pl->cpus[i]), &busiest);
if (busiest) {
src_cpu = cpu_of(busiest);
if (dst_cpu != src_cpu)
*pulled_task = lb_idle_pull_tasks(dst_cpu, src_cpu);
if (*pulled_task)
break;
}
}
raw_spin_rq_lock(dst_rq);
out:
/*
* While browsing the domains, we released the rq lock, a task could
* have been enqueued in the meantime. Since we're not going idle,
* pretend we pulled a task.
*/
if (dst_rq->cfs.h_nr_running && !*pulled_task)
*pulled_task = 1;
/* Is there a task of a high priority class? */
if (dst_rq->nr_running != dst_rq->cfs.h_nr_running)
*pulled_task = -1;
if (*pulled_task)
dst_rq->idle_stamp = 0;
rq_repin_lock(dst_rq, rf);
trace_lb_newidle_balance(dst_cpu, src_cpu, *pulled_task, range, short_idle);
}
/*
* Active migration to push the misfit task
*/
void lb_tick(struct rq *src_rq)
{
int src_cpu = src_rq->cpu, dst_cpu;
struct task_struct *p = src_rq->curr;
struct lb_env *env = per_cpu_ptr(lb_env, src_cpu);
struct rq *dst_rq;
unsigned long rq_flags;
/* if system is busy state */
if (sysbusy_on_somac())
return;
/* if src cpu is idle, we don't need to push the task */
if (available_idle_cpu(src_cpu))
return;
/* if there is no misfit task in this cpu */
if (!ems_rq_nr_misfited(src_rq))
return;
/* if current task is not fair */
if (get_sched_class(p) != EMS_SCHED_FAIR)
return;
if (!ems_task_misfited(p))
return;
raw_spin_rq_lock_irqsave(src_rq, rq_flags);
dst_cpu = __ems_select_task_rq_fair(p, src_cpu, 0, 0);
/* stop migration, if there is no dst cpu or dst cpu is not idle */
if (dst_cpu < 0 || !available_idle_cpu(dst_cpu))
goto out_unlock;
/* stop migration if same/lower capacity CPU */
if (src_cpu == dst_cpu || capacity_orig_of(dst_cpu) <= capacity_orig_of(src_cpu))
goto out_unlock;
if (src_rq->active_balance)
goto out_unlock;
dst_rq = cpu_rq(dst_cpu);
src_rq->active_balance = 1;
ems_rq_migrated(dst_rq) = true;
env->src_rq = src_rq;
env->dst_rq = dst_rq;
env->push_task = p;
trace_lb_active_migration(p, src_cpu, dst_cpu, "tick-balanced");
raw_spin_rq_unlock_irqrestore(src_rq, rq_flags);
stop_one_cpu_nowait(src_cpu, lb_active_migration_stop,
env, per_cpu_ptr(lb_work, src_cpu));
wake_up_if_idle(dst_cpu);
return;
out_unlock:
raw_spin_rq_unlock_irqrestore(src_rq, rq_flags);
trace_lb_active_migration(p, src_cpu, dst_cpu, "tick-no-balanced");
}
void lb_init(void)
{
lb_env = alloc_percpu(struct lb_env);
lb_work = alloc_percpu(struct cpu_stop_work);
}