kernel_samsung_a53x/kernel/sched/ems/core.c
2024-06-15 16:02:09 -03:00

1377 lines
33 KiB
C
Executable file

/*
* Exynos Mobile Scheduler CPU selection
*
* Copyright (C) 2020 Samsung Electronics Co., Ltd
*/
#include <dt-bindings/soc/samsung/ems.h>
#include "../sched.h"
#include "ems.h"
#include <trace/events/ems.h>
#include <trace/events/ems_debug.h>
#define INVALID_CPU -1
#define cpu_selected(cpu) (cpu > INVALID_CPU)
/******************************************************************************
* TEX (Task EXpress) *
******************************************************************************/
struct {
int enabled[CGROUP_COUNT];
int prio;
} tex;
bool is_boosted_tex_task(struct task_struct *p)
{
if (ems_boosted_tex(p))
return true;
else if (emstune_get_cur_level() == 2 && cpuctl_task_group_idx(p) == CGROUP_TOPAPP) {
/* RenderThread and zygote boost */
if (ems_render(p) == 1 || strcmp(p->comm, "main") == 0) {
return true;
}
}
return false;
}
static bool is_binder_tex_task(struct task_struct *p)
{
return ems_binder_task(p);
}
static bool is_prio_tex_task(struct task_struct *p)
{
int group_idx, sched_class;
group_idx = cpuctl_task_group_idx(p);
if (!tex.enabled[group_idx])
return 0;
sched_class = get_sched_class(p);
if (sched_class != EMS_SCHED_FAIR)
return 0;
return p->prio <= tex.prio;
}
static bool is_expired_tex(struct task_struct *p)
{
return ems_tex_chances(p) <= 0;
}
int get_tex_level(struct task_struct *p)
{
if (is_expired_tex(p))
return NOT_TEX;
if (is_boosted_tex_task(p))
return BOOSTED_TEX;
if (is_binder_tex_task(p))
return BINDER_TEX;
if (is_prio_tex_task(p))
return PRIO_TEX;
return NOT_TEX;
}
static void tex_insert_to_qjump_list(struct task_struct *p, int cpu, bool preempt)
{
struct list_head *qjump_list = ems_qjump_list(cpu_rq(cpu)), *node;
struct task_struct *qjump_task;
list_for_each(node, qjump_list) {
qjump_task = ems_qjump_list_entry(node);
if (ems_tex_level(p) < ems_tex_level(qjump_task))
break;
else if ((ems_tex_level(p) == ems_tex_level(qjump_task)) && preempt)
break;
}
list_add(ems_qjump_node(p), node->prev);
trace_tex_insert_to_qjump_list(p);
}
static void tex_remove_from_qjump_list(struct task_struct *p)
{
list_del_init(ems_qjump_node(p));
trace_tex_remove_from_qjump_list(p);
}
void tex_enqueue_task(struct task_struct *p, int cpu)
{
struct rq *rq = cpu_rq(cpu);
int tex_level = get_tex_level(p);
if (get_sched_class(p) != EMS_SCHED_FAIR)
return;
ems_prio_tex(p) = is_prio_tex_task(p);
if (ems_prio_tex(p))
ems_rq_nr_prio_tex(rq) += 1;
if (tex_level == NOT_TEX)
return;
ems_tex_level(p) = tex_level;
tex_insert_to_qjump_list(p, cpu, task_running(rq, p));
if (!ems_tex_runtime(p))
ems_tex_last_update(p) = p->se.sum_exec_runtime;
}
void tex_dequeue_task(struct task_struct *p, int cpu)
{
struct rq *rq = cpu_rq(cpu);
bool is_tex = !list_empty(ems_qjump_node(p)) && ems_qjump_node(p)->next;
if (get_sched_class(p) != EMS_SCHED_FAIR)
return;
if (ems_prio_tex(p))
ems_rq_nr_prio_tex(rq) -= 1;
if (is_tex) {
tex_remove_from_qjump_list(p);
ems_tex_level(p) = NOT_TEX;
}
if (p->state != TASK_RUNNING) {
ems_tex_runtime(p) = 0;
ems_tex_chances(p) = TEX_WINDOW_COUNT;
}
}
void tex_replace_next_task_fair(struct rq *rq, struct task_struct **p_ptr,
struct sched_entity **se_ptr, bool *repick,
bool simple, struct task_struct *prev)
{
struct task_struct *p = NULL;
if (list_empty(ems_qjump_list(rq)))
return;
p = ems_qjump_first_entry(ems_qjump_list(rq));
*p_ptr = p;
*se_ptr = &p->se;
*repick = true;
trace_tex_qjump_pick_next_task(p);
}
static unsigned int get_idle_exit_latency(struct rq *rq)
{
struct cpuidle_state *idle;
unsigned int exit_latency;
rcu_read_lock();
idle = idle_get_state(rq);
exit_latency = idle ? idle->exit_latency : 0;
rcu_read_unlock();
return exit_latency;
}
static bool is_perf_task(struct task_struct *p)
{
return cpuctl_task_group_idx(p) == CGROUP_TOPAPP;
}
static bool can_sync_to_this_cpu(struct tp_env *env, int this_cpu)
{
if (uclamp_latency_sensitive(env->p))
return false;
if (is_perf_task(env->p) && is_perf_task(cpu_rq(this_cpu)->curr))
return false;
if (!cpumask_test_cpu(this_cpu, &env->cpus_allowed))
return false;
if (capacity_cpu_orig(this_cpu) < env->base_cap)
return false;
return true;
}
static int tex_boosted_fit_cpus(struct tp_env *env)
{
struct pe_list *pl = get_pe_list(env->init_index);
int index, cpu = smp_processor_id(), prev_cpu = task_cpu(env->p);
int max_spare_cap_cpu = -1, shallowest_idle_cpu = -1;
long spare_cap, max_spare_cap = LONG_MIN, idle_max_spare_cap = 0;
unsigned int exit_latency, min_exit_latency = UINT_MAX;
unsigned long cpu_util;
cpumask_clear(&env->fit_cpus);
if (env->sync && can_sync_to_this_cpu(env, cpu)) {
cpumask_set_cpu(cpu, &env->fit_cpus);
goto out;
}
for (index = 0; index < pl->num_of_cpus; index++) {
for_each_cpu_and(cpu, &pl->cpus[index], &env->cpus_allowed) {
cpu_util = ml_cpu_util_without(cpu, env->p);
cpu_util = min(cpu_util, capacity_cpu_orig(cpu));
spare_cap = capacity_cpu_orig(cpu) - cpu_util;
if (ems_rq_migrated(cpu_rq(cpu)))
continue;
if (get_tex_level(cpu_rq(cpu)->curr) == BOOSTED_TEX)
continue;
if (available_idle_cpu(cpu)) {
exit_latency = get_idle_exit_latency(cpu_rq(cpu));
if (exit_latency > min_exit_latency)
continue;
if (exit_latency == min_exit_latency) {
if ((shallowest_idle_cpu == prev_cpu) ||
(spare_cap > idle_max_spare_cap))
continue;
}
shallowest_idle_cpu = cpu;
idle_max_spare_cap = spare_cap;
min_exit_latency = exit_latency;
}
else {
if (spare_cap < max_spare_cap)
continue;
max_spare_cap_cpu = cpu;
max_spare_cap = spare_cap;
}
}
if ((max_spare_cap_cpu != -1) || (shallowest_idle_cpu != -1))
break;
}
max_spare_cap_cpu = (shallowest_idle_cpu != -1) ? shallowest_idle_cpu : max_spare_cap_cpu;
if (cpu_selected(max_spare_cap_cpu)) {
cpumask_set_cpu(max_spare_cap_cpu, &env->fit_cpus);
env->reason_of_selection = FAIR_EXPRESS;
}
out:
trace_tex_boosted_fit_cpus(env->p, env->sync, &env->fit_cpus);
return cpumask_weight(&env->fit_cpus);
}
static void tex_adjust_window(struct task_struct *p)
{
u64 remainder;
int nr_expired = div64_u64_rem(ems_tex_runtime(p), TEX_WINDOW, &remainder);
if (nr_expired < ems_tex_chances(p)) {
ems_tex_chances(p) -= nr_expired;
ems_tex_runtime(p) = remainder;
}
else {
ems_tex_chances(p) = 0;
ems_tex_runtime(p) = 0;
}
}
void tex_update_stats(struct rq *rq, struct task_struct *p)
{
s64 delta, now = p->se.sum_exec_runtime;
lockdep_assert_held(&rq->lock);
delta = now - ems_tex_last_update(p);
if (delta < 0)
delta = 0;
ems_tex_runtime(p) += delta;
ems_tex_last_update(p) = now;
if (ems_tex_runtime(p) < TEX_WINDOW) {
trace_tex_update_stats(p, "stay");
return;
}
/* current window is expired here */
tex_adjust_window(p);
tex_remove_from_qjump_list(p);
if (ems_tex_chances(p) > 0) {
tex_insert_to_qjump_list(p, rq->cpu, false);
trace_tex_update_stats(p, "re-queue");
}
else {
ems_tex_level(p) = NOT_TEX;
trace_tex_update_stats(p, "remove");
}
}
void tex_check_preempt_wakeup(struct rq *rq, struct task_struct *p,
bool *preempt, bool *ignore)
{
struct task_struct *curr = rq->curr;
bool is_p_tex = !list_empty(ems_qjump_node(p)) && ems_qjump_node(p)->next;
bool is_curr_tex = !list_empty(ems_qjump_node(curr)) && ems_qjump_node(curr)->next;
if (!is_curr_tex && !is_p_tex)
return;
if (!is_curr_tex && is_p_tex) {
*preempt = true;
goto out;
}
tex_update_stats(rq, curr);
if (curr == ems_qjump_first_entry(ems_qjump_list(rq)))
*ignore = true;
else
*preempt = true;
out:
trace_tex_check_preempt_wakeup(curr, p, *preempt, *ignore);
}
void tex_update(struct rq *rq)
{
struct task_struct *curr = rq->curr;
bool is_tex;
if (get_sched_class(curr) != EMS_SCHED_FAIR)
return;
raw_spin_lock(&rq->lock);
is_tex = !list_empty(ems_qjump_node(curr)) && ems_qjump_node(curr)->next;
if (!is_tex) {
raw_spin_unlock(&rq->lock);
return;
}
tex_update_stats(rq, curr);
if ((curr != ems_qjump_first_entry(ems_qjump_list(rq))) && (rq->cfs.h_nr_running > 1))
resched_curr(rq);
raw_spin_unlock(&rq->lock);
}
void tex_do_yield(struct task_struct *p)
{
bool is_tex = !list_empty(ems_qjump_node(p)) && ems_qjump_node(p)->next;
if (get_sched_class(p) != EMS_SCHED_FAIR)
return;
if (!is_tex)
return;
tex_remove_from_qjump_list(p);
ems_tex_level(p) = NOT_TEX;
ems_tex_runtime(p) = 0;
ems_tex_chances(p) = TEX_WINDOW_COUNT;
}
static int tex_emstune_notifier_call(struct notifier_block *nb,
unsigned long val, void *v)
{
struct emstune_set *cur_set = (struct emstune_set *)v;
int i;
for (i = 0; i < CGROUP_COUNT; i++)
tex.enabled[i] = cur_set->tex.enabled[i];
tex.prio = cur_set->tex.prio;
return NOTIFY_OK;
}
static struct notifier_block tex_emstune_notifier = {
.notifier_call = tex_emstune_notifier_call,
};
void tex_task_init(struct task_struct *p)
{
INIT_LIST_HEAD(ems_qjump_node(p));
ems_tex_level(p) = NOT_TEX;
ems_tex_last_update(p) = 0;
ems_tex_runtime(p) = 0;
ems_tex_chances(p) = TEX_WINDOW_COUNT;
ems_prio_tex(p) = 0;
ems_boosted_tex(p) = 0;
ems_binder_task(p) = 0;
}
static void tex_init(void)
{
int i;
for (i = 0; i < CGROUP_COUNT; i++)
tex.enabled[i] = 0;
tex.prio = 0;
emstune_register_notifier(&tex_emstune_notifier);
}
/******************************************************************************
* cpus_binding *
******************************************************************************/
struct {
unsigned long target_sched_class;
struct cpumask mask[CGROUP_COUNT];
} cpus_binding;
const struct cpumask *cpus_binding_mask(struct task_struct *p)
{
int group_idx;
int sched_class;
sched_class = get_sched_class(p);
if (!(cpus_binding.target_sched_class & sched_class))
return cpu_active_mask;
group_idx = cpuctl_task_group_idx(p);
if (unlikely(cpumask_empty(&cpus_binding.mask[group_idx])))
return cpu_active_mask;
return &cpus_binding.mask[group_idx];
}
static int cpus_binding_emstune_notifier_call(struct notifier_block *nb,
unsigned long val, void *v)
{
struct emstune_set *cur_set = (struct emstune_set *)v;
int i;
cpus_binding.target_sched_class =
cur_set->cpus_binding.target_sched_class;
for (i = 0; i < CGROUP_COUNT; i++)
cpumask_copy(&cpus_binding.mask[i],
&cur_set->cpus_binding.mask[i]);
return NOTIFY_OK;
}
static struct notifier_block cpus_binding_emstune_notifier = {
.notifier_call = cpus_binding_emstune_notifier_call,
};
static void cpus_binding_init(void)
{
int i;
cpus_binding.target_sched_class = 0;
for (i = 0; i < CGROUP_COUNT; i++)
cpumask_setall(&cpus_binding.mask[i]);
emstune_register_notifier(&cpus_binding_emstune_notifier);
}
/******************************************************************************
* cpu weight / idle weight *
******************************************************************************/
static int active_weight[CGROUP_COUNT][VENDOR_NR_CPUS];
static int idle_weight[CGROUP_COUNT][VENDOR_NR_CPUS];
static int cpu_weight_get(struct task_struct *p, int cpu, int idle)
{
int group_idx = cpuctl_task_group_idx(p);
if (NON_IDLE)
return active_weight[group_idx][cpu];
else
return idle_weight[group_idx][cpu];
}
static int cpu_weight_emstune_notifier_call(struct notifier_block *nb,
unsigned long val, void *v)
{
struct emstune_set *cur_set = (struct emstune_set *)v;
int i, cpu;
for (i = 0; i < CGROUP_COUNT; i++) {
for_each_possible_cpu(cpu) {
active_weight[i][cpu] = cur_set->active_weight.ratio[i][cpu];
idle_weight[i][cpu] = cur_set->idle_weight.ratio[i][cpu];
}
}
return NOTIFY_OK;
}
static struct notifier_block cpu_weight_emstune_notifier = {
.notifier_call = cpu_weight_emstune_notifier_call,
};
#define DEFAULT_WEIGHT (100)
static void cpu_weight_init(void)
{
int i, cpu;
for (i = 0; i < CGROUP_COUNT; i++) {
for_each_possible_cpu(cpu) {
active_weight[i][cpu] = DEFAULT_WEIGHT;
idle_weight[i][cpu] = DEFAULT_WEIGHT;
}
}
emstune_register_notifier(&cpu_weight_emstune_notifier);
}
/******************************************************************************
* best energy cpu selection *
******************************************************************************/
static LIST_HEAD(csd_head);
struct cs_domain __percpu **__pcpu_csd;
#define pcpu_csd(cpu) (*per_cpu_ptr(__pcpu_csd, cpu))
static int get_idle_state(int cpu)
{
struct cpuidle_state *state;
int ret = NON_IDLE;
rcu_read_lock();
state = idle_get_state(cpu_rq(cpu));
if (!state)
goto out;
if (strcmp(state->name, "WFI"))
ret = IDLE_C1;
else
ret = IDLE_C2;
out:
rcu_read_unlock();
return ret;
}
static void take_util_snapshot(struct tp_env *env)
{
int cpu;
/*
* We don't agree setting 0 for task util
* Because we do better apply active power of task
* when get the energy
*/
env->task_util = ml_task_util_est(env->p);
env->task_util_clamped = ml_uclamp_task_util(env->p);
env->task_load_avg = ml_task_load_avg(env->p);
/* fill cpu util */
for_each_cpu(cpu, cpu_active_mask) {
struct rq *rq = cpu_rq(cpu);
struct cfs_rq *cfs_rq = &rq->cfs;
unsigned long capacity = capacity_cpu_orig(cpu);
unsigned long extra_util;
env->cpu_stat[cpu].rt_util = cpu_util_rt(rq);
env->cpu_stat[cpu].dl_util = cpu_util_dl(rq);
extra_util = env->cpu_stat[cpu].rt_util + env->cpu_stat[cpu].dl_util;
env->cpu_stat[cpu].util_wo = min(ml_cpu_util_est_without(cpu, env->p) + extra_util,
capacity);
env->cpu_stat[cpu].util_with = min(ml_cpu_util_est_with(env->p, cpu) + extra_util,
capacity);
env->cpu_stat[cpu].runnable = READ_ONCE(cfs_rq->avg.runnable_avg);
env->cpu_stat[cpu].load_avg = ml_cpu_load_avg(cpu);
if (cpu == env->prev_cpu)
env->cpu_stat[cpu].util = env->cpu_stat[cpu].util_with;
else
env->cpu_stat[cpu].util = env->cpu_stat[cpu].util_wo;
env->cpu_stat[cpu].nr_running = rq->nr_running;
env->cpu_stat[cpu].idle = get_idle_state(cpu);
env->weight[cpu] = cpu_weight_get(env->p, cpu, env->cpu_stat[cpu].idle);
trace_ems_take_util_snapshot(cpu, env);
}
}
static unsigned long prev_cpu_advantage(unsigned long cpu_util, unsigned long task_util)
{
long util = cpu_util;
/*
* subtract cpu util by 12.5% of task util to give advantage to
* prev cpu when computing energy.
*/
util -= (task_util >> 3);
return max(util, (long)0);
}
static unsigned int compute_system_energy(struct tp_env *env, int dst_cpu,
struct energy_backup *backup)
{
struct cs_domain *csd;
struct energy_state states[VENDOR_NR_CPUS] = { 0, };
list_for_each_entry(csd, &csd_head, list) {
unsigned long capacity;
capacity = cpufreq_get_next_cap(env, &csd->cpus, dst_cpu);
et_fill_energy_state(env, &csd->cpus, states, capacity, dst_cpu);
}
if (env->prev_cpu == dst_cpu)
states[dst_cpu].util = prev_cpu_advantage(states[dst_cpu].util, env->task_util);
return et_compute_system_energy(&csd_head, states, dst_cpu, backup);
}
static int find_min_util_cpu(struct tp_env *env, const struct cpumask *mask, bool among_idle)
{
int cpu, min_cpu = INVALID_CPU;
unsigned long min_util = ULONG_MAX;
for_each_cpu_and(cpu, &env->fit_cpus, mask) {
unsigned long cpu_util;
/*
* If among_idle is true, find min util cpu among idle cpu.
* Skip non-idle cpu.
*/
if (among_idle && !env->cpu_stat[cpu].idle)
continue;
cpu_util = env->cpu_stat[cpu].util_with;
if (cpu == env->prev_cpu)
cpu_util = prev_cpu_advantage(cpu_util, env->task_util);
if (cpu_util < min_util) {
min_util = cpu_util;
min_cpu = cpu;
}
}
return min_cpu;
}
static int __find_energy_cpu(struct tp_env *env, const struct cpumask *candidates)
{
struct energy_backup backup[VENDOR_NR_CPUS] = { 0, };
int cpu, energy_cpu = INVALID_CPU, min_util = INT_MAX;
unsigned int min_energy = UINT_MAX;
for_each_cpu(cpu, candidates) {
unsigned int energy;
int cpu_util = env->cpu_stat[cpu].util_with;
energy = compute_system_energy(env, cpu, backup);
trace_ems_compute_system_energy(env->p, candidates, cpu, energy);
if (energy > min_energy)
continue;
else if (energy < min_energy)
goto found_min_util_cpu;
if (cpu_util >= min_util)
continue;
found_min_util_cpu:
/*
* energy_cpu has the lowest energy,
* or the lowest util among the same energy.
*/
min_energy = energy;
min_util = cpu_util;
energy_cpu = cpu;
}
return energy_cpu;
}
static int get_best_idle_cpu(struct tp_env *env)
{
int cpu, best_idle_cpu = -1, best_idle_state = INT_MAX;
unsigned long best_idle_util = ULONG_MAX;
for_each_cpu_and(cpu, &env->fit_cpus, cpu_slowest_mask()) {
unsigned long util = env->cpu_stat[cpu].util_wo;
int state = env->cpu_stat[cpu].idle;
if (!available_idle_cpu(cpu))
continue;
if (util > best_idle_util)
continue;
if ((util == best_idle_util) && (state > best_idle_state))
continue;
best_idle_cpu = cpu;
best_idle_util = util;
best_idle_state = state;
}
return best_idle_cpu;
}
static int find_energy_cpu(struct tp_env *env)
{
struct cs_domain *csd;
struct cpumask candidates;
int energy_cpu, adv_energy_cpu = INVALID_CPU;
take_util_snapshot(env);
/* set candidates cpu to find energy cpu */
cpumask_clear(&candidates);
/* Pick minimum utilization cpu from each domain */
list_for_each_entry(csd, &csd_head, list) {
int min_cpu = find_min_util_cpu(env, &csd->cpus, false);
if (cpu_selected(min_cpu))
cpumask_set_cpu(min_cpu, &candidates);
}
if (cpumask_weight(&candidates) == 1) {
energy_cpu = cpumask_any(&candidates);
goto out;
}
/* find min energy cpu */
energy_cpu = __find_energy_cpu(env, &candidates);
/*
* Slowest cpumask is usually the coregroup that includes the boot
* processor(cpu0), has low power consumption but also low performance
* efficiency. If selected cpu belongs to slowest cpumask and task is
* tiny enough not to increase system energy, reselect min energy cpu
* among idle cpu within slowest cpumask for faster task processing.
* (tiny task criteria = task util < 12.5% of slowest cpu capacity)
*/
if (cpumask_test_cpu(energy_cpu, cpu_slowest_mask())) {
int best_idle_cpu = get_best_idle_cpu(env);
if (is_perf_task(env->p) && cpu_selected(best_idle_cpu))
adv_energy_cpu = best_idle_cpu;
else if(env->task_util < (capacity_cpu(0) >> 3))
adv_energy_cpu = find_min_util_cpu(env, cpu_slowest_mask(), true);
}
out:
trace_ems_find_energy_cpu(env->p, &candidates, energy_cpu, adv_energy_cpu);
if (cpu_selected(adv_energy_cpu) || cpu_selected(energy_cpu))
env->reason_of_selection = FAIR_ENERGY;
if (cpu_selected(adv_energy_cpu))
return adv_energy_cpu;
return energy_cpu;
}
/******************************************************************************
* best performance cpu selection *
******************************************************************************/
static int find_best_perf_cpu(struct tp_env *env)
{
struct pe_list *pe_list = get_pe_list(env->init_index);
int cluster, cpu;
int best_cpu = INVALID_CPU;
int best_idle_cpu = INVALID_CPU;
int best_active_cpu = INVALID_CPU;
unsigned long max_active_spare = 0;
bool is_prio_tex = is_prio_tex_task(env->p);
for (cluster = 0; cluster < pe_list->num_of_cpus; cluster++) {
unsigned long long cluster_min_exit_latency = ULLONG_MAX;
unsigned long cluster_max_idle_spare = 0, cluster_max_active_spare = 0;
int cluster_best_idle_cpu = INVALID_CPU;
int cluster_best_active_cpu = INVALID_CPU;
int cluster_best_nr_prio_tex = INT_MAX;
rcu_read_lock();
for_each_cpu_and(cpu, &env->fit_cpus, &pe_list->cpus[cluster]) {
struct rq *rq = cpu_rq(cpu);
unsigned long extra_util = cpu_util_rt(rq) + cpu_util_dl(rq);
unsigned long util = ml_cpu_util_without(cpu, env->p);
unsigned long capacity = capacity_cpu_orig(cpu);
unsigned long spare;
int nr_prio_tex_tasks = ems_rq_nr_prio_tex(rq);
util = min(util + extra_util, capacity);
spare = capacity - util;
if (get_tex_level(env->p) == NOT_TEX &&
get_tex_level(rq->curr) != NOT_TEX)
continue;
if (available_idle_cpu(cpu)) {
unsigned int exit_latency = get_idle_exit_latency(cpu_rq(cpu));
if (exit_latency < cluster_min_exit_latency) {
cluster_max_idle_spare = spare;
cluster_min_exit_latency = exit_latency;
cluster_best_idle_cpu = cpu;
} else if (exit_latency == cluster_min_exit_latency) {
if (spare > cluster_max_idle_spare) {
cluster_max_idle_spare = spare;
cluster_best_idle_cpu = cpu;
}
}
} else {
/* Spread prio_tex tasks */
if (is_prio_tex) {
if (nr_prio_tex_tasks > cluster_best_nr_prio_tex)
continue;
if ((nr_prio_tex_tasks == cluster_best_nr_prio_tex) &&
(spare < cluster_max_active_spare))
continue;
}
if (spare > cluster_max_active_spare) {
cluster_max_active_spare = spare;
cluster_best_active_cpu = cpu;
cluster_best_nr_prio_tex = nr_prio_tex_tasks;
}
}
}
rcu_read_unlock();
if (cpu_selected(cluster_best_idle_cpu))
best_idle_cpu = cluster_best_idle_cpu;
else if (cpu_selected(cluster_best_active_cpu)) {
if (cluster_max_active_spare > max_active_spare) {
best_active_cpu = cluster_best_active_cpu;
max_active_spare = cluster_max_active_spare;
}
}
if (cpu_selected(best_idle_cpu))
break;
if ((cluster >= env->end_index) &&
(cpu_selected(best_idle_cpu) || cpu_selected(best_active_cpu)))
break;
}
if (cpu_selected(best_idle_cpu))
best_cpu = best_idle_cpu;
else if (cpu_selected(best_active_cpu))
best_cpu = best_active_cpu;
trace_ems_find_best_perf_cpu(env, best_cpu);
if (cpu_selected(best_cpu))
env->reason_of_selection = FAIR_PERFORMANCE;
return best_cpu;
}
/******************************************************************************
* best cpu selection *
******************************************************************************/
static int find_best_cpu(struct tp_env *env)
{
int best_cpu;
switch (env->sched_policy) {
case SCHED_POLICY_PERF:
best_cpu = find_best_perf_cpu(env);
break;
case SCHED_POLICY_ENERGY:
best_cpu = find_energy_cpu(env);
break;
default:
best_cpu = INVALID_CPU;
}
return best_cpu;
}
static void find_overcap_cpus(struct tp_env *env)
{
struct rq *rq;
unsigned long cpu_util_with, cpu_util_wo, extra_util, capacity;
unsigned long spare_cap, max_spare_cap = 0;
int cpu, max_spare_cap_cpu = -1;
cpumask_clear(&env->overcap_cpus);
/*
* Find cpus that becomes over capacity with a given task.
* overcap_cpus = cpu capacity < cpu util + task util
*/
for_each_cpu(cpu, &env->cpus_allowed) {
rq = cpu_rq(cpu);
capacity = capacity_cpu_orig(cpu);
extra_util = cpu_util_rt(rq) + cpu_util_dl(rq);
cpu_util_wo = ml_cpu_util_without(cpu, env->p) + extra_util;
cpu_util_wo = min(cpu_util_wo, capacity);
spare_cap = capacity - cpu_util_wo;
if (spare_cap > max_spare_cap) {
max_spare_cap = spare_cap;
max_spare_cap_cpu = cpu;
}
cpu_util_with = ml_cpu_util_with(env->p, cpu) + extra_util;
if (cpu_util_with > capacity)
cpumask_set_cpu(cpu, &env->overcap_cpus);
}
if (cpu_selected(max_spare_cap_cpu) &&
cpumask_equal(&env->overcap_cpus, &env->cpus_allowed))
cpumask_clear_cpu(max_spare_cap_cpu, &env->overcap_cpus);
}
static void find_migrating_cpus(struct tp_env *env)
{
struct rq *rq;
int cpu;
/* Find cpus that will receive the task through migration */
cpumask_clear(&env->migrating_cpus);
for_each_cpu(cpu, &env->fit_cpus) {
rq = cpu_rq(cpu);
if (!ems_rq_migrated(rq))
continue;
cpumask_set_cpu(cpu, &env->migrating_cpus);
}
}
enum task_class {
BOOSTED_TEX_CLASS,
BOOSTED_CLASS,
NORMAL_CLASS,
};
static bool is_boosted_task(struct task_struct *p)
{
if (emstune_sched_boost() && is_perf_task(p))
return true;
/* if prio > DEFAULT_PRIO and should_spread, skip boost task in level 2 */
if (emstune_get_cur_level() == 2 && p->prio > DEFAULT_PRIO && emstune_should_spread())
return false;
if (is_gsc_task(p))
return true;
if (is_prio_tex_task(p))
return true;
if (emstune_support_uclamp()) {
if (uclamp_boosted(p) && !is_small_task(p))
return true;
}
return false;
}
static enum task_class get_task_class(struct task_struct *p)
{
if (is_boosted_tex_task(p))
return BOOSTED_TEX_CLASS;
else if (is_boosted_task(p))
return BOOSTED_CLASS;
return NORMAL_CLASS;
}
static int get_init_index(struct tp_env *env)
{
struct cpumask mask;
int s_index = 0;
int e_index = get_pe_list_size();
int ot_index = 0, index;
if (e_index == 1) {
/* 1 cluster */
return s_index;
} else if (env->sched_policy == SCHED_POLICY_EXPRESS) {
s_index = e_index - 1;
return s_index;
} else if (env->sched_policy == SCHED_POLICY_PERF) {
s_index = 1;
}
/* Adjust ontime_fit_cpus to start_index of pe_list */
ontime_select_fit_cpus(env->p, &mask);
for (index = 0; index < e_index; index++) {
struct pe_list *pe_list = get_pe_list(index);
if (cpumask_intersects(&mask, &pe_list->cpus[0])) {
ot_index = index;
break;
}
}
s_index = max(s_index, ot_index);
cpumask_and(&env->fit_cpus, &env->fit_cpus, &mask);
/* Raise the start_index until the capacity is enough */
for (index = s_index; index < e_index; index++) {
struct pe_list *pe_list = get_pe_list(index);
unsigned long capacity = capacity_orig_of(cpumask_first(&pe_list->cpus[0]));
if (ems_task_fits_capacity(env->p, capacity))
break;
}
return min(index, e_index - 1);
}
static int get_end_index(struct tp_env *env)
{
if (!emstune_should_spread())
return 0;
if (env->sched_policy != SCHED_POLICY_PERF)
return 0;
return 1;
}
static unsigned long get_base_cap(struct tp_env *env)
{
struct pe_list *pl = get_pe_list(env->init_index);
int cpu = cpumask_first(&pl->cpus[0]);
return capacity_cpu_orig(cpu);
}
static void update_tp_env(struct tp_env *env)
{
env->task_class = get_task_class(env->p);
switch (env->task_class) {
case BOOSTED_TEX_CLASS:
env->sched_policy = SCHED_POLICY_EXPRESS;
break;
case BOOSTED_CLASS:
env->sched_policy = SCHED_POLICY_PERF;
break;
case NORMAL_CLASS:
default:
env->sched_policy = SCHED_POLICY_ENERGY;
break;
}
cpumask_copy(&env->fit_cpus, &env->cpus_allowed);
env->init_index = get_init_index(env);
env->end_index = get_end_index(env);
env->base_cap = get_base_cap(env);
}
static int sysbusy_fit_cpus(struct tp_env *env)
{
int target_cpu;
if (!sysbusy_activated())
return 0;
cpumask_clear(&env->fit_cpus);
target_cpu = sysbusy_schedule(env);
if (cpu_selected(target_cpu)) {
cpumask_set_cpu(target_cpu, &env->fit_cpus);
env->reason_of_selection = FAIR_SYSBUSY;
}
return cpumask_weight(&env->fit_cpus);
}
#define PRIO_FOR_PERF 110
static bool can_use_fast_track(struct tp_env *env)
{
if (env->sched_policy == SCHED_POLICY_ENERGY)
return false;
if (!cpumask_test_cpu(env->prev_cpu, &env->cpus_allowed))
return false;
if (!available_idle_cpu(env->prev_cpu))
return false;
if (capacity_cpu_orig(env->prev_cpu) != env->base_cap)
return false;
if (cpumask_test_cpu(env->prev_cpu, cpu_slowest_mask())) {
if (is_perf_task(env->p))
return false;
if ((env->cgroup_idx == CGROUP_FOREGROUND) && (env->p->prio <= PRIO_FOR_PERF))
return false;
}
return true;
}
static int normal_fit_cpus(struct tp_env *env)
{
struct cpumask *fit_cpus = &env->fit_cpus;
int this_cpu = smp_processor_id(), cpu;
/*
* If cl_sync is true and the coregroup of this cpu is suitable for fit_cpus,
* select it as fit_cpus.
*/
if (env->cl_sync) {
struct cpumask *coregroup_cpus = &pcpu_csd(this_cpu)->cpus;
if (cpumask_intersects(fit_cpus, coregroup_cpus)) {
cpumask_and(fit_cpus, fit_cpus, coregroup_cpus);
if (cpumask_weight(fit_cpus) == 1)
goto out;
}
}
/* Handle sync flag */
if (env->sync && can_sync_to_this_cpu(env, this_cpu)) {
cpumask_clear(fit_cpus);
cpumask_set_cpu(this_cpu, fit_cpus);
env->reason_of_selection = FAIR_SYNC;
goto out;
}
if (can_use_fast_track(env)) {
cpumask_clear(fit_cpus);
cpumask_set_cpu(task_cpu(env->p), fit_cpus);
env->reason_of_selection = FAIR_FAST_TRACK;
goto out;
}
/*
* Exclude overcap cpu from cpus_allowed. If there is only one or no
* fit cpus, it does not need to find fit cpus anymore.
*/
find_overcap_cpus(env);
cpumask_andnot(&env->fit_cpus, &env->fit_cpus, &env->overcap_cpus);
if (cpumask_weight(&env->fit_cpus) <= 1)
goto out;
/* Exclude migrating cpus from fit cpus */
find_migrating_cpus(env);
cpumask_andnot(&env->fit_cpus, &env->fit_cpus, &env->migrating_cpus);
if (!cpumask_weight(&env->fit_cpus))
cpumask_or(&env->fit_cpus, &env->fit_cpus, &env->migrating_cpus);
for_each_cpu(cpu, &env->fit_cpus) {
struct task_struct *curr = cpu_rq(cpu)->curr;
if (is_boosted_tex_task(curr))
cpumask_clear_cpu(cpu, &env->fit_cpus);
}
out:
return cpumask_weight(fit_cpus);
}
static int find_fit_cpus(struct tp_env *env)
{
int num_of_cpus;
update_tp_env(env);
if (env->sched_policy == SCHED_POLICY_EXPRESS)
num_of_cpus = tex_boosted_fit_cpus(env);
else
num_of_cpus = normal_fit_cpus(env);
trace_ems_find_fit_cpus(env);
return num_of_cpus;
}
/*
* Return number of CPUs allowed.
*/
int find_cpus_allowed(struct tp_env *env)
{
struct cpumask mask[4];
/*
* take a snapshot of cpumask to get CPUs allowed
* - mask0 : p->cpus_ptr
* - mask1 : cpu_active_mask
* - mask2 : ecs_cpus_allowed
* - mask3 : cpus_binding_mask
*/
cpumask_copy(&mask[0], env->p->cpus_ptr);
cpumask_copy(&mask[1], cpu_active_mask);
cpumask_copy(&mask[2], ecs_cpus_allowed(env->p));
cpumask_copy(&mask[3], cpus_binding_mask(env->p));
cpumask_copy(&env->cpus_allowed, &mask[0]);
if (env->per_cpu_kthread)
goto out;
if (!cpumask_intersects(&env->cpus_allowed, &mask[1]))
goto out;
cpumask_and(&env->cpus_allowed, &env->cpus_allowed, &mask[1]);
if (cpumask_intersects(&env->cpus_allowed, &mask[2]))
cpumask_and(&env->cpus_allowed, &env->cpus_allowed, &mask[2]);
if (cpumask_intersects(&env->cpus_allowed, &mask[3]))
cpumask_and(&env->cpus_allowed, &env->cpus_allowed, &mask[3]);
out:
trace_ems_find_cpus_allowed(env, mask);
return cpumask_weight(&env->cpus_allowed);
}
extern char *fair_causes_name[END_OF_FAIR_CAUSES];
int __ems_select_task_rq_fair(struct task_struct *p, int prev_cpu,
int sd_flag, int wake_flag)
{
struct tp_env env = {
.p = p,
.cgroup_idx = cpuctl_task_group_idx(p),
.per_cpu_kthread = is_per_cpu_kthread(p),
.cl_sync = (wake_flag & WF_ANDROID_VENDOR) &&
!(current->flags & PF_EXITING),
.sync = (wake_flag & WF_SYNC) &&
!(current->flags & PF_EXITING),
.prev_cpu = task_cpu(p),
.task_util = ml_task_util_est(p),
.task_util_clamped = ml_uclamp_task_util(p),
.reason_of_selection = FAIR_FAILED,
};
int target_cpu = INVALID_CPU;
int num_of_cpus;
/* Find mandatory conditions for task allocation */
num_of_cpus = find_cpus_allowed(&env);
if (num_of_cpus == 0) {
/* There is no CPU allowed, give up find new cpu */
env.reason_of_selection = FAIR_ALLOWED_0;
goto out;
} else if (num_of_cpus == 1) {
/* There is only one CPU allowed, no need to find cpu */
target_cpu = cpumask_any(&env.cpus_allowed);
env.reason_of_selection = FAIR_ALLOWED_1;
goto out;
}
/* When sysbusy is detected, do scheduling under other policies */
num_of_cpus = sysbusy_fit_cpus(&env);
if (num_of_cpus > 0) {
target_cpu = cpumask_any(&env.fit_cpus);
goto out;
}
/* Find cpu candidates suitable for task operation */
num_of_cpus = find_fit_cpus(&env);
if (num_of_cpus == 0) {
/* There is no fit cpus, keep the task on prev cpu */
if (cpumask_test_cpu(prev_cpu, &env.cpus_allowed)) {
target_cpu = prev_cpu;
env.reason_of_selection = FAIR_FIT_0;
goto out;
}
/*
* If prev_cpu is not allowed,
* copy cpus_allowed to fit_cpus to find appropriate cpu among cpus_allowed.
*/
cpumask_copy(&env.fit_cpus, &env.cpus_allowed);
} else if (num_of_cpus == 1) {
/* Only one cpu is fit. Select this cpu. */
target_cpu = cpumask_any(&env.fit_cpus);
if (env.reason_of_selection == FAIR_FAILED)
env.reason_of_selection = FAIR_FIT_1;
goto out;
}
target_cpu = find_best_cpu(&env);
if (cpu_selected(target_cpu)) {
if (!cpumask_test_cpu(target_cpu, &env.cpus_allowed)) {
pr_err("Disallowed cpu%d is selected (cpus_allowed=%*pbl)\n",
target_cpu, cpumask_pr_args(&env.cpus_allowed));
WARN_ON(1);
}
} else {
if (cpumask_test_cpu(prev_cpu, &env.cpus_allowed))
target_cpu = prev_cpu;
else
target_cpu = cpumask_any(&env.cpus_allowed);
env.reason_of_selection = FAIR_FAILED;
}
out:
update_fair_stat(smp_processor_id(), env.reason_of_selection);
trace_ems_select_task_rq(&env, target_cpu, fair_causes_name[env.reason_of_selection]);
return target_cpu;
}
int core_init(struct device_node *ems_dn)
{
struct device_node *dn, *child;
int index;
dn = of_find_node_by_name(ems_dn, "cpu-selection-domain");
if (unlikely(!dn))
return -ENODATA;
__pcpu_csd = alloc_percpu(struct cs_domain *);
for_each_child_of_node(dn, child) {
struct cs_domain *csd;
const char *buf;
int cpu;
if (of_property_read_string(child, "cpus", &buf))
return -ENODATA;
csd = kzalloc(sizeof(struct cs_domain), GFP_KERNEL);
if (unlikely(!csd))
return -ENOMEM;
cpulist_parse(buf, &csd->cpus);
list_add_tail(&csd->list, &csd_head);
for_each_cpu(cpu, &csd->cpus)
*per_cpu_ptr(__pcpu_csd, cpu) = csd;
}
tex_init();
cpus_binding_init();
cpu_weight_init();
index = match_string(sched_feat_names, __SCHED_FEAT_NR, "TTWU_QUEUE");
if (index >= 0) {
static_key_disable(&sched_feat_keys[index]);
sysctl_sched_features &= ~(1UL << index);
}
return 0;
}