#include "../sched.h" #include "../pelt.h" #include "ems.h" #include #include struct frt_dom { unsigned int active_ratio; struct cpumask cpus; struct list_head list; struct frt_dom *next; struct frt_dom *prev; /* kobject for sysfs group */ struct kobject kobj; }; struct cpumask available_mask; LIST_HEAD(frt_list); DEFINE_RAW_SPINLOCK(frt_lock); static struct frt_dom __percpu **frt_rqs; static struct cpumask __percpu **frt_local_cpu_mask; #define RATIO_SCALE_SHIFT 10 #define ratio_scale(v, r) (((v) * (r) * 10) >> RATIO_SCALE_SHIFT) #define cpu_selected(cpu) (cpu >= 0) static u64 frt_cpu_util(int cpu) { struct rq *rq = cpu_rq(cpu); u64 cpu_util; cpu_util = ml_cpu_util(cpu); cpu_util += READ_ONCE(rq->avg_rt.util_avg) + READ_ONCE(rq->avg_dl.util_avg); return cpu_util; } static const struct cpumask *get_available_cpus(void) { return &available_mask; } static int frt_mode_update_callback(struct notifier_block *nb, unsigned long val, void *v) { struct emstune_set *cur_set = (struct emstune_set *)v; struct frt_dom *dom; list_for_each_entry(dom, &frt_list, list) { int cpu = cpumask_first(&dom->cpus); dom->active_ratio = cur_set->frt.active_ratio[cpu]; } return NOTIFY_OK; } static struct notifier_block frt_mode_update_notifier = { .notifier_call = frt_mode_update_callback, }; int frt_init(struct kobject *ems_kobj) { struct frt_dom *cur, *prev = NULL, *head = NULL; struct device_node *dn; int cpu, tcpu; dn = of_find_node_by_path("/ems"); if (!dn) return 0; frt_rqs = alloc_percpu(struct frt_dom *); if (!frt_rqs) { pr_err("failed to alloc frt_rqs\n"); return -ENOMEM; } frt_local_cpu_mask = alloc_percpu(struct cpumask *); if (!frt_local_cpu_mask) { pr_err("failed to alloc frt_local_cpu_mask\n"); return -ENOMEM; } INIT_LIST_HEAD(&frt_list); cpumask_setall(&available_mask); for_each_possible_cpu(cpu) { *per_cpu_ptr(frt_local_cpu_mask, cpu) = NULL; if (cpu != cpumask_first(cpu_coregroup_mask(cpu))) continue; cur = kzalloc(sizeof(struct frt_dom), GFP_KERNEL); if (!cur) { pr_err("FRT(%s): failed to allocate dom\n", __func__); goto put_node; } cpumask_copy(&cur->cpus, cpu_coregroup_mask(cpu)); /* make linke between rt domains */ if (head == NULL) head = cur; if (prev) { prev->next = cur; cur->prev = prev; } cur->next = head; head->prev = cur; prev = cur; for_each_cpu(tcpu, &cur->cpus) *per_cpu_ptr(frt_rqs, tcpu) = cur; list_add_tail(&cur->list, &frt_list); } emstune_register_notifier(&frt_mode_update_notifier); pr_info("%s: frt initialized complete!\n", __func__); put_node: of_node_put(dn); return 0; } /*****************************************************************************/ /* SELECT WAKEUP CPU */ /*****************************************************************************/ void frt_update_available_cpus(struct rq *rq) { struct frt_dom *dom; struct cpumask mask, active_cpus; unsigned long flags; unsigned long capacity, dom_util_sum = 0; int cpu; if (rq->cpu != 0) return; if (!raw_spin_trylock_irqsave(&frt_lock, flags)) return; cpumask_clear(&mask); list_for_each_entry(dom, &frt_list, list) { cpumask_and(&active_cpus, &dom->cpus, cpu_active_mask); /* this domain is off */ if (cpumask_empty(&active_cpus)) continue; for_each_cpu(cpu, &active_cpus) dom_util_sum += frt_cpu_util(cpu); capacity = capacity_cpu_orig(cpumask_any(&active_cpus)) * cpumask_weight(&active_cpus); /* * domains with util greater than 80% of domain capacity are excluded * from available_mask. */ if (check_busy(dom_util_sum, capacity)) continue; cpumask_or(&mask, &mask, &dom->cpus); trace_frt_available_cpus(*(unsigned int *)cpumask_bits(&active_cpus), dom_util_sum, capacity * dom->active_ratio / 100, *(unsigned int *)cpumask_bits(&mask)); /* * If the percentage of domain util is less than active_ratio, * this domain is idle. */ if (dom_util_sum * 100 < capacity * dom->active_ratio) break; } cpumask_copy(&available_mask, &mask); raw_spin_unlock_irqrestore(&frt_lock, flags); } static int find_idle_cpu(struct task_struct *task) { int cpu, best_cpu = -1; int cpu_prio, max_prio = -1; u64 cpu_util, min_util = ULLONG_MAX; struct cpumask candidate_cpus; struct frt_dom *dom, *prefer_dom; int nr_cpus_allowed; struct tp_env env = { .p = task, }; nr_cpus_allowed = find_cpus_allowed(&env); if (!nr_cpus_allowed) return best_cpu; if (nr_cpus_allowed == 1) return cpumask_any(&env.cpus_allowed); cpumask_and(&candidate_cpus, &env.cpus_allowed, get_available_cpus()); if (unlikely(cpumask_empty(&candidate_cpus))) cpumask_copy(&candidate_cpus, &env.cpus_allowed); prefer_dom = dom = *per_cpu_ptr(frt_rqs, 0); do { for_each_cpu_and(cpu, &dom->cpus, &candidate_cpus) { if (!available_idle_cpu(cpu)) continue; cpu_prio = cpu_rq(cpu)->rt.highest_prio.curr; if (cpu_prio < max_prio) continue; cpu_util = frt_cpu_util(cpu); if ((cpu_prio > max_prio) || (cpu_util < min_util) || (cpu_util == min_util && task_cpu(task) == cpu)) { min_util = cpu_util; max_prio = cpu_prio; best_cpu = cpu; } } if (cpu_selected(best_cpu)) { trace_frt_select_task_rq(task, best_cpu, "IDLE-FIRST"); return best_cpu; } dom = dom->next; } while (dom != prefer_dom); return best_cpu; } static int find_recessive_cpu(struct task_struct *task, struct cpumask *lowest_mask) { int cpu, best_cpu = -1; u64 cpu_util, min_util= ULLONG_MAX; struct cpumask candidate_cpus; struct frt_dom *dom, *prefer_dom; int nr_cpus_allowed; int cpu_prio, max_prio = -1; struct tp_env env = { .p = task, }; if (unlikely(*this_cpu_ptr(frt_local_cpu_mask) == NULL)) { if (lowest_mask) { /* Save lowest_mask per CPU once */ *this_cpu_ptr(frt_local_cpu_mask) = lowest_mask; } else { /* Make sure the mask is initialized first */ trace_frt_select_task_rq(task, best_cpu, "NA LOWESTMSK"); return best_cpu; } } lowest_mask = *this_cpu_ptr(frt_local_cpu_mask); nr_cpus_allowed = find_cpus_allowed(&env); if (!nr_cpus_allowed) return best_cpu; if (nr_cpus_allowed == 1) return cpumask_any(&env.cpus_allowed); cpumask_and(&candidate_cpus, &env.cpus_allowed, get_available_cpus()); if (unlikely(cpumask_empty(&candidate_cpus))) return best_cpu; if (cpumask_intersects(&candidate_cpus, lowest_mask)) cpumask_and(&candidate_cpus, &candidate_cpus, lowest_mask); prefer_dom = dom = *per_cpu_ptr(frt_rqs, 0); do { for_each_cpu_and(cpu, &dom->cpus, &candidate_cpus) { cpu_util = frt_cpu_util(cpu); cpu_prio = cpu_rq(cpu)->rt.highest_prio.curr; if (cpu_prio < max_prio) continue; cpu_util = frt_cpu_util(cpu); if ((cpu_prio > max_prio) || (cpu_prio == max_prio && cpu_util < min_util)) { min_util = cpu_util; max_prio = cpu_prio; best_cpu = cpu; } } if (cpu_selected(best_cpu)) { trace_frt_select_task_rq(task, best_cpu, rt_task(cpu_rq(best_cpu)->curr) ? "RT-RECESS" : "FAIR-RECESS"); return best_cpu; } dom = dom->next; } while (dom != prefer_dom); return best_cpu; } int frt_find_lowest_rq(struct task_struct *task, struct cpumask *lowest_mask) { int best_cpu = -1; if (task->nr_cpus_allowed == 1) { trace_frt_select_task_rq(task, best_cpu, "NA ALLOWED"); update_rt_stat(smp_processor_id(), RT_ALLOWED_1); return best_cpu; } if (!rt_task(task)) return best_cpu; /* * Fluid Sched Core selection procedure: * * 1. idle CPU selection * 2. recessive task first */ /* 1. idle CPU selection */ best_cpu = find_idle_cpu(task); if (cpu_selected(best_cpu)) { update_rt_stat(smp_processor_id(), RT_IDLE); goto out; } /* 2. recessive task first */ best_cpu = find_recessive_cpu(task, lowest_mask); if (cpu_selected(best_cpu)) { update_rt_stat(smp_processor_id(), RT_RECESSIVE); goto out; } out: if (!cpu_selected(best_cpu)) { best_cpu = task_rq(task)->cpu; update_rt_stat(smp_processor_id(), RT_FAILED); } if (!cpumask_test_cpu(best_cpu, cpu_active_mask)) { trace_frt_select_task_rq(task, best_cpu, "NOTHING_VALID"); best_cpu = -1; } return best_cpu; } static inline bool frt_can_sync_to_cur_cpu(struct rq *cur_rq, struct task_struct *p, int cur_cpu) { if (!cpu_active(cur_cpu)) return false; if (!cpumask_test_cpu(cur_cpu, p->cpus_ptr)) return false; if (p->prio > cur_rq->rt.highest_prio.next) return false; if (cur_rq->rt.rt_nr_running > 2) return false; return true; } int frt_select_task_rq_rt(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags) { struct task_struct *curr; struct rq *rq, *cur_rq; bool sync = !!(wake_flags & WF_SYNC); int target_cpu = -1, cur_cpu; /* For anything but wake ups, just return the task_cpu */ if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) goto out; cur_cpu = raw_smp_processor_id(); cur_rq = cpu_rq(cur_cpu); /* Handle sync flag */ if (sync && frt_can_sync_to_cur_cpu(cur_rq, p, cur_cpu)) { target_cpu = cur_cpu; goto out; } rq = cpu_rq(prev_cpu); rcu_read_lock(); curr = READ_ONCE(rq->curr); /* unlocked access */ if (curr) { /* * Even though the destination CPU is running * a higher priority task, FluidRT can bother moving it * when its utilization is very small, and the other CPU is too busy * to accomodate the p in the point of priority and utilization. * * BTW, if the curr has higher priority than p, FluidRT tries to find * the other CPUs first. In the worst case, curr can be victim, if it * has very small utilization. */ int cpu = frt_find_lowest_rq(p, NULL); if (likely(cpu >= 0)) target_cpu = cpu; else target_cpu = prev_cpu; } rcu_read_unlock(); out: return target_cpu; }