/* * CPUFreq boost driver * * Copyright (C) 2020 Samsung Electronics Co., Ltd * Park Choonghoon */ #include #include #include #include #include #include #include #include "../sched.h" #include "ems.h" #include #include /****************************************************************************** * data structure and API * ******************************************************************************/ /* Freqboost groups * Keep track of all the boost groups which impact on CPU, for example when a * CPU has two RUNNABLE tasks belonging to two different boost groups and thus * likely with different boost values. * Since on each system we expect only a limited number of boost groups, here * we use a simple array to keep track of the metrics required to compute the * maximum per-CPU boosting value. */ struct boost_groups { /* Maximum boost value for all RUNNABLE tasks on a CPU */ int boost_max; u64 boost_ts; struct { /* The boost for tasks on that boost group */ int boost; /* Count of RUNNABLE tasks on that boost group */ unsigned tasks; /* Count of woken-up tasks on that boost group */ unsigned wokenup_tasks; /* Timestamp of boost activation */ u64 ts; } group[CGROUP_COUNT]; int timeout; }; static raw_spinlock_t __percpu *lock; static inline bool freqboost_boost_timeout(u64 now, u64 ts, u64 timeout) { return ((now - ts) > timeout); } static inline bool freqboost_boost_group_active(int idx, struct boost_groups *bg, u64 now) { if (bg->group[idx].tasks) return true; return !freqboost_boost_timeout(now, bg->group[idx].ts, bg->timeout); } static void freqboost_group_update(struct boost_groups *bg, u64 now) { int boost_max = INT_MIN; u64 boost_ts = 0; int idx; for (idx = 0; idx < CGROUP_COUNT; idx++) { /* * A boost group affects a CPU only if it has * RUNNABLE tasks on that CPU or it has hold * in effect from a previous task. */ if (!freqboost_boost_group_active(idx, bg, now)) continue; if (boost_max > bg->group[idx].boost) continue; boost_max = bg->group[idx].boost; boost_ts = bg->group[idx].ts; } bg->boost_max = boost_max; if (boost_ts) bg->boost_ts = boost_ts; } struct reciprocal_value freqboost_spc_rdiv; static long freqboost_margin(unsigned long capacity, unsigned long signal, long boost) { long long margin = 0; if (signal > capacity) return 0; /* * Signal proportional compensation (SPC) * * The Boost (B) value is used to compute a Margin (M) which is * proportional to the complement of the original Signal (S): * M = B * (capacity - S) * The obtained M could be used by the caller to "boost" S. */ if (boost >= 0) { margin = capacity - signal; margin *= boost; } else margin = -signal * boost; margin = reciprocal_divide(margin, freqboost_spc_rdiv); if (boost < 0) margin *= -1; return margin; } static unsigned long freqboost_boosted_util(struct boost_groups *bg, int cpu, unsigned long util) { u64 now; long boost, margin = 0; now = sched_clock(); /* Check to see if we have a hold in effect */ if (freqboost_boost_timeout(now, bg->boost_ts, bg->timeout)) freqboost_group_update(bg, now); boost = bg->boost_max; if (!boost || boost == INT_MIN) goto out; margin = freqboost_margin(capacity_cpu(cpu), util, boost); out: trace_freqboost_boosted_util(cpu, boost, util, margin); return util + margin; } /****************************************************************************** * freq boost * ******************************************************************************/ /* Boost groups affecting each CPU in the system */ static struct boost_groups __percpu *freqboost_groups; /* We hold freqboost in effect for at least this long */ #define FREQBOOST_HOLD_NS 50000000ULL /* 50ms */ static inline bool freqboost_update_timestamp(struct task_struct *p) { return task_has_rt_policy(p); } static inline void freqboost_tasks_update(struct task_struct *p, int cpu, int idx, int flags, int task_count) { struct boost_groups *bg = per_cpu_ptr(freqboost_groups, cpu); int tasks = bg->group[idx].tasks + task_count; /* Update boosted tasks count while avoiding to make it negative */ bg->group[idx].tasks = max(0, tasks); /* Update timeout on enqueue */ if (task_count > 0) { u64 now = sched_clock(); if (freqboost_update_timestamp(p)) bg->group[idx].ts = now; /* Boost group activation or deactivation on that RQ */ if (bg->group[idx].tasks == 1) freqboost_group_update(bg, now); } } unsigned long freqboost_cpu_boost(int cpu, unsigned long util) { struct boost_groups *bg = per_cpu_ptr(freqboost_groups, cpu); return freqboost_boosted_util(bg, cpu, util); } /****************************************************************************** * Heavy Task Boost * ******************************************************************************/ unsigned long heavytask_cpu_boost(int cpu, unsigned long util, int ratio) { int boost = 0, hratio = profile_get_htask_ratio(cpu); long margin = 0; if (!hratio) goto out; boost = (hratio * ratio) >> SCHED_CAPACITY_SHIFT; boost = min(boost, 100); margin = freqboost_margin(capacity_cpu(cpu), util, boost); out: trace_freqboost_htsk_boosted_util(cpu, hratio, ratio, boost, util, margin); return util + margin; } /****************************************************************************** * common * ******************************************************************************/ static void freqboost_enqdeq_task(struct task_struct *p, int cpu, int flags, int type) { unsigned long irq_flags; int idx; /* * Boost group accouting is protected by a per-cpu lock and requires * interrupt to be disabled to avoid race conditions for example on * do_exit()::cgroup_exit() and task migration. */ raw_spin_lock_irqsave(per_cpu_ptr(lock, cpu), irq_flags); idx = cpuctl_task_group_idx(p); freqboost_tasks_update(p, cpu, idx, flags, type); raw_spin_unlock_irqrestore(per_cpu_ptr(lock, cpu), irq_flags); } #define ENQUEUE_TASK 1 #define DEQUEUE_TASK -1 void freqboost_enqueue_task(struct task_struct *p, int cpu, int flags) { freqboost_enqdeq_task(p, cpu, flags, ENQUEUE_TASK); } void freqboost_dequeue_task(struct task_struct *p, int cpu, int flags) { freqboost_enqdeq_task(p, cpu, flags, DEQUEUE_TASK); } int freqboost_can_attach(struct cgroup_taskset *tset) { struct task_struct *task; struct cgroup_subsys_state *css; struct boost_groups *bg; struct rq_flags rq_flags; unsigned int cpu; struct rq *rq; int src_bg; /* Source boost group index */ int dst_bg; /* Destination boost group index */ int tasks; u64 now; cgroup_taskset_for_each(task, css, tset) { /* * Lock the CPU's RQ the task is enqueued to avoid race * conditions with migration code while the task is being * accounted */ rq = task_rq_lock(task, &rq_flags); if (!task->on_rq) { task_rq_unlock(rq, task, &rq_flags); continue; } /* * Boost group accouting is protected by a per-cpu lock and requires * interrupt to be disabled to avoid race conditions on... */ cpu = cpu_of(rq); bg = per_cpu_ptr(freqboost_groups, cpu); raw_spin_lock(per_cpu_ptr(lock, cpu)); dst_bg = css->id - 1; /* if customer add new group, use the last group */ if (dst_bg >= CGROUP_COUNT) dst_bg = CGROUP_COUNT - 1; src_bg = cpuctl_task_group_idx(task); /* * Current task is not changing boostgroup, which can * happen when the new hierarchy is in use. */ if (unlikely(dst_bg == src_bg)) { raw_spin_unlock(per_cpu_ptr(lock, cpu)); task_rq_unlock(rq, task, &rq_flags); continue; } /* * This is the case of a RUNNABLE task which is switching its * current boost group. */ /* Move task from src to dst boost group */ tasks = bg->group[src_bg].tasks - 1; bg->group[src_bg].tasks = max(0, tasks); bg->group[dst_bg].tasks += 1; /* Update boost hold start for this group */ now = sched_clock(); bg->group[dst_bg].ts = now; /* Force boost group re-evaluation at next boost check */ bg->boost_ts = now - FREQBOOST_HOLD_NS; raw_spin_unlock(per_cpu_ptr(lock, cpu)); task_rq_unlock(rq, task, &rq_flags); } return 0; } int freqboost_get_task_ratio(struct task_struct *p) { struct boost_groups *fbg = per_cpu_ptr(freqboost_groups, task_cpu(p)); int st_idx; st_idx = cpuctl_task_group_idx(p); return fbg->group[st_idx].boost; } static int freqboost_emstune_notifier_call(struct notifier_block *nb, unsigned long val, void *v) { struct emstune_set *cur_set = (struct emstune_set *)v; struct boost_groups *fbg; int i, cpu; for_each_possible_cpu(cpu) { fbg = per_cpu_ptr(freqboost_groups, cpu); for (i = 0; i < CGROUP_COUNT; i++) fbg->group[i].boost = cur_set->freqboost.ratio[i][cpu]; } return NOTIFY_OK; } static struct notifier_block freqboost_emstune_notifier = { .notifier_call = freqboost_emstune_notifier_call, }; static inline void freqboost_init_cgroups(void) { struct boost_groups *bg; int cpu; /* Initialize the per CPU boost groups */ for_each_possible_cpu(cpu) { bg = per_cpu_ptr(freqboost_groups, cpu); memset(bg, 0, sizeof(struct boost_groups)); bg->timeout = FREQBOOST_HOLD_NS; raw_spin_lock_init(per_cpu_ptr(lock, cpu)); } } /* * Initialize the cgroup structures */ int freqboost_init(void) { lock = alloc_percpu(raw_spinlock_t); freqboost_groups = alloc_percpu(struct boost_groups); freqboost_spc_rdiv = reciprocal_value(100); freqboost_init_cgroups(); emstune_register_notifier(&freqboost_emstune_notifier); return 0; }