/* * CPUFreq governor based on Energy-Step-Data And Scheduler-Event. * * Copyright (C) 2019,Samsung Electronic Corporation * Author: Youngtae Lee * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #include #include #include #include #include #include #include "../sched.h" #include "ems.h" #include #include struct esgov_policy { struct cpufreq_policy *policy; struct cpumask cpus; struct kobject kobj; raw_spinlock_t update_lock; struct rw_semaphore rwsem; bool enabled; /* whether esg is current cpufreq governor or not */ bool running; /* whether esg is running or not */ unsigned int last_caller; unsigned int target_freq; /* target frequency at the current status */ int util; /* target util */ u64 last_freq_update_time; /* The next fields are for the tunnables */ int step; unsigned long step_power; /* allowed energy at a step */ s64 rate_delay_ns; int patient_mode; int pelt_margin; int pelt_boost; int up_rate_limit_ns; int down_rate_limit_ns; int rapid_scale_up; int rapid_scale_down; bool limits_changed; /* slack timer */ int slack_expired_time_ms; /* no work freq press */ int no_work_press_ratio; /* Tracking min max information */ int min_cap; /* allowed max capacity */ int max_cap; /* allowed min capacity */ int min; /* min freq */ int max; /* max freq */ /* The next fields are for frequency change work */ bool work_in_progress; struct irq_work irq_work; struct kthread_work work; struct mutex work_lock; struct kthread_worker worker; struct task_struct *thread; }; struct esgov_cpu { struct update_util_data update_util; struct esgov_policy *esg_policy; unsigned int cpu; int util; /* target util */ int pelt_util; /* pelt util */ int step_util; /* energy step util */ int io_util; /* io boost util */ int active_ratio; int capacity; int last_idx; bool iowait_boost_pending; unsigned int iowait_boost; u64 last_update; unsigned long min; /* min util matched with min_cap */ bool no_work_press; }; struct esgov_param { struct cpumask cpus; int step; int patient_mode; int pelt_margin; int pelt_boost; int up_rate_limit; int down_rate_limit; int rapid_scale_up; int rapid_scale_down; }; struct kobject *esg_kobj; static struct esgov_policy __percpu **esgov_policy; static struct esgov_cpu __percpu *esgov_cpu; static struct esgov_param __percpu **esgov_param; /*************************************************************************/ /* HELPER FUNCTION */ /************************************************************************/ int find_allowed_capacity(int cpu, unsigned int freq, int power) { unsigned long cur_power = et_freq_to_dpower(cpu, freq); return et_dpower_to_cap(cpu, cur_power + power); } static void esg_update_freq_range(struct cpufreq_policy *data) { unsigned int new_min, new_max, new_min_idx, new_max_idx; struct esgov_policy *esg_policy = *per_cpu_ptr(esgov_policy, data->cpu); struct cpufreq_policy *policy = esg_policy->policy; if (unlikely((!esg_policy) || !esg_policy->enabled)) return; new_min = data->min; new_max = data->max; if (esg_policy->min == new_min && esg_policy->max == new_max) return; esg_policy->min = new_min; esg_policy->max = new_max; new_min_idx = cpufreq_frequency_table_target( policy, new_min, CPUFREQ_RELATION_L); new_max_idx = cpufreq_frequency_table_target( policy, new_max, CPUFREQ_RELATION_H); new_min = esg_policy->policy->freq_table[new_min_idx].frequency; new_max = esg_policy->policy->freq_table[new_max_idx].frequency; esg_policy->min_cap = find_allowed_capacity(policy->cpu, new_min, 0); esg_policy->max_cap = find_allowed_capacity(policy->cpu, new_max, 0); esg_policy->min_cap = min(esg_policy->max_cap, esg_policy->min_cap); trace_esg_update_limit(policy->cpu, esg_policy->min_cap, esg_policy->max_cap); } static void esg_update_step(struct esgov_policy *esg_policy, int step) { int cpu = cpumask_first(&esg_policy->cpus); esg_policy->step = step; esg_policy->step_power = (et_max_dpower(cpu) - et_min_dpower(cpu)) / step; } static void esg_sync_param(struct esgov_policy *esg_policy, struct esgov_param *param) { esg_update_step(esg_policy, param->step); esg_policy->patient_mode = param->patient_mode; esg_policy->pelt_margin = param->pelt_margin; esg_policy->pelt_boost = param->pelt_boost; esg_policy->up_rate_limit_ns = param->up_rate_limit * NSEC_PER_MSEC; esg_policy->down_rate_limit_ns = param->down_rate_limit * NSEC_PER_MSEC; esg_policy->rapid_scale_up = param->rapid_scale_up; esg_policy->rapid_scale_down = param->rapid_scale_down; } static int esg_mode_update_callback(struct notifier_block *nb, unsigned long val, void *v) { struct emstune_set *cur_set = (struct emstune_set *)v; struct esgov_policy *esg_policy; struct esgov_param *param; int cpu; for_each_possible_cpu(cpu) { if (cpu != cpumask_first(cpu_coregroup_mask(cpu))) continue; param = *per_cpu_ptr(esgov_param, cpu); if (unlikely(!param)) continue; param->step = cur_set->esg.step[cpu]; param->pelt_margin = cur_set->esg.pelt_margin[cpu]; param->patient_mode = cur_set->cpufreq_gov.patient_mode[cpu]; param->pelt_boost = cur_set->cpufreq_gov.pelt_boost[cpu]; param->up_rate_limit = 4; param->down_rate_limit = 4; param->rapid_scale_up = cur_set->cpufreq_gov.rapid_scale_up; param->rapid_scale_down = cur_set->cpufreq_gov.rapid_scale_down; esg_policy = *per_cpu_ptr(esgov_policy, cpu); if (unlikely((!esg_policy) || !esg_policy->enabled)) continue; esg_sync_param(esg_policy, param); } return NOTIFY_OK; } /* * return next maximum util of this group when task moves to dst_cpu * grp_cpu: one of the cpu of target group to get next maximum util * dst_cpu: dst_cpu of task */ static unsigned int esgov_calc_cpu_target_util(struct esgov_cpu *esg_cpu, int max, int org_pelt_util, int pelt_util_diff, int nr_running); int esgov_get_gov_next_cap(struct tp_env *env, struct cpumask *cpus, int dst_cpu, bool apply_clamp) { struct esgov_policy *esg_policy; int cpu, src_cpu = task_cpu(env->p); int max_util = 0; esg_policy = *per_cpu_ptr(esgov_policy, cpumask_any(cpus)); if (unlikely(!esg_policy || !esg_policy->enabled)) return -ENODEV; if (esg_policy->min_cap >= esg_policy->max_cap) return esg_policy->max_cap; /* get max util of the cluster of this cpu */ for_each_cpu(cpu, esg_policy->policy->cpus) { struct esgov_cpu *esg_cpu = per_cpu_ptr(esgov_cpu, cpu); unsigned int max = capacity_cpu_orig(cpu); int cpu_util, pelt_util; int nr_running = env->cpu_stat[cpu].nr_running; if (!esg_cpu) continue; pelt_util = env->cpu_stat[cpu].util; if (cpu != dst_cpu && cpu!= src_cpu) { cpu_util = esgov_calc_cpu_target_util(esg_cpu, max, pelt_util, 0, nr_running); } else if (cpu == dst_cpu && cpu != src_cpu) { /* util of dst_cpu (when migrating task)*/ cpu_util = esgov_calc_cpu_target_util(esg_cpu, max, pelt_util, env->task_util, nr_running); } else if (cpu != dst_cpu && cpu == src_cpu) { /* util of src_cpu (when migrating task) */ cpu_util = esgov_calc_cpu_target_util(esg_cpu, max, pelt_util, -env->task_util, nr_running); } else { /* util of src_cpu (when task stay on the src_cpu) */ cpu_util = esgov_calc_cpu_target_util(esg_cpu, max, pelt_util, 0, nr_running); } if (cpu_util > max_util) max_util = cpu_util; } if (apply_clamp) { /* max floor depends on CPUFreq min/max lock */ max_util = max(esg_policy->min_cap, max_util); max_util = min(esg_policy->max_cap, max_util); } return max_util; } /*********************************************************************/ /* SLACK TIMER */ /*********************************************************************/ struct esgov_slack_timer { /* for slack timer */ unsigned long min; int enabled; struct timer_list timer; }; /* slack timer per cpu */ static struct esgov_slack_timer __percpu *esgov_timer; static void slack_update_min(struct cpufreq_policy *policy) { unsigned int cpu; unsigned long max_cap, min_cap; struct esgov_slack_timer *slack_timer; max_cap = capacity_cpu_orig(policy->cpu); /* min_cap is minimum value making higher frequency than policy->min */ min_cap = (max_cap * policy->min) / policy->max; min_cap -= 1; for_each_cpu(cpu, policy->cpus) { slack_timer = per_cpu_ptr(esgov_timer, cpu); if (!slack_timer) continue; slack_timer->min = min_cap; } } static void slack_nop_timer(struct timer_list *timer) { /* * The purpose of slack-timer is to wake up the CPU from IDLE, in order * to decrease its frequency if it is not set to minimum already. * * This is important for platforms where CPU with higher frequencies * consume higher power even at IDLE. */ trace_cpufreq_gov_slack_func(smp_processor_id()); } static int esgov_cpu_pm_callback(struct notifier_block *nb, unsigned long event, void *v) { unsigned int cpu = raw_smp_processor_id(); struct esgov_slack_timer *slack_timer = per_cpu_ptr(esgov_timer, cpu); struct timer_list *timer; struct esgov_cpu *esg_cpu = per_cpu_ptr(esgov_cpu, cpu); if (!esg_cpu || !slack_timer) return NOTIFY_OK; if (!esg_cpu->esg_policy || !esg_cpu->esg_policy->running) return NOTIFY_OK; timer = &slack_timer->timer; switch (event) { case CPU_PM_ENTER: if (timer_pending(timer)) del_timer_sync(timer); if (esg_cpu->util > slack_timer->min) { timer->expires = jiffies + msecs_to_jiffies(esg_cpu->esg_policy->slack_expired_time_ms); add_timer_on(timer, cpu); trace_cpufreq_gov_slack(cpu, esg_cpu->util, slack_timer->min, event); } break; case CPU_PM_EXIT: if (timer_pending(timer)) { del_timer_sync(timer); trace_cpufreq_gov_slack(cpu, esg_cpu->util, slack_timer->min, event); } break; } return NOTIFY_OK; } static struct notifier_block esg_cpu_pm_notifier = { .notifier_call = esgov_cpu_pm_callback, }; #define DEFAULT_SLACK_EXPIRED_TIME (20) static void esgov_init_slack_timer(struct cpufreq_policy *policy) { int cpu; for_each_cpu(cpu, policy->related_cpus) { struct esgov_slack_timer *slack_timer = per_cpu_ptr(esgov_timer, cpu); if (!slack_timer) continue; /* Initialize slack-timer */ slack_timer->min = ULONG_MAX; timer_setup(&slack_timer->timer, slack_nop_timer, TIMER_PINNED); } } /*************************************************************************/ /* IOWAIT BOOST */ /************************************************************************/ /** * esgov_iowait_reset() - Reset the IO boost status of a CPU. * @esg_cpu: the esgov data for the CPU to boost * @time: the update time from the caller * @set_iowait_boost: true if an IO boost has been requested * * The IO wait boost of a task is disabled after a tick since the last update * of a CPU. If a new IO wait boost is requested after more then a tick, then * we enable the boost starting from the minimum frequency, which improves * energy efficiency by ignoring sporadic wakeups from IO. */ static bool esgov_iowait_reset(struct esgov_cpu *esg_cpu, u64 time, bool set_iowait_boost) { s64 delta_ns = time - esg_cpu->last_update; /* Reset boost only if a tick has elapsed since last request */ if (delta_ns <= TICK_NSEC) return false; esg_cpu->iowait_boost = set_iowait_boost ? esg_cpu->min : 0; esg_cpu->iowait_boost_pending = set_iowait_boost; return true; } /** * esggov_iowait_boost() - Updates the IO boost status of a CPU. * @esg_cpu: the esgov data for the CPU to boost * @time: the update time from the caller * @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait * * Each time a task wakes up after an IO operation, the CPU utilization can be * boosted to a certain utilization which doubles at each "frequent and * successive" wakeup from IO, ranging from the utilization of the minimum * OPP to the utilization of the maximum OPP. * To keep doubling, an IO boost has to be requested at least once per tick, * otherwise we restart from the utilization of the minimum OPP. */ static void esgov_iowait_boost(struct esgov_cpu *esg_cpu, u64 time, unsigned int flags) { bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT; /* Reset boost if the CPU appears to have been idle enough */ if (esg_cpu->iowait_boost && esgov_iowait_reset(esg_cpu, time, set_iowait_boost)) return; /* Boost only tasks waking up after IO */ if (!set_iowait_boost) return; /* Ensure boost doubles only one time at each request */ if (esg_cpu->iowait_boost_pending) return; esg_cpu->iowait_boost_pending = true; /* Double the boost at each request */ if (esg_cpu->iowait_boost) { esg_cpu->iowait_boost = min_t(unsigned int, esg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE); return; } /* First wakeup after IO: start with minimum boost */ esg_cpu->iowait_boost = esg_cpu->min; } /** * esgov_iowait_apply() - Apply the IO boost to a CPU. * @esg_cpu: the esgov data for the cpu to boost * @time: the update time from the caller * @util: the utilization to (eventually) boost * @max: the maximum value the utilization can be boosted to * * A CPU running a task which woken up after an IO operation can have its * utilization boosted to speed up the completion of those IO operations. * The IO boost value is increased each time a task wakes up from IO, in * esgov_iowait_apply(), and it's instead decreased by this function, * each time an increase has not been requested (!iowait_boost_pending). * * A CPU which also appears to have been idle for at least one tick has also * its IO boost utilization reset. * * This mechanism is designed to boost high frequently IO waiting tasks, while * being more conservative on tasks which does sporadic IO operations. */ static unsigned long esgov_iowait_apply(struct esgov_cpu *esg_cpu, u64 time, unsigned long max) { unsigned long boost; /* No boost currently required */ if (!esg_cpu->iowait_boost) return 0; /* Reset boost if the CPU appears to have been idle enough */ if (esgov_iowait_reset(esg_cpu, time, false)) return 0; if (!esg_cpu->iowait_boost_pending) { /* * No boost pending; reduce the boost value. */ esg_cpu->iowait_boost >>= 1; if (esg_cpu->iowait_boost < esg_cpu->min) { esg_cpu->iowait_boost = 0; return 0; } } esg_cpu->iowait_boost_pending = false; boost = (esg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT; boost = boost + (boost >> 2); return boost; } static struct notifier_block esg_mode_update_notifier = { .notifier_call = esg_mode_update_callback, }; struct esg_attr { struct attribute attr; ssize_t (*show)(struct kobject *, char *); ssize_t (*store)(struct kobject *, const char *, size_t count); }; #define esg_attr_rw(name) \ static struct esg_attr name##_attr = \ __ATTR(name, 0644, show_##name, store_##name) #define esg_show_step(name, related_val) \ static ssize_t show_##name(struct kobject *k, char *buf) \ { \ struct esgov_policy *esg_policy = \ container_of(k, struct esgov_policy, kobj); \ \ return sprintf(buf, "step: %d (energy: %lu)\n", \ esg_policy->name, esg_policy->related_val); \ } #define esg_store_step(name) \ static ssize_t store_##name(struct kobject *k, const char *buf, size_t count) \ { \ struct esgov_policy *esg_policy = \ container_of(k, struct esgov_policy, kobj); \ int data; \ \ if (!sscanf(buf, "%d", &data)) \ return -EINVAL; \ \ esg_update_##name(esg_policy, data); \ \ return count; \ } #define esg_show(name) \ static ssize_t show_##name(struct kobject *k, char *buf) \ { \ struct esgov_policy *esg_policy = \ container_of(k, struct esgov_policy, kobj); \ \ return sprintf(buf, "%d\n", esg_policy->name); \ } \ #define esg_store(name) \ static ssize_t store_##name(struct kobject *k, const char *buf, size_t count) \ { \ struct esgov_policy *esg_policy = \ container_of(k, struct esgov_policy, kobj); \ int data; \ \ if (!sscanf(buf, "%d", &data)) \ return -EINVAL; \ \ esg_policy->name = data; \ return count; \ } esg_show(slack_expired_time_ms); esg_store(slack_expired_time_ms); esg_attr_rw(slack_expired_time_ms); esg_show(no_work_press_ratio); esg_store(no_work_press_ratio); esg_attr_rw(no_work_press_ratio); static ssize_t show(struct kobject *kobj, struct attribute *at, char *buf) { struct esg_attr *fvattr = container_of(at, struct esg_attr, attr); return fvattr->show(kobj, buf); } static ssize_t store(struct kobject *kobj, struct attribute *at, const char *buf, size_t count) { struct esg_attr *fvattr = container_of(at, struct esg_attr, attr); return fvattr->store(kobj, buf, count); } static const struct sysfs_ops esg_sysfs_ops = { .show = show, .store = store, }; static struct attribute *esg_attrs[] = { &slack_expired_time_ms_attr.attr, &no_work_press_ratio_attr.attr, NULL }; static struct kobj_type ktype_esg = { .sysfs_ops = &esg_sysfs_ops, .default_attrs = esg_attrs, }; static struct esgov_policy *esgov_policy_alloc(struct cpufreq_policy *policy) { struct esgov_policy *esg_policy; struct esgov_param *param; /* allocate esgov_policy */ esg_policy = kzalloc(sizeof(struct esgov_policy), GFP_KERNEL); if (!esg_policy) goto init_failed; /* Init cpumask */ cpumask_copy(&esg_policy->cpus, policy->related_cpus); if (cpumask_weight(&esg_policy->cpus) == 0) goto free_allocation; param = *per_cpu_ptr(esgov_param, cpumask_any(&esg_policy->cpus)); esg_sync_param(esg_policy, param); esgov_init_slack_timer(policy); esg_policy->rate_delay_ns = 4 * NSEC_PER_MSEC; /* Init Sysfs */ if (kobject_init_and_add(&esg_policy->kobj, &ktype_esg, esg_kobj, "coregroup%d", cpumask_first(&esg_policy->cpus))) goto free_allocation; /* init spin lock */ raw_spin_lock_init(&esg_policy->update_lock); init_rwsem(&esg_policy->rwsem); esg_policy->policy = policy; return esg_policy; free_allocation: kfree(esg_policy); init_failed: pr_warn("%s: Failed esgov_init(cpu%d)\n", __func__, policy->cpu); return NULL; } static void esgov_work(struct kthread_work *work) { struct esgov_policy *esg_policy = container_of(work, struct esgov_policy, work); unsigned int freq; unsigned long flags; raw_spin_lock_irqsave(&esg_policy->update_lock, flags); freq = esg_policy->target_freq; esg_policy->work_in_progress = false; raw_spin_unlock_irqrestore(&esg_policy->update_lock, flags); down_write(&esg_policy->policy->rwsem); mutex_lock(&esg_policy->work_lock); __cpufreq_driver_target(esg_policy->policy, freq, CPUFREQ_RELATION_L); mutex_unlock(&esg_policy->work_lock); up_write(&esg_policy->policy->rwsem); } static void esgov_irq_work(struct irq_work *irq_work) { struct esgov_policy *esg_policy; esg_policy = container_of(irq_work, struct esgov_policy, irq_work); kthread_queue_work(&esg_policy->worker, &esg_policy->work); } static int esgov_kthread_create(struct esgov_policy *esg_policy) { struct task_struct *thread; struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 }; struct cpufreq_policy *policy = esg_policy->policy; struct device_node *dn; int ret; /* kthread only required for slow path */ if (policy->fast_switch_enabled) return 0; kthread_init_work(&esg_policy->work, esgov_work); kthread_init_worker(&esg_policy->worker); thread = kthread_create(kthread_worker_fn, &esg_policy->worker, "esgov:%d", cpumask_first(policy->related_cpus)); if (IS_ERR(thread)) { pr_err("failed to create esgov thread: %ld\n", PTR_ERR(thread)); return PTR_ERR(thread); } ret = sched_setscheduler_nocheck(thread, SCHED_FIFO, ¶m); if (ret) { kthread_stop(thread); pr_warn("%s: failed to set SCHED_CLASS\n", __func__); return ret; } dn = of_find_node_by_path("/ems/esg"); if (dn) { struct cpumask mask; const char *buf; cpumask_copy(&mask, cpu_possible_mask); if (!of_property_read_string(dn, "thread-run-on", &buf)) cpulist_parse(buf, &mask); set_cpus_allowed_ptr(thread, &mask); thread->flags |= PF_NO_SETAFFINITY; } esg_policy->thread = thread; init_irq_work(&esg_policy->irq_work, esgov_irq_work); mutex_init(&esg_policy->work_lock); wake_up_process(thread); return 0; } static void esgov_policy_free(struct esgov_policy *esg_policy) { kfree(esg_policy); } static int esgov_init(struct cpufreq_policy *policy) { struct esgov_policy *esg_policy; int ret = 0; int cpu; if (policy->governor_data) return -EBUSY; cpufreq_enable_fast_switch(policy); esg_policy = *per_cpu_ptr(esgov_policy, policy->cpu); if (esg_policy) { pr_info("%s: Already allocated esgov_policy\n", __func__); goto complete_esg_init; } esg_policy = esgov_policy_alloc(policy); if (!esg_policy) { ret = -ENOMEM; goto failed_to_init; } ret = esgov_kthread_create(esg_policy); if (ret) goto free_esg_policy; for_each_cpu(cpu, &esg_policy->cpus) *per_cpu_ptr(esgov_policy, cpu) = esg_policy; complete_esg_init: down_write(&esg_policy->rwsem); policy->governor_data = esg_policy; esg_policy->min = policy->min; esg_policy->max = policy->max; esg_policy->min_cap = find_allowed_capacity(policy->cpu, policy->min, 0); esg_policy->max_cap = find_allowed_capacity(policy->cpu, policy->max, 0); esg_policy->enabled = true;; esg_policy->last_caller = UINT_MAX; cpufreq_register_hook(esgov_get_gov_next_cap); up_write(&esg_policy->rwsem); return 0; free_esg_policy: esgov_policy_free(esg_policy); failed_to_init: pr_err("initialization failed (error %d)\n", ret); return ret; } static void esgov_exit(struct cpufreq_policy *policy) { struct esgov_policy *esg_policy = *per_cpu_ptr(esgov_policy, policy->cpu); down_write(&esg_policy->rwsem); cpufreq_unregister_hook(); esg_policy->enabled = false;; policy->governor_data = NULL; up_write(&esg_policy->rwsem); cpufreq_disable_fast_switch(policy); } static unsigned int get_next_freq(struct esgov_policy *esg_policy, unsigned long util, unsigned long max) { struct cpufreq_policy *policy = esg_policy->policy; unsigned int freq; freq = (policy->cpuinfo.max_freq * util) / max; freq = cpufreq_driver_resolve_freq(policy, freq); freq = fclamp_apply(policy, freq); return clamp_val(freq, esg_policy->min, esg_policy->max); } static int esg_get_boost_pelt_util(int capacity, int util, int boost) { long long margin; if (!boost) return util; if (boost > 0) { margin = max(capacity - util, 0) * boost; } else { margin = util * boost; } margin /= 100; return util + margin; } #define FREQ_UPDATE_BIT (1 << 31) /* return the max_util of this cpu */ static unsigned int esgov_calc_cpu_target_util(struct esgov_cpu *esg_cpu, int max, int org_pelt_util, int pelt_util_diff, int nr_running) { int util, step_util, pelt_util, io_util; int org_io_util, org_step_util; int pelt_margin, pelt_boost; if (unlikely(!esg_cpu->esg_policy)) return org_pelt_util; org_io_util = esg_cpu->io_util; org_step_util = esg_cpu->step_util; pelt_margin = esg_cpu->esg_policy->pelt_margin; pelt_boost = esg_cpu->esg_policy->pelt_boost; /* calculate boost util */ io_util = org_io_util; /* * calculate pelt_util * add pelt_util_diff and then apply pelt-margin to sched-util * pelt_util_diff: util_diff by migrating task */ pelt_util = org_pelt_util + pelt_util_diff; pelt_util = max(pelt_util, 0); if (pelt_util > 0) { pelt_util += pelt_util * pelt_margin / 100; pelt_util = esg_get_boost_pelt_util(capacity_cpu(esg_cpu->cpu), pelt_util, pelt_boost); pelt_util = min(max, pelt_util); } pelt_util = nr_running & ~FREQ_UPDATE_BIT ? pelt_util : pelt_util * esg_cpu->esg_policy->no_work_press_ratio / 100; /* * calculate step util * if there is no running task, step util is always 0 */ step_util = nr_running & ~FREQ_UPDATE_BIT ? org_step_util : 0; step_util = (esg_cpu->active_ratio == SCHED_CAPACITY_SCALE) ? step_util : 0; /* find max util */ util = max(pelt_util, step_util); util = max(util, io_util); /* apply wakeup boost value */ if ((nr_running & FREQ_UPDATE_BIT) && !pelt_boost) util = wakeboost_cpu_boost(esg_cpu->cpu, util); /* apply emstune_boost */ util = freqboost_cpu_boost(esg_cpu->cpu, util); if (nr_running & FREQ_UPDATE_BIT) trace_esg_cpu_util(esg_cpu->cpu, nr_running & ~FREQ_UPDATE_BIT, org_io_util, io_util, org_step_util, step_util, org_pelt_util, pelt_util, pelt_margin, pelt_boost, max, util); return util; } /* return max util of the cluster of this cpu */ static unsigned int esgov_get_target_util(struct esgov_policy *esg_policy, u64 time, unsigned long max) { unsigned long max_util = 0; unsigned int cpu; /* get max util in the cluster */ for_each_cpu(cpu, esg_policy->policy->cpus) { struct esgov_cpu *esg_cpu = per_cpu_ptr(esgov_cpu, cpu); int nr_running = cpu_rq(cpu)->nr_running; if (!esgov_cpu) continue; esg_cpu->util = esgov_calc_cpu_target_util( esg_cpu, max, esg_cpu->pelt_util, 0, nr_running | FREQ_UPDATE_BIT); if (esg_cpu->util > max_util) max_util = esg_cpu->util; } return max_util; } /* update the step_util */ #define PATIENT_MODE_BUSY_RATIO 950 static unsigned long esgov_get_step_util(struct esgov_cpu *esg_cpu, unsigned long max) { struct esgov_policy *esg_policy = *per_cpu_ptr(esgov_policy, esg_cpu->cpu); unsigned int freq; int active_ratio = 0, util, prev_idx, hist_count = 0, idx; int patient_tick, over_cnt = 0; int hist_idx = mlt_cur_period(esg_cpu->cpu); int cpu = esg_cpu->cpu; if (unlikely(!esg_policy || !esg_policy->step)) return 0; if (esg_cpu->last_idx == hist_idx) return esg_cpu->step_util; /* get active ratio for patient mode */ idx = esg_cpu->last_idx = hist_idx; patient_tick = esg_policy->patient_mode; while (hist_count++ < patient_tick) { if (mlt_art_value(cpu, idx) > PATIENT_MODE_BUSY_RATIO) over_cnt++; idx = mlt_prev_period(idx); } active_ratio = (patient_tick == over_cnt) ? SCHED_CAPACITY_SCALE : 0; esg_cpu->active_ratio = active_ratio; /* get active ratio for step util */ prev_idx = mlt_prev_period(hist_idx); active_ratio = mlt_art_value(cpu, hist_idx) + mlt_art_value(cpu, prev_idx); active_ratio >>= 1; /* update the capacity */ freq = esg_cpu->step_util * (esg_policy->policy->max / max); esg_cpu->capacity = find_allowed_capacity(esg_cpu->cpu, freq, esg_policy->step_power); /* calculate step_util */ util = (esg_cpu->capacity * active_ratio) >> SCHED_CAPACITY_SHIFT; trace_esg_cpu_step_util(esg_cpu->cpu, esg_cpu->capacity, active_ratio, max, util); return util; } /* update cpu util */ static void esgov_update_cpu_util(struct esgov_policy *esg_policy, u64 time, unsigned long max) { int cpu; for_each_cpu(cpu, esg_policy->policy->cpus) { struct esgov_cpu *esg_cpu = per_cpu_ptr(esgov_cpu, cpu); struct rq *rq = cpu_rq(cpu); if (!esg_cpu) continue; /* update iowait boost util */ esg_cpu->io_util = esgov_iowait_apply(esg_cpu, time, max); /* update sched_util */ esg_cpu->pelt_util = ml_cpu_util(cpu) + cpu_util_rt(rq); /* update step_util, If cpu is idle, we want to ignore step_util */ esg_cpu->step_util = esgov_get_step_util(esg_cpu, max); } } static bool esgov_check_rate_delay(struct esgov_policy *esg_policy, u64 time) { s64 delta_ns = time - esg_policy->last_freq_update_time; if (delta_ns < esg_policy->rate_delay_ns) return false; return true; } static unsigned int get_diff_num_levels(struct cpufreq_policy *policy, unsigned int freq) { unsigned int index1, index2; index1 = cpufreq_frequency_table_get_index(policy, policy->cur); index2 = cpufreq_frequency_table_get_index(policy, freq); return abs(index1 - index2); } enum { RAPID_SCALE_UP = 1, RAPID_SCALE_DOWN, RAPID_SCALE_MUST, }; #define ESG_MAX_DELAY_PERIODS 5 /* * Return true if we can delay frequency update because the requested frequency * change is not large enough, and false if it is large enough. The condition * for determining large enough compares the number of frequency level change * vs., elapsed time since last frequency update. For example, * ESG_MAX_DELAY_PERIODS of 5 would mean immediate frequency change is allowed * only if the change in frequency level is greater or equal to 5; * It also means change in frequency level equal to 1 would need to * wait 5 ticks for it to take effect. */ static bool esgov_postpone_freq_update(struct esgov_policy *esg_policy, u64 time, unsigned int target_freq, int rapid_scale) { unsigned int diff_num_levels, num_periods, elapsed, margin; if (rapid_scale == RAPID_SCALE_MUST) return false; elapsed = time - esg_policy->last_freq_update_time; /* In this point target_freq is different with cur freq */ if (esg_policy->policy->cur < target_freq) { u64 ramp_up_bound = esg_policy->up_rate_limit_ns; if (rapid_scale == RAPID_SCALE_UP) return false; if (elapsed < ramp_up_bound) return true; } else { u64 ramp_down_bound = esg_policy->down_rate_limit_ns; if (rapid_scale == RAPID_SCALE_DOWN) return false; if (elapsed < ramp_down_bound) return true; } /* * if there is no pelt_margin, we do better increase * frequency immediately to prevent performance drop */ if (esg_policy->pelt_margin <= 0 && target_freq > esg_policy->policy->cur) return false; margin = esg_policy->rate_delay_ns >> 2; num_periods = (elapsed + margin) / esg_policy->rate_delay_ns; if (num_periods > ESG_MAX_DELAY_PERIODS) return false; diff_num_levels = get_diff_num_levels(esg_policy->policy, target_freq); if (diff_num_levels > ESG_MAX_DELAY_PERIODS - num_periods) return false; else return true; } static int esgov_check_rapid_scale(struct esgov_policy *esg_policy, struct esgov_cpu *esg_cpu) { int cpu, nr_running = 0; if (esg_policy->limits_changed) { esg_policy->limits_changed = false; return RAPID_SCALE_MUST; } for_each_cpu(cpu, &esg_cpu->esg_policy->cpus) nr_running += cpu_rq(cpu)->nr_running; /* rapid scale up */ if (esg_cpu->no_work_press && wakeboost_pending(esg_cpu->cpu)) { esg_cpu->no_work_press = false; if (esg_cpu->esg_policy->rapid_scale_up) return RAPID_SCALE_UP; } /* rapid scale down */ if (!esg_cpu->no_work_press && !wakeboost_pending(esg_cpu->cpu)) { esg_cpu->no_work_press = true; if (esg_cpu->esg_policy->rapid_scale_down) return RAPID_SCALE_DOWN; } return 0; } static void esgov_update(struct update_util_data *hook, u64 time, unsigned int flags) { struct esgov_cpu *esg_cpu = container_of(hook, struct esgov_cpu, update_util); struct esgov_policy *esg_policy = esg_cpu->esg_policy; unsigned long max = capacity_cpu_orig(esg_cpu->cpu); unsigned int target_util, target_freq; int rapid_scale; if (unlikely(!esg_policy)) return; /* check iowait boot */ esgov_iowait_boost(esg_cpu, time, flags); esg_cpu->last_update = time; if (!cpufreq_this_cpu_can_update(esg_policy->policy)) return; /* * try to hold lock. * If somebody is holding this lock, this updater(cpu) will be failed * to update freq because somebody already updated or will update * the last_update_time very close with now */ if (!raw_spin_trylock(&esg_policy->update_lock)) return; rapid_scale = esgov_check_rapid_scale(esg_policy, esg_cpu); /* check rate delay */ if (!rapid_scale && !esgov_check_rate_delay(esg_policy, time)) goto out; /* update cpu_util of this cluster */ esgov_update_cpu_util(esg_policy, time, max); /* update target util of the cluster of this cpu */ target_util = esgov_get_target_util(esg_policy, time, max); /* get target freq for new target util */ target_freq = get_next_freq(esg_policy, target_util, max); if (esg_policy->policy->cur == target_freq) goto out; /* inform new freq to et */ et_update_freq(esg_cpu->cpu, target_freq); if (esgov_postpone_freq_update(esg_policy, time, target_freq, rapid_scale)) goto out; if (esg_policy->work_in_progress) goto out; esg_policy->last_caller = smp_processor_id(); esg_policy->util = target_util; esg_policy->target_freq = target_freq; esg_policy->last_freq_update_time = time; trace_esg_req_freq(esg_policy->policy->cpu, esg_policy->util, esg_policy->target_freq, rapid_scale); if (esg_policy->policy->fast_switch_enabled) { cpufreq_driver_fast_switch(esg_policy->policy, target_freq); } else { esg_policy->work_in_progress = true; irq_work_queue(&esg_policy->irq_work); } out: raw_spin_unlock(&esg_policy->update_lock); } static int esgov_start(struct cpufreq_policy *policy) { struct esgov_policy *esg_policy = policy->governor_data; unsigned int cpu; /* TODO: We SHOULD implement FREQVAR-RATE-DELAY Base on SchedTune */ esg_policy->last_freq_update_time = 0; esg_policy->target_freq = 0; /* Initialize slack expired time */ esg_policy->slack_expired_time_ms = 20; /* Default 20ms */ /* Initialize no work press ratio */ esg_policy->no_work_press_ratio = 100; /* Default 100% */ esg_policy->limits_changed = false; esg_policy->running = 1; for_each_cpu(cpu, policy->cpus) { struct esgov_cpu *esg_cpu = per_cpu_ptr(esgov_cpu, cpu); esg_cpu->esg_policy = esg_policy; esg_cpu->cpu = cpu; esg_cpu->min = (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) / policy->cpuinfo.max_freq; } for_each_cpu(cpu, policy->cpus) { struct esgov_cpu *esg_cpu = per_cpu_ptr(esgov_cpu, cpu); cpufreq_add_update_util_hook(cpu, &esg_cpu->update_util, esgov_update); } return 0; } static void esgov_stop(struct cpufreq_policy *policy) { struct esgov_policy *esg_policy = policy->governor_data; unsigned int cpu; for_each_cpu(cpu, policy->cpus) cpufreq_remove_update_util_hook(cpu); synchronize_rcu(); if (!policy->fast_switch_enabled) irq_work_sync(&esg_policy->irq_work); esg_policy->running = 0; } static void esgov_limits(struct cpufreq_policy *policy) { struct esgov_policy *esg_policy = policy->governor_data; unsigned long max = capacity_cpu_orig(policy->cpu); unsigned int target_util, target_freq = 0; /* These don't need to get work_lock */ slack_update_min(policy); esg_update_freq_range(policy); if (!policy->fast_switch_enabled) { mutex_lock(&esg_policy->work_lock); cpufreq_policy_apply_limits(policy); /* Get target util of the cluster of this cpu */ target_util = esgov_get_target_util(esg_policy, 0, max); /* get target freq for new target util */ target_freq = get_next_freq(esg_policy, target_util, max); /* * After freq limits change, CPUFreq policy->cur can be different * with ESG's target freq. In that case, explicitly change current freq * to ESG's target freq */ if (policy->cur != target_freq) __cpufreq_driver_target(policy, target_freq, CPUFREQ_RELATION_L); mutex_unlock(&esg_policy->work_lock); } else esg_policy->limits_changed = true; } struct cpufreq_governor energy_step_gov = { .name = "energy_step", .owner = THIS_MODULE, .flags = CPUFREQ_GOV_DYNAMIC_SWITCHING, .init = esgov_init, .exit = esgov_exit, .start = esgov_start, .stop = esgov_stop, .limits = esgov_limits, }; #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ENERGYSTEP unsigned long cpufreq_governor_get_util(unsigned int cpu) { struct esgov_cpu *esg_cpu = per_cpu_ptr(esgov_cpu, cpu); if (!esg_cpu) return 0; return esg_cpu->util; } unsigned int cpufreq_governor_get_freq(int cpu) { struct esgov_policy *esg_policy; unsigned int freq; unsigned long flags; if (cpu < 0) return 0; esg_policy = *per_cpu_ptr(esgov_policy, cpu); if (!esg_policy) return 0; raw_spin_lock_irqsave(&esg_policy->update_lock, flags); freq = esg_policy->target_freq; raw_spin_unlock_irqrestore(&esg_policy->update_lock, flags); return freq; } struct cpufreq_governor *cpufreq_default_governor(void) { return &energy_step_gov; } #endif static int esgov_register(void) { cpu_pm_register_notifier(&esg_cpu_pm_notifier); emstune_register_notifier(&esg_mode_update_notifier); return cpufreq_register_governor(&energy_step_gov); } #define DEFAULT_ESG_STEP (4) #define DEFAULT_PATIENT_MODE (0) #define DEFAULT_PELT_MARGIN (25) #define DEFAULT_PELT_BOOST (0) int esgov_pre_init(struct kobject *ems_kobj) { struct device_node *dn, *child; struct esgov_param *param; int ret = 0, cpu; esgov_policy = alloc_percpu(struct esgov_policy *); esgov_cpu = alloc_percpu(struct esgov_cpu); esgov_param = alloc_percpu(struct esgov_param *); esgov_timer = alloc_percpu(struct esgov_slack_timer); esgov_register(); dn = of_find_node_by_path("/ems/esg"); for_each_child_of_node(dn, child) { const char *buf; param = kzalloc(sizeof(struct esgov_param), GFP_KERNEL); if (!param) { pr_err("%s: failed to alloc esgov_param\n", __func__); ret = -ENOMEM; goto fail; } if (of_property_read_string(child, "shared-cpus", &buf)) { pr_err("%s: cpus property is omitted\n", __func__); ret = -ENODATA; goto fail; } else cpulist_parse(buf, ¶m->cpus); for_each_cpu(cpu, ¶m->cpus) *per_cpu_ptr(esgov_param, cpu) = param; } esg_kobj = kobject_create_and_add("energy_step", ems_kobj); if (!esg_kobj) return -EINVAL; return ret; fail: for_each_possible_cpu(cpu) kfree(*per_cpu_ptr(esgov_param, cpu)); return ret; }