2674d4402d
This reverts commit 5d1ef2f0ad
.
1397 lines
38 KiB
C
Executable file
1397 lines
38 KiB
C
Executable file
/*
|
|
* EGO(Energy-Aware CPUFreq Governor) on Energy and Scheduler-Event.
|
|
* Copyright (C) 2021, Samsung Electronic Corporation
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*/
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/sched/cpufreq.h>
|
|
#include <linux/cpu_pm.h>
|
|
#include <linux/cpufreq.h>
|
|
#include <trace/hooks/cpuidle.h>
|
|
|
|
#include "../sched.h"
|
|
#include "ems.h"
|
|
|
|
#include <dt-bindings/soc/samsung/ems.h>
|
|
#include <trace/events/ems.h>
|
|
#include <trace/events/ems_debug.h>
|
|
|
|
#define IOWAIT_BOOST_MIN (SCHED_CAPACITY_SCALE / 8)
|
|
#define HIST_SIZE 40
|
|
#define RATIO_UNIT 1000
|
|
|
|
struct ego_idle {
|
|
int avg_ratio[CSTATE_MAX];
|
|
int last_ratio[CSTATE_MAX];
|
|
u32 prev_idx;
|
|
};
|
|
|
|
struct ego_policy {
|
|
struct cpufreq_policy *policy;
|
|
|
|
raw_spinlock_t update_lock; /* For shared policies */
|
|
u64 last_freq_update_time;
|
|
s64 freq_update_delay_ns;
|
|
unsigned int next_freq; /* final target freq */
|
|
unsigned int cached_raw_freq;/* util based raw freq */
|
|
unsigned int org_freq; /* util based freq in table */
|
|
unsigned int eng_freq; /* lowest energy freq */
|
|
|
|
/* The next fields are only needed if fast switch cannot be used: */
|
|
struct irq_work irq_work;
|
|
struct kthread_work work;
|
|
struct mutex work_lock;
|
|
struct kthread_worker worker;
|
|
struct task_struct *thread;
|
|
bool work_in_progress;
|
|
|
|
bool limits_changed;
|
|
bool need_freq_update;
|
|
|
|
/* EGO specific */
|
|
struct cpumask cpus;
|
|
struct cpumask thread_allowed_cpus;
|
|
int heaviest_cpu;
|
|
|
|
/* EGO tunables */
|
|
unsigned int ratio;
|
|
int dis_buck_share; /* ignore buck-share when computing energy */
|
|
int pelt_boost; /* dynamic changed boost */
|
|
int htask_boost; /* tunable boost */
|
|
int pelt_margin;
|
|
int split_pelt_margin;
|
|
unsigned int split_pelt_margin_freq;
|
|
s64 up_rate_limit_ns;
|
|
s64 split_up_rate_limit_ns;
|
|
unsigned int split_up_rate_limit_freq;
|
|
s64 down_rate_limit_ns;
|
|
|
|
bool build_somac_wall;
|
|
unsigned int somac_wall;
|
|
|
|
struct kobject kobj;
|
|
};
|
|
|
|
struct ego_cpu {
|
|
struct update_util_data update_util;
|
|
struct ego_policy *egp;
|
|
unsigned int cpu;
|
|
|
|
bool iowait_boost_pending;
|
|
unsigned int iowait_boost;
|
|
u64 last_update;
|
|
|
|
unsigned long bw_dl;
|
|
unsigned long max;
|
|
|
|
unsigned long util; /* current pelt util */
|
|
unsigned long boosted_util; /* current boosted util */
|
|
|
|
unsigned long min_cap;
|
|
|
|
/* idle state */
|
|
struct ego_idle idle;
|
|
};
|
|
|
|
struct kobject *ego_kobj;
|
|
static DEFINE_PER_CPU(struct ego_cpu, ego_cpu);
|
|
|
|
/*********************************************************************/
|
|
/* EGO Specific Implementation */
|
|
/*********************************************************************/
|
|
/* returns wether cpufreq governor is EGO or NOT */
|
|
static bool inline ego_is_working(struct ego_policy *egp)
|
|
{
|
|
return ((likely(egp)) && (likely(egp->policy))
|
|
&& (egp->policy->governor_data == egp));
|
|
}
|
|
|
|
/* compute freq level diff between cur freq and given freq */
|
|
static unsigned int
|
|
get_diff_num_levels(struct cpufreq_policy *policy, unsigned int freq)
|
|
{
|
|
unsigned int index1, index2;
|
|
|
|
index1 = cpufreq_frequency_table_get_index(policy, policy->cur);
|
|
index2 = cpufreq_frequency_table_get_index(policy, freq);
|
|
|
|
return abs(index1 - index2);
|
|
}
|
|
|
|
#define ESG_MAX_DELAY_PERIODS 5
|
|
/*
|
|
* Return true if we can delay frequency update because the requested frequency
|
|
* change is not large enough, and false if it is large enough. The condition
|
|
* for determining large enough compares the number of frequency level change
|
|
* vs., elapsed time since last frequency update. For example,
|
|
* ESG_MAX_DELAY_PERIODS of 5 would mean immediate frequency change is allowed
|
|
* only if the change in frequency level is greater or equal to 5;
|
|
* It also means change in frequency level equal to 1 would need to
|
|
* wait 5 ticks for it to take effect.
|
|
*/
|
|
static bool ego_postpone_freq_update(struct ego_policy *egp,
|
|
u64 time, unsigned int target_freq)
|
|
{
|
|
unsigned int diff_num_levels, num_periods, elapsed, margin;
|
|
|
|
if (egp->need_freq_update)
|
|
return false;
|
|
|
|
elapsed = time - egp->last_freq_update_time;
|
|
|
|
if (egp->policy->cur < target_freq)
|
|
return elapsed < egp->up_rate_limit_ns;
|
|
|
|
margin = egp->freq_update_delay_ns >> 2;
|
|
num_periods = (elapsed + margin) / egp->freq_update_delay_ns;
|
|
if (num_periods > ESG_MAX_DELAY_PERIODS)
|
|
return false;
|
|
|
|
diff_num_levels = get_diff_num_levels(egp->policy, target_freq);
|
|
if (diff_num_levels > ESG_MAX_DELAY_PERIODS - num_periods)
|
|
return false;
|
|
else
|
|
return true;
|
|
}
|
|
|
|
/*********************************************************************/
|
|
/* To support expecting power */
|
|
/*********************************************************************/
|
|
static inline
|
|
unsigned long ego_compute_energy(struct ego_policy *egp, unsigned long freq)
|
|
{
|
|
struct energy_state states[VENDOR_NR_CPUS] = { 0, };
|
|
unsigned long time[CSTATE_MAX] = { 0 };
|
|
unsigned long active_eng, idle_eng, capacity;
|
|
int cpu, policy_cpu = egp->policy->cpu;
|
|
|
|
capacity = max(et_freq_to_cap(policy_cpu, freq), (unsigned long)1);
|
|
et_fill_energy_state(NULL, &egp->cpus, states, capacity, -1);
|
|
|
|
/* compute nomalized time */
|
|
for_each_cpu(cpu, &egp->cpus) {
|
|
struct ego_cpu *egc = &per_cpu(ego_cpu, cpu);
|
|
struct ego_idle *egi = &egc->idle;
|
|
unsigned long idle_util, idle_ratio_sum;
|
|
|
|
states[cpu].util = egc->util;
|
|
|
|
/* We just guess nomalized value from clkoff/pwroff ratio */
|
|
idle_util = max((long)(capacity - egc->util), (long) 0);
|
|
idle_ratio_sum = egi->avg_ratio[CLKOFF] + egi->avg_ratio[PWROFF];
|
|
time[CLKOFF] += (idle_util * egi->avg_ratio[CLKOFF] / idle_ratio_sum);
|
|
time[PWROFF] += (idle_util * egi->avg_ratio[PWROFF] / idle_ratio_sum);
|
|
}
|
|
|
|
/* compute active energy */
|
|
active_eng = et_compute_cpu_energy(&egp->cpus, states);
|
|
|
|
/* compute idle energy */
|
|
idle_eng = (states[policy_cpu].static_power * (time[CLKOFF] * RATIO_UNIT)) / capacity;
|
|
|
|
trace_ego_cpu_eng(policy_cpu, capacity,
|
|
states[policy_cpu].dynamic_power, states[policy_cpu].static_power,
|
|
time[CLKOFF], active_eng, idle_eng);
|
|
|
|
return active_eng + idle_eng;
|
|
}
|
|
|
|
static void ego_compute_cpu_idle_ratio(struct ego_cpu *egc, int hist_size)
|
|
{
|
|
int avg_ratio[CSTATE_MAX] = { 0 };
|
|
struct ego_idle *egi = &egc->idle;
|
|
int cpu = egc->cpu;
|
|
int state, idx, cur_idx = mlt_cur_period(cpu);
|
|
int update = abs(cur_idx - egi->prev_idx);
|
|
int last_ratio, cur_ratio;
|
|
int last_idx = mlt_period_with_delta(cur_idx, 1);
|
|
|
|
if (!update)
|
|
return;
|
|
|
|
/* compute last/current window only to fast computing */
|
|
if (update == 1) {
|
|
for (state = 0; state < CSTATE_MAX; state++) {
|
|
last_ratio = egi->last_ratio[state];
|
|
cur_ratio = mlt_cst_value(cpu, cur_idx, state);
|
|
|
|
/* 1. compute ratio sum */
|
|
avg_ratio[state] = egi->avg_ratio[state] * hist_size;
|
|
/* 2. minus last window ratio */
|
|
avg_ratio[state] = max((avg_ratio[state] - last_ratio), 0);
|
|
/* 3. plus current window ratio */
|
|
avg_ratio[state] += cur_ratio;
|
|
}
|
|
} else {
|
|
/* compute all ratio about hist size */
|
|
int cursor = cur_idx;
|
|
for (idx = 0; idx < hist_size; idx++) {
|
|
for (state = 0; state < CSTATE_MAX; state++)
|
|
avg_ratio[state] += mlt_cst_value(cpu, cursor, state);
|
|
cursor = mlt_prev_period(cursor);
|
|
}
|
|
}
|
|
|
|
/* compute avg ratio */
|
|
for (state = 0; state < CSTATE_MAX; state++)
|
|
egi->avg_ratio[state] = avg_ratio[state] / hist_size;
|
|
|
|
/* update last index */
|
|
egi->prev_idx = cur_idx;
|
|
|
|
/* save last ratio to fast computing */
|
|
for (state = 0; state < CSTATE_MAX; state++)
|
|
egi->last_ratio[state] = mlt_cst_value(cpu, last_idx, state);
|
|
|
|
trace_ego_cpu_idle_ratio(cpu, update,
|
|
cur_idx, egi->avg_ratio[CLKOFF], egi->avg_ratio[PWROFF],
|
|
last_ratio, cur_ratio, last_idx);
|
|
}
|
|
|
|
/* to compute time delta, make time snapshot */
|
|
static inline void ego_compute_idle_ratio(struct ego_policy *egp)
|
|
{
|
|
int cpu;
|
|
|
|
for_each_cpu(cpu, &egp->cpus) {
|
|
struct ego_cpu *egc = &per_cpu(ego_cpu, cpu);
|
|
ego_compute_cpu_idle_ratio(egc, MLT_PERIOD_COUNT);
|
|
}
|
|
}
|
|
|
|
static unsigned int ego_apply_eng_boost(unsigned int min_freq,
|
|
unsigned int eng_freq, struct ego_policy *egp)
|
|
{
|
|
int delta = eng_freq - min_freq;
|
|
if (delta <= 0)
|
|
return min_freq;
|
|
return min_freq + (delta * egp->ratio) / RATIO_UNIT;
|
|
}
|
|
|
|
#define khz_to_mhz(x) ((x) / 1000)
|
|
static unsigned int ego_find_energy_freq(struct ego_policy *egp, unsigned int org_freq)
|
|
{
|
|
struct cpufreq_frequency_table *pos;
|
|
int min_energy = INT_MAX, eng_freq = -1;
|
|
|
|
cpufreq_for_each_entry(pos, egp->policy->freq_table) {
|
|
unsigned long energy;
|
|
|
|
if (pos->frequency < org_freq)
|
|
continue;
|
|
|
|
energy = ego_compute_energy(egp, pos->frequency);
|
|
if (energy < min_energy) {
|
|
min_energy = energy;
|
|
eng_freq = pos->frequency;
|
|
}
|
|
}
|
|
|
|
if (eng_freq < 0)
|
|
return org_freq;
|
|
|
|
eng_freq = ego_apply_eng_boost(org_freq, eng_freq, egp);
|
|
|
|
return clamp_val(eng_freq, egp->policy->min, egp->policy->max);
|
|
}
|
|
|
|
/*********************************************************************/
|
|
/* Sysbusy state change notifier */
|
|
/*********************************************************************/
|
|
static int ego_sysbusy_notifier_call(struct notifier_block *nb,
|
|
unsigned long val, void *v)
|
|
{
|
|
int cpu;
|
|
enum sysbusy_state state = *(enum sysbusy_state *)v;
|
|
|
|
if (val != SYSBUSY_STATE_CHANGE)
|
|
return NOTIFY_OK;
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
struct ego_policy *egp;
|
|
if (cpu != cpumask_first(cpu_coregroup_mask(cpu)))
|
|
continue;
|
|
|
|
egp = per_cpu(ego_cpu, cpu).egp;
|
|
if (!ego_is_working(egp))
|
|
continue;
|
|
|
|
egp->build_somac_wall = (state == SYSBUSY_SOMAC);
|
|
}
|
|
|
|
return NOTIFY_OK;
|
|
}
|
|
|
|
static struct notifier_block ego_sysbusy_notifier = {
|
|
.notifier_call = ego_sysbusy_notifier_call,
|
|
};
|
|
|
|
/*********************************************************************/
|
|
/* EGO mode change notifier */
|
|
/*********************************************************************/
|
|
#define DEFAULT_PELT_MARGIN (25) /* 25% in default */
|
|
static int ego_mode_update_callback(struct notifier_block *nb,
|
|
unsigned long val, void *v)
|
|
{
|
|
struct emstune_set *cur_set = (struct emstune_set *)v;
|
|
struct ego_policy *egp;
|
|
int cpu;
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
if (cpu != cpumask_first(cpu_coregroup_mask(cpu)))
|
|
continue;
|
|
|
|
egp = per_cpu(ego_cpu, cpu).egp;
|
|
if (!egp)
|
|
continue;
|
|
|
|
egp->pelt_boost = cur_set->cpufreq_gov.pelt_boost[cpu];
|
|
egp->htask_boost = cur_set->cpufreq_gov.htask_boost[cpu];
|
|
egp->pelt_margin = DEFAULT_PELT_MARGIN;
|
|
egp->split_pelt_margin = cur_set->cpufreq_gov.split_pelt_margin[cpu];
|
|
egp->split_pelt_margin_freq = cur_set->cpufreq_gov.split_pelt_margin_freq[cpu];
|
|
egp->up_rate_limit_ns = 4 * NSEC_PER_MSEC; /* 4 ms in default */
|
|
egp->split_up_rate_limit_ns =
|
|
cur_set->cpufreq_gov.split_up_rate_limit[cpu] * NSEC_PER_MSEC;
|
|
egp->split_up_rate_limit_freq =
|
|
cur_set->cpufreq_gov.split_up_rate_limit_freq[cpu];
|
|
egp->down_rate_limit_ns = cur_set->cpufreq_gov.down_rate_limit * NSEC_PER_MSEC;
|
|
egp->dis_buck_share = cur_set->cpufreq_gov.dis_buck_share[cpu];
|
|
}
|
|
|
|
return NOTIFY_OK;
|
|
}
|
|
|
|
static struct notifier_block ego_mode_update_notifier = {
|
|
.notifier_call = ego_mode_update_callback,
|
|
};
|
|
|
|
/*********************************************************************/
|
|
/* SLACK TIMER */
|
|
/*********************************************************************/
|
|
static void ego_update_min_cap(struct cpufreq_policy *policy)
|
|
{
|
|
unsigned int cpu;
|
|
unsigned long max_cap, min_cap;
|
|
|
|
max_cap = capacity_cpu_orig(policy->cpu);
|
|
|
|
/* min_cap is minimum value making higher frequency than policy->min */
|
|
min_cap = (max_cap * policy->min) / policy->max;
|
|
min_cap -= 1;
|
|
|
|
for_each_cpu(cpu, policy->cpus)
|
|
per_cpu(ego_cpu, cpu).min_cap = min_cap;
|
|
}
|
|
|
|
static int ego_need_slack_timer(void)
|
|
{
|
|
unsigned int cpu = raw_smp_processor_id();
|
|
struct ego_cpu *egc = &per_cpu(ego_cpu, cpu);
|
|
struct ego_policy *egp = egc->egp;
|
|
int need = 0;
|
|
|
|
if (!ego_is_working(egp))
|
|
return 0;
|
|
|
|
if (egc->boosted_util > egc->min_cap) {
|
|
need = 1;
|
|
goto out;
|
|
}
|
|
|
|
/* want to add timer heaviest cpu only in this domain */
|
|
if (egp->heaviest_cpu == cpu) {
|
|
/* want to add timer when freq is high with energy freq, not min lock */
|
|
if (egp->policy->cur > egp->policy->cpuinfo.min_freq &&
|
|
egp->eng_freq > egp->org_freq)
|
|
need = 1;
|
|
}
|
|
|
|
out:
|
|
trace_ego_need_slack_timer(cpu, egc->boosted_util, egc->min_cap,
|
|
egp->heaviest_cpu, egp->policy->cur,
|
|
egp->policy->cpuinfo.min_freq,
|
|
egp->eng_freq, egp->org_freq, need);
|
|
|
|
return need;
|
|
}
|
|
|
|
/************************ Governor internals ***********************/
|
|
|
|
static unsigned int ego_resolve_freq_wo_clamp(struct cpufreq_policy *policy,
|
|
unsigned int target_freq)
|
|
{
|
|
unsigned int index;
|
|
|
|
index = cpufreq_table_find_index_al(policy, target_freq);
|
|
if (index < 0) {
|
|
pr_err("target frequency(%d) out of range\n", target_freq);
|
|
return 0;
|
|
}
|
|
|
|
return policy->freq_table[index].frequency;
|
|
}
|
|
|
|
static bool ego_should_update_freq(struct ego_policy *egp, u64 time)
|
|
{
|
|
s64 delta_ns, rate_limit_ns;
|
|
|
|
/*
|
|
* Since cpufreq_update_util() is called with rq->lock held for
|
|
* the @target_cpu, our per-CPU data is fully serialized.
|
|
*
|
|
* However, drivers cannot in general deal with cross-CPU
|
|
* requests, so while get_next_freq() will work, our
|
|
* ego_update_commit() call may not for the fast switching platforms.
|
|
*
|
|
* Hence stop here for remote requests if they aren't supported
|
|
* by the hardware, as calculating the frequency is pointless if
|
|
* we cannot in fact act on it.
|
|
*
|
|
* This is needed on the slow switching platforms too to prevent CPUs
|
|
* going offline from leaving stale IRQ work items behind.
|
|
*/
|
|
if (!cpufreq_this_cpu_can_update(egp->policy))
|
|
return false;
|
|
|
|
if (unlikely(egp->limits_changed)) {
|
|
egp->limits_changed = false;
|
|
egp->need_freq_update = true;
|
|
return true;
|
|
}
|
|
|
|
delta_ns = time - egp->last_freq_update_time;
|
|
|
|
/*
|
|
* EGO doesn't know target frequency at this point, so consider
|
|
* the minimum value between up/down rate limit to cover all cases.
|
|
* The exact rate limit will be considered in ego_postpone_freq_update().
|
|
*/
|
|
rate_limit_ns = min(egp->up_rate_limit_ns, egp->down_rate_limit_ns);
|
|
|
|
return delta_ns >= rate_limit_ns;
|
|
}
|
|
|
|
static void ego_update_pelt_margin(struct ego_policy *egp, u64 time,
|
|
unsigned int next_freq)
|
|
{
|
|
if (next_freq < egp->split_pelt_margin_freq)
|
|
egp->pelt_margin = DEFAULT_PELT_MARGIN;
|
|
else
|
|
egp->pelt_margin = egp->split_pelt_margin;
|
|
}
|
|
|
|
static void ego_update_up_rate_limit(struct ego_policy *egp, u64 time,
|
|
unsigned int next_freq)
|
|
{
|
|
if (next_freq < egp->split_up_rate_limit_freq)
|
|
egp->up_rate_limit_ns = 4 * NSEC_PER_MSEC; /* 4 ms in default */
|
|
else
|
|
egp->up_rate_limit_ns = egp->split_up_rate_limit_ns;
|
|
}
|
|
|
|
static void ego_update_freq_variant_param(struct ego_policy *egp, u64 time,
|
|
unsigned int next_freq)
|
|
{
|
|
ego_update_pelt_margin(egp, time, next_freq);
|
|
ego_update_up_rate_limit(egp, time, next_freq);
|
|
}
|
|
|
|
static bool ego_request_freq_change(struct ego_policy *egp, u64 time,
|
|
unsigned int next_freq)
|
|
{
|
|
if (!egp->need_freq_update) {
|
|
if (egp->policy->cur == next_freq)
|
|
return false;
|
|
} else {
|
|
egp->need_freq_update = false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/* update next freq and last frequency change requesting time */
|
|
static void ego_update_next_freq(struct ego_policy *egp, u64 time,
|
|
unsigned int next_freq)
|
|
{
|
|
ego_update_freq_variant_param(egp, time, next_freq);
|
|
|
|
if (egp->next_freq > next_freq)
|
|
next_freq = (egp->next_freq + next_freq) >> 1;
|
|
|
|
egp->next_freq = next_freq;
|
|
egp->last_freq_update_time = time;
|
|
}
|
|
|
|
static void ego_fast_switch(struct ego_policy *egp, u64 time,
|
|
unsigned int next_freq)
|
|
{
|
|
struct cpufreq_policy *policy = egp->policy;
|
|
|
|
if (!ego_request_freq_change(egp, time, next_freq))
|
|
return;
|
|
|
|
ego_update_next_freq(egp, time, next_freq);
|
|
cpufreq_driver_fast_switch(policy, next_freq);
|
|
}
|
|
|
|
|
|
static void ego_deferred_update(struct ego_policy *egp, u64 time,
|
|
unsigned int next_freq)
|
|
{
|
|
if (!ego_request_freq_change(egp, time, next_freq))
|
|
return;
|
|
|
|
ego_update_next_freq(egp, time, next_freq);
|
|
|
|
if (!egp->work_in_progress) {
|
|
egp->work_in_progress = true;
|
|
irq_work_queue(&egp->irq_work);
|
|
}
|
|
}
|
|
|
|
static inline unsigned long
|
|
ego_map_util_freq(struct ego_policy *egp, unsigned long util,
|
|
unsigned long freq, unsigned long cap)
|
|
{
|
|
return ((freq * (100 + egp->pelt_margin)) / 100) * util / cap;
|
|
}
|
|
|
|
/**
|
|
* get_next_freq - Compute a new frequency for a given cpufreq policy.
|
|
* @egp: schedutil policy object to compute the new frequency for.
|
|
* @util: Current CPU utilization.
|
|
* @max: CPU capacity.
|
|
*
|
|
* If the utilization is frequency-invariant, choose the new frequency to be
|
|
* proportional to it, that is
|
|
*
|
|
* next_freq = C * max_freq * util / max
|
|
*
|
|
* Otherwise, approximate the would-be frequency-invariant utilization by
|
|
* util_raw * (curr_freq / max_freq) which leads to
|
|
*
|
|
* next_freq = C * curr_freq * util_raw / max
|
|
*
|
|
* Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
|
|
*
|
|
* The lowest driver-supported frequency which is equal or greater than the raw
|
|
* next_freq (as calculated above) is returned, subject to policy min/max and
|
|
* cpufreq driver limitations.
|
|
*/
|
|
|
|
/*
|
|
* use_energy_freq - return use energy freq or not
|
|
* Must have at least one busy cpu to use enregy freq
|
|
*/
|
|
static bool use_energy_freq(struct cpufreq_policy *policy)
|
|
{
|
|
int cpu;
|
|
|
|
for_each_cpu(cpu, policy->cpus) {
|
|
if (profile_get_cpu_wratio_busy(cpu))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static unsigned int get_next_freq(struct ego_policy *egp,
|
|
unsigned long util, unsigned long max)
|
|
{
|
|
struct cpufreq_policy *policy = egp->policy;
|
|
unsigned int freq, org_freq, eng_freq = 0;
|
|
|
|
/* compute pure frequency base on util */
|
|
org_freq = ego_map_util_freq(egp, util, policy->cpuinfo.max_freq, max);
|
|
if ((org_freq == egp->cached_raw_freq || egp->work_in_progress)
|
|
&& !egp->need_freq_update) {
|
|
freq = max(egp->org_freq, egp->next_freq);
|
|
goto skip_find_next_freq;
|
|
}
|
|
egp->cached_raw_freq = org_freq;
|
|
|
|
/* find freq from table */
|
|
org_freq = ego_resolve_freq_wo_clamp(policy, org_freq);
|
|
if (egp->org_freq != org_freq) {
|
|
egp->org_freq = org_freq;
|
|
/* inform new freq to et */
|
|
et_update_freq(policy->cpu, org_freq);
|
|
}
|
|
|
|
/* compute lowest energy freq */
|
|
if (use_energy_freq(policy)) {
|
|
ego_compute_idle_ratio(egp);
|
|
egp->eng_freq = eng_freq = ego_find_energy_freq(egp, org_freq);
|
|
} else {
|
|
egp->eng_freq = 0;
|
|
}
|
|
freq = max(org_freq, eng_freq);
|
|
|
|
skip_find_next_freq:
|
|
|
|
/* Apply fclamp */
|
|
freq = fclamp_apply(policy, freq);
|
|
freq = clamp_val(freq, policy->min, policy->max);
|
|
|
|
freq = egp->build_somac_wall ? min(freq, egp->somac_wall) : freq;
|
|
|
|
trace_ego_req_freq(policy->cpu, freq, policy->min, policy->max,
|
|
org_freq, eng_freq, util, max);
|
|
|
|
return freq;
|
|
}
|
|
|
|
/*
|
|
* This function computes an effective utilization for the given CPU, to be
|
|
* used for frequency selection given the linear relation: f = u * f_max.
|
|
*
|
|
* The scheduler tracks the following metrics:
|
|
*
|
|
* cpu_util_{cfs,rt,dl,irq}()
|
|
* cpu_bw_dl()
|
|
*
|
|
* Where the cfs,rt and dl util numbers are tracked with the same metric and
|
|
* synchronized windows and are thus directly comparable.
|
|
*
|
|
* The cfs,rt,dl utilization are the running times measured with rq->clock_task
|
|
* which excludes things like IRQ and steal-time. These latter are then accrued
|
|
* in the irq utilization.
|
|
*
|
|
* The DL bandwidth number otoh is not a measured metric but a value computed
|
|
* based on the task model parameters and gives the minimal utilization
|
|
* required to meet deadlines.
|
|
*/
|
|
unsigned long ego_cpu_util(int cpu, unsigned long util_cfs,
|
|
unsigned long max, enum schedutil_type type,
|
|
struct task_struct *p)
|
|
{
|
|
unsigned long dl_util, util, irq;
|
|
struct rq *rq = cpu_rq(cpu);
|
|
|
|
/*
|
|
* Early check to see if IRQ/steal time saturates the CPU, can be
|
|
* because of inaccuracies in how we track these -- see
|
|
* update_irq_load_avg().
|
|
*/
|
|
|
|
irq = cpu_util_irq(rq);
|
|
if (unlikely(irq >= max)) {
|
|
util = irq;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Because the time spend on RT/DL tasks is visible as 'lost' time to
|
|
* CFS tasks and we use the same metric to track the effective
|
|
* utilization (PELT windows are synchronized) we can directly add them
|
|
* to obtain the CPU's actual utilization.
|
|
*
|
|
* CFS and RT utilization can be boosted or capped, depending on
|
|
* utilization clamp constraints requested by currently RUNNABLE
|
|
* tasks.
|
|
* When there are no CFS RUNNABLE tasks, clamps are released and
|
|
* frequency will be gracefully reduced with the utilization decay.
|
|
*/
|
|
util = util_cfs + cpu_util_rt(rq);
|
|
if (type == FREQUENCY_UTIL)
|
|
util = uclamp_rq_util_with(rq, util, p);
|
|
dl_util = cpu_util_dl(rq);
|
|
|
|
/*
|
|
* For frequency selection we do not make cpu_util_dl() a permanent part
|
|
* of this sum because we want to use cpu_bw_dl() later on, but we need
|
|
* to check if the CFS+RT+DL sum is saturated (ie. no idle time) such
|
|
* that we select f_max when there is no idle time.
|
|
*
|
|
* NOTE: numerical errors or stop class might cause us to not quite hit
|
|
* saturation when we should -- something for later.
|
|
*/
|
|
if (util + dl_util >= max) {
|
|
util = util + dl_util;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* OTOH, for energy computation we need the estimated running time, so
|
|
* include util_dl and ignore dl_bw.
|
|
*/
|
|
if (type == ENERGY_UTIL)
|
|
util += dl_util;
|
|
|
|
/*
|
|
* There is still idle time; further improve the number by using the
|
|
* irq metric. Because IRQ/steal time is hidden from the task clock we
|
|
* need to scale the task numbers:
|
|
*
|
|
* max - irq
|
|
* U' = irq + --------- * U
|
|
* max
|
|
*/
|
|
util = scale_irq_capacity(util, irq, max);
|
|
util += irq;
|
|
|
|
/*
|
|
* Bandwidth required by DEADLINE must always be granted while, for
|
|
* FAIR and RT, we use blocked utilization of IDLE CPUs as a mechanism
|
|
* to gracefully reduce the frequency when no tasks show up for longer
|
|
* periods of time.
|
|
*
|
|
* Ideally we would like to set bw_dl as min/guaranteed freq and util +
|
|
* bw_dl as requested freq. However, cpufreq is not yet ready for such
|
|
* an interface. So, we only do the latter for now.
|
|
*/
|
|
if (type == FREQUENCY_UTIL)
|
|
util += cpu_bw_dl(rq);
|
|
|
|
out:
|
|
trace_ego_sched_util(cpu, util, util_cfs, cpu_util_rt(rq),
|
|
cpu_util_dl(rq), cpu_bw_dl(rq), cpu_util_irq(rq));
|
|
|
|
return min(max, util);
|
|
}
|
|
|
|
static unsigned long ego_get_util(struct ego_cpu *egc)
|
|
{
|
|
struct rq *rq = cpu_rq(egc->cpu);
|
|
unsigned long util = ml_cpu_util(egc->cpu);
|
|
unsigned long max = arch_scale_cpu_capacity(egc->cpu);
|
|
|
|
egc->max = max;
|
|
egc->bw_dl = cpu_bw_dl(rq);
|
|
|
|
return ego_cpu_util(egc->cpu, util, max, FREQUENCY_UTIL, NULL);
|
|
}
|
|
|
|
/**
|
|
* ego_iowait_reset() - Reset the IO boost status of a CPU.
|
|
* @egc: the ego data for the CPU to boost
|
|
* @time: the update time from the caller
|
|
* @set_iowait_boost: true if an IO boost has been requested
|
|
*
|
|
* The IO wait boost of a task is disabled after a tick since the last update
|
|
* of a CPU. If a new IO wait boost is requested after more then a tick, then
|
|
* we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy
|
|
* efficiency by ignoring sporadic wakeups from IO.
|
|
*/
|
|
static bool ego_iowait_reset(struct ego_cpu *egc, u64 time,
|
|
bool set_iowait_boost)
|
|
{
|
|
s64 delta_ns = time - egc->last_update;
|
|
|
|
/* Reset boost only if a tick has elapsed since last request */
|
|
if (delta_ns <= TICK_NSEC)
|
|
return false;
|
|
|
|
egc->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0;
|
|
egc->iowait_boost_pending = set_iowait_boost;
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* ego_iowait_boost() - Updates the IO boost status of a CPU.
|
|
* @egc: the ego data for the CPU to boost
|
|
* @time: the update time from the caller
|
|
* @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait
|
|
*
|
|
* Each time a task wakes up after an IO operation, the CPU utilization can be
|
|
* boosted to a certain utilization which doubles at each "frequent and
|
|
* successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization
|
|
* of the maximum OPP.
|
|
*
|
|
* To keep doubling, an IO boost has to be requested at least once per tick,
|
|
* otherwise we restart from the utilization of the minimum OPP.
|
|
*/
|
|
static void ego_iowait_boost(struct ego_cpu *egc, u64 time,
|
|
unsigned int flags)
|
|
{
|
|
bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT;
|
|
|
|
/* Reset boost if the CPU appears to have been idle enough */
|
|
if (egc->iowait_boost &&
|
|
ego_iowait_reset(egc, time, set_iowait_boost))
|
|
return;
|
|
|
|
/* Boost only tasks waking up after IO */
|
|
if (!set_iowait_boost)
|
|
return;
|
|
|
|
/* Ensure boost doubles only one time at each request */
|
|
if (egc->iowait_boost_pending)
|
|
return;
|
|
egc->iowait_boost_pending = true;
|
|
|
|
/* Double the boost at each request */
|
|
if (egc->iowait_boost) {
|
|
egc->iowait_boost =
|
|
min_t(unsigned int, egc->iowait_boost << 1, SCHED_CAPACITY_SCALE);
|
|
return;
|
|
}
|
|
|
|
/* First wakeup after IO: start with minimum boost */
|
|
egc->iowait_boost = IOWAIT_BOOST_MIN;
|
|
}
|
|
|
|
/**
|
|
* ego_iowait_apply() - Apply the IO boost to a CPU.
|
|
* @egc: the ego data for the cpu to boost
|
|
* @time: the update time from the caller
|
|
* @util: the utilization to (eventually) boost
|
|
* @max: the maximum value the utilization can be boosted to
|
|
*
|
|
* A CPU running a task which woken up after an IO operation can have its
|
|
* utilization boosted to speed up the completion of those IO operations.
|
|
* The IO boost value is increased each time a task wakes up from IO, in
|
|
* ego_iowait_apply(), and it's instead decreased by this function,
|
|
* each time an increase has not been requested (!iowait_boost_pending).
|
|
*
|
|
* A CPU which also appears to have been idle for at least one tick has also
|
|
* its IO boost utilization reset.
|
|
*
|
|
* This mechanism is designed to boost high frequently IO waiting tasks, while
|
|
* being more conservative on tasks which does sporadic IO operations.
|
|
*/
|
|
static unsigned long ego_iowait_apply(struct ego_cpu *egc, u64 time,
|
|
unsigned long util, unsigned long max)
|
|
{
|
|
unsigned long boost;
|
|
|
|
/* No boost currently required */
|
|
if (!egc->iowait_boost)
|
|
return 0;
|
|
|
|
/* Reset boost if the CPU appears to have been idle enough */
|
|
if (ego_iowait_reset(egc, time, false))
|
|
return 0;
|
|
|
|
if (!egc->iowait_boost_pending) {
|
|
/*
|
|
* No boost pending; reduce the boost value.
|
|
*/
|
|
egc->iowait_boost >>= 1;
|
|
if (egc->iowait_boost < IOWAIT_BOOST_MIN) {
|
|
egc->iowait_boost = 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
egc->iowait_boost_pending = false;
|
|
|
|
/*
|
|
* @util is already in capacity scale; convert iowait_boost
|
|
* into the same scale so we can compare.
|
|
*/
|
|
boost = (egc->iowait_boost * max) >> SCHED_CAPACITY_SHIFT;
|
|
boost = max(boost, util);
|
|
boost = uclamp_rq_util_with(cpu_rq(egc->cpu), boost, NULL);
|
|
return boost;
|
|
}
|
|
|
|
/*
|
|
* Make ego_should_update_freq() ignore the rate limit when DL
|
|
* has increased the utilization.
|
|
*/
|
|
static inline void ignore_dl_rate_limit(struct ego_cpu *egc, struct ego_policy *egp)
|
|
{
|
|
if (cpu_bw_dl(cpu_rq(egc->cpu)) > egc->bw_dl)
|
|
egp->limits_changed = true;
|
|
}
|
|
|
|
static int get_boost_pelt_util(int capacity, int util, int boost)
|
|
{
|
|
long long margin;
|
|
|
|
#if AMIGO_BUILD_VER >= 4
|
|
margin = util * boost / 100;
|
|
#else
|
|
if (!boost)
|
|
return util;
|
|
|
|
if (boost > 0) {
|
|
margin = max(capacity - util, 0) * boost;
|
|
} else {
|
|
margin = util * boost;
|
|
}
|
|
margin /= 100;
|
|
#endif
|
|
return util + margin;
|
|
}
|
|
|
|
static unsigned int ego_next_freq_shared(struct ego_cpu *egc, u64 time)
|
|
{
|
|
struct ego_policy *egp = egc->egp;
|
|
struct cpufreq_policy *policy = egp->policy;
|
|
unsigned long util = 0, io_util = 0, max = 1;
|
|
unsigned int cpu;
|
|
|
|
for_each_cpu(cpu, policy->cpus) {
|
|
struct ego_cpu *egc = &per_cpu(ego_cpu, cpu);
|
|
unsigned long cpu_util, cpu_io_util, cpu_max;
|
|
unsigned long cpu_boosted_util;
|
|
|
|
egc->util = cpu_util = ego_get_util(egc);
|
|
cpu_boosted_util = freqboost_cpu_boost(cpu, cpu_util);
|
|
cpu_boosted_util = max(cpu_boosted_util,
|
|
heavytask_cpu_boost(cpu, cpu_util, egp->htask_boost));
|
|
cpu_boosted_util = get_boost_pelt_util(capacity_cpu(cpu),
|
|
cpu_boosted_util, egp->pelt_boost);
|
|
egc->boosted_util = cpu_boosted_util;
|
|
cpu_max = egc->max;
|
|
|
|
cpu_io_util = ego_iowait_apply(egc, time, cpu_util, cpu_max);
|
|
|
|
/* find heaviest util and cpu */
|
|
if (util < cpu_boosted_util) {
|
|
util = cpu_boosted_util;
|
|
egp->heaviest_cpu = cpu;
|
|
}
|
|
/* find heaviest io util */
|
|
io_util = max(io_util, cpu_io_util);
|
|
/* find heaviest max */
|
|
max = max(max, cpu_max);
|
|
|
|
trace_ego_cpu_util(cpu, egp->pelt_boost, cpu_util, io_util, cpu_boosted_util);
|
|
}
|
|
|
|
util = max(util, io_util);
|
|
return get_next_freq(egp, util, max);
|
|
}
|
|
|
|
static void
|
|
ego_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
|
|
{
|
|
struct ego_cpu *egc = container_of(hook, struct ego_cpu, update_util);
|
|
struct ego_policy *egp = egc->egp;
|
|
unsigned int next_f;
|
|
|
|
ego_iowait_boost(egc, time, flags);
|
|
egc->last_update = time;
|
|
ignore_dl_rate_limit(egc, egp);
|
|
|
|
if (egc->iowait_boost || egp->limits_changed)
|
|
raw_spin_lock(&egp->update_lock);
|
|
else
|
|
if (!raw_spin_trylock(&egp->update_lock))
|
|
return;
|
|
|
|
if (ego_should_update_freq(egp, time)) {
|
|
next_f = ego_next_freq_shared(egc, time);
|
|
|
|
if (ego_postpone_freq_update(egp, time, next_f))
|
|
goto out;
|
|
|
|
if (egp->policy->fast_switch_enabled)
|
|
ego_fast_switch(egp, time, next_f);
|
|
else
|
|
ego_deferred_update(egp, time, next_f);
|
|
}
|
|
|
|
out:
|
|
raw_spin_unlock(&egp->update_lock);
|
|
}
|
|
|
|
static void ego_work(struct kthread_work *work)
|
|
{
|
|
struct ego_policy *egp = container_of(work, struct ego_policy, work);
|
|
unsigned int freq;
|
|
unsigned long flags;
|
|
|
|
/*
|
|
* Hold egp->update_lock shortly to handle the case where:
|
|
* incase egp->next_freq is read here, and then updated by
|
|
* ego_deferred_update() just before work_in_progress is set to false
|
|
* here, we may miss queueing the new update.
|
|
*
|
|
* Note: If a work was queued after the update_lock is released,
|
|
* ego_work() will just be called again by kthread_work code; and the
|
|
* request will be proceed before the ego thread sleeps.
|
|
*/
|
|
raw_spin_lock_irqsave(&egp->update_lock, flags);
|
|
freq = egp->next_freq;
|
|
egp->work_in_progress = false;
|
|
raw_spin_unlock_irqrestore(&egp->update_lock, flags);
|
|
|
|
mutex_lock(&egp->work_lock);
|
|
__cpufreq_driver_target(egp->policy, freq, CPUFREQ_RELATION_L);
|
|
mutex_unlock(&egp->work_lock);
|
|
}
|
|
|
|
static void ego_irq_work(struct irq_work *irq_work)
|
|
{
|
|
struct ego_policy *egp;
|
|
|
|
egp = container_of(irq_work, struct ego_policy, irq_work);
|
|
|
|
kthread_queue_work(&egp->worker, &egp->work);
|
|
}
|
|
|
|
/************************** sysfs interface ************************/
|
|
struct ego_attr {
|
|
struct attribute attr;
|
|
ssize_t (*show)(struct kobject *, char *);
|
|
ssize_t (*store)(struct kobject *, const char *, size_t count);
|
|
};
|
|
|
|
#define ego_attr_rw(name) \
|
|
static struct ego_attr name##_attr = \
|
|
__ATTR(name, 0644, show_##name, store_##name)
|
|
|
|
#define ego_show(name) \
|
|
static ssize_t show_##name(struct kobject *k, char *buf) \
|
|
{ \
|
|
struct ego_policy *egp = \
|
|
container_of(k, struct ego_policy, kobj); \
|
|
\
|
|
return sprintf(buf, "%d\n", egp->name); \
|
|
} \
|
|
|
|
#define ego_store(name) \
|
|
static ssize_t store_##name(struct kobject *k, const char *buf, size_t count) \
|
|
{ \
|
|
struct ego_policy *egp = \
|
|
container_of(k, struct ego_policy, kobj); \
|
|
int data; \
|
|
\
|
|
if (!sscanf(buf, "%d", &data)) \
|
|
return -EINVAL; \
|
|
\
|
|
egp->name = data; \
|
|
return count; \
|
|
}
|
|
|
|
ego_show(ratio);
|
|
ego_store(ratio);
|
|
ego_attr_rw(ratio);
|
|
ego_show(dis_buck_share);
|
|
ego_store(dis_buck_share);
|
|
ego_attr_rw(dis_buck_share);
|
|
|
|
ego_show(somac_wall);
|
|
ego_store(somac_wall);
|
|
ego_attr_rw(somac_wall);
|
|
|
|
static ssize_t show(struct kobject *kobj, struct attribute *at, char *buf)
|
|
{
|
|
struct ego_attr *fvattr = container_of(at, struct ego_attr, attr);
|
|
return fvattr->show(kobj, buf);
|
|
}
|
|
|
|
static ssize_t store(struct kobject *kobj, struct attribute *at,
|
|
const char *buf, size_t count)
|
|
{
|
|
struct ego_attr *fvattr = container_of(at, struct ego_attr, attr);
|
|
return fvattr->store(kobj, buf, count);
|
|
}
|
|
|
|
static const struct sysfs_ops ego_sysfs_ops = {
|
|
.show = show,
|
|
.store = store,
|
|
};
|
|
|
|
static struct attribute *ego_attrs[] = {
|
|
&ratio_attr.attr,
|
|
&somac_wall_attr.attr,
|
|
&dis_buck_share_attr.attr,
|
|
NULL
|
|
};
|
|
|
|
static struct kobj_type ktype_ego = {
|
|
.sysfs_ops = &ego_sysfs_ops,
|
|
.default_attrs = ego_attrs,
|
|
};
|
|
|
|
/********************** cpufreq governor interface *********************/
|
|
struct cpufreq_governor energy_aware_gov;
|
|
|
|
static int ego_kthread_create(struct ego_policy *egp)
|
|
{
|
|
struct task_struct *thread;
|
|
struct sched_param param = { .sched_priority = MAX_RT_PRIO / 2 };
|
|
struct cpufreq_policy *policy = egp->policy;
|
|
int ret;
|
|
|
|
/* kthread only required for slow path */
|
|
if (policy->fast_switch_enabled)
|
|
return 0;
|
|
|
|
kthread_init_work(&egp->work, ego_work);
|
|
kthread_init_worker(&egp->worker);
|
|
thread = kthread_create(kthread_worker_fn, &egp->worker,
|
|
"ego:%d", cpumask_first(policy->related_cpus));
|
|
if (IS_ERR(thread)) {
|
|
pr_err("failed to create ego thread: %ld\n", PTR_ERR(thread));
|
|
return PTR_ERR(thread);
|
|
}
|
|
|
|
ret = sched_setscheduler_nocheck(thread, SCHED_FIFO, ¶m);
|
|
if (ret) {
|
|
kthread_stop(thread);
|
|
pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
|
|
return ret;
|
|
}
|
|
|
|
set_cpus_allowed_ptr(thread, &egp->thread_allowed_cpus);
|
|
thread->flags |= PF_NO_SETAFFINITY;
|
|
egp->thread = thread;
|
|
init_irq_work(&egp->irq_work, ego_irq_work);
|
|
mutex_init(&egp->work_lock);
|
|
|
|
pr_info("%s: cpus=%#x, allowed-cpu=%#x\n", __func__,
|
|
*(unsigned int *)cpumask_bits(&egp->cpus),
|
|
*(unsigned int *)cpumask_bits(&egp->thread_allowed_cpus));
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int ego_init(struct cpufreq_policy *policy)
|
|
{
|
|
struct ego_policy *egp = NULL;
|
|
int cpu;
|
|
|
|
/* State should be equivalent to EXIT */
|
|
if (policy->governor_data)
|
|
return -EBUSY;
|
|
|
|
cpufreq_enable_fast_switch(policy);
|
|
|
|
egp = per_cpu(ego_cpu, policy->cpu).egp;
|
|
if (!egp) {
|
|
pr_info("%s: ego_policy is not ready\n", __func__);
|
|
goto fail_ego_init;
|
|
}
|
|
|
|
if (egp->policy) {
|
|
egp->policy = policy;
|
|
pr_info("%s: Already ego_policy was initialized\n", __func__);
|
|
goto complete_ego_init;
|
|
}
|
|
egp->policy = policy;
|
|
|
|
if (ego_kthread_create(egp)) {
|
|
pr_info("%s: failed to create kthread\n", __func__);
|
|
goto fail_ego_init;
|
|
}
|
|
|
|
complete_ego_init:
|
|
if (!policy->fast_switch_enabled)
|
|
wake_up_process(egp->thread);
|
|
|
|
policy->governor_data = egp;
|
|
|
|
for_each_cpu(cpu, policy->related_cpus)
|
|
cpufreq_register_hook(cpu, NULL, ego_need_slack_timer);
|
|
|
|
pr_info("%s: ego init complete: cpus=%#x, allowed-cpu=%#x\n", __func__,
|
|
*(unsigned int *)cpumask_bits(&egp->cpus),
|
|
*(unsigned int *)cpumask_bits(&egp->thread_allowed_cpus));
|
|
return 0;
|
|
|
|
fail_ego_init:
|
|
cpufreq_disable_fast_switch(policy);
|
|
pr_err("initialization failed\n");
|
|
return -1;
|
|
}
|
|
|
|
static void ego_exit(struct cpufreq_policy *policy)
|
|
{
|
|
int cpu;
|
|
|
|
policy->governor_data = NULL;
|
|
cpufreq_disable_fast_switch(policy);
|
|
|
|
for_each_cpu(cpu, policy->related_cpus)
|
|
cpufreq_unregister_hook(cpu);
|
|
}
|
|
|
|
static int ego_start(struct cpufreq_policy *policy)
|
|
{
|
|
struct ego_policy *egp = policy->governor_data;
|
|
unsigned int cpu;
|
|
|
|
egp->pelt_margin = DEFAULT_PELT_MARGIN;
|
|
egp->freq_update_delay_ns = 4 * NSEC_PER_MSEC;
|
|
egp->up_rate_limit_ns = 500 * NSEC_PER_MSEC;
|
|
egp->down_rate_limit_ns = 1000 * NSEC_PER_MSEC;
|
|
egp->last_freq_update_time = 0;
|
|
egp->next_freq = 0;
|
|
egp->work_in_progress = false;
|
|
egp->limits_changed = false;
|
|
egp->need_freq_update = false;
|
|
egp->cached_raw_freq = 0;
|
|
|
|
for_each_cpu(cpu, policy->cpus) {
|
|
struct ego_cpu *egc = &per_cpu(ego_cpu, cpu);
|
|
egc->iowait_boost_pending = false;
|
|
egc->iowait_boost = 0;
|
|
egc->last_update = 0;
|
|
egc->bw_dl = 0;
|
|
egc->max = 0;
|
|
egc->util = 0;
|
|
egc->boosted_util = 0;
|
|
egc->egp = egp;
|
|
egc->cpu = cpu;
|
|
egc->min_cap = ULONG_MAX;
|
|
}
|
|
|
|
for_each_cpu(cpu, policy->cpus) {
|
|
struct ego_cpu *egc = &per_cpu(ego_cpu, cpu);
|
|
cpufreq_add_update_util_hook(cpu,
|
|
&egc->update_util, ego_update_shared);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void ego_stop(struct cpufreq_policy *policy)
|
|
{
|
|
struct ego_policy *egp = policy->governor_data;
|
|
unsigned int cpu;
|
|
|
|
for_each_cpu(cpu, policy->cpus)
|
|
cpufreq_remove_update_util_hook(cpu);
|
|
|
|
synchronize_rcu();
|
|
|
|
if (!policy->fast_switch_enabled) {
|
|
irq_work_sync(&egp->irq_work);
|
|
kthread_cancel_work_sync(&egp->work);
|
|
}
|
|
}
|
|
|
|
static void ego_limits(struct cpufreq_policy *policy)
|
|
{
|
|
struct ego_policy *egp = policy->governor_data;
|
|
unsigned int target_freq;
|
|
unsigned long flags;
|
|
|
|
target_freq = max(egp->org_freq, egp->eng_freq);
|
|
target_freq = clamp_val(target_freq, policy->min, policy->max);
|
|
|
|
raw_spin_lock_irqsave(&egp->update_lock, flags);
|
|
ego_update_min_cap(policy);
|
|
ego_update_next_freq(egp, egp->last_freq_update_time, target_freq);
|
|
raw_spin_unlock_irqrestore(&egp->update_lock, flags);
|
|
|
|
if (!policy->fast_switch_enabled) {
|
|
mutex_lock(&egp->work_lock);
|
|
__cpufreq_driver_target(policy, target_freq, CPUFREQ_RELATION_H);
|
|
mutex_unlock(&egp->work_lock);
|
|
} else
|
|
cpufreq_driver_fast_switch(policy, target_freq);
|
|
}
|
|
|
|
struct cpufreq_governor energy_aware_gov = {
|
|
.name = "energy_aware",
|
|
.owner = THIS_MODULE,
|
|
.flags = CPUFREQ_GOV_DYNAMIC_SWITCHING,
|
|
.init = ego_init,
|
|
.exit = ego_exit,
|
|
.start = ego_start,
|
|
.stop = ego_stop,
|
|
.limits = ego_limits,
|
|
};
|
|
|
|
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ENERGYAWARE
|
|
struct cpufreq_governor *cpufreq_default_governor(void)
|
|
{
|
|
return &energy_aware_gov;
|
|
}
|
|
#endif
|
|
|
|
static int ego_register(struct kobject *ems_kobj)
|
|
{
|
|
ego_kobj = kobject_create_and_add("ego", ems_kobj);
|
|
if (!ego_kobj)
|
|
return -EINVAL;
|
|
|
|
sysbusy_register_notifier(&ego_sysbusy_notifier);
|
|
emstune_register_notifier(&ego_mode_update_notifier);
|
|
|
|
return cpufreq_register_governor(&energy_aware_gov);
|
|
}
|
|
|
|
static struct ego_policy *ego_policy_alloc(void)
|
|
{
|
|
return kzalloc(sizeof(struct ego_policy), GFP_KERNEL);
|
|
}
|
|
|
|
static int ego_parse_dt(struct device_node *dn, struct ego_policy *egp)
|
|
{
|
|
struct cpumask mask;
|
|
const char *buf;
|
|
|
|
if (of_property_read_string(dn, "cpus", &buf)) {
|
|
pr_err("%s: cpus property is omitted\n", __func__);
|
|
return -1;
|
|
} else
|
|
cpulist_parse(buf, &egp->cpus);
|
|
|
|
if (!of_property_read_string(dn, "thread-run-on", &buf))
|
|
cpulist_parse(buf, &mask);
|
|
else
|
|
cpumask_copy(&mask, cpu_possible_mask);
|
|
cpumask_copy(&egp->thread_allowed_cpus, &mask);
|
|
|
|
if (of_property_read_u32(dn, "ratio", &egp->ratio))
|
|
egp->ratio = RATIO_UNIT;
|
|
|
|
if (of_property_read_u32(dn, "dis-buck-share", &egp->dis_buck_share))
|
|
egp->dis_buck_share = 0;
|
|
|
|
if (of_property_read_u32(dn, "somac_wall", &egp->somac_wall))
|
|
egp->somac_wall = UINT_MAX;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int ego_pre_init(struct kobject *ems_kobj)
|
|
{
|
|
struct device_node *dn, *child;
|
|
int cpu;
|
|
dn = of_find_node_by_path("/ems/ego");
|
|
if (!dn)
|
|
goto fail;
|
|
|
|
ego_register(ems_kobj);
|
|
|
|
for_each_child_of_node(dn, child) {
|
|
struct ego_policy *egp;
|
|
|
|
egp = ego_policy_alloc();
|
|
if (!egp) {
|
|
pr_err("%s: failed to alloc ego_policy\n", __func__);
|
|
goto fail;
|
|
}
|
|
|
|
/* Parse device tree */
|
|
if (ego_parse_dt(child, egp))
|
|
goto fail;
|
|
|
|
/* Init Sysfs */
|
|
if (kobject_init_and_add(&egp->kobj, &ktype_ego, ego_kobj,
|
|
"coregroup%d", cpumask_first(&egp->cpus)))
|
|
goto fail;
|
|
|
|
/* init policy spin lock */
|
|
raw_spin_lock_init(&egp->update_lock);
|
|
|
|
for_each_cpu(cpu, &egp->cpus) {
|
|
struct ego_cpu *egc = &per_cpu(ego_cpu, cpu);
|
|
egc->egp = egp;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
|
|
fail:
|
|
for_each_possible_cpu(cpu) {
|
|
if (per_cpu(ego_cpu, cpu).egp)
|
|
kfree(per_cpu(ego_cpu, cpu).egp);
|
|
per_cpu(ego_cpu, cpu).egp = NULL;
|
|
}
|
|
|
|
return -1;
|
|
}
|