kernel_samsung_a53x/kernel/sched/ems/profile.c
2024-06-15 16:02:09 -03:00

483 lines
12 KiB
C
Executable file

/*
* Scheduling status profiler for Exynos Mobile Scheduler
*
* Copyright (C) 2021 Samsung Electronics Co., Ltd
* Park Choonghoon <choong.park@samsung.com>
*/
#include "../sched.h"
#include "ems.h"
#include <trace/events/ems.h>
#include <trace/events/ems_debug.h>
static struct system_profile_data *system_profile_data;
static DEFINE_RWLOCK(profile_sched_lock);
#define get_cpu_profile(cpu, profile) (&system_profile_data->cp[cpu][profile]);
char *fair_causes_name[END_OF_FAIR_CAUSES] = {
"no-candidate",
"only-one-candidate",
"no-fit-cpu",
"only-one-fit-cpu",
"sysbusy",
"task-express",
"lowest-energy",
"performance",
"sync",
"fast-track",
"na"
};
char *rt_causes_name[END_OF_RT_CAUSES] = {
"only-one-candidate",
"idle",
"recessive",
"na"
};
struct sched_stat {
unsigned int fair_sum;
unsigned int rt_sum;
unsigned int fair[END_OF_FAIR_CAUSES];
unsigned int rt[END_OF_RT_CAUSES];
};
static DEFINE_PER_CPU(struct sched_stat, stats);
void update_fair_stat(int cpu, enum fair_causes i)
{
struct sched_stat *stat = per_cpu_ptr(&stats, cpu);
if (!stat)
return;
stat->fair_sum++;
stat->fair[i]++;
}
void update_rt_stat(int cpu, enum rt_causes i)
{
struct sched_stat *stat = per_cpu_ptr(&stats, cpu);
if (!stat)
return;
stat->rt_sum++;
stat->rt[i]++;
}
static ssize_t sched_stat_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
int value, cpu;
if (sscanf(buf, "%d", &value) != 1)
return -EINVAL;
for_each_possible_cpu(cpu) {
struct sched_stat *stat = per_cpu_ptr(&stats, cpu);
memset(stat, 0, sizeof(struct sched_stat));
}
return count;
}
static ssize_t sched_stat_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct sched_stat *stat;
int ret = 0;
int cpu, i;
char *line = "--------------------------------------------------"
"--------------------------------------------------"
"---------------------";
ret += sprintf(buf + ret, "%s\n", line);
ret += sprintf(buf + ret, "%20s | ", "FAIR schedule");
for_each_possible_cpu(cpu)
ret += sprintf(buf + ret, " CPU%d ", cpu);
ret += sprintf(buf + ret, "\n%s\n", line);
for (i = 0; i < END_OF_FAIR_CAUSES; i++) {
ret += sprintf(buf + ret, "%20s | ", fair_causes_name[i]);
for_each_possible_cpu(cpu) {
stat = per_cpu_ptr(&stats, cpu);
ret += sprintf(buf + ret, "%10u ", stat->fair[i]);
}
ret += sprintf(buf + ret, "\n");
}
ret += sprintf(buf + ret, "%s\n", line);
ret += sprintf(buf + ret, "%20s | ", "total");
for_each_possible_cpu(cpu) {
stat = per_cpu_ptr(&stats, cpu);
ret += sprintf(buf + ret, "%10u ", stat->fair_sum);
}
ret += sprintf(buf + ret, "\n%s\n", line);
\
ret += sprintf(buf + ret, "%s\n", line);
ret += sprintf(buf + ret, "%20s | ", "RT schedule");
for_each_possible_cpu(cpu)
ret += sprintf(buf + ret, "%10u ", cpu);
ret += sprintf(buf + ret, "\n%s\n", line);
for (i = 0; i < END_OF_RT_CAUSES; i++) {
ret += sprintf(buf + ret, "%20s | ", rt_causes_name[i]);
for_each_possible_cpu(cpu) {
stat = per_cpu_ptr(&stats, cpu);
ret += sprintf(buf + ret, "%10u ", stat->rt[i]);
}
ret += sprintf(buf + ret, "\n");
}
ret += sprintf(buf + ret, "%s\n", line);
ret += sprintf(buf + ret, "%20s | ", "total");
for_each_possible_cpu(cpu) {
stat = per_cpu_ptr(&stats, cpu);
ret += sprintf(buf + ret, "%10u ", stat->rt_sum);
}
ret += sprintf(buf + ret, "\n%s\n", line);
return ret;
}
DEVICE_ATTR_RW(sched_stat);
/****************************************************************
* HEAVY TASK base on Active Ratio *
****************************************************************/
static int profile_heavy_task_ratio = 900; /* 90% */
/* heavy task should be included in TOP-APP or FOREGOUND */
static inline bool htask_enabled_grp(struct task_struct *p)
{
int grp = cpuctl_task_group_idx(p);
if (grp != CGROUP_TOPAPP && grp != CGROUP_FOREGROUND)
return false;
return true;
}
/* get_htask_ratio - if this task is heavy task, return heaviness ratio */
static int get_htask_ratio(struct task_struct *p)
{
int ratio, cur_ratio = 0, prev_ratio = 0, recent_ratio = 0;
int prev_idx, cur_idx;
if (!htask_enabled_grp(p))
return 0;
cur_idx = mlt_task_cur_period(p);
prev_idx = mlt_period_with_delta(cur_idx, -1);
recent_ratio = mlt_task_recent(p);
cur_ratio = mlt_task_value(p, cur_idx);
prev_ratio = mlt_task_value(p, prev_idx);
/* 2 periods average actvie ratio should over 90% */
if (prev_ratio < recent_ratio)
ratio = recent_ratio + cur_ratio;
else
ratio = prev_ratio + cur_ratio;
ratio = ratio >> 1;
if (ratio < profile_heavy_task_ratio)
return 0;
return mlt_task_avg(p);
}
static void profile_update_cpu_htask(int cpu, unsigned long hratio, unsigned long pid)
{
struct cpu_profile *cs = get_cpu_profile(cpu, CPU_HTSK);
cs->value = hratio;
cs->data = pid;
}
/*
* profile_enqueue_task
* check whether enqueued task is heavy or not, if it is true,
* apply htask_ratio to this rq immediately to increase frequency latency
*/
void profile_enqueue_task(struct rq *rq, struct task_struct *p)
{
struct cpu_profile *cp;
int hratio;
if (!htask_enabled_grp(p))
return;
cp = get_cpu_profile(cpu_of(rq), CPU_HTSK);
hratio = get_htask_ratio(p);
if (cp->value < hratio) {
cp->value = hratio;
cp->data = p->pid;
}
}
int profile_get_htask_ratio(int cpu)
{
struct cpu_profile *cp = get_cpu_profile(cpu, CPU_HTSK);
return cp->value;
}
/****************************************************************
* CPU WEIGHTED ACTIVE RATIO *
****************************************************************/
#define IDLE_HYSTERESIS_SHIFT 1
#define WEIGHTED_RATIO 900
#define B2I_THR_RATIO 250
#define I2B_THR_RATIO (B2I_THR_RATIO >> IDLE_HYSTERESIS_SHIFT)
#define I2B_MONITOR_CNT 4
#define B2I_MONITOR_CNT (I2B_MONITOR_CNT << IDLE_HYSTERESIS_SHIFT)
#define I2B_WSUM 4095
#define B2I_WSUM 5695
static int profile_update_cpu_wratio(int cpu)
{
int cnt, monitor_cnt, wratio_thr, wratio_sum, ar_sum = 0;
int cpu_ar_monitor_cnt = I2B_MONITOR_CNT + 1;
int idx, wratio = 0;
u64 now = sched_clock();
struct cpu_profile *cs = get_cpu_profile(cpu, CPU_WRATIO);
/* Update for tickless core */
if ((mlt_art_last_update_time(cpu) + MLT_IDLE_THR_TIME)< now ) {
cs->value = 0;
cs->data = PROFILE_CPU_IDLE;
return 0;
}
if (cs->data == PROFILE_CPU_BUSY) {
wratio_sum = B2I_WSUM;
monitor_cnt = B2I_MONITOR_CNT;
wratio_thr = B2I_THR_RATIO;
} else {
wratio_sum = I2B_WSUM;
monitor_cnt = I2B_MONITOR_CNT;
wratio_thr = I2B_THR_RATIO;
}
/* computing weighted active ratio */
idx = mlt_period_with_delta(mlt_cur_period(cpu), -(monitor_cnt - 1));
for (cnt = 0; cnt < monitor_cnt; cnt++) {
int ratio = mlt_art_value(cpu, idx);
wratio = (ratio + ((wratio * WEIGHTED_RATIO) >> SCHED_CAPACITY_SHIFT));
idx = mlt_period_with_delta(idx, 1);
if (cnt < cpu_ar_monitor_cnt)
ar_sum += ratio;
}
cs->value = (wratio << SCHED_CAPACITY_SHIFT) / wratio_sum;
if (cs->data == PROFILE_CPU_BUSY) {
if (cs->value < wratio_thr)
cs->data = PROFILE_CPU_IDLE;
} else {
if (cs->value > wratio_thr)
cs->data = PROFILE_CPU_BUSY;
}
return ar_sum ? (ar_sum / cpu_ar_monitor_cnt) : 0;
}
u64 profile_get_cpu_wratio_busy(int cpu)
{
struct cpu_profile *cp = get_cpu_profile(cpu, CPU_WRATIO);
return cp->data;
}
/****************************************************************
* CPU UTIL *
****************************************************************/
static void profile_update_cpu_util(int cpu, int *busy_cnt, unsigned long *util_sum)
{
int cpu_util;
struct cpu_profile *cs = get_cpu_profile(cpu, CPU_UTIL);
cpu_util = ml_cpu_util(cpu) + cpu_util_rt(cpu_rq(cpu));
(*util_sum) += cpu_util;
if (check_busy(cpu_util, capacity_cpu(cpu))) {
cs->data = PROFILE_CPU_BUSY;
(*busy_cnt) += 1;
} else {
cs->data = PROFILE_CPU_IDLE;
}
cs->value = cpu_util;
}
/****************************************************************
* External APIs *
****************************************************************/
static u64 last_profile_time;
static int profile_interval = 1; /* 1 tick = 4ms */
int profile_sched_data(void)
{
unsigned long flags;
unsigned long now = jiffies;
unsigned long cpu_util_sum = 0;
unsigned long heavy_task_util_sum = 0;
unsigned long misfit_task_util_sum = 0;
unsigned long heaviest_task_util = 0;
int busy_cpu_count = 0;
int heavy_task_count = 0;
int misfit_task_count = 0;
int pd_nr_running = 0;
int ed_ar_avg_sum = 0;
int pd_ar_avg_sum = 0;
int perf_cpu_nr = (VENDOR_NR_CPUS - cpumask_weight(cpu_slowest_mask()));
int perf_cap_scale = (SCHED_CAPACITY_SCALE * perf_cpu_nr);
int slowest_cap_scale = (SCHED_CAPACITY_SCALE * cpumask_weight(cpu_slowest_mask()));
int cpu;
if (!write_trylock_irqsave(&profile_sched_lock, flags))
return -EBUSY;
if (now < last_profile_time + profile_interval)
goto unlock;
last_profile_time = now;
for_each_cpu(cpu, cpu_active_mask) {
struct rq *rq = cpu_rq(cpu);
struct task_struct *p;
unsigned long task_util;
u64 max_hratio = 0, max_hratio_pid = 0;
int track_count;
int cpu_ar_avg = 0;
/* update cpu util */
profile_update_cpu_util(cpu, &busy_cpu_count, &cpu_util_sum);
/* update weighted cpu active ratio */
cpu_ar_avg = profile_update_cpu_wratio(cpu);
raw_spin_rq_lock(rq);
if (!rq->cfs.curr)
goto rq_unlock;
/* Explictly clear count */
track_count = 0;
list_for_each_entry(p, &rq->cfs_tasks, se.group_node) {
/* update heavy task base on active ratio */
u64 task_hratio = get_htask_ratio(p);
if (task_hratio > max_hratio) {
max_hratio_pid = p->pid;
max_hratio = task_hratio;
}
/* update heavy task base on cpu util */
task_util = ml_task_util(p);
if (is_heavy_task_util(task_util)) {
heavy_task_count++;
heavy_task_util_sum += task_util;
}
if (is_misfit_task_util(task_util)) {
misfit_task_count++;
misfit_task_util_sum += task_util;
}
if (heaviest_task_util < task_util)
heaviest_task_util = task_util;
if (++track_count >= TRACK_TASK_COUNT)
break;
}
rq_unlock:
if (!cpumask_test_cpu(cpu, cpu_slowest_mask())) {
pd_ar_avg_sum += cpu_ar_avg;
pd_nr_running += rq->cfs.h_nr_running;
} else {
ed_ar_avg_sum += cpu_ar_avg;
}
/* save heaviest task data */
profile_update_cpu_htask(cpu, max_hratio, max_hratio_pid);
trace_ems_cpu_profile(cpu, &system_profile_data->cp[cpu][0],
cpu_ar_avg, rq->cfs.h_nr_running);
raw_spin_rq_unlock(rq);
}
trace_ems_profile_tasks(busy_cpu_count, cpu_util_sum,
heavy_task_count, heavy_task_util_sum,
misfit_task_count, misfit_task_util_sum,
pd_nr_running);
/* Fill profile data */
system_profile_data->busy_cpu_count = busy_cpu_count;
system_profile_data->heavy_task_count = heavy_task_count;
system_profile_data->misfit_task_count = misfit_task_count;
system_profile_data->cpu_util_sum = cpu_util_sum;
system_profile_data->heavy_task_util_sum = heavy_task_util_sum;
system_profile_data->misfit_task_util_sum = misfit_task_util_sum;
system_profile_data->heaviest_task_util = heaviest_task_util;
system_profile_data->ed_ar_avg = (ed_ar_avg_sum * SCHED_CAPACITY_SCALE) / slowest_cap_scale;
system_profile_data->pd_ar_avg = (pd_ar_avg_sum * SCHED_CAPACITY_SCALE) / perf_cap_scale;
system_profile_data->pd_nr_running = pd_nr_running;
unlock:
write_unlock_irqrestore(&profile_sched_lock, flags);
return 0;
}
/* Caller MUST disable irq before calling this function. */
void get_system_sched_data(struct system_profile_data *data)
{
read_lock(&profile_sched_lock);
memcpy(data, system_profile_data, sizeof(struct system_profile_data));
read_unlock(&profile_sched_lock);
}
/****************************************************************
* sysbusy state change notifier *
****************************************************************/
static int profile_sysbusy_notifier_call(struct notifier_block *nb,
unsigned long val, void *v)
{
enum sysbusy_state state = *(enum sysbusy_state *)v;
if (val != SYSBUSY_STATE_CHANGE)
return NOTIFY_OK;
profile_interval = sysbusy_params[state].monitor_interval;
return NOTIFY_OK;
}
static struct notifier_block profile_sysbusy_notifier = {
.notifier_call = profile_sysbusy_notifier_call,
};
/****************************************************************
* Initialization *
****************************************************************/
int profile_sched_init(struct kobject *ems_kobj)
{
system_profile_data =
kzalloc(sizeof(struct system_profile_data), GFP_KERNEL);
if (!system_profile_data) {
pr_err("Failed to allocate profile_system_data\n");
return -ENOMEM;
}
sysbusy_register_notifier(&profile_sysbusy_notifier);
if (sysfs_create_file(ems_kobj, &dev_attr_sched_stat.attr))
pr_warn("failed to create sched_stat\n");
return 0;
}