kernel_samsung_a53x/drivers/gpu/arm/exynos/backend/gpexbe_utilization.c
2024-06-15 16:02:09 -03:00

269 lines
7.3 KiB
C
Executable file

/* SPDX-License-Identifier: GPL-2.0 */
/*
* (C) COPYRIGHT 2021 Samsung Electronics Inc. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU licence.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*/
#include <mali_kbase.h>
#include <gpexbe_utilization.h>
#include <gpex_gts.h>
/* Mali pm metrics uses 256ns as a unit */
#define KBASE_PM_TIME_SHIFT 8
struct _util_info {
struct kbase_device *kbdev;
int pure_compute_time_rate;
atomic_t time_compute_jobs;
atomic_t time_vertex_jobs;
atomic_t time_fragment_jobs;
atomic_t cnt_compute_jobs;
atomic_t cnt_fragment_jobs;
atomic_t cnt_vertex_jobs;
int cur_utilization;
};
static struct _util_info util_info;
static inline void atomic_add_shifted(u64 val, atomic_t *res)
{
atomic_add(val >> KBASE_PM_TIME_SHIFT, res);
}
static inline void update_compute_job_load(u64 ns_elapsed)
{
atomic_add_shifted(ns_elapsed, &util_info.time_compute_jobs);
}
static inline void update_fragment_job_load(u64 ns_elapsed)
{
atomic_add_shifted(ns_elapsed, &util_info.time_fragment_jobs);
}
static inline void update_vertex_job_load(u64 ns_elapsed)
{
atomic_add_shifted(ns_elapsed, &util_info.time_vertex_jobs);
}
static inline void increment_compute_job_cnt(void)
{
atomic_inc(&util_info.cnt_compute_jobs);
}
static inline void increment_fragment_job_cnt(void)
{
atomic_inc(&util_info.cnt_fragment_jobs);
}
static inline void increment_vertex_job_cnt(void)
{
atomic_inc(&util_info.cnt_vertex_jobs);
}
static inline bool is_pure_compute_job(struct kbase_jd_atom *katom)
{
return katom->core_req & BASE_JD_REQ_ONLY_COMPUTE;
}
static inline bool is_fragment_job(struct kbase_jd_atom *katom)
{
return katom->core_req & BASE_JD_REQ_FS;
}
static inline bool is_compute_job(struct kbase_jd_atom *katom)
{
/* Includes vertex shader, geometry shader and actual compute shader job */
return katom->core_req & BASE_JD_REQ_CS;
}
/* Precondition: katom and end_timestamp are not NULL */
void gpexbe_utilization_update_job_load(struct kbase_jd_atom *katom, ktime_t *end_timestamp)
{
u64 ns_spent = ktime_to_ns(ktime_sub(*end_timestamp, katom->start_timestamp));
if (is_pure_compute_job(katom)) {
update_compute_job_load(ns_spent);
increment_compute_job_cnt();
} else if (is_fragment_job(katom)) {
update_fragment_job_load(ns_spent);
increment_fragment_job_cnt();
} else if (is_compute_job(katom)) {
update_vertex_job_load(ns_spent);
increment_vertex_job_cnt();
}
}
int gpexbe_utilization_get_compute_job_time(void)
{
return atomic_read(&util_info.time_compute_jobs);
}
int gpexbe_utilization_get_vertex_job_time(void)
{
return atomic_read(&util_info.time_vertex_jobs);
}
int gpexbe_utilization_get_fragment_job_time(void)
{
return atomic_read(&util_info.time_fragment_jobs);
}
int gpexbe_utilization_get_compute_job_cnt(void)
{
int ret = atomic_read(&util_info.cnt_compute_jobs);
atomic_set(&util_info.cnt_compute_jobs, 0);
return ret;
}
int gpexbe_utilization_get_vertex_job_cnt(void)
{
int ret = atomic_read(&util_info.cnt_vertex_jobs);
atomic_set(&util_info.cnt_vertex_jobs, 0);
return ret;
}
int gpexbe_utilization_get_fragment_job_cnt(void)
{
int ret = atomic_read(&util_info.cnt_fragment_jobs);
atomic_set(&util_info.cnt_fragment_jobs, 0);
return ret;
}
int gpexbe_utilization_get_utilization(void)
{
return util_info.cur_utilization;
}
int gpexbe_utilization_get_pure_compute_time_rate(void)
{
return util_info.pure_compute_time_rate;
}
void gpexbe_utilization_calculate_compute_ratio(void)
{
int compute_time = atomic_read(&util_info.time_compute_jobs);
int vertex_time = atomic_read(&util_info.time_vertex_jobs);
int fragment_time = atomic_read(&util_info.time_fragment_jobs);
int total_time = compute_time + vertex_time + fragment_time;
if (compute_time > 0 && total_time > 0)
util_info.pure_compute_time_rate = (100 * compute_time) / total_time;
else
util_info.pure_compute_time_rate = 0;
atomic_set(&util_info.time_compute_jobs, 0);
atomic_set(&util_info.time_vertex_jobs, 0);
atomic_set(&util_info.time_fragment_jobs, 0);
}
/* TODO: Refactor this function */
int gpexbe_utilization_calc_utilization(void)
{
unsigned long flags;
int utilisation = 0;
struct kbase_device *kbdev = util_info.kbdev;
ktime_t now = ktime_get();
ktime_t diff;
u32 ns_time;
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
ns_time = (u32)(ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
if (kbdev->pm.backend.metrics.gpu_active) {
kbdev->pm.backend.metrics.values.time_busy += ns_time;
/* TODO: busy_cl can be a static global here */
kbdev->pm.backend.metrics.values.busy_cl[0] +=
ns_time * kbdev->pm.backend.metrics.active_cl_ctx[0];
kbdev->pm.backend.metrics.values.busy_cl[1] +=
ns_time * kbdev->pm.backend.metrics.active_cl_ctx[1];
kbdev->pm.backend.metrics.time_period_start = now;
} else {
kbdev->pm.backend.metrics.values.time_idle += ns_time;
kbdev->pm.backend.metrics.time_period_start = now;
}
gpex_gts_update_jobslot_util(kbdev->pm.backend.metrics.gpu_active, ns_time);
spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
if (kbdev->pm.backend.metrics.values.time_idle +
kbdev->pm.backend.metrics.values.time_busy ==
0) {
/* No data - so we return NOP */
utilisation = -1;
goto out;
}
utilisation = (100 * kbdev->pm.backend.metrics.values.time_busy) /
(kbdev->pm.backend.metrics.values.time_idle +
kbdev->pm.backend.metrics.values.time_busy);
gpex_gts_update_gpu_data();
out:
spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
kbdev->pm.backend.metrics.values.time_idle = 0;
kbdev->pm.backend.metrics.values.time_busy = 0;
kbdev->pm.backend.metrics.values.busy_cl[0] = 0;
kbdev->pm.backend.metrics.values.busy_cl[1] = 0;
kbdev->pm.backend.metrics.values.busy_gl = 0;
gpex_gts_clear();
util_info.cur_utilization = utilisation;
spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
return utilisation;
}
int gpexbe_utilization_init(struct device **dev)
{
util_info.kbdev = container_of(dev, struct kbase_device, dev);
atomic_set(&util_info.time_compute_jobs, 0);
atomic_set(&util_info.time_vertex_jobs, 0);
atomic_set(&util_info.time_fragment_jobs, 0);
atomic_set(&util_info.cnt_compute_jobs, 0);
atomic_set(&util_info.cnt_fragment_jobs, 0);
atomic_set(&util_info.cnt_vertex_jobs, 0);
util_info.pure_compute_time_rate = 0;
return 0;
}
void gpexbe_utilization_term(void)
{
util_info.kbdev = NULL;
atomic_set(&util_info.time_compute_jobs, 0);
atomic_set(&util_info.time_vertex_jobs, 0);
atomic_set(&util_info.time_fragment_jobs, 0);
atomic_set(&util_info.cnt_compute_jobs, 0);
atomic_set(&util_info.cnt_fragment_jobs, 0);
atomic_set(&util_info.cnt_vertex_jobs, 0);
util_info.pure_compute_time_rate = 0;
}