/* * @file sgpu_utilization.c * @copyright 2020 Samsung Electronics */ #include #include "amdgpu.h" #include "amdgpu_trace.h" #include "sgpu_governor.h" #include "sgpu_utilization.h" static int sgpu_dvfs_get_utilization(struct devfreq_dev_status *stat, bool interval) { struct utilization_data *data = stat->private_data; struct utilization_timeinfo *timeinfo = &data->timeinfo[SGPU_TIMEINFO_SW]; stat->total_time = timeinfo->total_time; stat->busy_time = timeinfo->busy_time; return 0; } #define MSB_BIT_SHIFT 31 #define UPPER_BIT_SHIFT 16 #define NORMALIZE_SHIFT 9 #define NORMALIZE_FACT (1<<(NORMALIZE_SHIFT)) #define RANGE_SHIFT_MAX ((NORMALIZE_SHIFT) - 1) static int sgpu_dvfs_get_fake_random_utilization(struct devfreq_dev_status *stat, bool interval) { static int utilization = 0; struct utilization_data *data = stat->private_data; struct utilization_timeinfo *timeinfo = &data->timeinfo[SGPU_TIMEINFO_SW]; struct devfreq_dev_profile *dp = data->devfreq->profile; const uint32_t random_value = get_random_u32(); int range; int freq_util; const uint64_t max_range = NORMALIZE_FACT * (dp->freq_table[0] / HZ_PER_KHZ); if (!interval) { return 0; } range = max_range >> ((random_value >> UPPER_BIT_SHIFT) % RANGE_SHIFT_MAX); if (random_value & (0x1 << MSB_BIT_SHIFT)) utilization += random_value % ((max_range - utilization < range ? max_range - utilization : range) + 1); else utilization -= random_value % ((utilization < range ? utilization : range) + 1); freq_util = utilization / (stat->current_frequency / HZ_PER_KHZ); stat->total_time = timeinfo->total_time; stat->busy_time = freq_util >= NORMALIZE_FACT ? stat->total_time : stat->total_time * freq_util / NORMALIZE_FACT; timeinfo->busy_time = stat->busy_time; return 0; } #define DURATION_MIN 1 #define DURATION_MAX 40 #define DURATION_DIFF DURATION_MAX - DURATION_MIN static int sgpu_dvfs_get_fake_swing_utilization(struct devfreq_dev_status *stat, bool interval) { struct utilization_data *data = stat->private_data; struct utilization_timeinfo *timeinfo = &data->timeinfo[SGPU_TIMEINFO_SW]; static int duration = 0; static int cnt = 0; static bool up = false; uint32_t random_value = get_random_u32(); if (!interval) { return 0; } if (++cnt > duration) { cnt = 0; duration = random_value % DURATION_DIFF; duration += DURATION_MIN; up = up ? false : true; } stat->total_time = timeinfo->total_time; if (up) stat->busy_time = stat->total_time; else stat->busy_time = 0; return 0; } static int sgpu_dvfs_get_fake_hw_utilization(struct devfreq_dev_status *stat, bool interval) { static int utilization = 0; struct utilization_data *data = stat->private_data; struct devfreq_dev_profile *dp = data->devfreq->profile; const uint32_t random_value = get_random_u32(); uint32_t hw_random_value = get_random_u32(); int range; int freq_util; const uint64_t max_range = NORMALIZE_FACT * (dp->freq_table[0] / HZ_PER_KHZ); if (!interval) return 0; range = max_range >> ((random_value >> UPPER_BIT_SHIFT) % RANGE_SHIFT_MAX); if (random_value & (0x1 << MSB_BIT_SHIFT)) utilization += random_value % ((max_range - utilization < range ? max_range - utilization : range) + 1); else utilization -= random_value % ((utilization < range ? utilization : range) + 1); hw_random_value %= 101; freq_util = utilization / (stat->current_frequency / HZ_PER_KHZ); data->timeinfo[SGPU_TIMEINFO_SW].busy_time = freq_util >= NORMALIZE_FACT ? data->timeinfo[SGPU_TIMEINFO_SW].total_time : data->timeinfo[SGPU_TIMEINFO_SW].total_time * freq_util / NORMALIZE_FACT; data->timeinfo[SGPU_TIMEINFO_HW].total_time = data->timeinfo[SGPU_TIMEINFO_SW].total_time; data->timeinfo[SGPU_TIMEINFO_HW].busy_time = data->timeinfo[SGPU_TIMEINFO_SW].busy_time * hw_random_value / 100; stat->total_time = data->timeinfo[SGPU_TIMEINFO_SW].total_time; stat->busy_time = data->timeinfo[SGPU_TIMEINFO_SW].busy_time; return 0; } struct sgpu_utilization_info utilization_src_info[SGPU_MAX_SRC_NUM] = { { .id = SGPU_DVFS_SRC_JIFFIES, .name = "jiffies", .hw_source_valid = false, .sgpu_get_status = sgpu_dvfs_get_utilization, }, { .id = SGPU_DVFS_SRC_HW_COUNTER, .name = "hw_counter", .hw_source_valid = false, .sgpu_get_status = sgpu_dvfs_get_utilization, }, { .id = SGPU_DVFS_SRC_FAKE_RANDOM, .name = "random", .hw_source_valid = false, .sgpu_get_status = sgpu_dvfs_get_fake_random_utilization, }, { .id = SGPU_DVFS_SRC_FAKE_SWING, .name = "swing", .hw_source_valid = false, .sgpu_get_status = sgpu_dvfs_get_fake_swing_utilization, }, { .id = SGPU_DVFS_SRC_FAKE_HW_COUNTER, .name = "fake_hw_counter", .hw_source_valid = false, .sgpu_get_status = sgpu_dvfs_get_fake_hw_utilization, }, }; int sgpu_utilization_job_start(struct devfreq *df, uint32_t job_count, bool cu_job) { struct devfreq_dev_status *stat = &df->last_status; struct utilization_data *data = stat->private_data; struct utilization_timeinfo *sw_info = &data->timeinfo[SGPU_TIMEINFO_SW]; struct amdgpu_device *adev = data->adev; int ret = 0; uint64_t current_time; unsigned long flags; if (!job_count) return 0; spin_lock_irqsave(&data->lock, flags); if (data->active == 0) { /* starting point */ current_time = sched_clock(); sw_info->total_time += current_time - data->last_time; data->last_time = current_time; if (cu_job) data->cu_last_time = current_time; } else if (cu_job && data->cu_active == 0) { /* compute(CL) job starting point (other jobs are still running) */ current_time = sched_clock(); data->cu_last_time = current_time; } data->active += job_count; if (cu_job) data->cu_active += job_count; SGPU_LOG(adev, DMSG_INFO, DMSG_POWER, "amdgpu_ib_schedule active_cnt %d, cu_active_cnt %d, usage_cnt %d", data->active, data->cu_active, data->adev->dev->power.usage_count); spin_unlock_irqrestore(&data->lock, flags); if (data->active < 0) dev_err(df->dev.parent, "%s: active count %d\n", __func__, data->active); return ret; } int sgpu_utilization_job_end(struct devfreq *df, uint32_t job_count, bool cu_job) { struct devfreq_dev_status *stat = &df->last_status; struct utilization_data *data = stat->private_data; struct utilization_timeinfo *sw_info = &data->timeinfo[SGPU_TIMEINFO_SW]; struct sgpu_governor_data *gdata = df->data; struct amdgpu_device *adev = gdata->adev; uint64_t current_time; int ret = 0; unsigned long flags; if (!job_count) return 0; spin_lock_irqsave(&data->lock, flags); data->active -= job_count; if (cu_job) data->cu_active -= job_count; if (data->active == 0) { /* end point */ current_time = sched_clock(); sw_info->busy_time += current_time - data->last_time; sw_info->total_time += current_time - data->last_time; data->last_time = current_time; if (cu_job) { sw_info->cu_busy_time += current_time - data->cu_last_time; data->cu_last_time = current_time; } } else if (cu_job && data->cu_active == 0) { /* compute(CL) job end point (other jobs are still running) */ current_time = sched_clock(); sw_info->cu_busy_time += current_time - data->cu_last_time; data->cu_last_time = current_time; } SGPU_LOG(adev, DMSG_INFO, DMSG_ETC, "amdgpu_fence_process active_cnt %d cu active cnt %d, usage_cnt %d", data->active, data->cu_active, data->adev->dev->power.usage_count); spin_unlock_irqrestore(&data->lock, flags); if (data->active < 0) dev_err(df->dev.parent, "%s: active count %d\n", __func__, data->active); return ret; } int sgpu_utilization_capture(struct devfreq_dev_status *stat) { struct utilization_data *data = stat->private_data; struct utilization_timeinfo *sw_info = &data->timeinfo[SGPU_TIMEINFO_SW]; struct sgpu_governor_data *governor_data = data->devfreq->data; uint64_t current_time; int ret = 0; unsigned long flags; current_time = sched_clock(); spin_lock_irqsave(&data->lock, flags); sw_info->total_time += current_time - data->last_time; if (data->active > 0) { sw_info->busy_time += current_time - data->last_time; if (data->cu_active > 0) sw_info->cu_busy_time += current_time - data->cu_last_time; } data->last_time = current_time; data->cu_last_time = current_time; if (sw_info->total_time - sw_info->prev_total_time >= governor_data->valid_time * NSEC_PER_MSEC) { sw_info->total_time -= sw_info->prev_total_time; sw_info->prev_total_time = sw_info->total_time; sw_info->busy_time -= sw_info->prev_busy_time; sw_info->prev_busy_time = sw_info->busy_time; sw_info->cu_busy_time -= sw_info->cu_prev_busy_time; sw_info->cu_prev_busy_time = sw_info->cu_busy_time; data->utilization_src->sgpu_get_status(stat, true); } else if (sw_info->prev_total_time) { data->utilization_src->sgpu_get_status(stat, false); } trace_sgpu_devfreq_utilization(sw_info, stat->current_frequency); spin_unlock_irqrestore(&data->lock, flags); return ret; } static void sgpu_utilization_reset(struct devfreq_dev_status *stat) { struct utilization_data *data = stat->private_data; int i = 0; for (i = SGPU_TIMEINFO_SW; i < SGPU_TIMEINFO_NUM; i++) { data->timeinfo[i].prev_total_time = 0; data->timeinfo[i].prev_busy_time = 0; data->timeinfo[i].cu_prev_busy_time = 0; data->timeinfo[i].total_time = 0; data->timeinfo[i].busy_time = 0; data->timeinfo[i].cu_busy_time = 0; } } void sgpu_utilization_trace_start(struct devfreq_dev_status *stat) { struct utilization_data *data = stat->private_data; unsigned long flags; spin_lock_irqsave(&data->lock, flags); sgpu_utilization_reset(stat); data->trace_time = data->last_time = data->cu_last_time = sched_clock(); spin_unlock_irqrestore(&data->lock, flags); } void sgpu_utilization_trace_stop(struct devfreq_dev_status *stat) { struct utilization_data *data = stat->private_data; unsigned long flags; spin_lock_irqsave(&data->lock, flags); sgpu_utilization_reset(stat); stat->total_time = 0; stat->busy_time = 0; spin_unlock_irqrestore(&data->lock, flags); } void sgpu_utilization_trace_before(struct devfreq_dev_status *stat, unsigned long freq) { struct utilization_data *data = stat->private_data; struct sgpu_governor_data *governor_data = data->devfreq->data; uint64_t current_time; unsigned long flags; current_time = sched_clock(); spin_lock_irqsave(&data->lock, flags); trace_sgpu_devfreq_monitor(data->devfreq, governor_data->min_freq, governor_data->max_freq, current_time - data->trace_time); data->trace_time = current_time; spin_unlock_irqrestore(&data->lock, flags); } void sgpu_utilization_trace_after(struct devfreq_dev_status *stat, unsigned long freq) { struct utilization_data *data = stat->private_data; struct sgpu_governor_data *governor_data = data->devfreq->data; struct amdgpu_device *adev = data->adev; uint64_t current_time; unsigned long flags; if (stat->current_frequency == freq) return; stat->current_frequency = freq; current_time = sched_clock(); spin_lock_irqsave(&data->lock, flags); SGPU_LOG(adev, DMSG_INFO, DMSG_DVFS, "min_freq=%8lu, max_freq=%8lu, cur_freq=%8lu", governor_data->min_freq, governor_data->max_freq, stat->current_frequency); trace_sgpu_devfreq_monitor(data->devfreq, governor_data->min_freq, governor_data->max_freq, current_time - data->trace_time); data->trace_time = data->last_time = data->cu_last_time = current_time; sgpu_utilization_reset(stat); spin_unlock_irqrestore(&data->lock, flags); } int sgpu_utilization_src_change(struct devfreq *df, char *buf) { int i; struct devfreq_dev_status *stat = &df->last_status; struct utilization_data *data = stat->private_data; int ret = -ENODEV; unsigned long flags; spin_lock_irqsave(&data->lock, flags); for (i = 0; i < SGPU_MAX_SRC_NUM; i++) { if (!strncmp(utilization_src_info[i].name, buf, DEVFREQ_NAME_LEN)) { data->utilization_src = &utilization_src_info[i]; ret = 0; break; } } sgpu_utilization_reset(stat); spin_unlock_irqrestore(&data->lock, flags); return ret; } ssize_t sgpu_utilization_current_src_show(struct devfreq *df, char *buf) { struct devfreq_dev_status *stat = &df->last_status; struct utilization_data *data = stat->private_data; ssize_t count; unsigned long flags; spin_lock_irqsave(&data->lock, flags); count = scnprintf(buf, PAGE_SIZE, "%s", data->utilization_src->name); spin_unlock_irqrestore(&data->lock, flags); return count; } ssize_t sgpu_utilization_all_src_show(struct devfreq *df, char *buf) { int i; ssize_t count = 0; for (i = 0; i < SGPU_MAX_SRC_NUM; i++) { struct sgpu_utilization_info *src = &utilization_src_info[i]; count += scnprintf(&buf[count], (PAGE_SIZE - count - 2), "%s ", src->name); } /* Truncate the trailing space */ if (count) count--; count += sprintf(&buf[count], "\n"); return count; } int sgpu_utilization_init(struct amdgpu_device *adev, struct devfreq *df) { struct devfreq_dev_status *stat = &df->last_status; struct utilization_data *data = kzalloc(sizeof(struct utilization_data), GFP_KERNEL); if (!data) return -ENOMEM; spin_lock_init(&data->lock); data->adev = adev; data->active = 0; data->cu_active = 0; data->devfreq = df; data->utilization_src = &utilization_src_info[SGPU_DVFS_SRC_JIFFIES]; stat->private_data = data; return 0; } void sgpu_utilization_deinit(struct devfreq *df) { struct devfreq_dev_status *stat = &df->last_status; struct utilization_data *data = stat->private_data; kfree(data); stat->private_data = NULL; }