kernel_samsung_a53x/drivers/gpu/drm/samsung/sgpu/amdgpu_cwsr.c

1127 lines
26 KiB
C
Raw Normal View History

2024-06-15 16:02:09 -03:00
/*
* Copyright 2020 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/delay.h>
#include <linux/kernel.h>
#include <linux/firmware.h>
#include <linux/module.h>
#include <linux/pci.h>
#include "amdgpu.h"
#include "amdgpu_cwsr.h"
#include "amdgpu_vm.h"
#include "amdgpu_gmc.h"
#include "amdgpu_ring.h"
#include "cwsr_trap_handler.h"
static int amdgpu_cwsr_static_map(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_bo *bo,
struct amdgpu_bo_va **bo_va,
u64 addr,
u32 size)
{
struct ww_acquire_ctx ticket;
struct list_head list;
struct amdgpu_bo_list_entry pd;
struct ttm_validate_buffer tv;
u64 pte_flag = 0;
int r;
INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&tv.head);
tv.bo = &bo->tbo;
tv.num_shared = 1;
list_add(&tv.head, &list);
amdgpu_vm_get_pd_bo(vm, &list, &pd);
DRM_DEBUG_DRIVER("map addr 0x%llx, size 0x%x\n", addr, size);
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
if (r) {
DRM_ERROR("failed to reserve cwsr BOs: err=%d\n", r);
return r;
}
*bo_va = amdgpu_vm_bo_add(adev, vm, bo);
if (!*bo_va) {
r = -ENOMEM;
DRM_ERROR("failed to create va for static cwsr map\n");
goto err1;
}
r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r) {
DRM_ERROR("failed to clear bo table, err=%d\n", r);
goto err1;
}
//make sure size is PAGE_SIZE aligned
size = round_up(size, 1 << 12);
pte_flag = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
AMDGPU_PTE_EXECUTABLE |
amdgpu_gmc_map_mtype(adev, AMDGPU_VM_MTYPE_UC);
r = amdgpu_vm_bo_map(adev, *bo_va, addr, 0, size, pte_flag);
if (r) {
DRM_ERROR("failed to do cwsr bo map, err=%d\n", r);
goto err2;
}
r = amdgpu_vm_bo_update(adev, *bo_va, false);
if (r) {
DRM_ERROR("failed to update cwsr bo table, err=%d\n", r);
goto err3;
}
if ((*bo_va)->last_pt_update) {
r = dma_fence_wait((*bo_va)->last_pt_update, true);
if (r) {
DRM_ERROR("failed to get pde update fence, err=%d\n",
r);
goto err3;
}
}
r = amdgpu_vm_update_pdes(adev, vm, false);
if (r) {
DRM_ERROR("failed to update pde, err=%d\n", r);
goto err3;
}
if (vm->last_update) {
r = dma_fence_wait(vm->last_update, true);
if (r) {
DRM_ERROR("failed to get pde update fence, err=%d\n",
r);
goto err3;
}
}
ttm_eu_backoff_reservation(&ticket, &list);
return 0;
err3:
amdgpu_vm_bo_unmap(adev, *bo_va, addr);
err2:
amdgpu_vm_bo_rmv(adev, *bo_va);
err1:
ttm_eu_backoff_reservation(&ticket, &list);
return r;
}
static int amdgpu_cwsr_static_unmap(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_bo *bo,
struct amdgpu_bo_va *bo_va,
uint64_t addr)
{
struct list_head list;
struct amdgpu_bo_list_entry pd;
struct ttm_validate_buffer tv;
struct ww_acquire_ctx ticket;
int r;
INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&tv.head);
tv.bo = &bo->tbo;
tv.num_shared = 1;
list_add(&tv.head, &list);
amdgpu_vm_get_pd_bo(vm, &list, &pd);
DRM_DEBUG_DRIVER("unmap addr 0x%llx\n", addr);
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
if (r) {
DRM_ERROR("failed to reserve cwsr BOs: err=%d\n", r);
return r;
}
r = amdgpu_vm_bo_unmap(adev, bo_va, addr);
if (r) {
DRM_ERROR("failed to unmap cwsr BOs: err=%d\n", r);
return r;
}
amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
if (bo_va->last_pt_update) {
r = dma_fence_wait(bo_va->last_pt_update, true);
if (r) {
DRM_ERROR("failed to get pde clear fence, err=%d\n",
r);
}
}
r = amdgpu_vm_update_pdes(adev, vm, false);
if (r) {
DRM_ERROR("failed to update pde, err=%d\n", r);
return r;
}
if (vm->last_update) {
r = dma_fence_wait(vm->last_update, true);
if (r) {
DRM_ERROR("failed to get pde update fence, err=%d\n",
r);
return r;
}
}
amdgpu_vm_bo_rmv(adev, bo_va);
ttm_eu_backoff_reservation(&ticket, &list);
return r;
}
void amdgpu_cwsr_wb_free(struct amdgpu_fpriv *fpriv, u32 wb)
{
wb >>= 3;
if (wb < fpriv->cwsr_wb.num_wb)
__clear_bit(wb, fpriv->cwsr_wb.used);
}
int amdgpu_cwsr_wb_get(struct amdgpu_fpriv *fpriv, u32 *wb)
{
unsigned long offset;
offset = find_first_zero_bit(fpriv->cwsr_wb.used,
fpriv->cwsr_wb.num_wb);
if (offset < fpriv->cwsr_wb.num_wb) {
__set_bit(offset, fpriv->cwsr_wb.used);
*wb = offset << 3; /* convert to dw offset */
return 0;
}
return -EINVAL;
}
static int amdgpu_cwsr_init_wb(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv)
{
int r;
u32 wb_size;
if (fpriv->cwsr_wb.wb_obj)
return 0;
/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8
* = AMDGPU_MAX_WB 256bit slots
*/
wb_size = AMDGPU_MAX_WB * sizeof(uint32_t) * 8;
r = amdgpu_bo_create_kernel(adev, wb_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
&fpriv->cwsr_wb.wb_obj,
NULL,
(void **)&fpriv->cwsr_wb.wb);
if (r) {
DRM_ERROR("create WB bo failed(%d)\n", r);
return r;
}
fpriv->cwsr_wb.num_wb = AMDGPU_MAX_WB;
memset(&fpriv->cwsr_wb.used, 0, sizeof(fpriv->cwsr_wb.used));
/* clear wb memory */
memset((char *)fpriv->cwsr_wb.wb, 0, wb_size);
r = amdgpu_cwsr_static_map(adev, &fpriv->vm,
fpriv->cwsr_wb.wb_obj,
&fpriv->cwsr_wb_va,
AMDGPU_CWSR_WB_OFFSET,
wb_size);
if (r) {
DRM_ERROR("map cwsr wb failed(%d)\n", r);
amdgpu_bo_free_kernel(&fpriv->cwsr_wb.wb_obj,
&fpriv->cwsr_wb.gpu_addr,
(void **)&fpriv->cwsr_wb.wb);
return r;
}
fpriv->cwsr_wb.gpu_addr = AMDGPU_CWSR_WB_OFFSET;
return 0;
}
static void amdgpu_cwsr_deinit_wb(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv)
{
if (fpriv->cwsr_wb.wb_obj) {
amdgpu_cwsr_static_unmap(adev,
&fpriv->vm,
fpriv->cwsr_wb.wb_obj,
fpriv->cwsr_wb_va,
fpriv->cwsr_wb.gpu_addr);
fpriv->cwsr_wb_va = NULL;
amdgpu_bo_free_kernel(&fpriv->cwsr_wb.wb_obj,
&fpriv->cwsr_wb.gpu_addr,
(void **)&fpriv->cwsr_wb.wb);
}
}
static int amdgpu_cwsr_init_hqd_eop(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv)
{
int r;
size_t mec_hqd_size;
if (fpriv->cwsr_hqd_eop_obj)
return 0;
mec_hqd_size = AMDGPU_CWSR_MEC_HQD_EOP_SIZE * 8;
r = amdgpu_bo_create_kernel(adev, mec_hqd_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&fpriv->cwsr_hqd_eop_obj,
NULL, &fpriv->cwsr_hqd_cpu_addr);
if (r)
return r;
memset(fpriv->cwsr_hqd_cpu_addr, 0, mec_hqd_size);
r = amdgpu_cwsr_static_map(adev, &fpriv->vm,
fpriv->cwsr_hqd_eop_obj,
&fpriv->cwsr_hqd_eop_va,
AMDGPU_CWSR_HQD_EOP_OFFSET,
mec_hqd_size);
if (r) {
DRM_ERROR("map cwsr hqd failed(%d)\n", r);
amdgpu_bo_free_kernel(&fpriv->cwsr_hqd_eop_obj, NULL, NULL);
}
return r;
}
static void amdgpu_cwsr_deinit_hqd_eop(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv)
{
if (fpriv->cwsr_hqd_eop_obj) {
amdgpu_cwsr_static_unmap(adev,
&fpriv->vm,
fpriv->cwsr_hqd_eop_obj,
fpriv->cwsr_hqd_eop_va,
AMDGPU_CWSR_HQD_EOP_OFFSET);
fpriv->cwsr_hqd_eop_va = NULL;
amdgpu_bo_free_kernel(&fpriv->cwsr_hqd_eop_obj, NULL,
&fpriv->cwsr_hqd_cpu_addr);
}
}
static int amdgpu_cwsr_init_mqd(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv,
struct amdgpu_ring *ring)
{
int r;
if (ring->mqd_obj)
return 0;
r = amdgpu_bo_create_kernel(adev,
AMDGPU_CWSR_MQD_SIZE,
PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&ring->mqd_obj,
NULL, &ring->mqd_ptr);
if (r) {
DRM_ERROR("failed to create ring mqd bo (%d)", r);
return r;
}
memset(ring->mqd_ptr, 0, AMDGPU_CWSR_MQD_SIZE);
r = amdgpu_cwsr_static_map(adev, &fpriv->vm,
ring->mqd_obj,
&ring->cwsr_mqd_va,
AMDGPU_CWSR_MQD_OFFSET +
ring->cwsr_slot_idx *
AMDGPU_CWSR_MQD_SIZE,
AMDGPU_CWSR_MQD_SIZE);
if (r) {
DRM_ERROR("failed to map cwsr ring mqd bo (%d)", r);
amdgpu_bo_free_kernel(&ring->mqd_obj,
NULL,
&ring->mqd_ptr);
}
return r;
}
static void amdgpu_cwsr_deinit_mqd(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv,
struct amdgpu_ring *ring)
{
if (ring->mqd_obj) {
amdgpu_cwsr_static_unmap(adev,
&fpriv->vm,
ring->mqd_obj,
ring->cwsr_mqd_va,
AMDGPU_CWSR_MQD_OFFSET +
ring->cwsr_slot_idx *
AMDGPU_CWSR_MQD_SIZE);
ring->cwsr_mqd_va = NULL;
amdgpu_bo_free_kernel(&ring->mqd_obj,
NULL,
&ring->mqd_ptr);
}
}
static int amdgpu_cwsr_init_ring_bo(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv,
struct amdgpu_ring *ring)
{
int r;
if (ring->ring_obj)
return 0;
r = amdgpu_bo_create_kernel(adev,
AMDGPU_CWSR_RING_BUF_SIZE,
PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&ring->ring_obj,
NULL,
(void **)&ring->ring);
if (r) {
DRM_ERROR("ring create failed(%d)\n", r);
return r;
}
r = amdgpu_cwsr_static_map(adev, &fpriv->vm,
ring->ring_obj,
&ring->cwsr_ring_va,
AMDGPU_CWSR_RING_BUF_OFFSET +
ring->cwsr_slot_idx *
AMDGPU_CWSR_RING_BUF_SIZE,
AMDGPU_CWSR_RING_BUF_SIZE);
if (r) {
dev_err(adev->dev, "map cwsr ring bo failed(%d)\n", r);
amdgpu_bo_free_kernel(&ring->mqd_obj,
NULL,
(void **)&ring->ring);
}
return r;
}
static void amdgpu_cwsr_deinit_ring_bo(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv,
struct amdgpu_ring *ring)
{
if (ring->ring_obj) {
amdgpu_cwsr_static_unmap(adev,
&fpriv->vm,
ring->ring_obj,
ring->cwsr_ring_va,
AMDGPU_CWSR_RING_BUF_OFFSET +
ring->cwsr_slot_idx *
AMDGPU_CWSR_RING_BUF_SIZE);
ring->cwsr_ring_va = NULL;
amdgpu_bo_free_kernel(&ring->ring_obj,
NULL,
(void **)&ring->ring);
}
}
static int amdgpu_cwsr_init_ring_wb(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_fpriv *fpriv)
{
int r;
r = amdgpu_cwsr_wb_get(fpriv, &ring->rptr_offs);
if (r) {
DRM_ERROR("(%d) failed to get rptr_offs\n", r);
return r;
}
ring->cwsr_rptr_gpu_addr =
fpriv->cwsr_wb.gpu_addr + (ring->rptr_offs * 4);
ring->cwsr_rptr_cpu_addr = &fpriv->cwsr_wb.wb[ring->rptr_offs];
*ring->cwsr_rptr_cpu_addr = 0;
r = amdgpu_cwsr_wb_get(fpriv, &ring->wptr_offs);
if (r) {
DRM_ERROR("(%d) ring wptr_offs wb alloc failed\n", r);
goto err1;
}
ring->cwsr_wptr_gpu_addr =
fpriv->cwsr_wb.gpu_addr + (ring->wptr_offs * 4);
ring->cwsr_wptr_cpu_addr = &fpriv->cwsr_wb.wb[ring->wptr_offs];
*ring->cwsr_wptr_cpu_addr = 0;
r = amdgpu_cwsr_wb_get(fpriv, &ring->fence_offs);
if (r) {
dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
goto err2;
}
ring->cwsr_fence_gpu_addr =
fpriv->cwsr_wb.gpu_addr + (ring->fence_offs * 4);
ring->cwsr_fence_cpu_addr = &fpriv->cwsr_wb.wb[ring->fence_offs];
*ring->cwsr_fence_cpu_addr = 0;
r = amdgpu_cwsr_wb_get(fpriv, &ring->trail_fence_offs);
if (r) {
dev_err(adev->dev,
"(%d) ring trail_fence_offs wb alloc failed\n", r);
goto err3;
}
ring->trail_fence_gpu_addr =
fpriv->cwsr_wb.gpu_addr + (ring->trail_fence_offs * 4);
ring->trail_fence_cpu_addr = &fpriv->cwsr_wb.wb[ring->trail_fence_offs];
*ring->trail_fence_cpu_addr = 0;
r = amdgpu_cwsr_wb_get(fpriv, &ring->cond_exe_offs);
if (r) {
dev_err(adev->dev, "(%d) ring cond_exec_offs wb alloc failed\n",
r);
goto err4;
}
ring->cond_exe_gpu_addr = fpriv->cwsr_wb.gpu_addr
+ (ring->cond_exe_offs * 4);
ring->cond_exe_cpu_addr = &fpriv->cwsr_wb.wb[ring->cond_exe_offs];
*ring->cond_exe_cpu_addr = 1;
return 0;
err4:
amdgpu_cwsr_wb_free(fpriv, ring->trail_fence_offs);
err3:
amdgpu_cwsr_wb_free(fpriv, ring->fence_offs);
err2:
amdgpu_cwsr_wb_free(fpriv, ring->wptr_offs);
err1:
amdgpu_cwsr_wb_free(fpriv, ring->rptr_offs);
return r;
}
static void amdgpu_cwsr_deinit_ring_wb(struct amdgpu_ring *ring,
struct amdgpu_fpriv *fpriv)
{
amdgpu_cwsr_wb_free(fpriv, ring->cond_exe_offs);
amdgpu_cwsr_wb_free(fpriv, ring->trail_fence_offs);
amdgpu_cwsr_wb_free(fpriv, ring->fence_offs);
amdgpu_cwsr_wb_free(fpriv, ring->wptr_offs);
amdgpu_cwsr_wb_free(fpriv, ring->rptr_offs);
}
static int amdgpu_cwsr_init_ring(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_ctx *ctx,
struct amdgpu_fpriv *fpriv)
{
int r;
if (ring->cwsr)
return 0;
ring->cwsr = true;
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->use_pollfence = amdgpu_poll_eop;
ring->eop_gpu_addr = AMDGPU_CWSR_HQD_EOP_OFFSET +
ctx->cwsr_slot_idx * AMDGPU_CWSR_MEC_HQD_EOP_SIZE;
memset(fpriv->cwsr_hqd_cpu_addr +
ctx->cwsr_slot_idx * AMDGPU_CWSR_MEC_HQD_EOP_SIZE,
0, AMDGPU_CWSR_MEC_HQD_EOP_SIZE);
ring->adev = adev;
ring->cwsr_slot_idx = ctx->cwsr_slot_idx;
r = amdgpu_cwsr_init_ring_wb(adev, ring, fpriv);
if (r) {
DRM_ERROR("(%d) failed to init cwsr wb\n", r);
goto err1;
}
// get ring buffer object
ring->ring_size = AMDGPU_CWSR_RING_BUF_SIZE;
ring->buf_mask = (ring->ring_size / 4) - 1;
ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
0xffffffffffffffff : ring->buf_mask;
r = amdgpu_cwsr_init_ring_bo(adev, fpriv, ring);
if (r) {
DRM_ERROR("failed to get ring buffer object(%d).\n", r);
goto err2;
}
amdgpu_ring_clear_ring(ring);
ring->gpu_addr = AMDGPU_CWSR_RING_BUF_OFFSET +
ring->cwsr_slot_idx *
AMDGPU_CWSR_RING_BUF_SIZE;
ring->max_dw = AMDGPU_CWSR_RING_MAX_DW;
ring->priority = DRM_SCHED_PRIORITY_NORMAL;
mutex_init(&ring->priority_mutex);
ring->cwsr_tba_gpu_addr = AMDGPU_CWSR_TBA_OFFSET;
ring->cwsr_tma_gpu_addr = AMDGPU_CWSR_TMA_OFFSET;
return 0;
err2:
amdgpu_cwsr_deinit_ring_wb(ring, fpriv);
err1:
ring->cwsr = false;
ring->adev = NULL;
return r;
}
static void amdgpu_cwsr_deinit_ring(struct amdgpu_ring *ring,
struct amdgpu_fpriv *fpriv)
{
struct amdgpu_device *adev;
if (!ring->cwsr)
return;
adev = ring->adev;
/* Don't deinit a ring which is not initialized */
if (!adev)
return;
amdgpu_cwsr_deinit_ring_bo(adev, fpriv, ring);
amdgpu_cwsr_deinit_ring_wb(ring, fpriv);
ring->vmid_wait = NULL;
ring->me = 0;
ring->cwsr = false;
ring->adev = NULL;
}
static int amdgpu_cwsr_init_trap(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv)
{
int r;
if (fpriv->cwsr_trap_obj)
return 0;
r = amdgpu_bo_create_kernel(adev, AMDGPU_CWSR_TBA_TMA_SIZE,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
&fpriv->cwsr_trap_obj,
NULL,
&fpriv->cwsr_trap_cpu_addr);
if (r) {
DRM_ERROR("(%d) failed to create cwsr trap bo\n", r);
return r;
}
/* clear memory */
memset((char *)fpriv->cwsr_trap_cpu_addr, 0, AMDGPU_CWSR_TBA_TMA_SIZE);
if (adev->asic_type == CHIP_VANGOGH_LITE)
memcpy(fpriv->cwsr_trap_cpu_addr,
cwsr_trap_m0_hex, sizeof(cwsr_trap_m0_hex));
else
memcpy(fpriv->cwsr_trap_cpu_addr,
cwsr_trap_nv14_hex, sizeof(cwsr_trap_nv14_hex));
r = amdgpu_cwsr_static_map(adev, &fpriv->vm,
fpriv->cwsr_trap_obj,
&fpriv->cwsr_trap_va,
AMDGPU_CWSR_TBA_OFFSET,
AMDGPU_CWSR_TBA_TMA_SIZE);
if (r) {
DRM_ERROR("map cwsr trap failed(%d)\n", r);
amdgpu_bo_free_kernel(&fpriv->cwsr_trap_obj,
NULL,
(void **)&fpriv->cwsr_trap_cpu_addr);
return r;
}
return 0;
}
static void amdgpu_cwsr_deinit_trap(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv)
{
if (fpriv->cwsr_trap_obj) {
amdgpu_cwsr_static_unmap(adev,
&fpriv->vm,
fpriv->cwsr_trap_obj,
fpriv->cwsr_trap_va,
AMDGPU_CWSR_TBA_OFFSET);
fpriv->cwsr_trap_va = NULL;
amdgpu_bo_free_kernel(&fpriv->cwsr_trap_obj,
NULL,
&fpriv->cwsr_trap_cpu_addr);
}
}
static int amdgpu_cwsr_init_sr_res(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_ring *ring)
{
int r;
u32 ctl_stack_size, wg_data_size;
u32 cu_num;
if (ring->cwsr_sr_obj)
return 0;
cu_num = adev->gfx.config.max_cu_per_sh *
adev->gfx.config.max_sh_per_se *
adev->gfx.config.max_shader_engines;
//8 + 16 bytes are for header of ctl stack
ctl_stack_size = cu_num * AMDGPU_CWSR_WAVES_PER_CU *
AMDGPU_CWSR_CNTL_STACK_BYTES_PER_WAVE + 8 + 16;
ctl_stack_size = round_up(ctl_stack_size, 1 << 12);
ring->cwsr_sr_ctl_size = ctl_stack_size;
wg_data_size = cu_num *
AMDGPU_CWSR_WG_CONTEXT_DATA_SIZE_PER_CU(adev->asic_type);
wg_data_size = round_up(wg_data_size, 1 << 12);
ring->cwsr_sr_size = wg_data_size + ctl_stack_size;
r = amdgpu_bo_create_kernel(adev, ring->cwsr_sr_size,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
&ring->cwsr_sr_obj,
NULL,
(void **)&ring->cwsr_sr_cpu_addr);
if (r) {
DRM_ERROR("(%d) failed to create cwsr sr bo\n", r);
return r;
}
/* clear memory */
memset((char *)ring->cwsr_sr_cpu_addr, 0, ring->cwsr_sr_size);
//ctl Stack is also named as Relaunch Stack
r = amdgpu_cwsr_static_map(adev, vm,
ring->cwsr_sr_obj,
&ring->cwsr_sr_va,
AMDGPU_CWSR_SR_OFFSET +
ring->cwsr_slot_idx *
ring->cwsr_sr_size,
ring->cwsr_sr_size);
if (r) {
DRM_ERROR("map cwsr sr failed(%d)\n", r);
amdgpu_bo_free_kernel(&ring->cwsr_sr_obj,
NULL,
(void **)&ring->cwsr_sr_cpu_addr);
return r;
}
ring->cwsr_sr_gpu_addr = AMDGPU_CWSR_SR_OFFSET +
ring->cwsr_slot_idx * ring->cwsr_sr_size;
return 0;
}
static void amdgpu_cwsr_deinit_sr_res(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_ring *ring)
{
if (ring->cwsr_sr_obj) {
amdgpu_cwsr_static_unmap(adev,
vm,
ring->cwsr_sr_obj,
ring->cwsr_sr_va,
AMDGPU_CWSR_SR_OFFSET +
ring->cwsr_slot_idx *
ring->cwsr_sr_size);
ring->cwsr_sr_va = NULL;
amdgpu_bo_free_kernel(&ring->cwsr_sr_obj,
NULL,
(void **)&ring->cwsr_sr_cpu_addr);
}
}
static int amdgpu_cwsr_init_res(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv)
{
int r;
if (fpriv->cwsr_ready) {
atomic_inc(&fpriv->cwsr_ctx_ref);
return 0;
}
//init wb bo
r = amdgpu_cwsr_init_wb(adev, fpriv);
if (r)
return r;
//allocate eop buffer
r = amdgpu_cwsr_init_hqd_eop(adev, fpriv);
if (r)
goto err1;
r = amdgpu_cwsr_init_trap(adev, fpriv);
if (r)
goto err2;
atomic_set(&fpriv->cwsr_ctx_ref, 1);
fpriv->cwsr_ready = true;
return 0;
err2:
amdgpu_cwsr_deinit_hqd_eop(adev, fpriv);
err1:
amdgpu_cwsr_deinit_wb(adev, fpriv);
return r;
}
static void amdgpu_cwsr_deinit_res(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv)
{
if (!fpriv->cwsr_ready)
return;
if (atomic_dec_return(&fpriv->cwsr_ctx_ref) > 0)
return;
fpriv->cwsr_ready = false;
amdgpu_cwsr_deinit_trap(adev, fpriv);
amdgpu_cwsr_deinit_hqd_eop(adev, fpriv);
amdgpu_cwsr_deinit_wb(adev, fpriv);
DRM_DEBUG_DRIVER("deinit cwsr per VM global resource\n");
}
static void amdgpu_cwsr_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_fpriv *fpriv)
{
u32 seq;
u32 irq_type;
struct amdgpu_device *adev;
struct amdgpu_irq_src *irq_src;
adev = ring->adev;
irq_src = &adev->gfx.eop_irq;
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ ring->pipe;
ring->fence_drv.cpu_addr = &fpriv->cwsr_wb.wb[ring->fence_offs];
ring->fence_drv.gpu_addr = fpriv->cwsr_wb.gpu_addr
+ (ring->fence_offs * 4);
seq = atomic_read(&ring->fence_drv.last_seq);
*ring->fence_drv.cpu_addr = cpu_to_le32(seq);
ring->fence_drv.irq_src = irq_src;
ring->fence_drv.irq_type = irq_type;
ring->fence_drv.initialized = true;
}
static int amdgpu_cwsr_init_ctx(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv,
struct amdgpu_ctx *ctx)
{
int r;
int sched_hw_submission = amdgpu_sched_hw_submission;
struct drm_gpu_scheduler *sched = NULL;
struct drm_sched_entity *entity;
struct amdgpu_ring *ring;
if (!fpriv->cwsr_ready)
return 0;
r = ida_simple_get(&fpriv->cwsr_res_slots, 0,
AMDGPU_CWSR_MAX_RING, GFP_KERNEL);
if (r < 0) {
DRM_DEBUG_DRIVER("no valid solt for CWSR\n");
return -EINVAL;
}
ctx->cwsr_slot_idx = r;
DRM_DEBUG_DRIVER("get cwsr slot idx:%u\n", r);
r = amdgpu_sws_early_init_ctx(ctx);
if (r) {
ida_simple_remove(&fpriv->cwsr_res_slots, ctx->cwsr_slot_idx);
DRM_WARN("failed to do early ring init\n");
return r;
}
ring = ctx->cwsr_ring;
r = amdgpu_cwsr_init_ring(adev, ring, ctx, fpriv);
if (r) {
DRM_ERROR("failed to init cwsr ring\n");
goto err1;
}
r = amdgpu_cwsr_init_mqd(adev, fpriv, ring);
if (r) {
DRM_ERROR("failed to get mqd for cwsr\n");
goto err2;
}
r = amdgpu_cwsr_init_sr_res(adev, &fpriv->vm, ring);
if (r) {
DRM_ERROR("failed to get sr for cwsr\n");
goto err3;
}
r = amdgpu_fence_driver_init_ring(ring, sched_hw_submission);
if (r) {
DRM_ERROR("(%d) failed to init cwsr fence drv\n", r);
goto err4;
}
entity = &ctx->cwsr_entities->entity;
sched = &ring->sched;
r = drm_sched_entity_init(entity,
ctx->init_priority,
&sched,
1, &ctx->guilty);
if (r) {
DRM_ERROR("(%d) failed to init entity\n", r);
goto err5;
}
amdgpu_cwsr_fence_driver_start_ring(ring, fpriv);
r = amdgpu_sws_init_ctx(ctx, fpriv);
if (r == AMDGPU_SWS_HW_RES_BUSY) {
if (amdgpu_debugfs_ring_init(adev, ring))
DRM_WARN("failed to init debugfs for ring:%s!\n",
ring->name);
ctx->cwsr = true;
return r;
} else if (r < 0) {
DRM_DEBUG_DRIVER("(%d) failed to init queue\n", r);
goto err6;
}
if (amdgpu_debugfs_ring_init(adev, ring))
DRM_WARN("failed to init debugfs for ring:%s!\n",
ring->name);
ctx->cwsr = true;
return 0;
err6:
drm_sched_entity_destroy(entity);
err5:
amdgpu_fence_driver_deinit_ring(ring);
err4:
amdgpu_cwsr_deinit_sr_res(adev, &fpriv->vm, ring);
err3:
amdgpu_cwsr_deinit_mqd(adev, fpriv, ring);
err2:
amdgpu_cwsr_deinit_ring(ring, fpriv);
err1:
amdgpu_sws_late_deinit_ctx(ctx);
ida_simple_remove(&fpriv->cwsr_res_slots, ctx->cwsr_slot_idx);
return r;
}
static void amdgpu_cwsr_deinit_ctx(struct amdgpu_device *adev,
struct amdgpu_ctx *ctx)
{
struct amdgpu_ring *ring;
struct amdgpu_fpriv *fpriv;
if (!ctx->cwsr)
return;
fpriv = ctx->fpriv;
ctx->cwsr = false;
ring = ctx->cwsr_ring;
amdgpu_debugfs_ring_fini(ring);
amdgpu_sws_deinit_ctx(ctx, fpriv);
amdgpu_fence_driver_deinit_ring(ring);
amdgpu_cwsr_deinit_sr_res(adev, &fpriv->vm, ring);
amdgpu_cwsr_deinit_mqd(adev, fpriv, ring);
amdgpu_cwsr_deinit_ring(ring, fpriv);
amdgpu_sws_late_deinit_ctx(ctx);
ida_simple_remove(&fpriv->cwsr_res_slots, ctx->cwsr_slot_idx);
}
int amdgpu_cwsr_init_queue(struct amdgpu_ring *ring)
{
int r;
struct amdgpu_device *adev;
if (!ring->cwsr ||
ring->sws_ctx.queue_state != AMDGPU_SWS_QUEUE_DISABLED)
return 0;
adev = ring->adev;
ring->mqd_gpu_addr = AMDGPU_CWSR_MQD_OFFSET +
ring->cwsr_slot_idx *
AMDGPU_CWSR_MQD_SIZE;
//init mqd
r = amdgpu_ring_compute_mqd_init(ring);
if (r) {
DRM_ERROR("failed to init mqd for cwsr\n");
goto err1;
}
//map queue
r = adev->gfx.mec.map_cwsr_queue(ring);
if (r) {
DRM_ERROR("failed to map queue for cwsr\n");
goto err1;
}
ring->sws_ctx.queue_state = AMDGPU_SWS_QUEUE_ENABLED;
return 0;
err1:
ring->cwsr_queue_broken = true;
return r;
}
void amdgpu_cwsr_deinit_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
if (!ring->cwsr ||
ring->sws_ctx.queue_state != AMDGPU_SWS_QUEUE_ENABLED)
return;
//unmap queue
adev->gfx.mec.unmap_cwsr_queue(ring, AMDGPU_CP_HQD_DEQUEUE_MODE_STD);
ring->sws_ctx.queue_state = AMDGPU_SWS_QUEUE_DISABLED;
}
int amdgpu_cwsr_init(struct amdgpu_ctx *ctx)
{
struct amdgpu_fpriv *fpriv;
struct amdgpu_device *adev;
int r = 0;
adev = ctx->adev;
fpriv = ctx->fpriv;
if (cwsr_enable == 0)
return 0;
mutex_lock(&fpriv->cwsr_lock);
ctx->cwsr_init = true;
r = amdgpu_cwsr_init_res(adev, fpriv);
if (r) {
DRM_WARN("failed to init cwsr res\n");
mutex_unlock(&fpriv->cwsr_lock);
return r;
}
r = amdgpu_cwsr_init_ctx(adev, fpriv, ctx);
if (r == AMDGPU_SWS_HW_RES_BUSY) {
mutex_unlock(&fpriv->cwsr_lock);
return AMDGPU_SWS_HW_RES_BUSY;
} else if (r < 0) {
DRM_DEBUG_DRIVER("failed to init cwsr ctx\n");
goto err1;
}
mutex_unlock(&fpriv->cwsr_lock);
return r;
err1:
amdgpu_cwsr_deinit_res(adev, fpriv);
mutex_unlock(&fpriv->cwsr_lock);
return r;
}
void amdgpu_cwsr_deinit(struct amdgpu_ctx *ctx)
{
struct amdgpu_fpriv *fpriv;
struct amdgpu_device *adev;
adev = ctx->adev;
fpriv = ctx->fpriv;
if (cwsr_enable == 0)
return;
mutex_lock(&fpriv->cwsr_lock);
if (ctx->cwsr) {
amdgpu_cwsr_deinit_ctx(adev, ctx);
amdgpu_cwsr_deinit_res(adev, fpriv);
}
mutex_unlock(&fpriv->cwsr_lock);
}
int amdgpu_cwsr_dequeue(struct amdgpu_ring *ring)
{
int r;
struct amdgpu_device *adev = ring->adev;
if (ring->sws_ctx.queue_state != AMDGPU_SWS_QUEUE_ENABLED)
return 0;
r = adev->gfx.mec.unmap_cwsr_queue(ring,
AMDGPU_CP_HQD_DEQUEUE_MODE_SSSD);
if (r)
ring->sws_ctx.queue_state = AMDGPU_SWS_QUEUE_DISABLED;
else
ring->sws_ctx.queue_state = AMDGPU_SWS_QUEUE_DEQUEUED;
return r;
}
int amdgpu_cwsr_relaunch(struct amdgpu_ring *ring)
{
int r;
struct amdgpu_device *adev = ring->adev;
if (ring->sws_ctx.queue_state != AMDGPU_SWS_QUEUE_DEQUEUED)
return 0;
//relaunch queue
r = adev->gfx.mec.map_cwsr_queue(ring);
if (r) {
DRM_ERROR("failed to map queue for cwsr\n");
return r;
}
ring->sws_ctx.queue_state = AMDGPU_SWS_QUEUE_ENABLED;
return r;
}