1127 lines
26 KiB
C
1127 lines
26 KiB
C
|
/*
|
||
|
* Copyright 2020 Advanced Micro Devices, Inc.
|
||
|
*
|
||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||
|
* copy of this software and associated documentation files (the "Software"),
|
||
|
* to deal in the Software without restriction, including without limitation
|
||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||
|
* Software is furnished to do so, subject to the following conditions:
|
||
|
*
|
||
|
* The above copyright notice and this permission notice shall be included in
|
||
|
* all copies or substantial portions of the Software.
|
||
|
*
|
||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||
|
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
#include <linux/delay.h>
|
||
|
#include <linux/kernel.h>
|
||
|
#include <linux/firmware.h>
|
||
|
#include <linux/module.h>
|
||
|
#include <linux/pci.h>
|
||
|
#include "amdgpu.h"
|
||
|
#include "amdgpu_cwsr.h"
|
||
|
#include "amdgpu_vm.h"
|
||
|
#include "amdgpu_gmc.h"
|
||
|
#include "amdgpu_ring.h"
|
||
|
#include "cwsr_trap_handler.h"
|
||
|
|
||
|
static int amdgpu_cwsr_static_map(struct amdgpu_device *adev,
|
||
|
struct amdgpu_vm *vm,
|
||
|
struct amdgpu_bo *bo,
|
||
|
struct amdgpu_bo_va **bo_va,
|
||
|
u64 addr,
|
||
|
u32 size)
|
||
|
{
|
||
|
struct ww_acquire_ctx ticket;
|
||
|
struct list_head list;
|
||
|
struct amdgpu_bo_list_entry pd;
|
||
|
struct ttm_validate_buffer tv;
|
||
|
u64 pte_flag = 0;
|
||
|
int r;
|
||
|
|
||
|
INIT_LIST_HEAD(&list);
|
||
|
INIT_LIST_HEAD(&tv.head);
|
||
|
tv.bo = &bo->tbo;
|
||
|
tv.num_shared = 1;
|
||
|
|
||
|
list_add(&tv.head, &list);
|
||
|
amdgpu_vm_get_pd_bo(vm, &list, &pd);
|
||
|
|
||
|
DRM_DEBUG_DRIVER("map addr 0x%llx, size 0x%x\n", addr, size);
|
||
|
|
||
|
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to reserve cwsr BOs: err=%d\n", r);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
*bo_va = amdgpu_vm_bo_add(adev, vm, bo);
|
||
|
if (!*bo_va) {
|
||
|
r = -ENOMEM;
|
||
|
DRM_ERROR("failed to create va for static cwsr map\n");
|
||
|
goto err1;
|
||
|
}
|
||
|
|
||
|
r = amdgpu_vm_clear_freed(adev, vm, NULL);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to clear bo table, err=%d\n", r);
|
||
|
goto err1;
|
||
|
}
|
||
|
|
||
|
//make sure size is PAGE_SIZE aligned
|
||
|
size = round_up(size, 1 << 12);
|
||
|
pte_flag = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
|
||
|
AMDGPU_PTE_EXECUTABLE |
|
||
|
amdgpu_gmc_map_mtype(adev, AMDGPU_VM_MTYPE_UC);
|
||
|
r = amdgpu_vm_bo_map(adev, *bo_va, addr, 0, size, pte_flag);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to do cwsr bo map, err=%d\n", r);
|
||
|
goto err2;
|
||
|
}
|
||
|
|
||
|
r = amdgpu_vm_bo_update(adev, *bo_va, false);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to update cwsr bo table, err=%d\n", r);
|
||
|
goto err3;
|
||
|
}
|
||
|
|
||
|
if ((*bo_va)->last_pt_update) {
|
||
|
r = dma_fence_wait((*bo_va)->last_pt_update, true);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to get pde update fence, err=%d\n",
|
||
|
r);
|
||
|
goto err3;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
r = amdgpu_vm_update_pdes(adev, vm, false);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to update pde, err=%d\n", r);
|
||
|
goto err3;
|
||
|
}
|
||
|
|
||
|
if (vm->last_update) {
|
||
|
r = dma_fence_wait(vm->last_update, true);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to get pde update fence, err=%d\n",
|
||
|
r);
|
||
|
goto err3;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
ttm_eu_backoff_reservation(&ticket, &list);
|
||
|
|
||
|
return 0;
|
||
|
|
||
|
err3:
|
||
|
amdgpu_vm_bo_unmap(adev, *bo_va, addr);
|
||
|
err2:
|
||
|
amdgpu_vm_bo_rmv(adev, *bo_va);
|
||
|
err1:
|
||
|
ttm_eu_backoff_reservation(&ticket, &list);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
static int amdgpu_cwsr_static_unmap(struct amdgpu_device *adev,
|
||
|
struct amdgpu_vm *vm,
|
||
|
struct amdgpu_bo *bo,
|
||
|
struct amdgpu_bo_va *bo_va,
|
||
|
uint64_t addr)
|
||
|
{
|
||
|
struct list_head list;
|
||
|
struct amdgpu_bo_list_entry pd;
|
||
|
struct ttm_validate_buffer tv;
|
||
|
struct ww_acquire_ctx ticket;
|
||
|
int r;
|
||
|
|
||
|
INIT_LIST_HEAD(&list);
|
||
|
INIT_LIST_HEAD(&tv.head);
|
||
|
tv.bo = &bo->tbo;
|
||
|
tv.num_shared = 1;
|
||
|
|
||
|
list_add(&tv.head, &list);
|
||
|
amdgpu_vm_get_pd_bo(vm, &list, &pd);
|
||
|
|
||
|
DRM_DEBUG_DRIVER("unmap addr 0x%llx\n", addr);
|
||
|
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to reserve cwsr BOs: err=%d\n", r);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
r = amdgpu_vm_bo_unmap(adev, bo_va, addr);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to unmap cwsr BOs: err=%d\n", r);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
|
||
|
if (bo_va->last_pt_update) {
|
||
|
r = dma_fence_wait(bo_va->last_pt_update, true);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to get pde clear fence, err=%d\n",
|
||
|
r);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
r = amdgpu_vm_update_pdes(adev, vm, false);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to update pde, err=%d\n", r);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
if (vm->last_update) {
|
||
|
r = dma_fence_wait(vm->last_update, true);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to get pde update fence, err=%d\n",
|
||
|
r);
|
||
|
return r;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
amdgpu_vm_bo_rmv(adev, bo_va);
|
||
|
|
||
|
ttm_eu_backoff_reservation(&ticket, &list);
|
||
|
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
void amdgpu_cwsr_wb_free(struct amdgpu_fpriv *fpriv, u32 wb)
|
||
|
{
|
||
|
wb >>= 3;
|
||
|
if (wb < fpriv->cwsr_wb.num_wb)
|
||
|
__clear_bit(wb, fpriv->cwsr_wb.used);
|
||
|
}
|
||
|
|
||
|
int amdgpu_cwsr_wb_get(struct amdgpu_fpriv *fpriv, u32 *wb)
|
||
|
{
|
||
|
unsigned long offset;
|
||
|
|
||
|
offset = find_first_zero_bit(fpriv->cwsr_wb.used,
|
||
|
fpriv->cwsr_wb.num_wb);
|
||
|
|
||
|
if (offset < fpriv->cwsr_wb.num_wb) {
|
||
|
__set_bit(offset, fpriv->cwsr_wb.used);
|
||
|
*wb = offset << 3; /* convert to dw offset */
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
return -EINVAL;
|
||
|
}
|
||
|
|
||
|
static int amdgpu_cwsr_init_wb(struct amdgpu_device *adev,
|
||
|
struct amdgpu_fpriv *fpriv)
|
||
|
{
|
||
|
int r;
|
||
|
u32 wb_size;
|
||
|
|
||
|
if (fpriv->cwsr_wb.wb_obj)
|
||
|
return 0;
|
||
|
|
||
|
/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8
|
||
|
* = AMDGPU_MAX_WB 256bit slots
|
||
|
*/
|
||
|
wb_size = AMDGPU_MAX_WB * sizeof(uint32_t) * 8;
|
||
|
r = amdgpu_bo_create_kernel(adev, wb_size,
|
||
|
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
|
||
|
&fpriv->cwsr_wb.wb_obj,
|
||
|
NULL,
|
||
|
(void **)&fpriv->cwsr_wb.wb);
|
||
|
if (r) {
|
||
|
DRM_ERROR("create WB bo failed(%d)\n", r);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
fpriv->cwsr_wb.num_wb = AMDGPU_MAX_WB;
|
||
|
memset(&fpriv->cwsr_wb.used, 0, sizeof(fpriv->cwsr_wb.used));
|
||
|
|
||
|
/* clear wb memory */
|
||
|
memset((char *)fpriv->cwsr_wb.wb, 0, wb_size);
|
||
|
|
||
|
r = amdgpu_cwsr_static_map(adev, &fpriv->vm,
|
||
|
fpriv->cwsr_wb.wb_obj,
|
||
|
&fpriv->cwsr_wb_va,
|
||
|
AMDGPU_CWSR_WB_OFFSET,
|
||
|
wb_size);
|
||
|
|
||
|
if (r) {
|
||
|
DRM_ERROR("map cwsr wb failed(%d)\n", r);
|
||
|
amdgpu_bo_free_kernel(&fpriv->cwsr_wb.wb_obj,
|
||
|
&fpriv->cwsr_wb.gpu_addr,
|
||
|
(void **)&fpriv->cwsr_wb.wb);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
fpriv->cwsr_wb.gpu_addr = AMDGPU_CWSR_WB_OFFSET;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static void amdgpu_cwsr_deinit_wb(struct amdgpu_device *adev,
|
||
|
struct amdgpu_fpriv *fpriv)
|
||
|
{
|
||
|
if (fpriv->cwsr_wb.wb_obj) {
|
||
|
amdgpu_cwsr_static_unmap(adev,
|
||
|
&fpriv->vm,
|
||
|
fpriv->cwsr_wb.wb_obj,
|
||
|
fpriv->cwsr_wb_va,
|
||
|
fpriv->cwsr_wb.gpu_addr);
|
||
|
|
||
|
fpriv->cwsr_wb_va = NULL;
|
||
|
|
||
|
amdgpu_bo_free_kernel(&fpriv->cwsr_wb.wb_obj,
|
||
|
&fpriv->cwsr_wb.gpu_addr,
|
||
|
(void **)&fpriv->cwsr_wb.wb);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static int amdgpu_cwsr_init_hqd_eop(struct amdgpu_device *adev,
|
||
|
struct amdgpu_fpriv *fpriv)
|
||
|
{
|
||
|
int r;
|
||
|
size_t mec_hqd_size;
|
||
|
|
||
|
if (fpriv->cwsr_hqd_eop_obj)
|
||
|
return 0;
|
||
|
|
||
|
mec_hqd_size = AMDGPU_CWSR_MEC_HQD_EOP_SIZE * 8;
|
||
|
|
||
|
r = amdgpu_bo_create_kernel(adev, mec_hqd_size, PAGE_SIZE,
|
||
|
AMDGPU_GEM_DOMAIN_GTT,
|
||
|
&fpriv->cwsr_hqd_eop_obj,
|
||
|
NULL, &fpriv->cwsr_hqd_cpu_addr);
|
||
|
if (r)
|
||
|
return r;
|
||
|
|
||
|
memset(fpriv->cwsr_hqd_cpu_addr, 0, mec_hqd_size);
|
||
|
|
||
|
r = amdgpu_cwsr_static_map(adev, &fpriv->vm,
|
||
|
fpriv->cwsr_hqd_eop_obj,
|
||
|
&fpriv->cwsr_hqd_eop_va,
|
||
|
AMDGPU_CWSR_HQD_EOP_OFFSET,
|
||
|
mec_hqd_size);
|
||
|
if (r) {
|
||
|
DRM_ERROR("map cwsr hqd failed(%d)\n", r);
|
||
|
amdgpu_bo_free_kernel(&fpriv->cwsr_hqd_eop_obj, NULL, NULL);
|
||
|
}
|
||
|
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
static void amdgpu_cwsr_deinit_hqd_eop(struct amdgpu_device *adev,
|
||
|
struct amdgpu_fpriv *fpriv)
|
||
|
{
|
||
|
if (fpriv->cwsr_hqd_eop_obj) {
|
||
|
amdgpu_cwsr_static_unmap(adev,
|
||
|
&fpriv->vm,
|
||
|
fpriv->cwsr_hqd_eop_obj,
|
||
|
fpriv->cwsr_hqd_eop_va,
|
||
|
AMDGPU_CWSR_HQD_EOP_OFFSET);
|
||
|
|
||
|
fpriv->cwsr_hqd_eop_va = NULL;
|
||
|
|
||
|
amdgpu_bo_free_kernel(&fpriv->cwsr_hqd_eop_obj, NULL,
|
||
|
&fpriv->cwsr_hqd_cpu_addr);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static int amdgpu_cwsr_init_mqd(struct amdgpu_device *adev,
|
||
|
struct amdgpu_fpriv *fpriv,
|
||
|
struct amdgpu_ring *ring)
|
||
|
{
|
||
|
int r;
|
||
|
|
||
|
if (ring->mqd_obj)
|
||
|
return 0;
|
||
|
|
||
|
r = amdgpu_bo_create_kernel(adev,
|
||
|
AMDGPU_CWSR_MQD_SIZE,
|
||
|
PAGE_SIZE,
|
||
|
AMDGPU_GEM_DOMAIN_GTT,
|
||
|
&ring->mqd_obj,
|
||
|
NULL, &ring->mqd_ptr);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to create ring mqd bo (%d)", r);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
memset(ring->mqd_ptr, 0, AMDGPU_CWSR_MQD_SIZE);
|
||
|
|
||
|
r = amdgpu_cwsr_static_map(adev, &fpriv->vm,
|
||
|
ring->mqd_obj,
|
||
|
&ring->cwsr_mqd_va,
|
||
|
AMDGPU_CWSR_MQD_OFFSET +
|
||
|
ring->cwsr_slot_idx *
|
||
|
AMDGPU_CWSR_MQD_SIZE,
|
||
|
AMDGPU_CWSR_MQD_SIZE);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to map cwsr ring mqd bo (%d)", r);
|
||
|
amdgpu_bo_free_kernel(&ring->mqd_obj,
|
||
|
NULL,
|
||
|
&ring->mqd_ptr);
|
||
|
}
|
||
|
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
static void amdgpu_cwsr_deinit_mqd(struct amdgpu_device *adev,
|
||
|
struct amdgpu_fpriv *fpriv,
|
||
|
struct amdgpu_ring *ring)
|
||
|
{
|
||
|
if (ring->mqd_obj) {
|
||
|
amdgpu_cwsr_static_unmap(adev,
|
||
|
&fpriv->vm,
|
||
|
ring->mqd_obj,
|
||
|
ring->cwsr_mqd_va,
|
||
|
AMDGPU_CWSR_MQD_OFFSET +
|
||
|
ring->cwsr_slot_idx *
|
||
|
AMDGPU_CWSR_MQD_SIZE);
|
||
|
|
||
|
ring->cwsr_mqd_va = NULL;
|
||
|
|
||
|
amdgpu_bo_free_kernel(&ring->mqd_obj,
|
||
|
NULL,
|
||
|
&ring->mqd_ptr);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static int amdgpu_cwsr_init_ring_bo(struct amdgpu_device *adev,
|
||
|
struct amdgpu_fpriv *fpriv,
|
||
|
struct amdgpu_ring *ring)
|
||
|
{
|
||
|
int r;
|
||
|
|
||
|
if (ring->ring_obj)
|
||
|
return 0;
|
||
|
|
||
|
r = amdgpu_bo_create_kernel(adev,
|
||
|
AMDGPU_CWSR_RING_BUF_SIZE,
|
||
|
PAGE_SIZE,
|
||
|
AMDGPU_GEM_DOMAIN_GTT,
|
||
|
&ring->ring_obj,
|
||
|
NULL,
|
||
|
(void **)&ring->ring);
|
||
|
if (r) {
|
||
|
DRM_ERROR("ring create failed(%d)\n", r);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
r = amdgpu_cwsr_static_map(adev, &fpriv->vm,
|
||
|
ring->ring_obj,
|
||
|
&ring->cwsr_ring_va,
|
||
|
AMDGPU_CWSR_RING_BUF_OFFSET +
|
||
|
ring->cwsr_slot_idx *
|
||
|
AMDGPU_CWSR_RING_BUF_SIZE,
|
||
|
AMDGPU_CWSR_RING_BUF_SIZE);
|
||
|
if (r) {
|
||
|
dev_err(adev->dev, "map cwsr ring bo failed(%d)\n", r);
|
||
|
amdgpu_bo_free_kernel(&ring->mqd_obj,
|
||
|
NULL,
|
||
|
(void **)&ring->ring);
|
||
|
}
|
||
|
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
static void amdgpu_cwsr_deinit_ring_bo(struct amdgpu_device *adev,
|
||
|
struct amdgpu_fpriv *fpriv,
|
||
|
struct amdgpu_ring *ring)
|
||
|
{
|
||
|
if (ring->ring_obj) {
|
||
|
amdgpu_cwsr_static_unmap(adev,
|
||
|
&fpriv->vm,
|
||
|
ring->ring_obj,
|
||
|
ring->cwsr_ring_va,
|
||
|
AMDGPU_CWSR_RING_BUF_OFFSET +
|
||
|
ring->cwsr_slot_idx *
|
||
|
AMDGPU_CWSR_RING_BUF_SIZE);
|
||
|
|
||
|
ring->cwsr_ring_va = NULL;
|
||
|
|
||
|
amdgpu_bo_free_kernel(&ring->ring_obj,
|
||
|
NULL,
|
||
|
(void **)&ring->ring);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static int amdgpu_cwsr_init_ring_wb(struct amdgpu_device *adev,
|
||
|
struct amdgpu_ring *ring,
|
||
|
struct amdgpu_fpriv *fpriv)
|
||
|
{
|
||
|
int r;
|
||
|
|
||
|
r = amdgpu_cwsr_wb_get(fpriv, &ring->rptr_offs);
|
||
|
if (r) {
|
||
|
DRM_ERROR("(%d) failed to get rptr_offs\n", r);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
ring->cwsr_rptr_gpu_addr =
|
||
|
fpriv->cwsr_wb.gpu_addr + (ring->rptr_offs * 4);
|
||
|
ring->cwsr_rptr_cpu_addr = &fpriv->cwsr_wb.wb[ring->rptr_offs];
|
||
|
*ring->cwsr_rptr_cpu_addr = 0;
|
||
|
|
||
|
r = amdgpu_cwsr_wb_get(fpriv, &ring->wptr_offs);
|
||
|
if (r) {
|
||
|
DRM_ERROR("(%d) ring wptr_offs wb alloc failed\n", r);
|
||
|
goto err1;
|
||
|
}
|
||
|
|
||
|
ring->cwsr_wptr_gpu_addr =
|
||
|
fpriv->cwsr_wb.gpu_addr + (ring->wptr_offs * 4);
|
||
|
ring->cwsr_wptr_cpu_addr = &fpriv->cwsr_wb.wb[ring->wptr_offs];
|
||
|
*ring->cwsr_wptr_cpu_addr = 0;
|
||
|
|
||
|
r = amdgpu_cwsr_wb_get(fpriv, &ring->fence_offs);
|
||
|
if (r) {
|
||
|
dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
|
||
|
goto err2;
|
||
|
}
|
||
|
ring->cwsr_fence_gpu_addr =
|
||
|
fpriv->cwsr_wb.gpu_addr + (ring->fence_offs * 4);
|
||
|
ring->cwsr_fence_cpu_addr = &fpriv->cwsr_wb.wb[ring->fence_offs];
|
||
|
*ring->cwsr_fence_cpu_addr = 0;
|
||
|
|
||
|
r = amdgpu_cwsr_wb_get(fpriv, &ring->trail_fence_offs);
|
||
|
if (r) {
|
||
|
dev_err(adev->dev,
|
||
|
"(%d) ring trail_fence_offs wb alloc failed\n", r);
|
||
|
goto err3;
|
||
|
}
|
||
|
ring->trail_fence_gpu_addr =
|
||
|
fpriv->cwsr_wb.gpu_addr + (ring->trail_fence_offs * 4);
|
||
|
ring->trail_fence_cpu_addr = &fpriv->cwsr_wb.wb[ring->trail_fence_offs];
|
||
|
*ring->trail_fence_cpu_addr = 0;
|
||
|
|
||
|
r = amdgpu_cwsr_wb_get(fpriv, &ring->cond_exe_offs);
|
||
|
if (r) {
|
||
|
dev_err(adev->dev, "(%d) ring cond_exec_offs wb alloc failed\n",
|
||
|
r);
|
||
|
goto err4;
|
||
|
}
|
||
|
ring->cond_exe_gpu_addr = fpriv->cwsr_wb.gpu_addr
|
||
|
+ (ring->cond_exe_offs * 4);
|
||
|
ring->cond_exe_cpu_addr = &fpriv->cwsr_wb.wb[ring->cond_exe_offs];
|
||
|
*ring->cond_exe_cpu_addr = 1;
|
||
|
|
||
|
return 0;
|
||
|
|
||
|
err4:
|
||
|
amdgpu_cwsr_wb_free(fpriv, ring->trail_fence_offs);
|
||
|
err3:
|
||
|
amdgpu_cwsr_wb_free(fpriv, ring->fence_offs);
|
||
|
err2:
|
||
|
amdgpu_cwsr_wb_free(fpriv, ring->wptr_offs);
|
||
|
err1:
|
||
|
amdgpu_cwsr_wb_free(fpriv, ring->rptr_offs);
|
||
|
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
static void amdgpu_cwsr_deinit_ring_wb(struct amdgpu_ring *ring,
|
||
|
struct amdgpu_fpriv *fpriv)
|
||
|
{
|
||
|
amdgpu_cwsr_wb_free(fpriv, ring->cond_exe_offs);
|
||
|
amdgpu_cwsr_wb_free(fpriv, ring->trail_fence_offs);
|
||
|
amdgpu_cwsr_wb_free(fpriv, ring->fence_offs);
|
||
|
amdgpu_cwsr_wb_free(fpriv, ring->wptr_offs);
|
||
|
amdgpu_cwsr_wb_free(fpriv, ring->rptr_offs);
|
||
|
}
|
||
|
|
||
|
static int amdgpu_cwsr_init_ring(struct amdgpu_device *adev,
|
||
|
struct amdgpu_ring *ring,
|
||
|
struct amdgpu_ctx *ctx,
|
||
|
struct amdgpu_fpriv *fpriv)
|
||
|
{
|
||
|
int r;
|
||
|
|
||
|
if (ring->cwsr)
|
||
|
return 0;
|
||
|
|
||
|
ring->cwsr = true;
|
||
|
|
||
|
ring->ring_obj = NULL;
|
||
|
ring->use_doorbell = true;
|
||
|
ring->use_pollfence = amdgpu_poll_eop;
|
||
|
|
||
|
ring->eop_gpu_addr = AMDGPU_CWSR_HQD_EOP_OFFSET +
|
||
|
ctx->cwsr_slot_idx * AMDGPU_CWSR_MEC_HQD_EOP_SIZE;
|
||
|
memset(fpriv->cwsr_hqd_cpu_addr +
|
||
|
ctx->cwsr_slot_idx * AMDGPU_CWSR_MEC_HQD_EOP_SIZE,
|
||
|
0, AMDGPU_CWSR_MEC_HQD_EOP_SIZE);
|
||
|
|
||
|
ring->adev = adev;
|
||
|
|
||
|
ring->cwsr_slot_idx = ctx->cwsr_slot_idx;
|
||
|
|
||
|
r = amdgpu_cwsr_init_ring_wb(adev, ring, fpriv);
|
||
|
if (r) {
|
||
|
DRM_ERROR("(%d) failed to init cwsr wb\n", r);
|
||
|
goto err1;
|
||
|
}
|
||
|
|
||
|
// get ring buffer object
|
||
|
ring->ring_size = AMDGPU_CWSR_RING_BUF_SIZE;
|
||
|
ring->buf_mask = (ring->ring_size / 4) - 1;
|
||
|
ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
|
||
|
0xffffffffffffffff : ring->buf_mask;
|
||
|
r = amdgpu_cwsr_init_ring_bo(adev, fpriv, ring);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to get ring buffer object(%d).\n", r);
|
||
|
goto err2;
|
||
|
}
|
||
|
amdgpu_ring_clear_ring(ring);
|
||
|
|
||
|
ring->gpu_addr = AMDGPU_CWSR_RING_BUF_OFFSET +
|
||
|
ring->cwsr_slot_idx *
|
||
|
AMDGPU_CWSR_RING_BUF_SIZE;
|
||
|
|
||
|
ring->max_dw = AMDGPU_CWSR_RING_MAX_DW;
|
||
|
ring->priority = DRM_SCHED_PRIORITY_NORMAL;
|
||
|
mutex_init(&ring->priority_mutex);
|
||
|
|
||
|
ring->cwsr_tba_gpu_addr = AMDGPU_CWSR_TBA_OFFSET;
|
||
|
ring->cwsr_tma_gpu_addr = AMDGPU_CWSR_TMA_OFFSET;
|
||
|
|
||
|
return 0;
|
||
|
|
||
|
err2:
|
||
|
amdgpu_cwsr_deinit_ring_wb(ring, fpriv);
|
||
|
err1:
|
||
|
ring->cwsr = false;
|
||
|
ring->adev = NULL;
|
||
|
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
static void amdgpu_cwsr_deinit_ring(struct amdgpu_ring *ring,
|
||
|
struct amdgpu_fpriv *fpriv)
|
||
|
{
|
||
|
struct amdgpu_device *adev;
|
||
|
|
||
|
if (!ring->cwsr)
|
||
|
return;
|
||
|
|
||
|
adev = ring->adev;
|
||
|
/* Don't deinit a ring which is not initialized */
|
||
|
if (!adev)
|
||
|
return;
|
||
|
|
||
|
amdgpu_cwsr_deinit_ring_bo(adev, fpriv, ring);
|
||
|
|
||
|
amdgpu_cwsr_deinit_ring_wb(ring, fpriv);
|
||
|
|
||
|
ring->vmid_wait = NULL;
|
||
|
ring->me = 0;
|
||
|
ring->cwsr = false;
|
||
|
ring->adev = NULL;
|
||
|
}
|
||
|
|
||
|
static int amdgpu_cwsr_init_trap(struct amdgpu_device *adev,
|
||
|
struct amdgpu_fpriv *fpriv)
|
||
|
{
|
||
|
int r;
|
||
|
|
||
|
if (fpriv->cwsr_trap_obj)
|
||
|
return 0;
|
||
|
|
||
|
r = amdgpu_bo_create_kernel(adev, AMDGPU_CWSR_TBA_TMA_SIZE,
|
||
|
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
|
||
|
&fpriv->cwsr_trap_obj,
|
||
|
NULL,
|
||
|
&fpriv->cwsr_trap_cpu_addr);
|
||
|
if (r) {
|
||
|
DRM_ERROR("(%d) failed to create cwsr trap bo\n", r);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
/* clear memory */
|
||
|
memset((char *)fpriv->cwsr_trap_cpu_addr, 0, AMDGPU_CWSR_TBA_TMA_SIZE);
|
||
|
|
||
|
if (adev->asic_type == CHIP_VANGOGH_LITE)
|
||
|
memcpy(fpriv->cwsr_trap_cpu_addr,
|
||
|
cwsr_trap_m0_hex, sizeof(cwsr_trap_m0_hex));
|
||
|
else
|
||
|
memcpy(fpriv->cwsr_trap_cpu_addr,
|
||
|
cwsr_trap_nv14_hex, sizeof(cwsr_trap_nv14_hex));
|
||
|
|
||
|
r = amdgpu_cwsr_static_map(adev, &fpriv->vm,
|
||
|
fpriv->cwsr_trap_obj,
|
||
|
&fpriv->cwsr_trap_va,
|
||
|
AMDGPU_CWSR_TBA_OFFSET,
|
||
|
AMDGPU_CWSR_TBA_TMA_SIZE);
|
||
|
if (r) {
|
||
|
DRM_ERROR("map cwsr trap failed(%d)\n", r);
|
||
|
amdgpu_bo_free_kernel(&fpriv->cwsr_trap_obj,
|
||
|
NULL,
|
||
|
(void **)&fpriv->cwsr_trap_cpu_addr);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static void amdgpu_cwsr_deinit_trap(struct amdgpu_device *adev,
|
||
|
struct amdgpu_fpriv *fpriv)
|
||
|
{
|
||
|
if (fpriv->cwsr_trap_obj) {
|
||
|
amdgpu_cwsr_static_unmap(adev,
|
||
|
&fpriv->vm,
|
||
|
fpriv->cwsr_trap_obj,
|
||
|
fpriv->cwsr_trap_va,
|
||
|
AMDGPU_CWSR_TBA_OFFSET);
|
||
|
|
||
|
fpriv->cwsr_trap_va = NULL;
|
||
|
amdgpu_bo_free_kernel(&fpriv->cwsr_trap_obj,
|
||
|
NULL,
|
||
|
&fpriv->cwsr_trap_cpu_addr);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static int amdgpu_cwsr_init_sr_res(struct amdgpu_device *adev,
|
||
|
struct amdgpu_vm *vm,
|
||
|
struct amdgpu_ring *ring)
|
||
|
{
|
||
|
int r;
|
||
|
u32 ctl_stack_size, wg_data_size;
|
||
|
u32 cu_num;
|
||
|
|
||
|
if (ring->cwsr_sr_obj)
|
||
|
return 0;
|
||
|
|
||
|
cu_num = adev->gfx.config.max_cu_per_sh *
|
||
|
adev->gfx.config.max_sh_per_se *
|
||
|
adev->gfx.config.max_shader_engines;
|
||
|
|
||
|
//8 + 16 bytes are for header of ctl stack
|
||
|
ctl_stack_size = cu_num * AMDGPU_CWSR_WAVES_PER_CU *
|
||
|
AMDGPU_CWSR_CNTL_STACK_BYTES_PER_WAVE + 8 + 16;
|
||
|
ctl_stack_size = round_up(ctl_stack_size, 1 << 12);
|
||
|
ring->cwsr_sr_ctl_size = ctl_stack_size;
|
||
|
|
||
|
wg_data_size = cu_num *
|
||
|
AMDGPU_CWSR_WG_CONTEXT_DATA_SIZE_PER_CU(adev->asic_type);
|
||
|
wg_data_size = round_up(wg_data_size, 1 << 12);
|
||
|
ring->cwsr_sr_size = wg_data_size + ctl_stack_size;
|
||
|
|
||
|
r = amdgpu_bo_create_kernel(adev, ring->cwsr_sr_size,
|
||
|
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
|
||
|
&ring->cwsr_sr_obj,
|
||
|
NULL,
|
||
|
(void **)&ring->cwsr_sr_cpu_addr);
|
||
|
if (r) {
|
||
|
DRM_ERROR("(%d) failed to create cwsr sr bo\n", r);
|
||
|
return r;
|
||
|
}
|
||
|
/* clear memory */
|
||
|
memset((char *)ring->cwsr_sr_cpu_addr, 0, ring->cwsr_sr_size);
|
||
|
|
||
|
//ctl Stack is also named as Relaunch Stack
|
||
|
r = amdgpu_cwsr_static_map(adev, vm,
|
||
|
ring->cwsr_sr_obj,
|
||
|
&ring->cwsr_sr_va,
|
||
|
AMDGPU_CWSR_SR_OFFSET +
|
||
|
ring->cwsr_slot_idx *
|
||
|
ring->cwsr_sr_size,
|
||
|
ring->cwsr_sr_size);
|
||
|
if (r) {
|
||
|
DRM_ERROR("map cwsr sr failed(%d)\n", r);
|
||
|
amdgpu_bo_free_kernel(&ring->cwsr_sr_obj,
|
||
|
NULL,
|
||
|
(void **)&ring->cwsr_sr_cpu_addr);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
ring->cwsr_sr_gpu_addr = AMDGPU_CWSR_SR_OFFSET +
|
||
|
ring->cwsr_slot_idx * ring->cwsr_sr_size;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static void amdgpu_cwsr_deinit_sr_res(struct amdgpu_device *adev,
|
||
|
struct amdgpu_vm *vm,
|
||
|
struct amdgpu_ring *ring)
|
||
|
{
|
||
|
if (ring->cwsr_sr_obj) {
|
||
|
amdgpu_cwsr_static_unmap(adev,
|
||
|
vm,
|
||
|
ring->cwsr_sr_obj,
|
||
|
ring->cwsr_sr_va,
|
||
|
AMDGPU_CWSR_SR_OFFSET +
|
||
|
ring->cwsr_slot_idx *
|
||
|
ring->cwsr_sr_size);
|
||
|
|
||
|
ring->cwsr_sr_va = NULL;
|
||
|
amdgpu_bo_free_kernel(&ring->cwsr_sr_obj,
|
||
|
NULL,
|
||
|
(void **)&ring->cwsr_sr_cpu_addr);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static int amdgpu_cwsr_init_res(struct amdgpu_device *adev,
|
||
|
struct amdgpu_fpriv *fpriv)
|
||
|
{
|
||
|
int r;
|
||
|
|
||
|
if (fpriv->cwsr_ready) {
|
||
|
atomic_inc(&fpriv->cwsr_ctx_ref);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
//init wb bo
|
||
|
r = amdgpu_cwsr_init_wb(adev, fpriv);
|
||
|
if (r)
|
||
|
return r;
|
||
|
|
||
|
//allocate eop buffer
|
||
|
r = amdgpu_cwsr_init_hqd_eop(adev, fpriv);
|
||
|
if (r)
|
||
|
goto err1;
|
||
|
|
||
|
r = amdgpu_cwsr_init_trap(adev, fpriv);
|
||
|
if (r)
|
||
|
goto err2;
|
||
|
|
||
|
atomic_set(&fpriv->cwsr_ctx_ref, 1);
|
||
|
|
||
|
fpriv->cwsr_ready = true;
|
||
|
return 0;
|
||
|
|
||
|
err2:
|
||
|
amdgpu_cwsr_deinit_hqd_eop(adev, fpriv);
|
||
|
err1:
|
||
|
amdgpu_cwsr_deinit_wb(adev, fpriv);
|
||
|
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
static void amdgpu_cwsr_deinit_res(struct amdgpu_device *adev,
|
||
|
struct amdgpu_fpriv *fpriv)
|
||
|
{
|
||
|
if (!fpriv->cwsr_ready)
|
||
|
return;
|
||
|
|
||
|
|
||
|
if (atomic_dec_return(&fpriv->cwsr_ctx_ref) > 0)
|
||
|
return;
|
||
|
|
||
|
fpriv->cwsr_ready = false;
|
||
|
amdgpu_cwsr_deinit_trap(adev, fpriv);
|
||
|
amdgpu_cwsr_deinit_hqd_eop(adev, fpriv);
|
||
|
amdgpu_cwsr_deinit_wb(adev, fpriv);
|
||
|
DRM_DEBUG_DRIVER("deinit cwsr per VM global resource\n");
|
||
|
}
|
||
|
|
||
|
static void amdgpu_cwsr_fence_driver_start_ring(struct amdgpu_ring *ring,
|
||
|
struct amdgpu_fpriv *fpriv)
|
||
|
{
|
||
|
u32 seq;
|
||
|
u32 irq_type;
|
||
|
struct amdgpu_device *adev;
|
||
|
struct amdgpu_irq_src *irq_src;
|
||
|
|
||
|
adev = ring->adev;
|
||
|
irq_src = &adev->gfx.eop_irq;
|
||
|
irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
|
||
|
+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
|
||
|
+ ring->pipe;
|
||
|
|
||
|
ring->fence_drv.cpu_addr = &fpriv->cwsr_wb.wb[ring->fence_offs];
|
||
|
ring->fence_drv.gpu_addr = fpriv->cwsr_wb.gpu_addr
|
||
|
+ (ring->fence_offs * 4);
|
||
|
seq = atomic_read(&ring->fence_drv.last_seq);
|
||
|
*ring->fence_drv.cpu_addr = cpu_to_le32(seq);
|
||
|
|
||
|
ring->fence_drv.irq_src = irq_src;
|
||
|
ring->fence_drv.irq_type = irq_type;
|
||
|
ring->fence_drv.initialized = true;
|
||
|
}
|
||
|
|
||
|
static int amdgpu_cwsr_init_ctx(struct amdgpu_device *adev,
|
||
|
struct amdgpu_fpriv *fpriv,
|
||
|
struct amdgpu_ctx *ctx)
|
||
|
{
|
||
|
int r;
|
||
|
int sched_hw_submission = amdgpu_sched_hw_submission;
|
||
|
struct drm_gpu_scheduler *sched = NULL;
|
||
|
struct drm_sched_entity *entity;
|
||
|
struct amdgpu_ring *ring;
|
||
|
|
||
|
if (!fpriv->cwsr_ready)
|
||
|
return 0;
|
||
|
|
||
|
r = ida_simple_get(&fpriv->cwsr_res_slots, 0,
|
||
|
AMDGPU_CWSR_MAX_RING, GFP_KERNEL);
|
||
|
if (r < 0) {
|
||
|
DRM_DEBUG_DRIVER("no valid solt for CWSR\n");
|
||
|
return -EINVAL;
|
||
|
}
|
||
|
ctx->cwsr_slot_idx = r;
|
||
|
DRM_DEBUG_DRIVER("get cwsr slot idx:%u\n", r);
|
||
|
|
||
|
r = amdgpu_sws_early_init_ctx(ctx);
|
||
|
if (r) {
|
||
|
ida_simple_remove(&fpriv->cwsr_res_slots, ctx->cwsr_slot_idx);
|
||
|
DRM_WARN("failed to do early ring init\n");
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
ring = ctx->cwsr_ring;
|
||
|
r = amdgpu_cwsr_init_ring(adev, ring, ctx, fpriv);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to init cwsr ring\n");
|
||
|
goto err1;
|
||
|
}
|
||
|
|
||
|
r = amdgpu_cwsr_init_mqd(adev, fpriv, ring);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to get mqd for cwsr\n");
|
||
|
goto err2;
|
||
|
}
|
||
|
|
||
|
r = amdgpu_cwsr_init_sr_res(adev, &fpriv->vm, ring);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to get sr for cwsr\n");
|
||
|
goto err3;
|
||
|
}
|
||
|
|
||
|
r = amdgpu_fence_driver_init_ring(ring, sched_hw_submission);
|
||
|
if (r) {
|
||
|
DRM_ERROR("(%d) failed to init cwsr fence drv\n", r);
|
||
|
goto err4;
|
||
|
}
|
||
|
|
||
|
entity = &ctx->cwsr_entities->entity;
|
||
|
sched = &ring->sched;
|
||
|
r = drm_sched_entity_init(entity,
|
||
|
ctx->init_priority,
|
||
|
&sched,
|
||
|
1, &ctx->guilty);
|
||
|
if (r) {
|
||
|
DRM_ERROR("(%d) failed to init entity\n", r);
|
||
|
goto err5;
|
||
|
}
|
||
|
|
||
|
amdgpu_cwsr_fence_driver_start_ring(ring, fpriv);
|
||
|
|
||
|
r = amdgpu_sws_init_ctx(ctx, fpriv);
|
||
|
if (r == AMDGPU_SWS_HW_RES_BUSY) {
|
||
|
if (amdgpu_debugfs_ring_init(adev, ring))
|
||
|
DRM_WARN("failed to init debugfs for ring:%s!\n",
|
||
|
ring->name);
|
||
|
ctx->cwsr = true;
|
||
|
return r;
|
||
|
} else if (r < 0) {
|
||
|
DRM_DEBUG_DRIVER("(%d) failed to init queue\n", r);
|
||
|
goto err6;
|
||
|
}
|
||
|
|
||
|
if (amdgpu_debugfs_ring_init(adev, ring))
|
||
|
DRM_WARN("failed to init debugfs for ring:%s!\n",
|
||
|
ring->name);
|
||
|
ctx->cwsr = true;
|
||
|
|
||
|
return 0;
|
||
|
|
||
|
err6:
|
||
|
drm_sched_entity_destroy(entity);
|
||
|
err5:
|
||
|
amdgpu_fence_driver_deinit_ring(ring);
|
||
|
err4:
|
||
|
amdgpu_cwsr_deinit_sr_res(adev, &fpriv->vm, ring);
|
||
|
err3:
|
||
|
amdgpu_cwsr_deinit_mqd(adev, fpriv, ring);
|
||
|
err2:
|
||
|
amdgpu_cwsr_deinit_ring(ring, fpriv);
|
||
|
err1:
|
||
|
amdgpu_sws_late_deinit_ctx(ctx);
|
||
|
ida_simple_remove(&fpriv->cwsr_res_slots, ctx->cwsr_slot_idx);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
static void amdgpu_cwsr_deinit_ctx(struct amdgpu_device *adev,
|
||
|
struct amdgpu_ctx *ctx)
|
||
|
{
|
||
|
struct amdgpu_ring *ring;
|
||
|
struct amdgpu_fpriv *fpriv;
|
||
|
|
||
|
if (!ctx->cwsr)
|
||
|
return;
|
||
|
|
||
|
fpriv = ctx->fpriv;
|
||
|
ctx->cwsr = false;
|
||
|
ring = ctx->cwsr_ring;
|
||
|
|
||
|
amdgpu_debugfs_ring_fini(ring);
|
||
|
|
||
|
amdgpu_sws_deinit_ctx(ctx, fpriv);
|
||
|
|
||
|
amdgpu_fence_driver_deinit_ring(ring);
|
||
|
|
||
|
amdgpu_cwsr_deinit_sr_res(adev, &fpriv->vm, ring);
|
||
|
amdgpu_cwsr_deinit_mqd(adev, fpriv, ring);
|
||
|
amdgpu_cwsr_deinit_ring(ring, fpriv);
|
||
|
amdgpu_sws_late_deinit_ctx(ctx);
|
||
|
ida_simple_remove(&fpriv->cwsr_res_slots, ctx->cwsr_slot_idx);
|
||
|
}
|
||
|
|
||
|
int amdgpu_cwsr_init_queue(struct amdgpu_ring *ring)
|
||
|
{
|
||
|
int r;
|
||
|
struct amdgpu_device *adev;
|
||
|
|
||
|
if (!ring->cwsr ||
|
||
|
ring->sws_ctx.queue_state != AMDGPU_SWS_QUEUE_DISABLED)
|
||
|
return 0;
|
||
|
|
||
|
adev = ring->adev;
|
||
|
|
||
|
ring->mqd_gpu_addr = AMDGPU_CWSR_MQD_OFFSET +
|
||
|
ring->cwsr_slot_idx *
|
||
|
AMDGPU_CWSR_MQD_SIZE;
|
||
|
|
||
|
//init mqd
|
||
|
r = amdgpu_ring_compute_mqd_init(ring);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to init mqd for cwsr\n");
|
||
|
goto err1;
|
||
|
}
|
||
|
|
||
|
//map queue
|
||
|
r = adev->gfx.mec.map_cwsr_queue(ring);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to map queue for cwsr\n");
|
||
|
goto err1;
|
||
|
}
|
||
|
|
||
|
ring->sws_ctx.queue_state = AMDGPU_SWS_QUEUE_ENABLED;
|
||
|
|
||
|
return 0;
|
||
|
err1:
|
||
|
ring->cwsr_queue_broken = true;
|
||
|
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
void amdgpu_cwsr_deinit_queue(struct amdgpu_ring *ring)
|
||
|
{
|
||
|
struct amdgpu_device *adev = ring->adev;
|
||
|
|
||
|
if (!ring->cwsr ||
|
||
|
ring->sws_ctx.queue_state != AMDGPU_SWS_QUEUE_ENABLED)
|
||
|
return;
|
||
|
|
||
|
//unmap queue
|
||
|
adev->gfx.mec.unmap_cwsr_queue(ring, AMDGPU_CP_HQD_DEQUEUE_MODE_STD);
|
||
|
|
||
|
ring->sws_ctx.queue_state = AMDGPU_SWS_QUEUE_DISABLED;
|
||
|
}
|
||
|
|
||
|
int amdgpu_cwsr_init(struct amdgpu_ctx *ctx)
|
||
|
{
|
||
|
struct amdgpu_fpriv *fpriv;
|
||
|
struct amdgpu_device *adev;
|
||
|
int r = 0;
|
||
|
|
||
|
adev = ctx->adev;
|
||
|
fpriv = ctx->fpriv;
|
||
|
|
||
|
if (cwsr_enable == 0)
|
||
|
return 0;
|
||
|
|
||
|
mutex_lock(&fpriv->cwsr_lock);
|
||
|
ctx->cwsr_init = true;
|
||
|
|
||
|
r = amdgpu_cwsr_init_res(adev, fpriv);
|
||
|
if (r) {
|
||
|
DRM_WARN("failed to init cwsr res\n");
|
||
|
mutex_unlock(&fpriv->cwsr_lock);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
r = amdgpu_cwsr_init_ctx(adev, fpriv, ctx);
|
||
|
if (r == AMDGPU_SWS_HW_RES_BUSY) {
|
||
|
mutex_unlock(&fpriv->cwsr_lock);
|
||
|
return AMDGPU_SWS_HW_RES_BUSY;
|
||
|
} else if (r < 0) {
|
||
|
DRM_DEBUG_DRIVER("failed to init cwsr ctx\n");
|
||
|
goto err1;
|
||
|
}
|
||
|
|
||
|
mutex_unlock(&fpriv->cwsr_lock);
|
||
|
return r;
|
||
|
|
||
|
err1:
|
||
|
amdgpu_cwsr_deinit_res(adev, fpriv);
|
||
|
mutex_unlock(&fpriv->cwsr_lock);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
void amdgpu_cwsr_deinit(struct amdgpu_ctx *ctx)
|
||
|
{
|
||
|
struct amdgpu_fpriv *fpriv;
|
||
|
struct amdgpu_device *adev;
|
||
|
|
||
|
adev = ctx->adev;
|
||
|
fpriv = ctx->fpriv;
|
||
|
|
||
|
if (cwsr_enable == 0)
|
||
|
return;
|
||
|
|
||
|
mutex_lock(&fpriv->cwsr_lock);
|
||
|
if (ctx->cwsr) {
|
||
|
amdgpu_cwsr_deinit_ctx(adev, ctx);
|
||
|
amdgpu_cwsr_deinit_res(adev, fpriv);
|
||
|
}
|
||
|
mutex_unlock(&fpriv->cwsr_lock);
|
||
|
}
|
||
|
|
||
|
int amdgpu_cwsr_dequeue(struct amdgpu_ring *ring)
|
||
|
{
|
||
|
int r;
|
||
|
|
||
|
struct amdgpu_device *adev = ring->adev;
|
||
|
|
||
|
if (ring->sws_ctx.queue_state != AMDGPU_SWS_QUEUE_ENABLED)
|
||
|
return 0;
|
||
|
|
||
|
r = adev->gfx.mec.unmap_cwsr_queue(ring,
|
||
|
AMDGPU_CP_HQD_DEQUEUE_MODE_SSSD);
|
||
|
if (r)
|
||
|
ring->sws_ctx.queue_state = AMDGPU_SWS_QUEUE_DISABLED;
|
||
|
else
|
||
|
ring->sws_ctx.queue_state = AMDGPU_SWS_QUEUE_DEQUEUED;
|
||
|
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
int amdgpu_cwsr_relaunch(struct amdgpu_ring *ring)
|
||
|
{
|
||
|
int r;
|
||
|
struct amdgpu_device *adev = ring->adev;
|
||
|
|
||
|
if (ring->sws_ctx.queue_state != AMDGPU_SWS_QUEUE_DEQUEUED)
|
||
|
return 0;
|
||
|
|
||
|
//relaunch queue
|
||
|
r = adev->gfx.mec.map_cwsr_queue(ring);
|
||
|
if (r) {
|
||
|
DRM_ERROR("failed to map queue for cwsr\n");
|
||
|
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
ring->sws_ctx.queue_state = AMDGPU_SWS_QUEUE_ENABLED;
|
||
|
|
||
|
return r;
|
||
|
}
|