drm/amdgpu: Modify indirect register access for amdkfd_gfx_v9 sriov

Modify GC register access from MMIO to RLCG if the indirect
flag is set

Signed-off-by: Victor Skvortsov <victor.skvortsov@amd.com>
Reviewed-by: David Nieto <david.nieto@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Victor Skvortsov 2021-12-13 22:43:06 +00:00 committed by Alex Deucher
parent 92f153bb5a
commit 4aa325ae54
1 changed files with 13 additions and 14 deletions

View File

@ -166,7 +166,7 @@ int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
lock_srbm(adev, mec, pipe, 0, 0); lock_srbm(adev, mec, pipe, 0, 0);
WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
@ -279,7 +279,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
lower_32_bits((uintptr_t)wptr)); lower_32_bits((uintptr_t)wptr));
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
upper_32_bits((uintptr_t)wptr)); upper_32_bits((uintptr_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
(uint32_t)get_queue_mask(adev, pipe_id, queue_id)); (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
} }
@ -488,13 +488,13 @@ bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev,
uint32_t low, high; uint32_t low, high;
acquire_queue(adev, pipe_id, queue_id); acquire_queue(adev, pipe_id, queue_id);
act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
if (act) { if (act) {
low = lower_32_bits(queue_address >> 8); low = lower_32_bits(queue_address >> 8);
high = upper_32_bits(queue_address >> 8); high = upper_32_bits(queue_address >> 8);
if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) &&
high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
retval = true; retval = true;
} }
release_queue(adev); release_queue(adev);
@ -556,7 +556,7 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
end_jiffies = (utimeout * HZ / 1000) + jiffies; end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) { while (true) {
temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
break; break;
if (time_after(jiffies, end_jiffies)) { if (time_after(jiffies, end_jiffies)) {
@ -645,7 +645,7 @@ int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd);
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
INSTANCE_BROADCAST_WRITES, 1); INSTANCE_BROADCAST_WRITES, 1);
@ -722,7 +722,7 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe; pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe; queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0); soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0);
reg_val = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) + reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
queue_slot); queue_slot);
*wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
if (*wave_cnt != 0) if (*wave_cnt != 0)
@ -809,8 +809,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) { for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff); gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff);
queue_map = RREG32(SOC15_REG_OFFSET(GC, 0, queue_map = RREG32_SOC15(GC, 0, mmSPI_CSQ_WF_ACTIVE_STATUS);
mmSPI_CSQ_WF_ACTIVE_STATUS));
/* /*
* Assumption: queue map encodes following schema: four * Assumption: queue map encodes following schema: four
@ -860,17 +859,17 @@ void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
/* /*
* Program TBA registers * Program TBA registers
*/ */
WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO), WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_LO,
lower_32_bits(tba_addr >> 8)); lower_32_bits(tba_addr >> 8));
WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI), WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_HI,
upper_32_bits(tba_addr >> 8)); upper_32_bits(tba_addr >> 8));
/* /*
* Program TMA registers * Program TMA registers
*/ */
WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO), WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_LO,
lower_32_bits(tma_addr >> 8)); lower_32_bits(tma_addr >> 8));
WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_HI,
upper_32_bits(tma_addr >> 8)); upper_32_bits(tma_addr >> 8));
unlock_srbm(adev); unlock_srbm(adev);