drm/amdgpu: move static CSA address to top of address space v2
Move the CSA area to the top of the VA space to avoid clashing with HMM/ATC in the lower range on GFX9. v2: wrong sign noticed by Roger, rebase on CSA_VADDR cleanup, handle VA hole on GFX9 as well. Signed-off-by: Christian König <christian.koenig@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Acked-by: Monk Liu <monk.liu@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
c1f2fb6b63
commit
6f05c4e9d1
|
@ -24,6 +24,18 @@
|
||||||
#include "amdgpu.h"
|
#include "amdgpu.h"
|
||||||
#define MAX_KIQ_REG_WAIT 100000000 /* in usecs */
|
#define MAX_KIQ_REG_WAIT 100000000 /* in usecs */
|
||||||
|
|
||||||
|
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
|
||||||
|
|
||||||
|
addr -= AMDGPU_VA_RESERVED_SIZE;
|
||||||
|
|
||||||
|
if (addr >= AMDGPU_VA_HOLE_START)
|
||||||
|
addr |= AMDGPU_VA_HOLE_END;
|
||||||
|
|
||||||
|
return addr;
|
||||||
|
}
|
||||||
|
|
||||||
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
|
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
/* By now all MMIO pages except mailbox are blocked */
|
/* By now all MMIO pages except mailbox are blocked */
|
||||||
|
@ -55,14 +67,14 @@ void amdgpu_free_static_csa(struct amdgpu_device *adev) {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* amdgpu_map_static_csa should be called during amdgpu_vm_init
|
* amdgpu_map_static_csa should be called during amdgpu_vm_init
|
||||||
* it maps virtual address "AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE"
|
* it maps virtual address amdgpu_csa_vaddr() to this VM, and each command
|
||||||
* to this VM, and each command submission of GFX should use this virtual
|
* submission of GFX should use this virtual address within META_DATA init
|
||||||
* address within META_DATA init package to support SRIOV gfx preemption.
|
* package to support SRIOV gfx preemption.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||||
struct amdgpu_bo_va **bo_va)
|
struct amdgpu_bo_va **bo_va)
|
||||||
{
|
{
|
||||||
|
uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_VA_HOLE_MASK;
|
||||||
struct ww_acquire_ctx ticket;
|
struct ww_acquire_ctx ticket;
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
struct amdgpu_bo_list_entry pd;
|
struct amdgpu_bo_list_entry pd;
|
||||||
|
@ -90,7 +102,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, AMDGPU_CSA_VADDR,
|
r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
|
||||||
AMDGPU_CSA_SIZE);
|
AMDGPU_CSA_SIZE);
|
||||||
if (r) {
|
if (r) {
|
||||||
DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
|
DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
|
||||||
|
@ -99,7 +111,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
r = amdgpu_vm_bo_map(adev, *bo_va, AMDGPU_CSA_VADDR, 0, AMDGPU_CSA_SIZE,
|
r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, AMDGPU_CSA_SIZE,
|
||||||
AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
|
AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
|
||||||
AMDGPU_PTE_EXECUTABLE);
|
AMDGPU_PTE_EXECUTABLE);
|
||||||
|
|
||||||
|
|
|
@ -251,8 +251,7 @@ struct amdgpu_virt {
|
||||||
uint32_t gim_feature;
|
uint32_t gim_feature;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define AMDGPU_CSA_SIZE (8 * 1024)
|
#define AMDGPU_CSA_SIZE (8 * 1024)
|
||||||
#define AMDGPU_CSA_VADDR (AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE)
|
|
||||||
|
|
||||||
#define amdgpu_sriov_enabled(adev) \
|
#define amdgpu_sriov_enabled(adev) \
|
||||||
((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
|
((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
|
||||||
|
@ -279,6 +278,8 @@ static inline bool is_virtual_machine(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
struct amdgpu_vm;
|
struct amdgpu_vm;
|
||||||
|
|
||||||
|
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev);
|
||||||
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
|
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
|
||||||
int amdgpu_allocate_static_csa(struct amdgpu_device *adev);
|
int amdgpu_allocate_static_csa(struct amdgpu_device *adev);
|
||||||
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||||
|
|
|
@ -7132,11 +7132,11 @@ static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
|
||||||
} ce_payload = {};
|
} ce_payload = {};
|
||||||
|
|
||||||
if (ring->adev->virt.chained_ib_support) {
|
if (ring->adev->virt.chained_ib_support) {
|
||||||
ce_payload_addr = AMDGPU_CSA_VADDR +
|
ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
|
||||||
offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
|
offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
|
||||||
cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
|
cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
|
||||||
} else {
|
} else {
|
||||||
ce_payload_addr = AMDGPU_CSA_VADDR +
|
ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
|
||||||
offsetof(struct vi_gfx_meta_data, ce_payload);
|
offsetof(struct vi_gfx_meta_data, ce_payload);
|
||||||
cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
|
cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
|
||||||
}
|
}
|
||||||
|
@ -7160,7 +7160,7 @@ static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
|
||||||
struct vi_de_ib_state_chained_ib chained;
|
struct vi_de_ib_state_chained_ib chained;
|
||||||
} de_payload = {};
|
} de_payload = {};
|
||||||
|
|
||||||
csa_addr = AMDGPU_CSA_VADDR;
|
csa_addr = amdgpu_csa_vaddr(ring->adev);
|
||||||
gds_addr = csa_addr + 4096;
|
gds_addr = csa_addr + 4096;
|
||||||
if (ring->adev->virt.chained_ib_support) {
|
if (ring->adev->virt.chained_ib_support) {
|
||||||
de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
|
de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
|
||||||
|
|
|
@ -3865,7 +3865,7 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
|
||||||
int cnt;
|
int cnt;
|
||||||
|
|
||||||
cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
|
cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
|
||||||
csa_addr = AMDGPU_CSA_VADDR;
|
csa_addr = amdgpu_csa_vaddr(ring->adev);
|
||||||
|
|
||||||
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
|
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
|
||||||
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
|
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
|
||||||
|
@ -3883,7 +3883,7 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
|
||||||
uint64_t csa_addr, gds_addr;
|
uint64_t csa_addr, gds_addr;
|
||||||
int cnt;
|
int cnt;
|
||||||
|
|
||||||
csa_addr = AMDGPU_CSA_VADDR;
|
csa_addr = amdgpu_csa_vaddr(ring->adev);
|
||||||
gds_addr = csa_addr + 4096;
|
gds_addr = csa_addr + 4096;
|
||||||
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
|
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
|
||||||
de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
|
de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
|
||||||
|
|
Loading…
Reference in New Issue