drm/radeon: use one VMID for each ring
Use multiple VMIDs for each VM, one for each ring. That allows us to execute flushes separately on each ring, still not ideal cause in a lot of cases rings can share IDs. Signed-off-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
ad1a58a45a
commit
7c42bc1aa2
|
@ -4066,6 +4066,7 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
|
|||
void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
|
||||
{
|
||||
struct radeon_ring *ring = &rdev->ring[ib->ring];
|
||||
unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
|
||||
u32 header, control = INDIRECT_BUFFER_VALID;
|
||||
|
||||
if (ib->is_const_ib) {
|
||||
|
@ -4094,8 +4095,7 @@ void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
|
|||
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
|
||||
}
|
||||
|
||||
control |= ib->length_dw |
|
||||
(ib->vm ? (ib->vm->id << 24) : 0);
|
||||
control |= ib->length_dw | (vm_id << 24);
|
||||
|
||||
radeon_ring_write(ring, header);
|
||||
radeon_ring_write(ring,
|
||||
|
|
|
@ -134,7 +134,7 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
|
|||
struct radeon_ib *ib)
|
||||
{
|
||||
struct radeon_ring *ring = &rdev->ring[ib->ring];
|
||||
u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
|
||||
u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf;
|
||||
|
||||
if (rdev->wb.enabled) {
|
||||
u32 next_rptr = ring->wptr + 5;
|
||||
|
|
|
@ -1373,6 +1373,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
|
|||
void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
|
||||
{
|
||||
struct radeon_ring *ring = &rdev->ring[ib->ring];
|
||||
unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
|
||||
u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA |
|
||||
PACKET3_SH_ACTION_ENA;
|
||||
|
||||
|
@ -1395,15 +1396,14 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
|
|||
#endif
|
||||
(ib->gpu_addr & 0xFFFFFFFC));
|
||||
radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
|
||||
radeon_ring_write(ring, ib->length_dw |
|
||||
(ib->vm ? (ib->vm->id << 24) : 0));
|
||||
radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
|
||||
|
||||
/* flush read cache over gart for this vmid */
|
||||
radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
|
||||
radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl);
|
||||
radeon_ring_write(ring, 0xFFFFFFFF);
|
||||
radeon_ring_write(ring, 0);
|
||||
radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* poll interval */
|
||||
radeon_ring_write(ring, (vm_id << 24) | 10); /* poll interval */
|
||||
}
|
||||
|
||||
static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
|
||||
|
|
|
@ -123,6 +123,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
|
|||
struct radeon_ib *ib)
|
||||
{
|
||||
struct radeon_ring *ring = &rdev->ring[ib->ring];
|
||||
unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
|
||||
|
||||
if (rdev->wb.enabled) {
|
||||
u32 next_rptr = ring->wptr + 4;
|
||||
|
@ -140,7 +141,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
|
|||
*/
|
||||
while ((ring->wptr & 7) != 5)
|
||||
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
|
||||
radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
|
||||
radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0));
|
||||
radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
|
||||
radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
|
||||
|
||||
|
|
|
@ -905,33 +905,39 @@ struct radeon_vm_pt {
|
|||
uint64_t addr;
|
||||
};
|
||||
|
||||
struct radeon_vm_id {
|
||||
unsigned id;
|
||||
uint64_t pd_gpu_addr;
|
||||
/* last flushed PD/PT update */
|
||||
struct radeon_fence *flushed_updates;
|
||||
/* last use of vmid */
|
||||
struct radeon_fence *last_id_use;
|
||||
};
|
||||
|
||||
struct radeon_vm {
|
||||
struct rb_root va;
|
||||
unsigned id;
|
||||
struct rb_root va;
|
||||
|
||||
/* BOs moved, but not yet updated in the PT */
|
||||
struct list_head invalidated;
|
||||
struct list_head invalidated;
|
||||
|
||||
/* BOs freed, but not yet updated in the PT */
|
||||
struct list_head freed;
|
||||
struct list_head freed;
|
||||
|
||||
/* contains the page directory */
|
||||
struct radeon_bo *page_directory;
|
||||
uint64_t pd_gpu_addr;
|
||||
unsigned max_pde_used;
|
||||
struct radeon_bo *page_directory;
|
||||
unsigned max_pde_used;
|
||||
|
||||
/* array of page tables, one for each page directory entry */
|
||||
struct radeon_vm_pt *page_tables;
|
||||
struct radeon_vm_pt *page_tables;
|
||||
|
||||
struct radeon_bo_va *ib_bo_va;
|
||||
struct radeon_bo_va *ib_bo_va;
|
||||
|
||||
struct mutex mutex;
|
||||
struct mutex mutex;
|
||||
/* last fence for cs using this vm */
|
||||
struct radeon_fence *fence;
|
||||
/* last flushed PD/PT update */
|
||||
struct radeon_fence *flushed_updates;
|
||||
/* last use of vmid */
|
||||
struct radeon_fence *last_id_use;
|
||||
struct radeon_fence *fence;
|
||||
|
||||
/* for id and flush management per ring */
|
||||
struct radeon_vm_id ids[RADEON_NUM_RINGS];
|
||||
};
|
||||
|
||||
struct radeon_vm_manager {
|
||||
|
|
|
@ -182,15 +182,18 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
|
|||
struct radeon_vm *vm, int ring)
|
||||
{
|
||||
struct radeon_fence *best[RADEON_NUM_RINGS] = {};
|
||||
struct radeon_vm_id *vm_id = &vm->ids[ring];
|
||||
|
||||
unsigned choices[2] = {};
|
||||
unsigned i;
|
||||
|
||||
/* check if the id is still valid */
|
||||
if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id])
|
||||
if (vm_id->id && vm_id->last_id_use &&
|
||||
vm_id->last_id_use == rdev->vm_manager.active[vm_id->id])
|
||||
return NULL;
|
||||
|
||||
/* we definately need to flush */
|
||||
vm->pd_gpu_addr = ~0ll;
|
||||
vm_id->pd_gpu_addr = ~0ll;
|
||||
|
||||
/* skip over VMID 0, since it is the system VM */
|
||||
for (i = 1; i < rdev->vm_manager.nvm; ++i) {
|
||||
|
@ -198,8 +201,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
|
|||
|
||||
if (fence == NULL) {
|
||||
/* found a free one */
|
||||
vm->id = i;
|
||||
trace_radeon_vm_grab_id(vm->id, ring);
|
||||
vm_id->id = i;
|
||||
trace_radeon_vm_grab_id(i, ring);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -211,8 +214,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
|
|||
|
||||
for (i = 0; i < 2; ++i) {
|
||||
if (choices[i]) {
|
||||
vm->id = choices[i];
|
||||
trace_radeon_vm_grab_id(vm->id, ring);
|
||||
vm_id->id = choices[i];
|
||||
trace_radeon_vm_grab_id(choices[i], ring);
|
||||
return rdev->vm_manager.active[choices[i]];
|
||||
}
|
||||
}
|
||||
|
@ -239,16 +242,18 @@ void radeon_vm_flush(struct radeon_device *rdev,
|
|||
int ring, struct radeon_fence *updates)
|
||||
{
|
||||
uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
|
||||
struct radeon_vm_id *vm_id = &vm->ids[ring];
|
||||
|
||||
if (pd_addr != vm->pd_gpu_addr || !vm->flushed_updates ||
|
||||
radeon_fence_is_earlier(vm->flushed_updates, updates)) {
|
||||
if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates ||
|
||||
radeon_fence_is_earlier(vm_id->flushed_updates, updates)) {
|
||||
|
||||
trace_radeon_vm_flush(pd_addr, ring, vm->id);
|
||||
radeon_fence_unref(&vm->flushed_updates);
|
||||
vm->flushed_updates = radeon_fence_ref(updates);
|
||||
vm->pd_gpu_addr = pd_addr;
|
||||
trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id);
|
||||
radeon_fence_unref(&vm_id->flushed_updates);
|
||||
vm_id->flushed_updates = radeon_fence_ref(updates);
|
||||
vm_id->pd_gpu_addr = pd_addr;
|
||||
radeon_ring_vm_flush(rdev, &rdev->ring[ring],
|
||||
vm->id, vm->pd_gpu_addr);
|
||||
vm_id->id, vm_id->pd_gpu_addr);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -268,14 +273,16 @@ void radeon_vm_fence(struct radeon_device *rdev,
|
|||
struct radeon_vm *vm,
|
||||
struct radeon_fence *fence)
|
||||
{
|
||||
unsigned vm_id = vm->ids[fence->ring].id;
|
||||
|
||||
radeon_fence_unref(&vm->fence);
|
||||
vm->fence = radeon_fence_ref(fence);
|
||||
|
||||
radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
|
||||
rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
|
||||
radeon_fence_unref(&rdev->vm_manager.active[vm_id]);
|
||||
rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence);
|
||||
|
||||
radeon_fence_unref(&vm->last_id_use);
|
||||
vm->last_id_use = radeon_fence_ref(fence);
|
||||
radeon_fence_unref(&vm->ids[fence->ring].last_id_use);
|
||||
vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1120,13 +1127,16 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
|
|||
const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE,
|
||||
RADEON_VM_PTE_COUNT * 8);
|
||||
unsigned pd_size, pd_entries, pts_size;
|
||||
int r;
|
||||
int i, r;
|
||||
|
||||
vm->id = 0;
|
||||
vm->ib_bo_va = NULL;
|
||||
vm->fence = NULL;
|
||||
vm->flushed_updates = NULL;
|
||||
vm->last_id_use = NULL;
|
||||
|
||||
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
|
||||
vm->ids[i].id = 0;
|
||||
vm->ids[i].flushed_updates = NULL;
|
||||
vm->ids[i].last_id_use = NULL;
|
||||
}
|
||||
mutex_init(&vm->mutex);
|
||||
vm->va = RB_ROOT;
|
||||
INIT_LIST_HEAD(&vm->invalidated);
|
||||
|
@ -1197,8 +1207,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
|
|||
radeon_bo_unref(&vm->page_directory);
|
||||
|
||||
radeon_fence_unref(&vm->fence);
|
||||
radeon_fence_unref(&vm->flushed_updates);
|
||||
radeon_fence_unref(&vm->last_id_use);
|
||||
|
||||
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
|
||||
radeon_fence_unref(&vm->ids[i].flushed_updates);
|
||||
radeon_fence_unref(&vm->ids[i].last_id_use);
|
||||
}
|
||||
|
||||
mutex_destroy(&vm->mutex);
|
||||
}
|
||||
|
|
|
@ -3362,6 +3362,7 @@ void si_fence_ring_emit(struct radeon_device *rdev,
|
|||
void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
|
||||
{
|
||||
struct radeon_ring *ring = &rdev->ring[ib->ring];
|
||||
unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
|
||||
u32 header;
|
||||
|
||||
if (ib->is_const_ib) {
|
||||
|
@ -3397,14 +3398,13 @@ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
|
|||
#endif
|
||||
(ib->gpu_addr & 0xFFFFFFFC));
|
||||
radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
|
||||
radeon_ring_write(ring, ib->length_dw |
|
||||
(ib->vm ? (ib->vm->id << 24) : 0));
|
||||
radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
|
||||
|
||||
if (!ib->is_const_ib) {
|
||||
/* flush read cache over gart for this vmid */
|
||||
radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
|
||||
radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
|
||||
radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
|
||||
radeon_ring_write(ring, vm_id);
|
||||
radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
|
||||
radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
|
||||
PACKET3_TC_ACTION_ENA |
|
||||
|
|
Loading…
Reference in New Issue