drm/radeon: use one VMID for each ring

Use multiple VMIDs for each VM, one for each ring. That allows
us to execute flushes separately on each ring, still not ideal
cause in a lot of cases rings can share IDs.

Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Christian König 2014-11-19 14:01:25 +01:00 committed by Alex Deucher
parent ad1a58a45a
commit 7c42bc1aa2
7 changed files with 68 additions and 48 deletions

View File

@ -4066,6 +4066,7 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
{ {
struct radeon_ring *ring = &rdev->ring[ib->ring]; struct radeon_ring *ring = &rdev->ring[ib->ring];
unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
u32 header, control = INDIRECT_BUFFER_VALID; u32 header, control = INDIRECT_BUFFER_VALID;
if (ib->is_const_ib) { if (ib->is_const_ib) {
@ -4094,8 +4095,7 @@ void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
} }
control |= ib->length_dw | control |= ib->length_dw | (vm_id << 24);
(ib->vm ? (ib->vm->id << 24) : 0);
radeon_ring_write(ring, header); radeon_ring_write(ring, header);
radeon_ring_write(ring, radeon_ring_write(ring,

View File

@ -134,7 +134,7 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
struct radeon_ib *ib) struct radeon_ib *ib)
{ {
struct radeon_ring *ring = &rdev->ring[ib->ring]; struct radeon_ring *ring = &rdev->ring[ib->ring];
u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf; u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf;
if (rdev->wb.enabled) { if (rdev->wb.enabled) {
u32 next_rptr = ring->wptr + 5; u32 next_rptr = ring->wptr + 5;

View File

@ -1373,6 +1373,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
{ {
struct radeon_ring *ring = &rdev->ring[ib->ring]; struct radeon_ring *ring = &rdev->ring[ib->ring];
unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA | u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA |
PACKET3_SH_ACTION_ENA; PACKET3_SH_ACTION_ENA;
@ -1395,15 +1396,14 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
#endif #endif
(ib->gpu_addr & 0xFFFFFFFC)); (ib->gpu_addr & 0xFFFFFFFC));
radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF); radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
radeon_ring_write(ring, ib->length_dw | radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
(ib->vm ? (ib->vm->id << 24) : 0));
/* flush read cache over gart for this vmid */ /* flush read cache over gart for this vmid */
radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl); radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl);
radeon_ring_write(ring, 0xFFFFFFFF); radeon_ring_write(ring, 0xFFFFFFFF);
radeon_ring_write(ring, 0); radeon_ring_write(ring, 0);
radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* poll interval */ radeon_ring_write(ring, (vm_id << 24) | 10); /* poll interval */
} }
static void cayman_cp_enable(struct radeon_device *rdev, bool enable) static void cayman_cp_enable(struct radeon_device *rdev, bool enable)

View File

@ -123,6 +123,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
struct radeon_ib *ib) struct radeon_ib *ib)
{ {
struct radeon_ring *ring = &rdev->ring[ib->ring]; struct radeon_ring *ring = &rdev->ring[ib->ring];
unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
if (rdev->wb.enabled) { if (rdev->wb.enabled) {
u32 next_rptr = ring->wptr + 4; u32 next_rptr = ring->wptr + 4;
@ -140,7 +141,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
*/ */
while ((ring->wptr & 7) != 5) while ((ring->wptr & 7) != 5)
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0)); radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0));
radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));

View File

@ -905,9 +905,17 @@ struct radeon_vm_pt {
uint64_t addr; uint64_t addr;
}; };
struct radeon_vm_id {
unsigned id;
uint64_t pd_gpu_addr;
/* last flushed PD/PT update */
struct radeon_fence *flushed_updates;
/* last use of vmid */
struct radeon_fence *last_id_use;
};
struct radeon_vm { struct radeon_vm {
struct rb_root va; struct rb_root va;
unsigned id;
/* BOs moved, but not yet updated in the PT */ /* BOs moved, but not yet updated in the PT */
struct list_head invalidated; struct list_head invalidated;
@ -917,7 +925,6 @@ struct radeon_vm {
/* contains the page directory */ /* contains the page directory */
struct radeon_bo *page_directory; struct radeon_bo *page_directory;
uint64_t pd_gpu_addr;
unsigned max_pde_used; unsigned max_pde_used;
/* array of page tables, one for each page directory entry */ /* array of page tables, one for each page directory entry */
@ -928,10 +935,9 @@ struct radeon_vm {
struct mutex mutex; struct mutex mutex;
/* last fence for cs using this vm */ /* last fence for cs using this vm */
struct radeon_fence *fence; struct radeon_fence *fence;
/* last flushed PD/PT update */
struct radeon_fence *flushed_updates; /* for id and flush management per ring */
/* last use of vmid */ struct radeon_vm_id ids[RADEON_NUM_RINGS];
struct radeon_fence *last_id_use;
}; };
struct radeon_vm_manager { struct radeon_vm_manager {

View File

@ -182,15 +182,18 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
struct radeon_vm *vm, int ring) struct radeon_vm *vm, int ring)
{ {
struct radeon_fence *best[RADEON_NUM_RINGS] = {}; struct radeon_fence *best[RADEON_NUM_RINGS] = {};
struct radeon_vm_id *vm_id = &vm->ids[ring];
unsigned choices[2] = {}; unsigned choices[2] = {};
unsigned i; unsigned i;
/* check if the id is still valid */ /* check if the id is still valid */
if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) if (vm_id->id && vm_id->last_id_use &&
vm_id->last_id_use == rdev->vm_manager.active[vm_id->id])
return NULL; return NULL;
/* we definately need to flush */ /* we definately need to flush */
vm->pd_gpu_addr = ~0ll; vm_id->pd_gpu_addr = ~0ll;
/* skip over VMID 0, since it is the system VM */ /* skip over VMID 0, since it is the system VM */
for (i = 1; i < rdev->vm_manager.nvm; ++i) { for (i = 1; i < rdev->vm_manager.nvm; ++i) {
@ -198,8 +201,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
if (fence == NULL) { if (fence == NULL) {
/* found a free one */ /* found a free one */
vm->id = i; vm_id->id = i;
trace_radeon_vm_grab_id(vm->id, ring); trace_radeon_vm_grab_id(i, ring);
return NULL; return NULL;
} }
@ -211,8 +214,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
for (i = 0; i < 2; ++i) { for (i = 0; i < 2; ++i) {
if (choices[i]) { if (choices[i]) {
vm->id = choices[i]; vm_id->id = choices[i];
trace_radeon_vm_grab_id(vm->id, ring); trace_radeon_vm_grab_id(choices[i], ring);
return rdev->vm_manager.active[choices[i]]; return rdev->vm_manager.active[choices[i]];
} }
} }
@ -239,16 +242,18 @@ void radeon_vm_flush(struct radeon_device *rdev,
int ring, struct radeon_fence *updates) int ring, struct radeon_fence *updates)
{ {
uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
struct radeon_vm_id *vm_id = &vm->ids[ring];
if (pd_addr != vm->pd_gpu_addr || !vm->flushed_updates || if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates ||
radeon_fence_is_earlier(vm->flushed_updates, updates)) { radeon_fence_is_earlier(vm_id->flushed_updates, updates)) {
trace_radeon_vm_flush(pd_addr, ring, vm->id); trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id);
radeon_fence_unref(&vm->flushed_updates); radeon_fence_unref(&vm_id->flushed_updates);
vm->flushed_updates = radeon_fence_ref(updates); vm_id->flushed_updates = radeon_fence_ref(updates);
vm->pd_gpu_addr = pd_addr; vm_id->pd_gpu_addr = pd_addr;
radeon_ring_vm_flush(rdev, &rdev->ring[ring], radeon_ring_vm_flush(rdev, &rdev->ring[ring],
vm->id, vm->pd_gpu_addr); vm_id->id, vm_id->pd_gpu_addr);
} }
} }
@ -268,14 +273,16 @@ void radeon_vm_fence(struct radeon_device *rdev,
struct radeon_vm *vm, struct radeon_vm *vm,
struct radeon_fence *fence) struct radeon_fence *fence)
{ {
unsigned vm_id = vm->ids[fence->ring].id;
radeon_fence_unref(&vm->fence); radeon_fence_unref(&vm->fence);
vm->fence = radeon_fence_ref(fence); vm->fence = radeon_fence_ref(fence);
radeon_fence_unref(&rdev->vm_manager.active[vm->id]); radeon_fence_unref(&rdev->vm_manager.active[vm_id]);
rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence);
radeon_fence_unref(&vm->last_id_use); radeon_fence_unref(&vm->ids[fence->ring].last_id_use);
vm->last_id_use = radeon_fence_ref(fence); vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence);
} }
/** /**
@ -1120,13 +1127,16 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE, const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE,
RADEON_VM_PTE_COUNT * 8); RADEON_VM_PTE_COUNT * 8);
unsigned pd_size, pd_entries, pts_size; unsigned pd_size, pd_entries, pts_size;
int r; int i, r;
vm->id = 0;
vm->ib_bo_va = NULL; vm->ib_bo_va = NULL;
vm->fence = NULL; vm->fence = NULL;
vm->flushed_updates = NULL;
vm->last_id_use = NULL; for (i = 0; i < RADEON_NUM_RINGS; ++i) {
vm->ids[i].id = 0;
vm->ids[i].flushed_updates = NULL;
vm->ids[i].last_id_use = NULL;
}
mutex_init(&vm->mutex); mutex_init(&vm->mutex);
vm->va = RB_ROOT; vm->va = RB_ROOT;
INIT_LIST_HEAD(&vm->invalidated); INIT_LIST_HEAD(&vm->invalidated);
@ -1197,8 +1207,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
radeon_bo_unref(&vm->page_directory); radeon_bo_unref(&vm->page_directory);
radeon_fence_unref(&vm->fence); radeon_fence_unref(&vm->fence);
radeon_fence_unref(&vm->flushed_updates);
radeon_fence_unref(&vm->last_id_use); for (i = 0; i < RADEON_NUM_RINGS; ++i) {
radeon_fence_unref(&vm->ids[i].flushed_updates);
radeon_fence_unref(&vm->ids[i].last_id_use);
}
mutex_destroy(&vm->mutex); mutex_destroy(&vm->mutex);
} }

View File

@ -3362,6 +3362,7 @@ void si_fence_ring_emit(struct radeon_device *rdev,
void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
{ {
struct radeon_ring *ring = &rdev->ring[ib->ring]; struct radeon_ring *ring = &rdev->ring[ib->ring];
unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
u32 header; u32 header;
if (ib->is_const_ib) { if (ib->is_const_ib) {
@ -3397,14 +3398,13 @@ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
#endif #endif
(ib->gpu_addr & 0xFFFFFFFC)); (ib->gpu_addr & 0xFFFFFFFC));
radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
radeon_ring_write(ring, ib->length_dw | radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
(ib->vm ? (ib->vm->id << 24) : 0));
if (!ib->is_const_ib) { if (!ib->is_const_ib) {
/* flush read cache over gart for this vmid */ /* flush read cache over gart for this vmid */
radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2); radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
radeon_ring_write(ring, ib->vm ? ib->vm->id : 0); radeon_ring_write(ring, vm_id);
radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA | radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
PACKET3_TC_ACTION_ENA | PACKET3_TC_ACTION_ENA |