drm/amdgpu: fix VM faults caused by vm_grab_id() v4
The owner must be per ring as long as we don't support sharing VMIDs per process. Also move the assigned VMID and page directory address into the IB structure. v3: assign the VMID to all IBs, not just the first one. v4: use correct pointer for owner Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Chunming Zhou <david1.zhou@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
ce22c4bfdf
commit
4ff37a83f1
|
@ -769,8 +769,9 @@ struct amdgpu_ib {
|
|||
uint32_t *ptr;
|
||||
struct amdgpu_fence *fence;
|
||||
struct amdgpu_user_fence *user;
|
||||
bool grabbed_vmid;
|
||||
struct amdgpu_vm *vm;
|
||||
unsigned vm_id;
|
||||
uint64_t vm_pd_addr;
|
||||
struct amdgpu_ctx *ctx;
|
||||
uint32_t gds_base, gds_size;
|
||||
uint32_t gws_base, gws_size;
|
||||
|
@ -877,10 +878,10 @@ struct amdgpu_vm_pt {
|
|||
};
|
||||
|
||||
struct amdgpu_vm_id {
|
||||
unsigned id;
|
||||
uint64_t pd_gpu_addr;
|
||||
struct amdgpu_vm_manager_id *mgr_id;
|
||||
uint64_t pd_gpu_addr;
|
||||
/* last flushed PD/PT update */
|
||||
struct fence *flushed_updates;
|
||||
struct fence *flushed_updates;
|
||||
};
|
||||
|
||||
struct amdgpu_vm {
|
||||
|
@ -954,10 +955,11 @@ void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates);
|
|||
void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
|
||||
struct amdgpu_vm *vm);
|
||||
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
|
||||
struct amdgpu_sync *sync, struct fence *fence);
|
||||
struct amdgpu_sync *sync, struct fence *fence,
|
||||
unsigned *vm_id, uint64_t *vm_pd_addr);
|
||||
void amdgpu_vm_flush(struct amdgpu_ring *ring,
|
||||
struct amdgpu_vm *vm,
|
||||
struct fence *updates);
|
||||
unsigned vmid,
|
||||
uint64_t pd_addr);
|
||||
uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
|
||||
int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
|
||||
struct amdgpu_vm *vm);
|
||||
|
|
|
@ -75,6 +75,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
|||
}
|
||||
|
||||
ib->vm = vm;
|
||||
ib->vm_id = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -139,7 +140,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (vm && !ibs->grabbed_vmid) {
|
||||
if (vm && !ibs->vm_id) {
|
||||
dev_err(adev->dev, "VM IB without ID\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -152,10 +153,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
|||
|
||||
if (vm) {
|
||||
/* do context switch */
|
||||
amdgpu_vm_flush(ring, vm, last_vm_update);
|
||||
amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr);
|
||||
|
||||
if (ring->funcs->emit_gds_switch)
|
||||
amdgpu_ring_emit_gds_switch(ring, ib->vm->ids[ring->idx].id,
|
||||
amdgpu_ring_emit_gds_switch(ring, ib->vm_id,
|
||||
ib->gds_base, ib->gds_size,
|
||||
ib->gws_base, ib->gws_size,
|
||||
ib->oa_base, ib->oa_size);
|
||||
|
|
|
@ -105,16 +105,23 @@ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
|
|||
|
||||
struct fence *fence = amdgpu_sync_get_fence(&job->sync);
|
||||
|
||||
if (fence == NULL && vm && !job->ibs->grabbed_vmid) {
|
||||
if (fence == NULL && vm && !job->ibs->vm_id) {
|
||||
struct amdgpu_ring *ring = job->ring;
|
||||
unsigned i, vm_id;
|
||||
uint64_t vm_pd_addr;
|
||||
int r;
|
||||
|
||||
r = amdgpu_vm_grab_id(vm, ring, &job->sync,
|
||||
&job->base.s_fence->base);
|
||||
&job->base.s_fence->base,
|
||||
&vm_id, &vm_pd_addr);
|
||||
if (r)
|
||||
DRM_ERROR("Error getting VM ID (%d)\n", r);
|
||||
else
|
||||
job->ibs->grabbed_vmid = true;
|
||||
else {
|
||||
for (i = 0; i < job->num_ibs; ++i) {
|
||||
job->ibs[i].vm_id = vm_id;
|
||||
job->ibs[i].vm_pd_addr = vm_pd_addr;
|
||||
}
|
||||
}
|
||||
|
||||
fence = amdgpu_sync_get_fence(&job->sync);
|
||||
}
|
||||
|
|
|
@ -50,6 +50,9 @@
|
|||
* SI supports 16.
|
||||
*/
|
||||
|
||||
/* Special value that no flush is necessary */
|
||||
#define AMDGPU_VM_NO_FLUSH (~0ll)
|
||||
|
||||
/**
|
||||
* amdgpu_vm_num_pde - return the number of page directory entries
|
||||
*
|
||||
|
@ -157,50 +160,69 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
|
|||
* Allocate an id for the vm, adding fences to the sync obj as necessary.
|
||||
*/
|
||||
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
|
||||
struct amdgpu_sync *sync, struct fence *fence)
|
||||
struct amdgpu_sync *sync, struct fence *fence,
|
||||
unsigned *vm_id, uint64_t *vm_pd_addr)
|
||||
{
|
||||
struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
|
||||
uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
struct amdgpu_vm_manager_id *id;
|
||||
struct amdgpu_vm_id *id = &vm->ids[ring->idx];
|
||||
struct fence *updates = sync->last_vm_update;
|
||||
int r;
|
||||
|
||||
mutex_lock(&adev->vm_manager.lock);
|
||||
|
||||
/* check if the id is still valid */
|
||||
if (vm_id->id) {
|
||||
if (id->mgr_id) {
|
||||
struct fence *flushed = id->flushed_updates;
|
||||
bool is_later;
|
||||
long owner;
|
||||
|
||||
id = &adev->vm_manager.ids[vm_id->id];
|
||||
owner = atomic_long_read(&id->owner);
|
||||
if (owner == (long)vm) {
|
||||
list_move_tail(&id->list, &adev->vm_manager.ids_lru);
|
||||
trace_amdgpu_vm_grab_id(vm, vm_id->id, ring->idx);
|
||||
if (!flushed)
|
||||
is_later = true;
|
||||
else if (!updates)
|
||||
is_later = false;
|
||||
else
|
||||
is_later = fence_is_later(updates, flushed);
|
||||
|
||||
fence_put(id->active);
|
||||
id->active = fence_get(fence);
|
||||
owner = atomic_long_read(&id->mgr_id->owner);
|
||||
if (!is_later && owner == (long)id &&
|
||||
pd_addr == id->pd_gpu_addr) {
|
||||
|
||||
fence_put(id->mgr_id->active);
|
||||
id->mgr_id->active = fence_get(fence);
|
||||
|
||||
list_move_tail(&id->mgr_id->list,
|
||||
&adev->vm_manager.ids_lru);
|
||||
|
||||
*vm_id = id->mgr_id - adev->vm_manager.ids;
|
||||
*vm_pd_addr = AMDGPU_VM_NO_FLUSH;
|
||||
trace_amdgpu_vm_grab_id(vm, *vm_id, ring->idx);
|
||||
|
||||
mutex_unlock(&adev->vm_manager.lock);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* we definately need to flush */
|
||||
vm_id->pd_gpu_addr = ~0ll;
|
||||
|
||||
id = list_first_entry(&adev->vm_manager.ids_lru,
|
||||
struct amdgpu_vm_manager_id,
|
||||
list);
|
||||
list_move_tail(&id->list, &adev->vm_manager.ids_lru);
|
||||
atomic_long_set(&id->owner, (long)vm);
|
||||
|
||||
vm_id->id = id - adev->vm_manager.ids;
|
||||
trace_amdgpu_vm_grab_id(vm, vm_id->id, ring->idx);
|
||||
|
||||
r = amdgpu_sync_fence(ring->adev, sync, id->active);
|
||||
id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru,
|
||||
struct amdgpu_vm_manager_id,
|
||||
list);
|
||||
|
||||
r = amdgpu_sync_fence(ring->adev, sync, id->mgr_id->active);
|
||||
if (!r) {
|
||||
fence_put(id->active);
|
||||
id->active = fence_get(fence);
|
||||
fence_put(id->mgr_id->active);
|
||||
id->mgr_id->active = fence_get(fence);
|
||||
|
||||
fence_put(id->flushed_updates);
|
||||
id->flushed_updates = fence_get(updates);
|
||||
|
||||
id->pd_gpu_addr = pd_addr;
|
||||
|
||||
list_move_tail(&id->mgr_id->list, &adev->vm_manager.ids_lru);
|
||||
atomic_long_set(&id->mgr_id->owner, (long)id);
|
||||
|
||||
*vm_id = id->mgr_id - adev->vm_manager.ids;
|
||||
*vm_pd_addr = pd_addr;
|
||||
trace_amdgpu_vm_grab_id(vm, *vm_id, ring->idx);
|
||||
}
|
||||
|
||||
mutex_unlock(&adev->vm_manager.lock);
|
||||
|
@ -211,35 +233,18 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
|
|||
* amdgpu_vm_flush - hardware flush the vm
|
||||
*
|
||||
* @ring: ring to use for flush
|
||||
* @vm: vm we want to flush
|
||||
* @updates: last vm update that we waited for
|
||||
* @vmid: vmid number to use
|
||||
* @pd_addr: address of the page directory
|
||||
*
|
||||
* Flush the vm.
|
||||
* Emit a VM flush when it is necessary.
|
||||
*/
|
||||
void amdgpu_vm_flush(struct amdgpu_ring *ring,
|
||||
struct amdgpu_vm *vm,
|
||||
struct fence *updates)
|
||||
unsigned vmid,
|
||||
uint64_t pd_addr)
|
||||
{
|
||||
uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
|
||||
struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
|
||||
struct fence *flushed_updates = vm_id->flushed_updates;
|
||||
bool is_later;
|
||||
|
||||
if (!flushed_updates)
|
||||
is_later = true;
|
||||
else if (!updates)
|
||||
is_later = false;
|
||||
else
|
||||
is_later = fence_is_later(updates, flushed_updates);
|
||||
|
||||
if (pd_addr != vm_id->pd_gpu_addr || is_later) {
|
||||
trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id);
|
||||
if (is_later) {
|
||||
vm_id->flushed_updates = fence_get(updates);
|
||||
fence_put(flushed_updates);
|
||||
}
|
||||
vm_id->pd_gpu_addr = pd_addr;
|
||||
amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr);
|
||||
if (pd_addr != AMDGPU_VM_NO_FLUSH) {
|
||||
trace_amdgpu_vm_flush(pd_addr, ring->idx, vmid);
|
||||
amdgpu_ring_emit_vm_flush(ring, vmid, pd_addr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1284,7 +1289,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
|
|||
int i, r;
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
vm->ids[i].id = 0;
|
||||
vm->ids[i].mgr_id = NULL;
|
||||
vm->ids[i].flushed_updates = NULL;
|
||||
}
|
||||
vm->va = RB_ROOT;
|
||||
|
@ -1381,13 +1386,13 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
|
|||
amdgpu_bo_unref(&vm->page_directory);
|
||||
fence_put(vm->page_directory_fence);
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
unsigned id = vm->ids[i].id;
|
||||
struct amdgpu_vm_id *id = &vm->ids[i];
|
||||
|
||||
atomic_long_cmpxchg(&adev->vm_manager.ids[id].owner,
|
||||
(long)vm, 0);
|
||||
fence_put(vm->ids[i].flushed_updates);
|
||||
if (id->mgr_id)
|
||||
atomic_long_cmpxchg(&id->mgr_id->owner,
|
||||
(long)id, 0);
|
||||
fence_put(id->flushed_updates);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -212,7 +212,7 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
|||
static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_ib *ib)
|
||||
{
|
||||
u32 extra_bits = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf;
|
||||
u32 extra_bits = ib->vm_id & 0xf;
|
||||
u32 next_rptr = ring->wptr + 5;
|
||||
|
||||
while ((next_rptr & 7) != 4)
|
||||
|
|
|
@ -2043,8 +2043,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
|
|||
else
|
||||
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
|
||||
|
||||
control |= ib->length_dw |
|
||||
(ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
|
||||
control |= ib->length_dw | (ib->vm_id << 24);
|
||||
|
||||
amdgpu_ring_write(ring, header);
|
||||
amdgpu_ring_write(ring,
|
||||
|
@ -2072,8 +2071,7 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
|
|||
|
||||
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
|
||||
|
||||
control |= ib->length_dw |
|
||||
(ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
|
||||
control |= ib->length_dw | (ib->vm_id << 24);
|
||||
|
||||
amdgpu_ring_write(ring, header);
|
||||
amdgpu_ring_write(ring,
|
||||
|
|
|
@ -4619,8 +4619,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
|
|||
else
|
||||
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
|
||||
|
||||
control |= ib->length_dw |
|
||||
(ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
|
||||
control |= ib->length_dw | (ib->vm_id << 24);
|
||||
|
||||
amdgpu_ring_write(ring, header);
|
||||
amdgpu_ring_write(ring,
|
||||
|
@ -4649,8 +4648,7 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
|
|||
|
||||
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
|
||||
|
||||
control |= ib->length_dw |
|
||||
(ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
|
||||
control |= ib->length_dw | (ib->vm_id << 24);
|
||||
|
||||
amdgpu_ring_write(ring, header);
|
||||
amdgpu_ring_write(ring,
|
||||
|
|
|
@ -244,7 +244,7 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
|||
static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_ib *ib)
|
||||
{
|
||||
u32 vmid = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf;
|
||||
u32 vmid = ib->vm_id & 0xf;
|
||||
u32 next_rptr = ring->wptr + 5;
|
||||
|
||||
while ((next_rptr & 7) != 2)
|
||||
|
|
|
@ -355,7 +355,7 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
|||
static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_ib *ib)
|
||||
{
|
||||
u32 vmid = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf;
|
||||
u32 vmid = ib->vm_id & 0xf;
|
||||
u32 next_rptr = ring->wptr + 5;
|
||||
|
||||
while ((next_rptr & 7) != 2)
|
||||
|
|
Loading…
Reference in New Issue