drm/amdkfd: Add TC flush on VMID deallocation for Hawaii
On GFX7 the CP does not perform a TC flush when queues are unmapped. To avoid TC eviction from accessing an invalid VMID, flush it explicitly before releasing a VMID. v2: Fix unnecessary list_for_each_entry_safe v3: Moved allocation to kfd_process_device_init_vm Signed-off-by: Amber Lin <Amber.Lin@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
This commit is contained in:
parent
f35751b870
commit
552764b680
|
@ -142,12 +142,31 @@ static int allocate_vmid(struct device_queue_manager *dqm,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
uint32_t len;
|
||||
|
||||
if (!qpd->ib_kaddr)
|
||||
return -ENOMEM;
|
||||
|
||||
len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
|
||||
|
||||
return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
|
||||
qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len);
|
||||
}
|
||||
|
||||
static void deallocate_vmid(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd,
|
||||
struct queue *q)
|
||||
{
|
||||
int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
|
||||
|
||||
/* On GFX v7, CP doesn't flush TC at dequeue */
|
||||
if (q->device->device_info->asic_family == CHIP_HAWAII)
|
||||
if (flush_texture_cache_nocpsch(q->device, qpd))
|
||||
pr_err("Failed to flush TC\n");
|
||||
|
||||
kfd_flush_tlb(qpd_to_pdd(qpd));
|
||||
|
||||
/* Release the vmid mapping */
|
||||
|
@ -792,11 +811,12 @@ static void uninitialize(struct device_queue_manager *dqm)
|
|||
static int start_nocpsch(struct device_queue_manager *dqm)
|
||||
{
|
||||
init_interrupts(dqm);
|
||||
return 0;
|
||||
return pm_init(&dqm->packets, dqm);
|
||||
}
|
||||
|
||||
static int stop_nocpsch(struct device_queue_manager *dqm)
|
||||
{
|
||||
pm_uninit(&dqm->packets);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -356,6 +356,43 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
|
|||
return retval;
|
||||
}
|
||||
|
||||
/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size
|
||||
* of this packet
|
||||
* @gpu_addr - GPU address of the packet. It's a virtual address.
|
||||
* @buffer - buffer to fill up with the packet. It's a CPU kernel pointer
|
||||
* Return - length of the packet
|
||||
*/
|
||||
uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer)
|
||||
{
|
||||
struct pm4_mec_release_mem *packet;
|
||||
|
||||
WARN_ON(!buffer);
|
||||
|
||||
packet = (struct pm4_mec_release_mem *)buffer;
|
||||
memset(buffer, 0, sizeof(*packet));
|
||||
|
||||
packet->header.u32All = build_pm4_header(IT_RELEASE_MEM,
|
||||
sizeof(*packet));
|
||||
|
||||
packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
|
||||
packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
|
||||
packet->bitfields2.tcl1_action_ena = 1;
|
||||
packet->bitfields2.tc_action_ena = 1;
|
||||
packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
|
||||
packet->bitfields2.atc = 0;
|
||||
|
||||
packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
|
||||
packet->bitfields3.int_sel =
|
||||
int_sel___release_mem__send_interrupt_after_write_confirm;
|
||||
|
||||
packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
|
||||
packet->address_hi = upper_32_bits(gpu_addr);
|
||||
|
||||
packet->data_lo = 0;
|
||||
|
||||
return sizeof(*packet) / sizeof(unsigned int);
|
||||
}
|
||||
|
||||
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
|
||||
{
|
||||
pm->dqm = dqm;
|
||||
|
|
|
@ -494,6 +494,7 @@ struct qcm_process_device {
|
|||
|
||||
/* IB memory */
|
||||
uint64_t ib_base;
|
||||
void *ib_kaddr;
|
||||
};
|
||||
|
||||
/* KFD Memory Eviction */
|
||||
|
@ -834,6 +835,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
|
|||
|
||||
void pm_release_ib(struct packet_manager *pm);
|
||||
|
||||
uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer);
|
||||
|
||||
uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
|
||||
|
||||
/* Events */
|
||||
|
|
|
@ -149,6 +149,36 @@ err_alloc_mem:
|
|||
return err;
|
||||
}
|
||||
|
||||
/* kfd_process_device_reserve_ib_mem - Reserve memory inside the
|
||||
* process for IB usage The memory reserved is for KFD to submit
|
||||
* IB to AMDGPU from kernel. If the memory is reserved
|
||||
* successfully, ib_kaddr will have the CPU/kernel
|
||||
* address. Check ib_kaddr before accessing the memory.
|
||||
*/
|
||||
static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
|
||||
{
|
||||
struct qcm_process_device *qpd = &pdd->qpd;
|
||||
uint32_t flags = ALLOC_MEM_FLAGS_GTT |
|
||||
ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
|
||||
ALLOC_MEM_FLAGS_WRITABLE |
|
||||
ALLOC_MEM_FLAGS_EXECUTABLE;
|
||||
void *kaddr;
|
||||
int ret;
|
||||
|
||||
if (qpd->ib_kaddr || !qpd->ib_base)
|
||||
return 0;
|
||||
|
||||
/* ib_base is only set for dGPU */
|
||||
ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
|
||||
&kaddr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
qpd->ib_kaddr = kaddr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct kfd_process *kfd_create_process(struct file *filep)
|
||||
{
|
||||
struct kfd_process *process;
|
||||
|
@ -610,6 +640,9 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
|
|||
return ret;
|
||||
}
|
||||
|
||||
ret = kfd_process_device_reserve_ib_mem(pdd);
|
||||
if (ret)
|
||||
goto err_reserve_ib_mem;
|
||||
ret = kfd_process_device_init_cwsr_dgpu(pdd);
|
||||
if (ret)
|
||||
goto err_init_cwsr;
|
||||
|
@ -619,6 +652,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
|
|||
return 0;
|
||||
|
||||
err_init_cwsr:
|
||||
err_reserve_ib_mem:
|
||||
kfd_process_device_free_bos(pdd);
|
||||
if (!drm_file)
|
||||
dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm);
|
||||
|
|
Loading…
Reference in New Issue