drm/amdkfd: Add TC flush on VMID deallocation for Hawaii

On GFX7 the CP does not perform a TC flush when queues are unmapped.
To avoid TC eviction from accessing an invalid VMID, flush it
explicitly before releasing a VMID.

v2: Fix unnecessary list_for_each_entry_safe
v3: Moved allocation to kfd_process_device_init_vm

Signed-off-by: Amber Lin <Amber.Lin@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
This commit is contained in:
Felix Kuehling 2018-03-15 17:27:50 -04:00 committed by Oded Gabbay
parent f35751b870
commit 552764b680
4 changed files with 95 additions and 1 deletions

View File

@ -142,12 +142,31 @@ static int allocate_vmid(struct device_queue_manager *dqm,
return 0;
}
static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
struct qcm_process_device *qpd)
{
uint32_t len;
if (!qpd->ib_kaddr)
return -ENOMEM;
len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len);
}
static void deallocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
{
int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
/* On GFX v7, CP doesn't flush TC at dequeue */
if (q->device->device_info->asic_family == CHIP_HAWAII)
if (flush_texture_cache_nocpsch(q->device, qpd))
pr_err("Failed to flush TC\n");
kfd_flush_tlb(qpd_to_pdd(qpd));
/* Release the vmid mapping */
@ -792,11 +811,12 @@ static void uninitialize(struct device_queue_manager *dqm)
static int start_nocpsch(struct device_queue_manager *dqm)
{
init_interrupts(dqm);
return 0;
return pm_init(&dqm->packets, dqm);
}
static int stop_nocpsch(struct device_queue_manager *dqm)
{
pm_uninit(&dqm->packets);
return 0;
}

View File

@ -356,6 +356,43 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
return retval;
}
/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size
* of this packet
* @gpu_addr - GPU address of the packet. It's a virtual address.
* @buffer - buffer to fill up with the packet. It's a CPU kernel pointer
* Return - length of the packet
*/
uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer)
{
struct pm4_mec_release_mem *packet;
WARN_ON(!buffer);
packet = (struct pm4_mec_release_mem *)buffer;
memset(buffer, 0, sizeof(*packet));
packet->header.u32All = build_pm4_header(IT_RELEASE_MEM,
sizeof(*packet));
packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
packet->bitfields2.tcl1_action_ena = 1;
packet->bitfields2.tc_action_ena = 1;
packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
packet->bitfields2.atc = 0;
packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
packet->bitfields3.int_sel =
int_sel___release_mem__send_interrupt_after_write_confirm;
packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
packet->address_hi = upper_32_bits(gpu_addr);
packet->data_lo = 0;
return sizeof(*packet) / sizeof(unsigned int);
}
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
{
pm->dqm = dqm;

View File

@ -494,6 +494,7 @@ struct qcm_process_device {
/* IB memory */
uint64_t ib_base;
void *ib_kaddr;
};
/* KFD Memory Eviction */
@ -834,6 +835,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
void pm_release_ib(struct packet_manager *pm);
uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer);
uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
/* Events */

View File

@ -149,6 +149,36 @@ err_alloc_mem:
return err;
}
/* kfd_process_device_reserve_ib_mem - Reserve memory inside the
* process for IB usage The memory reserved is for KFD to submit
* IB to AMDGPU from kernel. If the memory is reserved
* successfully, ib_kaddr will have the CPU/kernel
* address. Check ib_kaddr before accessing the memory.
*/
static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
{
struct qcm_process_device *qpd = &pdd->qpd;
uint32_t flags = ALLOC_MEM_FLAGS_GTT |
ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
ALLOC_MEM_FLAGS_WRITABLE |
ALLOC_MEM_FLAGS_EXECUTABLE;
void *kaddr;
int ret;
if (qpd->ib_kaddr || !qpd->ib_base)
return 0;
/* ib_base is only set for dGPU */
ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
&kaddr);
if (ret)
return ret;
qpd->ib_kaddr = kaddr;
return 0;
}
struct kfd_process *kfd_create_process(struct file *filep)
{
struct kfd_process *process;
@ -610,6 +640,9 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
return ret;
}
ret = kfd_process_device_reserve_ib_mem(pdd);
if (ret)
goto err_reserve_ib_mem;
ret = kfd_process_device_init_cwsr_dgpu(pdd);
if (ret)
goto err_init_cwsr;
@ -619,6 +652,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
return 0;
err_init_cwsr:
err_reserve_ib_mem:
kfd_process_device_free_bos(pdd);
if (!drm_file)
dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm);