drm/amdgpu: enable huge page handling in the VM v5
The hardware can use huge pages to map 2MB of address space with only one PDE. v2: few cleanups and rebased v3: skip PT updates if we are using the PDE v4: rebased, added support for CPU based updates v5: fix CPU based updates once more v6: fix ndw estimation Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-and-tested-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
6be7adb37d
commit
cf2f0a3720
|
@ -352,6 +352,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
|
|||
|
||||
entry->bo = pt;
|
||||
entry->addr = 0;
|
||||
entry->huge_page = false;
|
||||
}
|
||||
|
||||
if (level < adev->vm_manager.num_level) {
|
||||
|
@ -1117,7 +1118,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
|
|||
|
||||
pt = amdgpu_bo_gpu_offset(bo);
|
||||
pt = amdgpu_gart_get_vm_pde(adev, pt);
|
||||
if (parent->entries[pt_idx].addr == pt)
|
||||
if (parent->entries[pt_idx].addr == pt ||
|
||||
parent->entries[pt_idx].huge_page)
|
||||
continue;
|
||||
|
||||
parent->entries[pt_idx].addr = pt;
|
||||
|
@ -1258,29 +1260,95 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
|
|||
}
|
||||
|
||||
/**
|
||||
* amdgpu_vm_find_pt - find the page table for an address
|
||||
* amdgpu_vm_find_entry - find the entry for an address
|
||||
*
|
||||
* @p: see amdgpu_pte_update_params definition
|
||||
* @addr: virtual address in question
|
||||
* @entry: resulting entry or NULL
|
||||
* @parent: parent entry
|
||||
*
|
||||
* Find the page table BO for a virtual address, return NULL when none found.
|
||||
* Find the vm_pt entry and it's parent for the given address.
|
||||
*/
|
||||
static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
|
||||
uint64_t addr)
|
||||
void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
|
||||
struct amdgpu_vm_pt **entry,
|
||||
struct amdgpu_vm_pt **parent)
|
||||
{
|
||||
struct amdgpu_vm_pt *entry = &p->vm->root;
|
||||
unsigned idx, level = p->adev->vm_manager.num_level;
|
||||
|
||||
while (entry->entries) {
|
||||
*parent = NULL;
|
||||
*entry = &p->vm->root;
|
||||
while ((*entry)->entries) {
|
||||
idx = addr >> (p->adev->vm_manager.block_size * level--);
|
||||
idx %= amdgpu_bo_size(entry->bo) / 8;
|
||||
entry = &entry->entries[idx];
|
||||
idx %= amdgpu_bo_size((*entry)->bo) / 8;
|
||||
*parent = *entry;
|
||||
*entry = &(*entry)->entries[idx];
|
||||
}
|
||||
|
||||
if (level)
|
||||
return NULL;
|
||||
*entry = NULL;
|
||||
}
|
||||
|
||||
return entry->bo;
|
||||
/**
|
||||
* amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
|
||||
*
|
||||
* @p: see amdgpu_pte_update_params definition
|
||||
* @entry: vm_pt entry to check
|
||||
* @parent: parent entry
|
||||
* @nptes: number of PTEs updated with this operation
|
||||
* @dst: destination address where the PTEs should point to
|
||||
* @flags: access flags fro the PTEs
|
||||
*
|
||||
* Check if we can update the PD with a huge page.
|
||||
*/
|
||||
static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
|
||||
struct amdgpu_vm_pt *entry,
|
||||
struct amdgpu_vm_pt *parent,
|
||||
unsigned nptes, uint64_t dst,
|
||||
uint64_t flags)
|
||||
{
|
||||
bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes);
|
||||
uint64_t pd_addr, pde;
|
||||
int r;
|
||||
|
||||
/* In the case of a mixed PT the PDE must point to it*/
|
||||
if (p->adev->asic_type < CHIP_VEGA10 ||
|
||||
nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
|
||||
p->func == amdgpu_vm_do_copy_ptes ||
|
||||
!(flags & AMDGPU_PTE_VALID)) {
|
||||
|
||||
dst = amdgpu_bo_gpu_offset(entry->bo);
|
||||
dst = amdgpu_gart_get_vm_pde(p->adev, dst);
|
||||
flags = AMDGPU_PTE_VALID;
|
||||
} else {
|
||||
flags |= AMDGPU_PDE_PTE;
|
||||
}
|
||||
|
||||
if (entry->addr == dst &&
|
||||
entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
|
||||
return 0;
|
||||
|
||||
entry->addr = dst;
|
||||
entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
|
||||
|
||||
if (use_cpu_update) {
|
||||
r = amdgpu_bo_kmap(parent->bo, (void *)&pd_addr);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
pde = pd_addr + (entry - parent->entries) * 8;
|
||||
amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
|
||||
} else {
|
||||
if (parent->bo->shadow) {
|
||||
pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
|
||||
pde = pd_addr + (entry - parent->entries) * 8;
|
||||
amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
|
||||
}
|
||||
pd_addr = amdgpu_bo_gpu_offset(parent->bo);
|
||||
pde = pd_addr + (entry - parent->entries) * 8;
|
||||
amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1307,21 +1375,31 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
|
|||
struct amdgpu_bo *pt;
|
||||
unsigned nptes;
|
||||
bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);
|
||||
|
||||
int r;
|
||||
|
||||
/* walk over the address space and update the page tables */
|
||||
for (addr = start; addr < end; addr += nptes) {
|
||||
pt = amdgpu_vm_get_pt(params, addr);
|
||||
if (!pt) {
|
||||
pr_err("PT not found, aborting update_ptes\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
for (addr = start; addr < end; addr += nptes,
|
||||
dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
|
||||
struct amdgpu_vm_pt *entry, *parent;
|
||||
|
||||
amdgpu_vm_get_entry(params, addr, &entry, &parent);
|
||||
if (!entry)
|
||||
return -ENOENT;
|
||||
|
||||
if ((addr & ~mask) == (end & ~mask))
|
||||
nptes = end - addr;
|
||||
else
|
||||
nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
|
||||
|
||||
r = amdgpu_vm_handle_huge_pages(params, entry, parent,
|
||||
nptes, dst, flags);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (entry->huge_page)
|
||||
continue;
|
||||
|
||||
pt = entry->bo;
|
||||
if (use_cpu_update) {
|
||||
pe_start = (unsigned long)pt->kptr;
|
||||
} else {
|
||||
|
@ -1337,8 +1415,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
|
|||
pe_start += (addr & mask) * 8;
|
||||
params->func(params, pe_start, dst, nptes,
|
||||
AMDGPU_GPU_PAGE_SIZE, flags);
|
||||
|
||||
dst += nptes * AMDGPU_GPU_PAGE_SIZE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1490,6 +1566,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
|
|||
/* padding, etc. */
|
||||
ndw = 64;
|
||||
|
||||
/* one PDE write for each huge page */
|
||||
ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6;
|
||||
|
||||
if (src) {
|
||||
/* only copy commands needed */
|
||||
ndw += ncmds * 7;
|
||||
|
@ -1569,6 +1648,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
|
|||
|
||||
error_free:
|
||||
amdgpu_job_free(job);
|
||||
amdgpu_vm_invalidate_level(&vm->root);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
|
|
@ -70,6 +70,9 @@ struct amdgpu_bo_list_entry;
|
|||
/* TILED for VEGA10, reserved for older ASICs */
|
||||
#define AMDGPU_PTE_PRT (1ULL << 51)
|
||||
|
||||
/* PDE is handled as PTE for VEGA10 */
|
||||
#define AMDGPU_PDE_PTE (1ULL << 54)
|
||||
|
||||
/* VEGA10 only */
|
||||
#define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57)
|
||||
#define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL)
|
||||
|
@ -100,6 +103,7 @@ struct amdgpu_bo_list_entry;
|
|||
struct amdgpu_vm_pt {
|
||||
struct amdgpu_bo *bo;
|
||||
uint64_t addr;
|
||||
bool huge_page;
|
||||
|
||||
/* array of page tables, one for each directory entry */
|
||||
struct amdgpu_vm_pt *entries;
|
||||
|
|
Loading…
Reference in New Issue