drm/amdgpu: Override MTYPE per page on GFXv9.4.3 APUs
On GFXv9.4.3 NUMA APUs, system memory locality must be determined per page to choose the correct MTYPE. This patch adds a GMC callback that can provide this per-page override and implements it for native mode. Carve-out mode is not yet supported and will use the safe default (remote) MTYPE for system memory. Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Philip Yang <Philip.Yang@amd.com> Reviewed-and-tested-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
1e4a00334a
commit
352b919c1e
|
@ -148,6 +148,10 @@ struct amdgpu_gmc_funcs {
|
|||
void (*get_vm_pte)(struct amdgpu_device *adev,
|
||||
struct amdgpu_bo_va_mapping *mapping,
|
||||
uint64_t *flags);
|
||||
/* override per-page pte flags */
|
||||
void (*override_vm_pte_flags)(struct amdgpu_device *dev,
|
||||
struct amdgpu_vm *vm,
|
||||
uint64_t addr, uint64_t *flags);
|
||||
/* get the amount of memory used by the vbios for pre-OS console */
|
||||
unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
|
||||
|
||||
|
@ -336,6 +340,9 @@ struct amdgpu_gmc {
|
|||
#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
|
||||
#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
|
||||
#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags))
|
||||
#define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags) \
|
||||
(adev)->gmc.gmc_funcs->override_vm_pte_flags \
|
||||
((adev), (vm), (addr), (pte_flags))
|
||||
#define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
|
||||
|
||||
/**
|
||||
|
|
|
@ -786,13 +786,14 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
|
|||
uint64_t pe, uint64_t addr,
|
||||
unsigned int count, uint32_t incr,
|
||||
uint64_t flags)
|
||||
|
||||
{
|
||||
struct amdgpu_device *adev = params->adev;
|
||||
|
||||
if (level != AMDGPU_VM_PTB) {
|
||||
flags |= AMDGPU_PDE_PTE;
|
||||
amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
|
||||
amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags);
|
||||
|
||||
} else if (params->adev->asic_type >= CHIP_VEGA10 &&
|
||||
} else if (adev->asic_type >= CHIP_VEGA10 &&
|
||||
!(flags & AMDGPU_PTE_VALID) &&
|
||||
!(flags & AMDGPU_PTE_PRT)) {
|
||||
|
||||
|
@ -800,6 +801,21 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
|
|||
flags |= AMDGPU_PTE_EXECUTABLE;
|
||||
}
|
||||
|
||||
/* APUs mapping system memory may need different MTYPEs on different
|
||||
* NUMA nodes. Only do this for contiguous ranges that can be assumed
|
||||
* to be on the same NUMA node.
|
||||
*/
|
||||
if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) &&
|
||||
adev->gmc.gmc_funcs->override_vm_pte_flags &&
|
||||
num_possible_nodes() > 1) {
|
||||
if (!params->pages_addr)
|
||||
amdgpu_gmc_override_vm_pte_flags(adev, params->vm,
|
||||
addr, &flags);
|
||||
else
|
||||
dev_dbg(adev->dev,
|
||||
"override_vm_pte_flags skipped: non-contiguous\n");
|
||||
}
|
||||
|
||||
params->vm->update_funcs->update(params, pt, pe, addr, count, incr,
|
||||
flags);
|
||||
}
|
||||
|
|
|
@ -1297,6 +1297,69 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
|
|||
mapping, flags);
|
||||
}
|
||||
|
||||
static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
|
||||
struct amdgpu_vm *vm,
|
||||
uint64_t addr, uint64_t *flags)
|
||||
{
|
||||
int local_node, nid;
|
||||
|
||||
/* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system
|
||||
* memory can use more efficient MTYPEs.
|
||||
*/
|
||||
if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3))
|
||||
return;
|
||||
|
||||
/* Only direct-mapped memory allows us to determine the NUMA node from
|
||||
* the DMA address.
|
||||
*/
|
||||
if (!adev->ram_is_direct_mapped) {
|
||||
dev_dbg(adev->dev, "RAM is not direct mapped\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Only override mappings with MTYPE_NC, which is the safe default for
|
||||
* cacheable memory.
|
||||
*/
|
||||
if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
|
||||
AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) {
|
||||
dev_dbg(adev->dev, "MTYPE is not NC\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* TODO: memory partitions. mem_id is hard-coded to 0 for now.
|
||||
* FIXME: Only supported on native mode for now. For carve-out, the
|
||||
* NUMA affinity of the GPU/VM needs to come from the PCI info because
|
||||
* memory partitions are not associated with different NUMA nodes.
|
||||
*/
|
||||
if (adev->gmc.is_app_apu) {
|
||||
local_node = adev->gmc.mem_partitions[/*vm->mem_id*/0].numa.node;
|
||||
} else {
|
||||
dev_dbg(adev->dev, "Only native mode APU is supported.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Only handle real RAM. Mappings of PCIe resources don't have struct
|
||||
* page or NUMA nodes.
|
||||
*/
|
||||
if (!page_is_ram(addr >> PAGE_SHIFT)) {
|
||||
dev_dbg(adev->dev, "Page is not RAM.\n");
|
||||
return;
|
||||
}
|
||||
nid = pfn_to_nid(addr >> PAGE_SHIFT);
|
||||
dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n",
|
||||
/*vm->mem_id*/0, local_node, nid);
|
||||
if (nid == local_node) {
|
||||
unsigned int mtype_local =
|
||||
amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW;
|
||||
uint64_t old_flags = *flags;
|
||||
|
||||
*flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
|
||||
AMDGPU_PTE_MTYPE_VG10(mtype_local);
|
||||
dev_dbg(adev->dev, "flags updated from %llx to %llx\n",
|
||||
old_flags, *flags);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
|
||||
|
@ -1368,6 +1431,7 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
|
|||
.map_mtype = gmc_v9_0_map_mtype,
|
||||
.get_vm_pde = gmc_v9_0_get_vm_pde,
|
||||
.get_vm_pte = gmc_v9_0_get_vm_pte,
|
||||
.override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags,
|
||||
.get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
|
||||
.query_mem_partition_mode = &gmc_v9_0_query_memory_partition,
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue