drm/amdgpu: Fix per-BO MTYPE selection for GFXv9.4.3
Treat system memory on NUMA systems as remote by default. Overriding with a more efficient MTYPE per page will be implemented in the next patch. No need for a special case for APP APUs. System memory is handled the same for carve-out and native mode. And VRAM doesn't exist in native mode. Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Philip Yang <Philip.Yang@amd.com> Reviewed-and-tested-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
895797d919
commit
1e4a00334a
|
@ -1186,9 +1186,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
|
|||
bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
|
||||
bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
|
||||
bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
|
||||
unsigned int mtype;
|
||||
unsigned int mtype_default;
|
||||
/* TODO: memory partitions struct amdgpu_vm *vm = mapping->bo_va->base.vm;*/
|
||||
unsigned int mtype_local, mtype;
|
||||
bool snoop = false;
|
||||
bool is_local;
|
||||
|
||||
switch (adev->ip_versions[GC_HWIP][0]) {
|
||||
case IP_VERSION(9, 4, 1):
|
||||
|
@ -1228,35 +1229,26 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
|
|||
}
|
||||
break;
|
||||
case IP_VERSION(9, 4, 3):
|
||||
/* FIXME: Needs more work for handling multiple memory
|
||||
* partitions (> NPS1 mode) e.g. NPS4 for both APU and dGPU
|
||||
* modes.
|
||||
* FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable.
|
||||
* To force use of MTYPE_RW, set use_mtype_cc_wa=0
|
||||
/* Only local VRAM BOs or system memory on non-NUMA APUs
|
||||
* can be assumed to be local in their entirety. Choose
|
||||
* MTYPE_NC as safe fallback for all system memory BOs on
|
||||
* NUMA systems. Their MTYPE can be overridden per-page in
|
||||
* gmc_v9_0_override_vm_pte_flags.
|
||||
*/
|
||||
mtype_default = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW;
|
||||
mtype_local = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW;
|
||||
is_local = (!is_vram && (adev->flags & AMD_IS_APU) &&
|
||||
num_possible_nodes() <= 1) ||
|
||||
(is_vram && adev == bo_adev /* TODO: memory partitions &&
|
||||
bo->mem_id == vm->mem_id*/);
|
||||
snoop = true;
|
||||
if (uncached) {
|
||||
mtype = MTYPE_UC;
|
||||
} else if (adev->gmc.is_app_apu) {
|
||||
/* FIXME: APU in native mode, NPS1 single socket only
|
||||
*
|
||||
* For suporting NUMA partitioned APU e.g. in NPS4 mode,
|
||||
* this need to look at the NUMA node on which the
|
||||
* system memory allocation was done.
|
||||
*
|
||||
* Memory access by a different partition within same
|
||||
* socket should be treated as remote access so MTYPE_RW
|
||||
* cannot be used always.
|
||||
*/
|
||||
mtype = mtype_default;
|
||||
} else if (adev->flags & AMD_IS_APU) {
|
||||
/* APU on carve out mode */
|
||||
mtype = mtype_default;
|
||||
mtype = is_local ? mtype_local : MTYPE_NC;
|
||||
} else {
|
||||
/* dGPU */
|
||||
if (is_vram && bo_adev == adev)
|
||||
mtype = mtype_default;
|
||||
if (is_local)
|
||||
mtype = mtype_local;
|
||||
else if (is_vram)
|
||||
mtype = MTYPE_NC;
|
||||
else
|
||||
|
|
|
@ -1151,6 +1151,7 @@ svm_range_get_pte_flags(struct kfd_node *node,
|
|||
bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
|
||||
bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
|
||||
bool uncached = flags & KFD_IOCTL_SVM_FLAG_UNCACHED;
|
||||
unsigned int mtype_local;
|
||||
|
||||
if (domain == SVM_RANGE_VRAM_DOMAIN)
|
||||
bo_node = prange->svm_bo->node;
|
||||
|
@ -1191,19 +1192,16 @@ svm_range_get_pte_flags(struct kfd_node *node,
|
|||
}
|
||||
break;
|
||||
case IP_VERSION(9, 4, 3):
|
||||
//TODO: Need more work for handling multiple memory partitions
|
||||
//e.g. NPS4. Current approch is only applicable without memory
|
||||
//partitions.
|
||||
mtype_local = amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC :
|
||||
AMDGPU_VM_MTYPE_RW;
|
||||
snoop = true;
|
||||
if (uncached) {
|
||||
mapping_flags |= AMDGPU_VM_MTYPE_UC;
|
||||
} else if (domain == SVM_RANGE_VRAM_DOMAIN) {
|
||||
/* local HBM region close to partition
|
||||
* FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable.
|
||||
* To force use of MTYPE_RW, set use_mtype_cc_wa=0
|
||||
*/
|
||||
if (bo_node == node)
|
||||
mapping_flags |= amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
|
||||
/* local HBM region close to partition */
|
||||
if (bo_node->adev == node->adev /* TODO: memory partitions &&
|
||||
bo_node->mem_id == node->mem_id*/)
|
||||
mapping_flags |= mtype_local;
|
||||
/* local HBM region far from partition or remote XGMI GPU */
|
||||
else if (svm_nodes_in_same_hive(bo_node, node))
|
||||
mapping_flags |= AMDGPU_VM_MTYPE_NC;
|
||||
|
@ -1212,7 +1210,13 @@ svm_range_get_pte_flags(struct kfd_node *node,
|
|||
mapping_flags |= AMDGPU_VM_MTYPE_UC;
|
||||
/* system memory accessed by the APU */
|
||||
} else if (node->adev->flags & AMD_IS_APU) {
|
||||
mapping_flags |= AMDGPU_VM_MTYPE_NC;
|
||||
/* On NUMA systems, locality is determined per-page
|
||||
* in amdgpu_gmc_override_vm_pte_flags
|
||||
*/
|
||||
if (num_possible_nodes() <= 1)
|
||||
mapping_flags |= mtype_local;
|
||||
else
|
||||
mapping_flags |= AMDGPU_VM_MTYPE_NC;
|
||||
/* system memory accessed by the dGPU */
|
||||
} else {
|
||||
mapping_flags |= AMDGPU_VM_MTYPE_UC;
|
||||
|
|
Loading…
Reference in New Issue