From 3447d220155bd9f4b5435ea6e9d58b536c7e94dd Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Thu, 13 Jul 2017 20:21:53 -0500 Subject: [PATCH 1/4] drm/amdgpu: Fix KFD oversubscription by tracking queues correctly The number of compute queues available to the KFD was erroneously calculated as 64. Only the first MEC can execute compute queues and it has 32 queue slots. This caused the oversubscription limit to be calculated incorrectly, leading to a missing chained runlist command at the end of an oversubscribed runlist. v2: Remove unused num_mec field to avoid duplicate logic v3: Separate num_mec removal into separate patches Change-Id: I9e7bba2cc1928b624e3eeb1edb06fdb602e5294f Signed-off-by: Jay Cornwall Reviewed-by: Alex Deucher Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 5f8ada1d872b..76ddedcd1e65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -122,7 +122,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) /* According to linux/bitmap.h we shouldn't use bitmap_clear if * nbits is not compile time constant */ - last_valid_bit = adev->gfx.mec.num_mec + last_valid_bit = 1 /* only first MEC can have compute queues */ * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) From 13c4a2c78e7ed932cff019f54cdd4f6976dad140 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Thu, 13 Jul 2017 20:21:54 -0500 Subject: [PATCH 2/4] drm/amdkfd: Remove unused references to shared_resources.num_mec Dead code. Change-Id: Ic0bb1bcca87e96bc5e8fa9894727b0de152e8818 Signed-off-by: Jay Cornwall Reviewed-by: Alex Deucher Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 ---- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 7 ------- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 1 - 3 files changed, 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 88187bfc5ea3..3f95f7cb4019 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -226,10 +226,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->shared_resources = *gpu_resources; - /* We only use the first MEC */ - if (kfd->shared_resources.num_mec > 1) - kfd->shared_resources.num_mec = 1; - /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * kfd->device_info->mqd_size_aligned; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 955aa304ff48..602769ced3bd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -77,13 +77,6 @@ static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) return false; } -unsigned int get_mec_num(struct device_queue_manager *dqm) -{ - BUG_ON(!dqm || !dqm->dev); - - return dqm->dev->shared_resources.num_mec; -} - unsigned int get_queues_num(struct device_queue_manager *dqm) { BUG_ON(!dqm || !dqm->dev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 66b9615bc3c1..faf820a06400 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -180,7 +180,6 @@ void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops); void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops); void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd); -unsigned int get_mec_num(struct device_queue_manager *dqm); unsigned int get_queues_num(struct device_queue_manager *dqm); unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); From 1ff036dc391b169ff5a74ef0f0215c174c07166f Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Thu, 13 Jul 2017 20:21:55 -0500 Subject: [PATCH 3/4] drm/radeon: Remove initialization of shared_resources.num_mec Dead code. Change-Id: I2383e0b541ed55288570b6a0ec8a0d49cdd4df89 Signed-off-by: Jay Cornwall Reviewed-by: Alex Deucher Signed-off-by: Oded Gabbay --- drivers/gpu/drm/radeon/radeon_kfd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index 699fe7f9b8bf..a2ab6dcdf4a2 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -184,7 +184,6 @@ void radeon_kfd_device_init(struct radeon_device *rdev) if (rdev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = 0xFF00, - .num_mec = 1, .num_pipe_per_mec = 4, .num_queue_per_pipe = 8 }; From f835edf9aec13bc8abb3dd4836151773cc359f42 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Thu, 13 Jul 2017 20:21:56 -0500 Subject: [PATCH 4/4] drm/amdgpu: Remove unused field kgd2kfd_shared_resources.num_mec Dead code. Change-Id: I9575aa73b5741b80dc340f953cc773385c92b2be Signed-off-by: Jay Cornwall Reviewed-by: Alex Deucher Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 1 - drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 3 --- 2 files changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 76ddedcd1e65..37971d9402e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -101,7 +101,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) if (adev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = 0xFF00, - .num_mec = adev->gfx.mec.num_mec, .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe }; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 91ef1484b3bb..36f376677a53 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -63,9 +63,6 @@ struct kgd2kfd_shared_resources { /* Bit n == 1 means VMID n is available for KFD. */ unsigned int compute_vmid_bitmap; - /* number of mec available from the hardware */ - uint32_t num_mec; - /* number of pipes per mec */ uint32_t num_pipe_per_mec;