drm/amdkfd: add reset queue function for RAS poison (v2)

The new interface unmaps queues with reset mode for the process consumes
RAS poison, it's only for compute queue.

v2: rename the function to reset_queues.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Tao Zhou 2021-12-16 14:34:10 +08:00 committed by Alex Deucher
parent f6b80c04aa
commit dec6344338
2 changed files with 21 additions and 0 deletions

View File

@ -1476,6 +1476,21 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
return retval; return retval;
} }
/* only for compute queue */
static int reset_queues_cpsch(struct device_queue_manager *dqm,
uint16_t pasid)
{
int retval;
dqm_lock(dqm);
retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID,
pasid, true);
dqm_unlock(dqm);
return retval;
}
/* dqm->lock mutex has to be locked before calling this function */ /* dqm->lock mutex has to be locked before calling this function */
static int execute_queues_cpsch(struct device_queue_manager *dqm, static int execute_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter, enum kfd_unmap_queues_filter filter,
@ -1896,6 +1911,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.evict_process_queues = evict_process_queues_cpsch; dqm->ops.evict_process_queues = evict_process_queues_cpsch;
dqm->ops.restore_process_queues = restore_process_queues_cpsch; dqm->ops.restore_process_queues = restore_process_queues_cpsch;
dqm->ops.get_wave_state = get_wave_state; dqm->ops.get_wave_state = get_wave_state;
dqm->ops.reset_queues = reset_queues_cpsch;
break; break;
case KFD_SCHED_POLICY_NO_HWS: case KFD_SCHED_POLICY_NO_HWS:
/* initialize dqm for no cp scheduling */ /* initialize dqm for no cp scheduling */

View File

@ -81,6 +81,8 @@ struct device_process_node {
* *
* @get_wave_state: Retrieves context save state and optionally copies the * @get_wave_state: Retrieves context save state and optionally copies the
* control stack, if kept in the MQD, to the given userspace address. * control stack, if kept in the MQD, to the given userspace address.
*
* @reset_queues: reset queues which consume RAS poison
*/ */
struct device_queue_manager_ops { struct device_queue_manager_ops {
@ -134,6 +136,9 @@ struct device_queue_manager_ops {
void __user *ctl_stack, void __user *ctl_stack,
u32 *ctl_stack_used_size, u32 *ctl_stack_used_size,
u32 *save_area_used_size); u32 *save_area_used_size);
int (*reset_queues)(struct device_queue_manager *dqm,
uint16_t pasid);
}; };
struct device_queue_manager_asic_ops { struct device_queue_manager_asic_ops {