drm/amdkfd: Use xcc mask for identifying xcc

Instead of start xcc id and number of xcc per node, use the xcc mask
which is the mask of logical ids of xccs belonging to a parition.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Le Ma <le.ma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Lijo Lazar 2023-02-09 16:30:53 +05:30 committed by Alex Deucher
parent a75f2271a4
commit c4050ff1a4
8 changed files with 95 additions and 95 deletions

View File

@ -745,15 +745,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
node->vm_info.vmid_num_kfd = vmid_num_kfd;
node->xcp = amdgpu_get_next_xcp(kfd->adev->xcp_mgr, &xcp_idx);
/* TODO : Check if error handling is needed */
if (node->xcp)
if (node->xcp) {
amdgpu_xcp_get_inst_details(node->xcp, AMDGPU_XCP_GFX,
&node->xcc_mask);
else
++xcp_idx;
} else {
node->xcc_mask =
(1U << NUM_XCC(kfd->adev->gfx.xcc_mask)) - 1;
node->num_xcc_per_node = max(1U, kfd->adev->gfx.num_xcc_per_xcp);
node->start_xcc_id = node->num_xcc_per_node * i;
}
if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) &&
partition_mode == AMDGPU_CPX_PARTITION_MODE &&

View File

@ -136,16 +136,14 @@ static void init_sdma_bitmaps(struct device_queue_manager *dqm)
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
int xcc = 0;
uint32_t xcc_mask = dqm->dev->xcc_mask;
int xcc_id;
for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++)
for_each_inst(xcc_id, xcc_mask)
dqm->dev->kfd2kgd->program_sh_mem_settings(
dqm->dev->adev, qpd->vmid,
qpd->sh_mem_config,
qpd->sh_mem_ape1_base,
qpd->sh_mem_ape1_limit,
qpd->sh_mem_bases,
dqm->dev->start_xcc_id + xcc);
dqm->dev->adev, qpd->vmid, qpd->sh_mem_config,
qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit,
qpd->sh_mem_bases, xcc_id);
}
static void kfd_hws_hang(struct device_queue_manager *dqm)
@ -427,14 +425,14 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
static void program_trap_handler_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
int xcc = 0;
uint32_t xcc_mask = dqm->dev->xcc_mask;
int xcc_id;
if (dqm->dev->kfd2kgd->program_trap_handler_settings)
for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++)
for_each_inst(xcc_id, xcc_mask)
dqm->dev->kfd2kgd->program_trap_handler_settings(
dqm->dev->adev, qpd->vmid,
qpd->tba_addr, qpd->tma_addr,
dqm->dev->start_xcc_id + xcc);
dqm->dev->adev, qpd->vmid, qpd->tba_addr,
qpd->tma_addr, xcc_id);
}
static int allocate_vmid(struct device_queue_manager *dqm,
@ -697,7 +695,8 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process
struct kfd_process_device *pdd;
int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
int xcc = 0;
uint32_t xcc_mask = dev->xcc_mask;
int xcc_id;
reg_sq_cmd.u32All = 0;
reg_gfx_index.u32All = 0;
@ -742,11 +741,10 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process
reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
reg_sq_cmd.bits.vm_id = vmid;
for (xcc = 0; xcc < dev->num_xcc_per_node; xcc++)
dev->kfd2kgd->wave_control_execute(dev->adev,
reg_gfx_index.u32All,
reg_sq_cmd.u32All,
dev->start_xcc_id + xcc);
for_each_inst(xcc_id, xcc_mask)
dev->kfd2kgd->wave_control_execute(
dev->adev, reg_gfx_index.u32All,
reg_sq_cmd.u32All, xcc_id);
return 0;
}
@ -1258,12 +1256,12 @@ static int
set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
unsigned int vmid)
{
int xcc = 0, ret;
uint32_t xcc_mask = dqm->dev->xcc_mask;
int xcc_id, ret;
for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++) {
for_each_inst(xcc_id, xcc_mask) {
ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
dqm->dev->adev, pasid, vmid,
dqm->dev->start_xcc_id + xcc);
dqm->dev->adev, pasid, vmid, xcc_id);
if (ret)
break;
}
@ -1273,15 +1271,14 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
static void init_interrupts(struct device_queue_manager *dqm)
{
unsigned int i, xcc;
uint32_t xcc_mask = dqm->dev->xcc_mask;
unsigned int i, xcc_id;
for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) {
if (is_pipe_enabled(dqm, 0, i)) {
for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++)
for_each_inst(xcc_id, xcc_mask)
dqm->dev->kfd2kgd->init_interrupts(
dqm->dev->adev, i,
dqm->dev->start_xcc_id +
xcc);
dqm->dev->adev, i, xcc_id);
}
}
}
@ -2283,7 +2280,7 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
get_num_all_sdma_engines(dqm) *
dev->kfd->device_info.num_sdma_queues_per_engine +
(dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
dqm->dev->num_xcc_per_node);
NUM_XCC(dqm->dev->xcc_mask));
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
@ -2489,10 +2486,10 @@ static void seq_reg_dump(struct seq_file *m,
int dqm_debugfs_hqds(struct seq_file *m, void *data)
{
struct device_queue_manager *dqm = data;
uint32_t xcc_mask = dqm->dev->xcc_mask;
uint32_t (*dump)[2], n_regs;
int pipe, queue;
int r = 0, xcc;
uint32_t inst;
int r = 0, xcc_id;
uint32_t sdma_engine_start;
if (!dqm->sched_running) {
@ -2500,16 +2497,18 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
return 0;
}
for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++) {
inst = dqm->dev->start_xcc_id + xcc;
for_each_inst(xcc_id, xcc_mask) {
r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
&dump, &n_regs, inst);
KFD_CIK_HIQ_PIPE,
KFD_CIK_HIQ_QUEUE, &dump,
&n_regs, xcc_id);
if (!r) {
seq_printf(m,
seq_printf(
m,
" Inst %d, HIQ on MEC %d Pipe %d Queue %d\n",
inst, KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
xcc_id,
KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1,
KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm),
KFD_CIK_HIQ_QUEUE);
seq_reg_dump(m, dump, n_regs);
@ -2524,13 +2523,16 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
dqm->dev->kfd->shared_resources.cp_queue_bitmap))
continue;
r = dqm->dev->kfd2kgd->hqd_dump(
dqm->dev->adev, pipe, queue, &dump, &n_regs, inst);
r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
pipe, queue,
&dump, &n_regs,
xcc_id);
if (r)
break;
seq_printf(m, " Inst %d, CP Pipe %d, Queue %d\n",
inst, pipe, queue);
seq_printf(m,
" Inst %d, CP Pipe %d, Queue %d\n",
xcc_id, pipe, queue);
seq_reg_dump(m, dump, n_regs);
kfree(dump);

View File

@ -77,7 +77,7 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_node *dev,
dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
dev->num_xcc_per_node;
NUM_XCC(dev->xcc_mask);
mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem
+ offset);

View File

@ -128,7 +128,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node,
retval = amdgpu_amdkfd_alloc_gtt_mem(node->adev,
(ALIGN(q->ctl_stack_size, PAGE_SIZE) +
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE)) *
node->num_xcc_per_node,
NUM_XCC(node->xcc_mask),
&(mqd_mem_obj->gtt_mem),
&(mqd_mem_obj->gpu_addr),
(void *)&(mqd_mem_obj->cpu_ptr), true);
@ -482,7 +482,7 @@ static void init_mqd_hiq_v9_4_3(struct mqd_manager *mm, void **mqd,
memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
kfd_get_hiq_xcc_mqd(mm->dev, &xcc_mqd_mem_obj, xcc);
init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
@ -506,21 +506,21 @@ static int hiq_load_mqd_kiq_v9_4_3(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
struct queue_properties *p, struct mm_struct *mms)
{
int xcc, err;
uint32_t xcc_mask = mm->dev->xcc_mask;
int xcc_id, err, inst = 0;
void *xcc_mqd;
uint32_t start_inst = mm->dev->start_xcc_id;
uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
xcc_mqd = mqd + hiq_mqd_size * xcc;
for_each_inst(xcc_id, xcc_mask) {
xcc_mqd = mqd + hiq_mqd_size * inst;
err = mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, xcc_mqd,
pipe_id, queue_id,
p->doorbell_off,
start_inst+xcc);
p->doorbell_off, xcc_id);
if (err) {
pr_debug("Failed to load HIQ MQD for XCC: %d\n", xcc);
pr_debug("Failed to load HIQ MQD for XCC: %d\n", inst);
break;
}
++inst;
}
return err;
@ -530,20 +530,21 @@ static int destroy_hiq_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
enum kfd_preempt_type type, unsigned int timeout,
uint32_t pipe_id, uint32_t queue_id)
{
int xcc = 0, err;
uint32_t xcc_mask = mm->dev->xcc_mask;
int xcc_id, err, inst = 0;
void *xcc_mqd;
uint32_t start_inst = mm->dev->start_xcc_id;
uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
xcc_mqd = mqd + hiq_mqd_size * xcc;
for_each_inst(xcc_id, xcc_mask) {
xcc_mqd = mqd + hiq_mqd_size * inst;
err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd,
type, timeout, pipe_id,
queue_id, start_inst+xcc);
queue_id, xcc_id);
if (err) {
pr_debug("Destroy MQD failed for xcc: %d\n", xcc);
pr_debug("Destroy MQD failed for xcc: %d\n", inst);
break;
}
++inst;
}
return err;
@ -573,7 +574,7 @@ static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
uint32_t local_xcc_start = mm->dev->dqm->current_logical_xcc_start++;
memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset*xcc);
init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
@ -600,7 +601,7 @@ static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
m->compute_tg_chunk_size = 1;
m->compute_current_logic_xcc_id =
(local_xcc_start + xcc) %
mm->dev->num_xcc_per_node;
NUM_XCC(mm->dev->xcc_mask);
switch (xcc) {
case 0:
@ -633,7 +634,7 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
int xcc = 0;
uint64_t size = mm->mqd_stride(mm, q);
for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
m = get_mqd(mqd + size * xcc);
update_mqd(mm, m, q, minfo);
@ -661,24 +662,25 @@ static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
enum kfd_preempt_type type, unsigned int timeout,
uint32_t pipe_id, uint32_t queue_id)
{
int xcc = 0, err;
uint32_t xcc_mask = mm->dev->xcc_mask;
int xcc_id, err, inst = 0;
void *xcc_mqd;
struct v9_mqd *m;
uint64_t mqd_offset;
uint32_t start_inst = mm->dev->start_xcc_id;
m = get_mqd(mqd);
mqd_offset = m->cp_mqd_stride_size;
for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
xcc_mqd = mqd + mqd_offset * xcc;
for_each_inst(xcc_id, xcc_mask) {
xcc_mqd = mqd + mqd_offset * inst;
err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd,
type, timeout, pipe_id,
queue_id, start_inst+xcc);
queue_id, xcc_id);
if (err) {
pr_debug("Destroy MQD failed for xcc: %d\n", xcc);
pr_debug("Destroy MQD failed for xcc: %d\n", inst);
break;
}
++inst;
}
return err;
@ -690,21 +692,22 @@ static int load_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
{
/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
int xcc = 0, err;
uint32_t xcc_mask = mm->dev->xcc_mask;
int xcc_id, err, inst = 0;
void *xcc_mqd;
uint32_t start_inst = mm->dev->start_xcc_id;
uint64_t mqd_stride_size = mm->mqd_stride(mm, p);
for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
xcc_mqd = mqd + mqd_stride_size * xcc;
err = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, xcc_mqd,
pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
wptr_shift, 0, mms, start_inst+xcc);
for_each_inst(xcc_id, xcc_mask) {
xcc_mqd = mqd + mqd_stride_size * inst;
err = mm->dev->kfd2kgd->hqd_load(
mm->dev->adev, xcc_mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr, wptr_shift, 0, mms,
xcc_id);
if (err) {
pr_debug("Load MQD failed for xcc: %d\n", xcc);
pr_debug("Load MQD failed for xcc: %d\n", inst);
break;
}
++inst;
}
return err;
@ -722,7 +725,7 @@ static int get_wave_state_v9_4_3(struct mqd_manager *mm, void *mqd,
uint64_t mqd_stride_size = mm->mqd_stride(mm, q);
u32 tmp_ctl_stack_used_size = 0, tmp_save_area_used_size = 0;
for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
xcc_mqd = mqd + mqd_stride_size * xcc;
xcc_ctl_stack = (void __user *)((uintptr_t)ctl_stack +
q->ctx_save_restore_area_size * xcc);

View File

@ -274,10 +274,6 @@ struct kfd_node {
*/
struct kfd_vmid_info vm_info;
unsigned int id; /* topology stub index */
unsigned int num_xcc_per_node;
unsigned int start_xcc_id; /* Starting XCC instance
* number for the node
*/
uint32_t xcc_mask; /* Instance mask of XCCs present */
struct amdgpu_xcp *xcp;

View File

@ -2058,6 +2058,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
struct kfd_node *dev = pdd->dev;
uint32_t xcc_mask = dev->xcc_mask;
int xcc = 0;
/*
@ -2076,10 +2077,9 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev,
pdd->qpd.vmid);
} else {
for (xcc = 0; xcc < dev->num_xcc_per_node; xcc++)
amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->adev,
pdd->process->pasid, type,
dev->start_xcc_id + xcc);
for_each_inst(xcc, xcc_mask)
amdgpu_amdkfd_flush_gpu_tlb_pasid(
dev->adev, pdd->process->pasid, type, xcc);
}
}

View File

@ -946,7 +946,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
seq_printf(m, " Compute queue on device %x\n",
q->device->id);
mqd_type = KFD_MQD_TYPE_CP;
num_xccs = q->device->num_xcc_per_node;
num_xccs = NUM_XCC(q->device->xcc_mask);
break;
default:
seq_printf(m,

View File

@ -469,7 +469,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.cpu_cores_count);
sysfs_show_32bit_prop(buffer, offs, "simd_count",
dev->gpu ? (dev->node_props.simd_count *
dev->gpu->num_xcc_per_node) : 0);
NUM_XCC(dev->gpu->xcc_mask)) : 0);
sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
dev->node_props.mem_banks_count);
sysfs_show_32bit_prop(buffer, offs, "caches_count",
@ -494,7 +494,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.wave_front_size);
sysfs_show_32bit_prop(buffer, offs, "array_count",
dev->gpu ? (dev->node_props.array_count *
dev->gpu->num_xcc_per_node) : 0);
NUM_XCC(dev->gpu->xcc_mask)) : 0);
sysfs_show_32bit_prop(buffer, offs, "simd_arrays_per_engine",
dev->node_props.simd_arrays_per_engine);
sysfs_show_32bit_prop(buffer, offs, "cu_per_simd_array",
@ -558,7 +558,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
sysfs_show_64bit_prop(buffer, offs, "unique_id",
dev->gpu->adev->unique_id);
sysfs_show_32bit_prop(buffer, offs, "num_xcc",
dev->gpu->num_xcc_per_node);
NUM_XCC(dev->gpu->xcc_mask));
}
return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute",
@ -1180,7 +1180,7 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu)
buf[4] = gpu->adev->pdev->bus->number;
buf[5] = lower_32_bits(local_mem_size);
buf[6] = upper_32_bits(local_mem_size);
buf[7] = gpu->start_xcc_id | (gpu->num_xcc_per_node << 16);
buf[7] = (ffs(gpu->xcc_mask) - 1) | (NUM_XCC(gpu->xcc_mask) << 16);
for (i = 0, hashout = 0; i < 8; i++)
hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);