Merge tag 'drm-amdkfd-next-2017-12-24' of git://people.freedesktop.org/~gabbayo/linux into drm-next
- Add CWSR (compute wave save restore) support for GFX8 (Carrizo) - Fix SDMA user-mode queues support for GFX7 (Kaveri) - Add SDMA user-mode queues support for GFX8 (Carrizo) - Allow HWS (hardware scheduling) to schedule multiple processes concurrently - Add debugfs support - Simplify process locking and lock dependencies - Refactoring topology code to prepare for dGPU support + fixes to that code - Add option to generate dummy/virtual CRAT table when its missing or deformed - Recognize CPUs other then APUs as compute entities - Various clean ups and bug fixes I have not yet sent the dGPU topology code because it depends on a patch for the PCI subsystem that adds PCIe atomics support. Once that patch is upstreamed we can continue with the rest of the dGPU code. * tag 'drm-amdkfd-next-2017-12-24' of git://people.freedesktop.org/~gabbayo/linux: (53 commits) drm/amdgpu: Add support for reporting VRAM usage drm/amdkfd: Ignore ACPI CRAT for non-APU systems drm/amdkfd: Module option to disable CRAT table drm/amdkfd: Add AQL Queue Memory flag on topology drm/amdkfd: Fixup incorrect info in the CZ CRAT table drm/amdkfd: Add perf counters to topology drm/amdkfd: Add topology support for dGPUs drm/amdkfd: Add topology support for CPUs drm/amdkfd: Fix sibling_map[] size drm/amdkfd: Simplify counting of memory banks drm/amdkfd: Turn verbose topology messages into pr_debug drm/amdkfd: sync IOLINK defines to thunk spec drm/amdkfd: Support enumerating non-GPU devices drm/amdkfd: Decouple CRAT parsing from device list update drm/amdkfd: Reorganize CRAT fetching from ACPI drm/amdkfd: Group up CRAT related functions drm/amdkfd: Fix memory leaks in kfd topology drm/amdkfd: Topology: Fix location_id drm/amdkfd: Update number of compute unit from KGD drm/amd: Remove get_vmem_size from KGD-KFD interface ...
This commit is contained in:
commit
a9742b794a
|
@ -959,6 +959,7 @@ struct amdgpu_gfx_config {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct amdgpu_cu_info {
|
struct amdgpu_cu_info {
|
||||||
|
uint32_t simd_per_cu;
|
||||||
uint32_t max_waves_per_simd;
|
uint32_t max_waves_per_simd;
|
||||||
uint32_t wave_front_size;
|
uint32_t wave_front_size;
|
||||||
uint32_t max_scratch_slots_per_cu;
|
uint32_t max_scratch_slots_per_cu;
|
||||||
|
|
|
@ -275,14 +275,34 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
|
||||||
kfree(mem);
|
kfree(mem);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t get_vmem_size(struct kgd_dev *kgd)
|
void get_local_mem_info(struct kgd_dev *kgd,
|
||||||
|
struct kfd_local_mem_info *mem_info)
|
||||||
{
|
{
|
||||||
struct amdgpu_device *adev =
|
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||||
(struct amdgpu_device *)kgd;
|
uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask :
|
||||||
|
~((1ULL << 32) - 1);
|
||||||
|
resource_size_t aper_limit = adev->mc.aper_base + adev->mc.aper_size;
|
||||||
|
|
||||||
BUG_ON(kgd == NULL);
|
memset(mem_info, 0, sizeof(*mem_info));
|
||||||
|
if (!(adev->mc.aper_base & address_mask || aper_limit & address_mask)) {
|
||||||
|
mem_info->local_mem_size_public = adev->mc.visible_vram_size;
|
||||||
|
mem_info->local_mem_size_private = adev->mc.real_vram_size -
|
||||||
|
adev->mc.visible_vram_size;
|
||||||
|
} else {
|
||||||
|
mem_info->local_mem_size_public = 0;
|
||||||
|
mem_info->local_mem_size_private = adev->mc.real_vram_size;
|
||||||
|
}
|
||||||
|
mem_info->vram_width = adev->mc.vram_width;
|
||||||
|
|
||||||
return adev->mc.real_vram_size;
|
pr_debug("Address base: 0x%llx limit 0x%llx public 0x%llx private 0x%llx\n",
|
||||||
|
adev->mc.aper_base, aper_limit,
|
||||||
|
mem_info->local_mem_size_public,
|
||||||
|
mem_info->local_mem_size_private);
|
||||||
|
|
||||||
|
if (amdgpu_sriov_vf(adev))
|
||||||
|
mem_info->mem_clk_max = adev->clock.default_mclk / 100;
|
||||||
|
else
|
||||||
|
mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
|
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
|
||||||
|
@ -298,6 +318,39 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
|
||||||
{
|
{
|
||||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||||
|
|
||||||
/* The sclk is in quantas of 10kHz */
|
/* the sclk is in quantas of 10kHz */
|
||||||
return adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
|
if (amdgpu_sriov_vf(adev))
|
||||||
|
return adev->clock.default_sclk / 100;
|
||||||
|
|
||||||
|
return amdgpu_dpm_get_sclk(adev, false) / 100;
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
|
||||||
|
{
|
||||||
|
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||||
|
struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
|
||||||
|
|
||||||
|
memset(cu_info, 0, sizeof(*cu_info));
|
||||||
|
if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
|
||||||
|
return;
|
||||||
|
|
||||||
|
cu_info->cu_active_number = acu_info.number;
|
||||||
|
cu_info->cu_ao_mask = acu_info.ao_cu_mask;
|
||||||
|
memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
|
||||||
|
sizeof(acu_info.bitmap));
|
||||||
|
cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
|
||||||
|
cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
|
||||||
|
cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
|
||||||
|
cu_info->simd_per_cu = acu_info.simd_per_cu;
|
||||||
|
cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
|
||||||
|
cu_info->wave_front_size = acu_info.wave_front_size;
|
||||||
|
cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
|
||||||
|
cu_info->lds_size = acu_info.lds_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
|
||||||
|
{
|
||||||
|
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||||
|
|
||||||
|
return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,10 +56,13 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
|
||||||
void **mem_obj, uint64_t *gpu_addr,
|
void **mem_obj, uint64_t *gpu_addr,
|
||||||
void **cpu_ptr);
|
void **cpu_ptr);
|
||||||
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
|
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
|
||||||
uint64_t get_vmem_size(struct kgd_dev *kgd);
|
void get_local_mem_info(struct kgd_dev *kgd,
|
||||||
|
struct kfd_local_mem_info *mem_info);
|
||||||
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
|
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
|
||||||
|
|
||||||
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
|
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
|
||||||
|
void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
|
||||||
|
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
|
||||||
|
|
||||||
#define read_user_wptr(mmptr, wptr, dst) \
|
#define read_user_wptr(mmptr, wptr, dst) \
|
||||||
({ \
|
({ \
|
||||||
|
|
|
@ -105,7 +105,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
||||||
uint32_t queue_id, uint32_t __user *wptr,
|
uint32_t queue_id, uint32_t __user *wptr,
|
||||||
uint32_t wptr_shift, uint32_t wptr_mask,
|
uint32_t wptr_shift, uint32_t wptr_mask,
|
||||||
struct mm_struct *mm);
|
struct mm_struct *mm);
|
||||||
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
|
static int kgd_hqd_dump(struct kgd_dev *kgd,
|
||||||
|
uint32_t pipe_id, uint32_t queue_id,
|
||||||
|
uint32_t (**dump)[2], uint32_t *n_regs);
|
||||||
|
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
|
||||||
|
uint32_t __user *wptr, struct mm_struct *mm);
|
||||||
|
static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
|
||||||
|
uint32_t engine_id, uint32_t queue_id,
|
||||||
|
uint32_t (**dump)[2], uint32_t *n_regs);
|
||||||
static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
|
static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
|
||||||
uint32_t pipe_id, uint32_t queue_id);
|
uint32_t pipe_id, uint32_t queue_id);
|
||||||
|
|
||||||
|
@ -166,7 +173,7 @@ static int get_tile_config(struct kgd_dev *kgd,
|
||||||
static const struct kfd2kgd_calls kfd2kgd = {
|
static const struct kfd2kgd_calls kfd2kgd = {
|
||||||
.init_gtt_mem_allocation = alloc_gtt_mem,
|
.init_gtt_mem_allocation = alloc_gtt_mem,
|
||||||
.free_gtt_mem = free_gtt_mem,
|
.free_gtt_mem = free_gtt_mem,
|
||||||
.get_vmem_size = get_vmem_size,
|
.get_local_mem_info = get_local_mem_info,
|
||||||
.get_gpu_clock_counter = get_gpu_clock_counter,
|
.get_gpu_clock_counter = get_gpu_clock_counter,
|
||||||
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
|
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
|
||||||
.alloc_pasid = amdgpu_vm_alloc_pasid,
|
.alloc_pasid = amdgpu_vm_alloc_pasid,
|
||||||
|
@ -177,6 +184,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
||||||
.init_interrupts = kgd_init_interrupts,
|
.init_interrupts = kgd_init_interrupts,
|
||||||
.hqd_load = kgd_hqd_load,
|
.hqd_load = kgd_hqd_load,
|
||||||
.hqd_sdma_load = kgd_hqd_sdma_load,
|
.hqd_sdma_load = kgd_hqd_sdma_load,
|
||||||
|
.hqd_dump = kgd_hqd_dump,
|
||||||
|
.hqd_sdma_dump = kgd_hqd_sdma_dump,
|
||||||
.hqd_is_occupied = kgd_hqd_is_occupied,
|
.hqd_is_occupied = kgd_hqd_is_occupied,
|
||||||
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
|
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
|
||||||
.hqd_destroy = kgd_hqd_destroy,
|
.hqd_destroy = kgd_hqd_destroy,
|
||||||
|
@ -191,6 +200,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
||||||
.get_fw_version = get_fw_version,
|
.get_fw_version = get_fw_version,
|
||||||
.set_scratch_backing_va = set_scratch_backing_va,
|
.set_scratch_backing_va = set_scratch_backing_va,
|
||||||
.get_tile_config = get_tile_config,
|
.get_tile_config = get_tile_config,
|
||||||
|
.get_cu_info = get_cu_info,
|
||||||
|
.get_vram_usage = amdgpu_amdkfd_get_vram_usage
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
|
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
|
||||||
|
@ -375,7 +386,44 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
|
static int kgd_hqd_dump(struct kgd_dev *kgd,
|
||||||
|
uint32_t pipe_id, uint32_t queue_id,
|
||||||
|
uint32_t (**dump)[2], uint32_t *n_regs)
|
||||||
|
{
|
||||||
|
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||||
|
uint32_t i = 0, reg;
|
||||||
|
#define HQD_N_REGS (35+4)
|
||||||
|
#define DUMP_REG(addr) do { \
|
||||||
|
if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
|
||||||
|
break; \
|
||||||
|
(*dump)[i][0] = (addr) << 2; \
|
||||||
|
(*dump)[i++][1] = RREG32(addr); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
|
||||||
|
if (*dump == NULL)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
acquire_queue(kgd, pipe_id, queue_id);
|
||||||
|
|
||||||
|
DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
|
||||||
|
DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
|
||||||
|
DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
|
||||||
|
DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
|
||||||
|
|
||||||
|
for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
|
||||||
|
DUMP_REG(reg);
|
||||||
|
|
||||||
|
release_queue(kgd);
|
||||||
|
|
||||||
|
WARN_ON_ONCE(i != HQD_N_REGS);
|
||||||
|
*n_regs = i;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
|
||||||
|
uint32_t __user *wptr, struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||||
struct cik_sdma_rlc_registers *m;
|
struct cik_sdma_rlc_registers *m;
|
||||||
|
@ -410,10 +458,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
|
||||||
WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
|
WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
|
||||||
}
|
}
|
||||||
|
|
||||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL,
|
data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL,
|
||||||
m->sdma_rlc_doorbell);
|
ENABLE, 1);
|
||||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
|
||||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr);
|
||||||
|
|
||||||
|
if (read_user_wptr(mm, wptr, data))
|
||||||
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
|
||||||
|
else
|
||||||
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
|
||||||
|
m->sdma_rlc_rb_rptr);
|
||||||
|
|
||||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
|
||||||
m->sdma_rlc_virtual_addr);
|
m->sdma_rlc_virtual_addr);
|
||||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
|
||||||
|
@ -423,8 +478,37 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
|
||||||
m->sdma_rlc_rb_rptr_addr_lo);
|
m->sdma_rlc_rb_rptr_addr_lo);
|
||||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
|
||||||
m->sdma_rlc_rb_rptr_addr_hi);
|
m->sdma_rlc_rb_rptr_addr_hi);
|
||||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
|
|
||||||
m->sdma_rlc_rb_cntl);
|
data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL,
|
||||||
|
RB_ENABLE, 1);
|
||||||
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
|
||||||
|
uint32_t engine_id, uint32_t queue_id,
|
||||||
|
uint32_t (**dump)[2], uint32_t *n_regs)
|
||||||
|
{
|
||||||
|
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||||
|
uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
|
||||||
|
queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
|
||||||
|
uint32_t i = 0, reg;
|
||||||
|
#undef HQD_N_REGS
|
||||||
|
#define HQD_N_REGS (19+4)
|
||||||
|
|
||||||
|
*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
|
||||||
|
if (*dump == NULL)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
|
||||||
|
DUMP_REG(sdma_offset + reg);
|
||||||
|
for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
|
||||||
|
reg++)
|
||||||
|
DUMP_REG(sdma_offset + reg);
|
||||||
|
|
||||||
|
WARN_ON_ONCE(i != HQD_N_REGS);
|
||||||
|
*n_regs = i;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -575,7 +659,7 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
|
||||||
struct cik_sdma_rlc_registers *m;
|
struct cik_sdma_rlc_registers *m;
|
||||||
uint32_t sdma_base_addr;
|
uint32_t sdma_base_addr;
|
||||||
uint32_t temp;
|
uint32_t temp;
|
||||||
int timeout = utimeout;
|
unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
|
||||||
|
|
||||||
m = get_sdma_mqd(mqd);
|
m = get_sdma_mqd(mqd);
|
||||||
sdma_base_addr = get_sdma_base_addr(m);
|
sdma_base_addr = get_sdma_base_addr(m);
|
||||||
|
@ -588,10 +672,9 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
|
||||||
temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
|
temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
|
||||||
if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
|
if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
|
||||||
break;
|
break;
|
||||||
if (timeout <= 0)
|
if (time_after(jiffies, end_jiffies))
|
||||||
return -ETIME;
|
return -ETIME;
|
||||||
msleep(20);
|
usleep_range(500, 1000);
|
||||||
timeout -= 20;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
|
||||||
|
@ -599,6 +682,8 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
|
||||||
RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
|
RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
|
||||||
SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
|
SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
|
||||||
|
|
||||||
|
m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,7 @@ enum hqd_dequeue_request_type {
|
||||||
RESET_WAVES
|
RESET_WAVES
|
||||||
};
|
};
|
||||||
|
|
||||||
struct cik_sdma_rlc_registers;
|
struct vi_sdma_mqd;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Register access functions
|
* Register access functions
|
||||||
|
@ -64,7 +64,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
||||||
uint32_t queue_id, uint32_t __user *wptr,
|
uint32_t queue_id, uint32_t __user *wptr,
|
||||||
uint32_t wptr_shift, uint32_t wptr_mask,
|
uint32_t wptr_shift, uint32_t wptr_mask,
|
||||||
struct mm_struct *mm);
|
struct mm_struct *mm);
|
||||||
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
|
static int kgd_hqd_dump(struct kgd_dev *kgd,
|
||||||
|
uint32_t pipe_id, uint32_t queue_id,
|
||||||
|
uint32_t (**dump)[2], uint32_t *n_regs);
|
||||||
|
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
|
||||||
|
uint32_t __user *wptr, struct mm_struct *mm);
|
||||||
|
static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
|
||||||
|
uint32_t engine_id, uint32_t queue_id,
|
||||||
|
uint32_t (**dump)[2], uint32_t *n_regs);
|
||||||
static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
|
static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
|
||||||
uint32_t pipe_id, uint32_t queue_id);
|
uint32_t pipe_id, uint32_t queue_id);
|
||||||
static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
|
static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
|
||||||
|
@ -125,7 +132,7 @@ static int get_tile_config(struct kgd_dev *kgd,
|
||||||
static const struct kfd2kgd_calls kfd2kgd = {
|
static const struct kfd2kgd_calls kfd2kgd = {
|
||||||
.init_gtt_mem_allocation = alloc_gtt_mem,
|
.init_gtt_mem_allocation = alloc_gtt_mem,
|
||||||
.free_gtt_mem = free_gtt_mem,
|
.free_gtt_mem = free_gtt_mem,
|
||||||
.get_vmem_size = get_vmem_size,
|
.get_local_mem_info = get_local_mem_info,
|
||||||
.get_gpu_clock_counter = get_gpu_clock_counter,
|
.get_gpu_clock_counter = get_gpu_clock_counter,
|
||||||
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
|
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
|
||||||
.alloc_pasid = amdgpu_vm_alloc_pasid,
|
.alloc_pasid = amdgpu_vm_alloc_pasid,
|
||||||
|
@ -136,6 +143,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
||||||
.init_interrupts = kgd_init_interrupts,
|
.init_interrupts = kgd_init_interrupts,
|
||||||
.hqd_load = kgd_hqd_load,
|
.hqd_load = kgd_hqd_load,
|
||||||
.hqd_sdma_load = kgd_hqd_sdma_load,
|
.hqd_sdma_load = kgd_hqd_sdma_load,
|
||||||
|
.hqd_dump = kgd_hqd_dump,
|
||||||
|
.hqd_sdma_dump = kgd_hqd_sdma_dump,
|
||||||
.hqd_is_occupied = kgd_hqd_is_occupied,
|
.hqd_is_occupied = kgd_hqd_is_occupied,
|
||||||
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
|
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
|
||||||
.hqd_destroy = kgd_hqd_destroy,
|
.hqd_destroy = kgd_hqd_destroy,
|
||||||
|
@ -152,6 +161,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
||||||
.get_fw_version = get_fw_version,
|
.get_fw_version = get_fw_version,
|
||||||
.set_scratch_backing_va = set_scratch_backing_va,
|
.set_scratch_backing_va = set_scratch_backing_va,
|
||||||
.get_tile_config = get_tile_config,
|
.get_tile_config = get_tile_config,
|
||||||
|
.get_cu_info = get_cu_info,
|
||||||
|
.get_vram_usage = amdgpu_amdkfd_get_vram_usage
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
|
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
|
||||||
|
@ -268,9 +279,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
|
static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
|
||||||
{
|
{
|
||||||
return 0;
|
uint32_t retval;
|
||||||
|
|
||||||
|
retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
|
||||||
|
m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
|
||||||
|
pr_debug("kfd: sdma base address: 0x%x\n", retval);
|
||||||
|
|
||||||
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline struct vi_mqd *get_mqd(void *mqd)
|
static inline struct vi_mqd *get_mqd(void *mqd)
|
||||||
|
@ -278,9 +295,9 @@ static inline struct vi_mqd *get_mqd(void *mqd)
|
||||||
return (struct vi_mqd *)mqd;
|
return (struct vi_mqd *)mqd;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
|
static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
|
||||||
{
|
{
|
||||||
return (struct cik_sdma_rlc_registers *)mqd;
|
return (struct vi_sdma_mqd *)mqd;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
||||||
|
@ -358,8 +375,138 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
|
static int kgd_hqd_dump(struct kgd_dev *kgd,
|
||||||
|
uint32_t pipe_id, uint32_t queue_id,
|
||||||
|
uint32_t (**dump)[2], uint32_t *n_regs)
|
||||||
{
|
{
|
||||||
|
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||||
|
uint32_t i = 0, reg;
|
||||||
|
#define HQD_N_REGS (54+4)
|
||||||
|
#define DUMP_REG(addr) do { \
|
||||||
|
if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
|
||||||
|
break; \
|
||||||
|
(*dump)[i][0] = (addr) << 2; \
|
||||||
|
(*dump)[i++][1] = RREG32(addr); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
|
||||||
|
if (*dump == NULL)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
acquire_queue(kgd, pipe_id, queue_id);
|
||||||
|
|
||||||
|
DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
|
||||||
|
DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
|
||||||
|
DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
|
||||||
|
DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
|
||||||
|
|
||||||
|
for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++)
|
||||||
|
DUMP_REG(reg);
|
||||||
|
|
||||||
|
release_queue(kgd);
|
||||||
|
|
||||||
|
WARN_ON_ONCE(i != HQD_N_REGS);
|
||||||
|
*n_regs = i;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
|
||||||
|
uint32_t __user *wptr, struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||||
|
struct vi_sdma_mqd *m;
|
||||||
|
unsigned long end_jiffies;
|
||||||
|
uint32_t sdma_base_addr;
|
||||||
|
uint32_t data;
|
||||||
|
|
||||||
|
m = get_sdma_mqd(mqd);
|
||||||
|
sdma_base_addr = get_sdma_base_addr(m);
|
||||||
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
|
||||||
|
m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
|
||||||
|
|
||||||
|
end_jiffies = msecs_to_jiffies(2000) + jiffies;
|
||||||
|
while (true) {
|
||||||
|
data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
|
||||||
|
if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
|
||||||
|
break;
|
||||||
|
if (time_after(jiffies, end_jiffies))
|
||||||
|
return -ETIME;
|
||||||
|
usleep_range(500, 1000);
|
||||||
|
}
|
||||||
|
if (m->sdma_engine_id) {
|
||||||
|
data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
|
||||||
|
data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
|
||||||
|
RESUME_CTX, 0);
|
||||||
|
WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
|
||||||
|
} else {
|
||||||
|
data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
|
||||||
|
data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
|
||||||
|
RESUME_CTX, 0);
|
||||||
|
WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
|
||||||
|
ENABLE, 1);
|
||||||
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
|
||||||
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
|
||||||
|
|
||||||
|
if (read_user_wptr(mm, wptr, data))
|
||||||
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
|
||||||
|
else
|
||||||
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
|
||||||
|
m->sdmax_rlcx_rb_rptr);
|
||||||
|
|
||||||
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
|
||||||
|
m->sdmax_rlcx_virtual_addr);
|
||||||
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
|
||||||
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
|
||||||
|
m->sdmax_rlcx_rb_base_hi);
|
||||||
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
|
||||||
|
m->sdmax_rlcx_rb_rptr_addr_lo);
|
||||||
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
|
||||||
|
m->sdmax_rlcx_rb_rptr_addr_hi);
|
||||||
|
|
||||||
|
data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
|
||||||
|
RB_ENABLE, 1);
|
||||||
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
|
||||||
|
uint32_t engine_id, uint32_t queue_id,
|
||||||
|
uint32_t (**dump)[2], uint32_t *n_regs)
|
||||||
|
{
|
||||||
|
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||||
|
uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
|
||||||
|
queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
|
||||||
|
uint32_t i = 0, reg;
|
||||||
|
#undef HQD_N_REGS
|
||||||
|
#define HQD_N_REGS (19+4+2+3+7)
|
||||||
|
|
||||||
|
*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
|
||||||
|
if (*dump == NULL)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
|
||||||
|
DUMP_REG(sdma_offset + reg);
|
||||||
|
for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
|
||||||
|
reg++)
|
||||||
|
DUMP_REG(sdma_offset + reg);
|
||||||
|
for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI;
|
||||||
|
reg++)
|
||||||
|
DUMP_REG(sdma_offset + reg);
|
||||||
|
for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG;
|
||||||
|
reg++)
|
||||||
|
DUMP_REG(sdma_offset + reg);
|
||||||
|
for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL;
|
||||||
|
reg++)
|
||||||
|
DUMP_REG(sdma_offset + reg);
|
||||||
|
|
||||||
|
WARN_ON_ONCE(i != HQD_N_REGS);
|
||||||
|
*n_regs = i;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -388,7 +535,7 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
|
||||||
static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
|
static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
|
||||||
{
|
{
|
||||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||||
struct cik_sdma_rlc_registers *m;
|
struct vi_sdma_mqd *m;
|
||||||
uint32_t sdma_base_addr;
|
uint32_t sdma_base_addr;
|
||||||
uint32_t sdma_rlc_rb_cntl;
|
uint32_t sdma_rlc_rb_cntl;
|
||||||
|
|
||||||
|
@ -509,10 +656,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
|
||||||
unsigned int utimeout)
|
unsigned int utimeout)
|
||||||
{
|
{
|
||||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||||
struct cik_sdma_rlc_registers *m;
|
struct vi_sdma_mqd *m;
|
||||||
uint32_t sdma_base_addr;
|
uint32_t sdma_base_addr;
|
||||||
uint32_t temp;
|
uint32_t temp;
|
||||||
int timeout = utimeout;
|
unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
|
||||||
|
|
||||||
m = get_sdma_mqd(mqd);
|
m = get_sdma_mqd(mqd);
|
||||||
sdma_base_addr = get_sdma_base_addr(m);
|
sdma_base_addr = get_sdma_base_addr(m);
|
||||||
|
@ -523,18 +670,19 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
|
temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
|
||||||
if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
|
if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
|
||||||
break;
|
break;
|
||||||
if (timeout <= 0)
|
if (time_after(jiffies, end_jiffies))
|
||||||
return -ETIME;
|
return -ETIME;
|
||||||
msleep(20);
|
usleep_range(500, 1000);
|
||||||
timeout -= 20;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
|
||||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
|
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
|
||||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
|
RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
|
||||||
WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0);
|
SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
|
||||||
|
|
||||||
|
m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -562,7 +562,7 @@
|
||||||
#define PRIVATE_BASE(x) ((x) << 0) /* scratch */
|
#define PRIVATE_BASE(x) ((x) << 0) /* scratch */
|
||||||
#define SHARED_BASE(x) ((x) << 16) /* LDS */
|
#define SHARED_BASE(x) ((x) << 16) /* LDS */
|
||||||
|
|
||||||
#define KFD_CIK_SDMA_QUEUE_OFFSET 0x200
|
#define KFD_CIK_SDMA_QUEUE_OFFSET (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL)
|
||||||
|
|
||||||
/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
|
/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
|
||||||
enum {
|
enum {
|
||||||
|
|
|
@ -48,6 +48,8 @@
|
||||||
#include "oss/oss_2_0_d.h"
|
#include "oss/oss_2_0_d.h"
|
||||||
#include "oss/oss_2_0_sh_mask.h"
|
#include "oss/oss_2_0_sh_mask.h"
|
||||||
|
|
||||||
|
#define NUM_SIMD_PER_CU 0x4 /* missing from the gfx_7 IP headers */
|
||||||
|
|
||||||
#define GFX7_NUM_GFX_RINGS 1
|
#define GFX7_NUM_GFX_RINGS 1
|
||||||
#define GFX7_MEC_HPD_SIZE 2048
|
#define GFX7_MEC_HPD_SIZE 2048
|
||||||
|
|
||||||
|
@ -5277,6 +5279,11 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
|
||||||
|
|
||||||
cu_info->number = active_cu_number;
|
cu_info->number = active_cu_number;
|
||||||
cu_info->ao_cu_mask = ao_cu_mask;
|
cu_info->ao_cu_mask = ao_cu_mask;
|
||||||
|
cu_info->simd_per_cu = NUM_SIMD_PER_CU;
|
||||||
|
cu_info->max_waves_per_simd = 10;
|
||||||
|
cu_info->max_scratch_slots_per_cu = 32;
|
||||||
|
cu_info->wave_front_size = 64;
|
||||||
|
cu_info->lds_size = 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
|
const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
|
||||||
|
|
|
@ -7116,6 +7116,11 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
|
||||||
|
|
||||||
cu_info->number = active_cu_number;
|
cu_info->number = active_cu_number;
|
||||||
cu_info->ao_cu_mask = ao_cu_mask;
|
cu_info->ao_cu_mask = ao_cu_mask;
|
||||||
|
cu_info->simd_per_cu = NUM_SIMD_PER_CU;
|
||||||
|
cu_info->max_waves_per_simd = 10;
|
||||||
|
cu_info->max_scratch_slots_per_cu = 32;
|
||||||
|
cu_info->wave_front_size = 64;
|
||||||
|
cu_info->lds_size = 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
|
const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
|
||||||
|
|
|
@ -27,6 +27,8 @@
|
||||||
#define SDMA1_REGISTER_OFFSET 0x200 /* not a register */
|
#define SDMA1_REGISTER_OFFSET 0x200 /* not a register */
|
||||||
#define SDMA_MAX_INSTANCE 2
|
#define SDMA_MAX_INSTANCE 2
|
||||||
|
|
||||||
|
#define KFD_VI_SDMA_QUEUE_OFFSET 0x80 /* not a register */
|
||||||
|
|
||||||
/* crtc instance offsets */
|
/* crtc instance offsets */
|
||||||
#define CRTC0_REGISTER_OFFSET (0x1b9c - 0x1b9c)
|
#define CRTC0_REGISTER_OFFSET (0x1b9c - 0x1b9c)
|
||||||
#define CRTC1_REGISTER_OFFSET (0x1d9c - 0x1b9c)
|
#define CRTC1_REGISTER_OFFSET (0x1d9c - 0x1b9c)
|
||||||
|
|
|
@ -35,6 +35,8 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
|
||||||
kfd_process_queue_manager.o kfd_device_queue_manager.o \
|
kfd_process_queue_manager.o kfd_device_queue_manager.o \
|
||||||
kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
|
kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
|
||||||
kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
|
kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
|
||||||
kfd_dbgdev.o kfd_dbgmgr.o
|
kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
|
||||||
|
|
||||||
|
amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o
|
||||||
|
|
||||||
obj-$(CONFIG_HSA_AMD) += amdkfd.o
|
obj-$(CONFIG_HSA_AMD) += amdkfd.o
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -117,7 +117,7 @@ static int kfd_open(struct inode *inode, struct file *filep)
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
}
|
}
|
||||||
|
|
||||||
process = kfd_create_process(current);
|
process = kfd_create_process(filep);
|
||||||
if (IS_ERR(process))
|
if (IS_ERR(process))
|
||||||
return PTR_ERR(process);
|
return PTR_ERR(process);
|
||||||
|
|
||||||
|
@ -206,6 +206,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
|
||||||
q_properties->ctx_save_restore_area_address =
|
q_properties->ctx_save_restore_area_address =
|
||||||
args->ctx_save_restore_address;
|
args->ctx_save_restore_address;
|
||||||
q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
|
q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
|
||||||
|
q_properties->ctl_stack_size = args->ctl_stack_size;
|
||||||
if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
|
if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
|
||||||
args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
|
args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
|
||||||
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
|
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
|
||||||
|
@ -431,6 +432,38 @@ out:
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int kfd_ioctl_set_trap_handler(struct file *filep,
|
||||||
|
struct kfd_process *p, void *data)
|
||||||
|
{
|
||||||
|
struct kfd_ioctl_set_trap_handler_args *args = data;
|
||||||
|
struct kfd_dev *dev;
|
||||||
|
int err = 0;
|
||||||
|
struct kfd_process_device *pdd;
|
||||||
|
|
||||||
|
dev = kfd_device_by_id(args->gpu_id);
|
||||||
|
if (dev == NULL)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
mutex_lock(&p->mutex);
|
||||||
|
|
||||||
|
pdd = kfd_bind_process_to_device(dev, p);
|
||||||
|
if (IS_ERR(pdd)) {
|
||||||
|
err = -ESRCH;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dev->dqm->ops.set_trap_handler(dev->dqm,
|
||||||
|
&pdd->qpd,
|
||||||
|
args->tba_addr,
|
||||||
|
args->tma_addr))
|
||||||
|
err = -EINVAL;
|
||||||
|
|
||||||
|
out:
|
||||||
|
mutex_unlock(&p->mutex);
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
static int kfd_ioctl_dbg_register(struct file *filep,
|
static int kfd_ioctl_dbg_register(struct file *filep,
|
||||||
struct kfd_process *p, void *data)
|
struct kfd_process *p, void *data)
|
||||||
{
|
{
|
||||||
|
@ -493,7 +526,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep,
|
||||||
long status;
|
long status;
|
||||||
|
|
||||||
dev = kfd_device_by_id(args->gpu_id);
|
dev = kfd_device_by_id(args->gpu_id);
|
||||||
if (!dev)
|
if (!dev || !dev->dbgmgr)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (dev->device_info->asic_family == CHIP_CARRIZO) {
|
if (dev->device_info->asic_family == CHIP_CARRIZO) {
|
||||||
|
@ -979,7 +1012,10 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
|
||||||
kfd_ioctl_set_scratch_backing_va, 0),
|
kfd_ioctl_set_scratch_backing_va, 0),
|
||||||
|
|
||||||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
|
AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
|
||||||
kfd_ioctl_get_tile_config, 0)
|
kfd_ioctl_get_tile_config, 0),
|
||||||
|
|
||||||
|
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
|
||||||
|
kfd_ioctl_set_trap_handler, 0),
|
||||||
};
|
};
|
||||||
|
|
||||||
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
|
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
|
||||||
|
@ -1088,6 +1124,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||||
KFD_MMAP_EVENTS_MASK) {
|
KFD_MMAP_EVENTS_MASK) {
|
||||||
vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
|
vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
|
||||||
return kfd_event_mmap(process, vma);
|
return kfd_event_mmap(process, vma);
|
||||||
|
} else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) ==
|
||||||
|
KFD_MMAP_RESERVED_MEM_MASK) {
|
||||||
|
vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK;
|
||||||
|
return kfd_reserved_mem_mmap(process, vma);
|
||||||
}
|
}
|
||||||
|
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -44,6 +44,10 @@
|
||||||
|
|
||||||
#define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1)
|
#define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1)
|
||||||
|
|
||||||
|
/* Compute Unit flags */
|
||||||
|
#define COMPUTE_UNIT_CPU (1 << 0) /* Create Virtual CRAT for CPU */
|
||||||
|
#define COMPUTE_UNIT_GPU (1 << 1) /* Create Virtual CRAT for GPU */
|
||||||
|
|
||||||
struct crat_header {
|
struct crat_header {
|
||||||
uint32_t signature;
|
uint32_t signature;
|
||||||
uint32_t length;
|
uint32_t length;
|
||||||
|
@ -105,7 +109,7 @@ struct crat_subtype_computeunit {
|
||||||
uint8_t wave_front_size;
|
uint8_t wave_front_size;
|
||||||
uint8_t num_banks;
|
uint8_t num_banks;
|
||||||
uint16_t micro_engine_id;
|
uint16_t micro_engine_id;
|
||||||
uint8_t num_arrays;
|
uint8_t array_count;
|
||||||
uint8_t num_cu_per_array;
|
uint8_t num_cu_per_array;
|
||||||
uint8_t num_simd_per_cu;
|
uint8_t num_simd_per_cu;
|
||||||
uint8_t max_slots_scatch_cu;
|
uint8_t max_slots_scatch_cu;
|
||||||
|
@ -127,13 +131,14 @@ struct crat_subtype_memory {
|
||||||
uint8_t length;
|
uint8_t length;
|
||||||
uint16_t reserved;
|
uint16_t reserved;
|
||||||
uint32_t flags;
|
uint32_t flags;
|
||||||
uint32_t promixity_domain;
|
uint32_t proximity_domain;
|
||||||
uint32_t base_addr_low;
|
uint32_t base_addr_low;
|
||||||
uint32_t base_addr_high;
|
uint32_t base_addr_high;
|
||||||
uint32_t length_low;
|
uint32_t length_low;
|
||||||
uint32_t length_high;
|
uint32_t length_high;
|
||||||
uint32_t width;
|
uint32_t width;
|
||||||
uint8_t reserved2[CRAT_MEMORY_RESERVED_LENGTH];
|
uint8_t visibility_type; /* for virtual (dGPU) CRAT */
|
||||||
|
uint8_t reserved2[CRAT_MEMORY_RESERVED_LENGTH - 1];
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -222,9 +227,12 @@ struct crat_subtype_ccompute {
|
||||||
/*
|
/*
|
||||||
* HSA IO Link Affinity structure and definitions
|
* HSA IO Link Affinity structure and definitions
|
||||||
*/
|
*/
|
||||||
#define CRAT_IOLINK_FLAGS_ENABLED 0x00000001
|
#define CRAT_IOLINK_FLAGS_ENABLED (1 << 0)
|
||||||
#define CRAT_IOLINK_FLAGS_COHERENCY 0x00000002
|
#define CRAT_IOLINK_FLAGS_NON_COHERENT (1 << 1)
|
||||||
#define CRAT_IOLINK_FLAGS_RESERVED 0xfffffffc
|
#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2)
|
||||||
|
#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3)
|
||||||
|
#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4)
|
||||||
|
#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0xffffffe0
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* IO interface types
|
* IO interface types
|
||||||
|
@ -232,10 +240,18 @@ struct crat_subtype_ccompute {
|
||||||
#define CRAT_IOLINK_TYPE_UNDEFINED 0
|
#define CRAT_IOLINK_TYPE_UNDEFINED 0
|
||||||
#define CRAT_IOLINK_TYPE_HYPERTRANSPORT 1
|
#define CRAT_IOLINK_TYPE_HYPERTRANSPORT 1
|
||||||
#define CRAT_IOLINK_TYPE_PCIEXPRESS 2
|
#define CRAT_IOLINK_TYPE_PCIEXPRESS 2
|
||||||
#define CRAT_IOLINK_TYPE_OTHER 3
|
#define CRAT_IOLINK_TYPE_AMBA 3
|
||||||
|
#define CRAT_IOLINK_TYPE_MIPI 4
|
||||||
|
#define CRAT_IOLINK_TYPE_QPI_1_1 5
|
||||||
|
#define CRAT_IOLINK_TYPE_RESERVED1 6
|
||||||
|
#define CRAT_IOLINK_TYPE_RESERVED2 7
|
||||||
|
#define CRAT_IOLINK_TYPE_RAPID_IO 8
|
||||||
|
#define CRAT_IOLINK_TYPE_INFINIBAND 9
|
||||||
|
#define CRAT_IOLINK_TYPE_RESERVED3 10
|
||||||
|
#define CRAT_IOLINK_TYPE_OTHER 11
|
||||||
#define CRAT_IOLINK_TYPE_MAX 255
|
#define CRAT_IOLINK_TYPE_MAX 255
|
||||||
|
|
||||||
#define CRAT_IOLINK_RESERVED_LENGTH 24
|
#define CRAT_IOLINK_RESERVED_LENGTH 24
|
||||||
|
|
||||||
struct crat_subtype_iolink {
|
struct crat_subtype_iolink {
|
||||||
uint8_t type;
|
uint8_t type;
|
||||||
|
@ -291,4 +307,14 @@ struct cdit_header {
|
||||||
|
|
||||||
#pragma pack()
|
#pragma pack()
|
||||||
|
|
||||||
|
struct kfd_dev;
|
||||||
|
|
||||||
|
int kfd_create_crat_image_acpi(void **crat_image, size_t *size);
|
||||||
|
void kfd_destroy_crat_image(void *crat_image);
|
||||||
|
int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
|
||||||
|
uint32_t proximity_domain);
|
||||||
|
int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
|
||||||
|
int flags, struct kfd_dev *kdev,
|
||||||
|
uint32_t proximity_domain);
|
||||||
|
|
||||||
#endif /* KFD_CRAT_H_INCLUDED */
|
#endif /* KFD_CRAT_H_INCLUDED */
|
||||||
|
|
|
@ -95,7 +95,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
|
||||||
ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
|
ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
|
||||||
|
|
||||||
ib_packet->control = (1 << 23) | (1 << 31) |
|
ib_packet->control = (1 << 23) | (1 << 31) |
|
||||||
((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
|
((size_in_bytes / 4) & 0xfffff);
|
||||||
|
|
||||||
ib_packet->bitfields5.pasid = pasid;
|
ib_packet->bitfields5.pasid = pasid;
|
||||||
|
|
||||||
|
@ -126,8 +126,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
|
||||||
|
|
||||||
rm_packet->header.opcode = IT_RELEASE_MEM;
|
rm_packet->header.opcode = IT_RELEASE_MEM;
|
||||||
rm_packet->header.type = PM4_TYPE_3;
|
rm_packet->header.type = PM4_TYPE_3;
|
||||||
rm_packet->header.count = sizeof(struct pm4__release_mem) /
|
rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
|
||||||
sizeof(unsigned int) - 2;
|
|
||||||
|
|
||||||
rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
|
rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
|
||||||
rm_packet->bitfields2.event_index =
|
rm_packet->bitfields2.event_index =
|
||||||
|
@ -652,8 +651,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
|
||||||
packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
|
packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
|
||||||
packets_vec[0].header.type = PM4_TYPE_3;
|
packets_vec[0].header.type = PM4_TYPE_3;
|
||||||
packets_vec[0].bitfields2.reg_offset =
|
packets_vec[0].bitfields2.reg_offset =
|
||||||
GRBM_GFX_INDEX / (sizeof(uint32_t)) -
|
GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
|
||||||
USERCONFIG_REG_BASE;
|
|
||||||
|
|
||||||
packets_vec[0].bitfields2.insert_vmid = 0;
|
packets_vec[0].bitfields2.insert_vmid = 0;
|
||||||
packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
|
packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
|
||||||
|
@ -661,8 +659,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
|
||||||
packets_vec[1].header.count = 1;
|
packets_vec[1].header.count = 1;
|
||||||
packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
|
packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
|
||||||
packets_vec[1].header.type = PM4_TYPE_3;
|
packets_vec[1].header.type = PM4_TYPE_3;
|
||||||
packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
|
packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
|
||||||
AMD_CONFIG_REG_BASE;
|
|
||||||
|
|
||||||
packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
|
packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
|
||||||
packets_vec[1].bitfields2.insert_vmid = 1;
|
packets_vec[1].bitfields2.insert_vmid = 1;
|
||||||
|
@ -678,8 +675,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
|
||||||
|
|
||||||
packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
|
packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
|
||||||
packets_vec[2].bitfields2.reg_offset =
|
packets_vec[2].bitfields2.reg_offset =
|
||||||
GRBM_GFX_INDEX / (sizeof(uint32_t)) -
|
GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
|
||||||
USERCONFIG_REG_BASE;
|
|
||||||
|
|
||||||
packets_vec[2].bitfields2.insert_vmid = 0;
|
packets_vec[2].bitfields2.insert_vmid = 0;
|
||||||
packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
|
packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
|
||||||
|
|
|
@ -0,0 +1,75 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2016-2017 Advanced Micro Devices, Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||||
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||||
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/debugfs.h>
|
||||||
|
#include "kfd_priv.h"
|
||||||
|
|
||||||
|
static struct dentry *debugfs_root;
|
||||||
|
|
||||||
|
static int kfd_debugfs_open(struct inode *inode, struct file *file)
|
||||||
|
{
|
||||||
|
int (*show)(struct seq_file *, void *) = inode->i_private;
|
||||||
|
|
||||||
|
return single_open(file, show, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct file_operations kfd_debugfs_fops = {
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
.open = kfd_debugfs_open,
|
||||||
|
.read = seq_read,
|
||||||
|
.llseek = seq_lseek,
|
||||||
|
.release = single_release,
|
||||||
|
};
|
||||||
|
|
||||||
|
void kfd_debugfs_init(void)
|
||||||
|
{
|
||||||
|
struct dentry *ent;
|
||||||
|
|
||||||
|
debugfs_root = debugfs_create_dir("kfd", NULL);
|
||||||
|
if (!debugfs_root || debugfs_root == ERR_PTR(-ENODEV)) {
|
||||||
|
pr_warn("Failed to create kfd debugfs dir\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ent = debugfs_create_file("mqds", S_IFREG | 0444, debugfs_root,
|
||||||
|
kfd_debugfs_mqds_by_process,
|
||||||
|
&kfd_debugfs_fops);
|
||||||
|
if (!ent)
|
||||||
|
pr_warn("Failed to create mqds in kfd debugfs\n");
|
||||||
|
|
||||||
|
ent = debugfs_create_file("hqds", S_IFREG | 0444, debugfs_root,
|
||||||
|
kfd_debugfs_hqds_by_device,
|
||||||
|
&kfd_debugfs_fops);
|
||||||
|
if (!ent)
|
||||||
|
pr_warn("Failed to create hqds in kfd debugfs\n");
|
||||||
|
|
||||||
|
ent = debugfs_create_file("rls", S_IFREG | 0444, debugfs_root,
|
||||||
|
kfd_debugfs_rls_by_device,
|
||||||
|
&kfd_debugfs_fops);
|
||||||
|
if (!ent)
|
||||||
|
pr_warn("Failed to create rls in kfd debugfs\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void kfd_debugfs_fini(void)
|
||||||
|
{
|
||||||
|
debugfs_remove_recursive(debugfs_root);
|
||||||
|
}
|
|
@ -27,6 +27,7 @@
|
||||||
#include "kfd_priv.h"
|
#include "kfd_priv.h"
|
||||||
#include "kfd_device_queue_manager.h"
|
#include "kfd_device_queue_manager.h"
|
||||||
#include "kfd_pm4_headers_vi.h"
|
#include "kfd_pm4_headers_vi.h"
|
||||||
|
#include "cwsr_trap_handler_gfx8.asm"
|
||||||
|
|
||||||
#define MQD_SIZE_ALIGNED 768
|
#define MQD_SIZE_ALIGNED 768
|
||||||
|
|
||||||
|
@ -38,7 +39,8 @@ static const struct kfd_device_info kaveri_device_info = {
|
||||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||||
.event_interrupt_class = &event_interrupt_class_cik,
|
.event_interrupt_class = &event_interrupt_class_cik,
|
||||||
.num_of_watch_points = 4,
|
.num_of_watch_points = 4,
|
||||||
.mqd_size_aligned = MQD_SIZE_ALIGNED
|
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||||
|
.supports_cwsr = false,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct kfd_device_info carrizo_device_info = {
|
static const struct kfd_device_info carrizo_device_info = {
|
||||||
|
@ -49,7 +51,8 @@ static const struct kfd_device_info carrizo_device_info = {
|
||||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||||
.event_interrupt_class = &event_interrupt_class_cik,
|
.event_interrupt_class = &event_interrupt_class_cik,
|
||||||
.num_of_watch_points = 4,
|
.num_of_watch_points = 4,
|
||||||
.mqd_size_aligned = MQD_SIZE_ALIGNED
|
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||||
|
.supports_cwsr = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kfd_deviceid {
|
struct kfd_deviceid {
|
||||||
|
@ -212,6 +215,17 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
|
||||||
return AMD_IOMMU_INV_PRI_RSP_INVALID;
|
return AMD_IOMMU_INV_PRI_RSP_INVALID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void kfd_cwsr_init(struct kfd_dev *kfd)
|
||||||
|
{
|
||||||
|
if (cwsr_enable && kfd->device_info->supports_cwsr) {
|
||||||
|
BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
|
||||||
|
|
||||||
|
kfd->cwsr_isa = cwsr_trap_gfx8_hex;
|
||||||
|
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
|
||||||
|
kfd->cwsr_enabled = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||||
const struct kgd2kfd_shared_resources *gpu_resources)
|
const struct kgd2kfd_shared_resources *gpu_resources)
|
||||||
{
|
{
|
||||||
|
@ -224,6 +238,17 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||||
kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
|
kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
|
||||||
- kfd->vm_info.first_vmid_kfd + 1;
|
- kfd->vm_info.first_vmid_kfd + 1;
|
||||||
|
|
||||||
|
/* Verify module parameters regarding mapped process number*/
|
||||||
|
if ((hws_max_conc_proc < 0)
|
||||||
|
|| (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
|
||||||
|
dev_err(kfd_device,
|
||||||
|
"hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
|
||||||
|
hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
|
||||||
|
kfd->vm_info.vmid_num_kfd);
|
||||||
|
kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
|
||||||
|
} else
|
||||||
|
kfd->max_proc_per_quantum = hws_max_conc_proc;
|
||||||
|
|
||||||
/* calculate max size of mqds needed for queues */
|
/* calculate max size of mqds needed for queues */
|
||||||
size = max_num_of_queues_per_device *
|
size = max_num_of_queues_per_device *
|
||||||
kfd->device_info->mqd_size_aligned;
|
kfd->device_info->mqd_size_aligned;
|
||||||
|
@ -286,6 +311,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
||||||
goto device_iommu_pasid_error;
|
goto device_iommu_pasid_error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
kfd_cwsr_init(kfd);
|
||||||
|
|
||||||
if (kfd_resume(kfd))
|
if (kfd_resume(kfd))
|
||||||
goto kfd_resume_error;
|
goto kfd_resume_error;
|
||||||
|
|
||||||
|
|
|
@ -149,8 +149,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
|
||||||
|
|
||||||
static int create_queue_nocpsch(struct device_queue_manager *dqm,
|
static int create_queue_nocpsch(struct device_queue_manager *dqm,
|
||||||
struct queue *q,
|
struct queue *q,
|
||||||
struct qcm_process_device *qpd,
|
struct qcm_process_device *qpd)
|
||||||
int *allocated_vmid)
|
|
||||||
{
|
{
|
||||||
int retval;
|
int retval;
|
||||||
|
|
||||||
|
@ -170,9 +169,11 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
|
||||||
if (retval)
|
if (retval)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
*allocated_vmid = qpd->vmid;
|
|
||||||
q->properties.vmid = qpd->vmid;
|
q->properties.vmid = qpd->vmid;
|
||||||
|
|
||||||
|
q->properties.tba_addr = qpd->tba_addr;
|
||||||
|
q->properties.tma_addr = qpd->tma_addr;
|
||||||
|
|
||||||
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
|
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
|
||||||
retval = create_compute_queue_nocpsch(dqm, q, qpd);
|
retval = create_compute_queue_nocpsch(dqm, q, qpd);
|
||||||
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||||
|
@ -181,10 +182,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
|
||||||
retval = -EINVAL;
|
retval = -EINVAL;
|
||||||
|
|
||||||
if (retval) {
|
if (retval) {
|
||||||
if (list_empty(&qpd->queues_list)) {
|
if (list_empty(&qpd->queues_list))
|
||||||
deallocate_vmid(dqm, qpd, q);
|
deallocate_vmid(dqm, qpd, q);
|
||||||
*allocated_vmid = 0;
|
|
||||||
}
|
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -809,16 +808,13 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
|
||||||
}
|
}
|
||||||
|
|
||||||
static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||||
struct qcm_process_device *qpd, int *allocate_vmid)
|
struct qcm_process_device *qpd)
|
||||||
{
|
{
|
||||||
int retval;
|
int retval;
|
||||||
struct mqd_manager *mqd;
|
struct mqd_manager *mqd;
|
||||||
|
|
||||||
retval = 0;
|
retval = 0;
|
||||||
|
|
||||||
if (allocate_vmid)
|
|
||||||
*allocate_vmid = 0;
|
|
||||||
|
|
||||||
mutex_lock(&dqm->lock);
|
mutex_lock(&dqm->lock);
|
||||||
|
|
||||||
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
|
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
|
||||||
|
@ -846,6 +842,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
||||||
}
|
}
|
||||||
|
|
||||||
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
|
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
|
||||||
|
|
||||||
|
q->properties.tba_addr = qpd->tba_addr;
|
||||||
|
q->properties.tma_addr = qpd->tma_addr;
|
||||||
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
|
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
|
||||||
&q->gart_mqd_addr, &q->properties);
|
&q->gart_mqd_addr, &q->properties);
|
||||||
if (retval)
|
if (retval)
|
||||||
|
@ -1110,6 +1109,26 @@ out:
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int set_trap_handler(struct device_queue_manager *dqm,
|
||||||
|
struct qcm_process_device *qpd,
|
||||||
|
uint64_t tba_addr,
|
||||||
|
uint64_t tma_addr)
|
||||||
|
{
|
||||||
|
uint64_t *tma;
|
||||||
|
|
||||||
|
if (dqm->dev->cwsr_enabled) {
|
||||||
|
/* Jump from CWSR trap handler to user trap */
|
||||||
|
tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
|
||||||
|
tma[0] = tba_addr;
|
||||||
|
tma[1] = tma_addr;
|
||||||
|
} else {
|
||||||
|
qpd->tba_addr = tba_addr;
|
||||||
|
qpd->tma_addr = tma_addr;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int process_termination_nocpsch(struct device_queue_manager *dqm,
|
static int process_termination_nocpsch(struct device_queue_manager *dqm,
|
||||||
struct qcm_process_device *qpd)
|
struct qcm_process_device *qpd)
|
||||||
{
|
{
|
||||||
|
@ -1241,6 +1260,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
||||||
dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
|
dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
|
||||||
dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
|
dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
|
||||||
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
|
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
|
||||||
|
dqm->ops.set_trap_handler = set_trap_handler;
|
||||||
dqm->ops.process_termination = process_termination_cpsch;
|
dqm->ops.process_termination = process_termination_cpsch;
|
||||||
break;
|
break;
|
||||||
case KFD_SCHED_POLICY_NO_HWS:
|
case KFD_SCHED_POLICY_NO_HWS:
|
||||||
|
@ -1256,6 +1276,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
||||||
dqm->ops.initialize = initialize_nocpsch;
|
dqm->ops.initialize = initialize_nocpsch;
|
||||||
dqm->ops.uninitialize = uninitialize;
|
dqm->ops.uninitialize = uninitialize;
|
||||||
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
|
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
|
||||||
|
dqm->ops.set_trap_handler = set_trap_handler;
|
||||||
dqm->ops.process_termination = process_termination_nocpsch;
|
dqm->ops.process_termination = process_termination_nocpsch;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -1290,3 +1311,74 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm)
|
||||||
dqm->ops.uninitialize(dqm);
|
dqm->ops.uninitialize(dqm);
|
||||||
kfree(dqm);
|
kfree(dqm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
|
|
||||||
|
static void seq_reg_dump(struct seq_file *m,
|
||||||
|
uint32_t (*dump)[2], uint32_t n_regs)
|
||||||
|
{
|
||||||
|
uint32_t i, count;
|
||||||
|
|
||||||
|
for (i = 0, count = 0; i < n_regs; i++) {
|
||||||
|
if (count == 0 ||
|
||||||
|
dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
|
||||||
|
seq_printf(m, "%s %08x: %08x",
|
||||||
|
i ? "\n" : "",
|
||||||
|
dump[i][0], dump[i][1]);
|
||||||
|
count = 7;
|
||||||
|
} else {
|
||||||
|
seq_printf(m, " %08x", dump[i][1]);
|
||||||
|
count--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
seq_puts(m, "\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int dqm_debugfs_hqds(struct seq_file *m, void *data)
|
||||||
|
{
|
||||||
|
struct device_queue_manager *dqm = data;
|
||||||
|
uint32_t (*dump)[2], n_regs;
|
||||||
|
int pipe, queue;
|
||||||
|
int r = 0;
|
||||||
|
|
||||||
|
for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
|
||||||
|
int pipe_offset = pipe * get_queues_per_pipe(dqm);
|
||||||
|
|
||||||
|
for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
|
||||||
|
if (!test_bit(pipe_offset + queue,
|
||||||
|
dqm->dev->shared_resources.queue_bitmap))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
r = dqm->dev->kfd2kgd->hqd_dump(
|
||||||
|
dqm->dev->kgd, pipe, queue, &dump, &n_regs);
|
||||||
|
if (r)
|
||||||
|
break;
|
||||||
|
|
||||||
|
seq_printf(m, " CP Pipe %d, Queue %d\n",
|
||||||
|
pipe, queue);
|
||||||
|
seq_reg_dump(m, dump, n_regs);
|
||||||
|
|
||||||
|
kfree(dump);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (pipe = 0; pipe < CIK_SDMA_ENGINE_NUM; pipe++) {
|
||||||
|
for (queue = 0; queue < CIK_SDMA_QUEUES_PER_ENGINE; queue++) {
|
||||||
|
r = dqm->dev->kfd2kgd->hqd_sdma_dump(
|
||||||
|
dqm->dev->kgd, pipe, queue, &dump, &n_regs);
|
||||||
|
if (r)
|
||||||
|
break;
|
||||||
|
|
||||||
|
seq_printf(m, " SDMA Engine %d, RLC %d\n",
|
||||||
|
pipe, queue);
|
||||||
|
seq_reg_dump(m, dump, n_regs);
|
||||||
|
|
||||||
|
kfree(dump);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
|
@ -84,8 +84,7 @@ struct device_process_node {
|
||||||
struct device_queue_manager_ops {
|
struct device_queue_manager_ops {
|
||||||
int (*create_queue)(struct device_queue_manager *dqm,
|
int (*create_queue)(struct device_queue_manager *dqm,
|
||||||
struct queue *q,
|
struct queue *q,
|
||||||
struct qcm_process_device *qpd,
|
struct qcm_process_device *qpd);
|
||||||
int *allocate_vmid);
|
|
||||||
|
|
||||||
int (*destroy_queue)(struct device_queue_manager *dqm,
|
int (*destroy_queue)(struct device_queue_manager *dqm,
|
||||||
struct qcm_process_device *qpd,
|
struct qcm_process_device *qpd,
|
||||||
|
@ -123,6 +122,11 @@ struct device_queue_manager_ops {
|
||||||
void __user *alternate_aperture_base,
|
void __user *alternate_aperture_base,
|
||||||
uint64_t alternate_aperture_size);
|
uint64_t alternate_aperture_size);
|
||||||
|
|
||||||
|
int (*set_trap_handler)(struct device_queue_manager *dqm,
|
||||||
|
struct qcm_process_device *qpd,
|
||||||
|
uint64_t tba_addr,
|
||||||
|
uint64_t tma_addr);
|
||||||
|
|
||||||
int (*process_termination)(struct device_queue_manager *dqm,
|
int (*process_termination)(struct device_queue_manager *dqm,
|
||||||
struct qcm_process_device *qpd);
|
struct qcm_process_device *qpd);
|
||||||
};
|
};
|
||||||
|
|
|
@ -116,8 +116,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
|
||||||
pr_debug("doorbell aperture size == 0x%08lX\n",
|
pr_debug("doorbell aperture size == 0x%08lX\n",
|
||||||
kfd->shared_resources.doorbell_aperture_size);
|
kfd->shared_resources.doorbell_aperture_size);
|
||||||
|
|
||||||
pr_debug("doorbell kernel address == 0x%08lX\n",
|
pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
|
||||||
(uintptr_t)kfd->doorbell_kernel_ptr);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -194,8 +193,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
|
||||||
|
|
||||||
pr_debug("Get kernel queue doorbell\n"
|
pr_debug("Get kernel queue doorbell\n"
|
||||||
" doorbell offset == 0x%08X\n"
|
" doorbell offset == 0x%08X\n"
|
||||||
" kernel address == 0x%08lX\n",
|
" kernel address == %p\n",
|
||||||
*doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx));
|
*doorbell_off, (kfd->doorbell_kernel_ptr + inx));
|
||||||
|
|
||||||
return kfd->doorbell_kernel_ptr + inx;
|
return kfd->doorbell_kernel_ptr + inx;
|
||||||
}
|
}
|
||||||
|
@ -215,7 +214,7 @@ inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
|
||||||
{
|
{
|
||||||
if (db) {
|
if (db) {
|
||||||
writel(value, db);
|
writel(value, db);
|
||||||
pr_debug("Writing %d to doorbell address 0x%p\n", value, db);
|
pr_debug("Writing %d to doorbell address %p\n", value, db);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -441,7 +441,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
|
||||||
/*
|
/*
|
||||||
* Because we are called from arbitrary context (workqueue) as opposed
|
* Because we are called from arbitrary context (workqueue) as opposed
|
||||||
* to process context, kfd_process could attempt to exit while we are
|
* to process context, kfd_process could attempt to exit while we are
|
||||||
* running so the lookup function returns a locked process.
|
* running so the lookup function increments the process ref count.
|
||||||
*/
|
*/
|
||||||
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
|
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
|
||||||
|
|
||||||
|
@ -493,7 +493,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_unlock(&p->event_mutex);
|
mutex_unlock(&p->event_mutex);
|
||||||
mutex_unlock(&p->mutex);
|
kfd_unref_process(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
|
static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
|
||||||
|
@ -847,7 +847,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
|
||||||
/*
|
/*
|
||||||
* Because we are called from arbitrary context (workqueue) as opposed
|
* Because we are called from arbitrary context (workqueue) as opposed
|
||||||
* to process context, kfd_process could attempt to exit while we are
|
* to process context, kfd_process could attempt to exit while we are
|
||||||
* running so the lookup function returns a locked process.
|
* running so the lookup function increments the process ref count.
|
||||||
*/
|
*/
|
||||||
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
|
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm;
|
||||||
|
@ -860,7 +860,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
|
||||||
*/
|
*/
|
||||||
mm = get_task_mm(p->lead_thread);
|
mm = get_task_mm(p->lead_thread);
|
||||||
if (!mm) {
|
if (!mm) {
|
||||||
mutex_unlock(&p->mutex);
|
kfd_unref_process(p);
|
||||||
return; /* Process is exiting */
|
return; /* Process is exiting */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -903,7 +903,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
|
||||||
&memory_exception_data);
|
&memory_exception_data);
|
||||||
|
|
||||||
mutex_unlock(&p->event_mutex);
|
mutex_unlock(&p->event_mutex);
|
||||||
mutex_unlock(&p->mutex);
|
kfd_unref_process(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
void kfd_signal_hw_exception_event(unsigned int pasid)
|
void kfd_signal_hw_exception_event(unsigned int pasid)
|
||||||
|
@ -911,7 +911,7 @@ void kfd_signal_hw_exception_event(unsigned int pasid)
|
||||||
/*
|
/*
|
||||||
* Because we are called from arbitrary context (workqueue) as opposed
|
* Because we are called from arbitrary context (workqueue) as opposed
|
||||||
* to process context, kfd_process could attempt to exit while we are
|
* to process context, kfd_process could attempt to exit while we are
|
||||||
* running so the lookup function returns a locked process.
|
* running so the lookup function increments the process ref count.
|
||||||
*/
|
*/
|
||||||
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
|
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
|
||||||
|
|
||||||
|
@ -924,5 +924,5 @@ void kfd_signal_hw_exception_event(unsigned int pasid)
|
||||||
lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
|
lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
|
||||||
|
|
||||||
mutex_unlock(&p->event_mutex);
|
mutex_unlock(&p->event_mutex);
|
||||||
mutex_unlock(&p->mutex);
|
kfd_unref_process(p);
|
||||||
}
|
}
|
||||||
|
|
|
@ -300,9 +300,14 @@ int kfd_init_apertures(struct kfd_process *process)
|
||||||
struct kfd_process_device *pdd;
|
struct kfd_process_device *pdd;
|
||||||
|
|
||||||
/*Iterating over all devices*/
|
/*Iterating over all devices*/
|
||||||
while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL &&
|
while (kfd_topology_enum_kfd_devices(id, &dev) == 0 &&
|
||||||
id < NUM_OF_SUPPORTED_GPUS) {
|
id < NUM_OF_SUPPORTED_GPUS) {
|
||||||
|
|
||||||
|
if (!dev) {
|
||||||
|
id++; /* Skip non GPU devices */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
pdd = kfd_create_process_device_data(dev, process);
|
pdd = kfd_create_process_device_data(dev, process);
|
||||||
if (!pdd) {
|
if (!pdd) {
|
||||||
pr_err("Failed to create process device data\n");
|
pr_err("Failed to create process device data\n");
|
||||||
|
|
|
@ -218,7 +218,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
|
||||||
rptr = *kq->rptr_kernel;
|
rptr = *kq->rptr_kernel;
|
||||||
wptr = *kq->wptr_kernel;
|
wptr = *kq->wptr_kernel;
|
||||||
queue_address = (unsigned int *)kq->pq_kernel_addr;
|
queue_address = (unsigned int *)kq->pq_kernel_addr;
|
||||||
queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t);
|
queue_size_dwords = kq->queue->properties.queue_size / 4;
|
||||||
|
|
||||||
pr_debug("rptr: %d\n", rptr);
|
pr_debug("rptr: %d\n", rptr);
|
||||||
pr_debug("wptr: %d\n", wptr);
|
pr_debug("wptr: %d\n", wptr);
|
||||||
|
|
|
@ -50,6 +50,15 @@ module_param(sched_policy, int, 0444);
|
||||||
MODULE_PARM_DESC(sched_policy,
|
MODULE_PARM_DESC(sched_policy,
|
||||||
"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
|
"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
|
||||||
|
|
||||||
|
int hws_max_conc_proc = 8;
|
||||||
|
module_param(hws_max_conc_proc, int, 0444);
|
||||||
|
MODULE_PARM_DESC(hws_max_conc_proc,
|
||||||
|
"Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
|
||||||
|
|
||||||
|
int cwsr_enable = 1;
|
||||||
|
module_param(cwsr_enable, int, 0444);
|
||||||
|
MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))");
|
||||||
|
|
||||||
int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
|
int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
|
||||||
module_param(max_num_of_queues_per_device, int, 0444);
|
module_param(max_num_of_queues_per_device, int, 0444);
|
||||||
MODULE_PARM_DESC(max_num_of_queues_per_device,
|
MODULE_PARM_DESC(max_num_of_queues_per_device,
|
||||||
|
@ -60,6 +69,11 @@ module_param(send_sigterm, int, 0444);
|
||||||
MODULE_PARM_DESC(send_sigterm,
|
MODULE_PARM_DESC(send_sigterm,
|
||||||
"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
|
"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
|
||||||
|
|
||||||
|
int ignore_crat;
|
||||||
|
module_param(ignore_crat, int, 0444);
|
||||||
|
MODULE_PARM_DESC(ignore_crat,
|
||||||
|
"Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
|
||||||
|
|
||||||
static int amdkfd_init_completed;
|
static int amdkfd_init_completed;
|
||||||
|
|
||||||
int kgd2kfd_init(unsigned int interface_version,
|
int kgd2kfd_init(unsigned int interface_version,
|
||||||
|
@ -114,6 +128,8 @@ static int __init kfd_module_init(void)
|
||||||
|
|
||||||
kfd_process_create_wq();
|
kfd_process_create_wq();
|
||||||
|
|
||||||
|
kfd_debugfs_init();
|
||||||
|
|
||||||
amdkfd_init_completed = 1;
|
amdkfd_init_completed = 1;
|
||||||
|
|
||||||
dev_info(kfd_device, "Initialized module\n");
|
dev_info(kfd_device, "Initialized module\n");
|
||||||
|
@ -130,6 +146,7 @@ static void __exit kfd_module_exit(void)
|
||||||
{
|
{
|
||||||
amdkfd_init_completed = 0;
|
amdkfd_init_completed = 0;
|
||||||
|
|
||||||
|
kfd_debugfs_fini();
|
||||||
kfd_process_destroy_wq();
|
kfd_process_destroy_wq();
|
||||||
kfd_topology_shutdown();
|
kfd_topology_shutdown();
|
||||||
kfd_chardev_exit();
|
kfd_chardev_exit();
|
||||||
|
|
|
@ -85,6 +85,10 @@ struct mqd_manager {
|
||||||
uint64_t queue_address, uint32_t pipe_id,
|
uint64_t queue_address, uint32_t pipe_id,
|
||||||
uint32_t queue_id);
|
uint32_t queue_id);
|
||||||
|
|
||||||
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
|
int (*debugfs_show_mqd)(struct seq_file *m, void *data);
|
||||||
|
#endif
|
||||||
|
|
||||||
struct mutex mqd_mutex;
|
struct mutex mqd_mutex;
|
||||||
struct kfd_dev *dev;
|
struct kfd_dev *dev;
|
||||||
};
|
};
|
||||||
|
|
|
@ -36,6 +36,11 @@ static inline struct cik_mqd *get_mqd(void *mqd)
|
||||||
return (struct cik_mqd *)mqd;
|
return (struct cik_mqd *)mqd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
|
||||||
|
{
|
||||||
|
return (struct cik_sdma_rlc_registers *)mqd;
|
||||||
|
}
|
||||||
|
|
||||||
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||||
struct queue_properties *q)
|
struct queue_properties *q)
|
||||||
|
@ -149,7 +154,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
|
||||||
{
|
{
|
||||||
/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
|
/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
|
||||||
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
|
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
|
||||||
uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1);
|
uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1);
|
||||||
|
|
||||||
return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
|
return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
|
||||||
(uint32_t __user *)p->write_ptr,
|
(uint32_t __user *)p->write_ptr,
|
||||||
|
@ -160,7 +165,9 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||||
uint32_t pipe_id, uint32_t queue_id,
|
uint32_t pipe_id, uint32_t queue_id,
|
||||||
struct queue_properties *p, struct mm_struct *mms)
|
struct queue_properties *p, struct mm_struct *mms)
|
||||||
{
|
{
|
||||||
return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd);
|
return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
|
||||||
|
(uint32_t __user *)p->write_ptr,
|
||||||
|
mms);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int update_mqd(struct mqd_manager *mm, void *mqd,
|
static int update_mqd(struct mqd_manager *mm, void *mqd,
|
||||||
|
@ -176,8 +183,7 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
|
||||||
* Calculating queue size which is log base 2 of actual queue size -1
|
* Calculating queue size which is log base 2 of actual queue size -1
|
||||||
* dwords and another -1 for ffs
|
* dwords and another -1 for ffs
|
||||||
*/
|
*/
|
||||||
m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int))
|
m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
|
||||||
- 1 - 1;
|
|
||||||
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
|
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
|
||||||
m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
|
m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
|
||||||
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
|
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
|
||||||
|
@ -202,7 +208,7 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||||
struct cik_sdma_rlc_registers *m;
|
struct cik_sdma_rlc_registers *m;
|
||||||
|
|
||||||
m = get_sdma_mqd(mqd);
|
m = get_sdma_mqd(mqd);
|
||||||
m->sdma_rlc_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
|
m->sdma_rlc_rb_cntl = order_base_2(q->queue_size / 4)
|
||||||
<< SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
|
<< SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
|
||||||
q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
|
q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
|
||||||
1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
|
1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
|
||||||
|
@ -343,8 +349,7 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
|
||||||
* Calculating queue size which is log base 2 of actual queue
|
* Calculating queue size which is log base 2 of actual queue
|
||||||
* size -1 dwords
|
* size -1 dwords
|
||||||
*/
|
*/
|
||||||
m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int))
|
m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
|
||||||
- 1 - 1;
|
|
||||||
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
|
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
|
||||||
m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
|
m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
|
||||||
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
|
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
|
||||||
|
@ -360,15 +365,25 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
|
|
||||||
|
static int debugfs_show_mqd(struct seq_file *m, void *data)
|
||||||
{
|
{
|
||||||
struct cik_sdma_rlc_registers *m;
|
seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
|
||||||
|
data, sizeof(struct cik_mqd), false);
|
||||||
m = (struct cik_sdma_rlc_registers *)mqd;
|
return 0;
|
||||||
|
|
||||||
return m;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
|
||||||
|
{
|
||||||
|
seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
|
||||||
|
data, sizeof(struct cik_sdma_rlc_registers), false);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
||||||
struct kfd_dev *dev)
|
struct kfd_dev *dev)
|
||||||
{
|
{
|
||||||
|
@ -392,6 +407,9 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
||||||
mqd->update_mqd = update_mqd;
|
mqd->update_mqd = update_mqd;
|
||||||
mqd->destroy_mqd = destroy_mqd;
|
mqd->destroy_mqd = destroy_mqd;
|
||||||
mqd->is_occupied = is_occupied;
|
mqd->is_occupied = is_occupied;
|
||||||
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
|
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||||
|
#endif
|
||||||
break;
|
break;
|
||||||
case KFD_MQD_TYPE_HIQ:
|
case KFD_MQD_TYPE_HIQ:
|
||||||
mqd->init_mqd = init_mqd_hiq;
|
mqd->init_mqd = init_mqd_hiq;
|
||||||
|
@ -400,6 +418,9 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
||||||
mqd->update_mqd = update_mqd_hiq;
|
mqd->update_mqd = update_mqd_hiq;
|
||||||
mqd->destroy_mqd = destroy_mqd;
|
mqd->destroy_mqd = destroy_mqd;
|
||||||
mqd->is_occupied = is_occupied;
|
mqd->is_occupied = is_occupied;
|
||||||
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
|
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||||
|
#endif
|
||||||
break;
|
break;
|
||||||
case KFD_MQD_TYPE_SDMA:
|
case KFD_MQD_TYPE_SDMA:
|
||||||
mqd->init_mqd = init_mqd_sdma;
|
mqd->init_mqd = init_mqd_sdma;
|
||||||
|
@ -408,6 +429,9 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
||||||
mqd->update_mqd = update_mqd_sdma;
|
mqd->update_mqd = update_mqd_sdma;
|
||||||
mqd->destroy_mqd = destroy_mqd_sdma;
|
mqd->destroy_mqd = destroy_mqd_sdma;
|
||||||
mqd->is_occupied = is_occupied_sdma;
|
mqd->is_occupied = is_occupied_sdma;
|
||||||
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
|
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
|
||||||
|
#endif
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
kfree(mqd);
|
kfree(mqd);
|
||||||
|
|
|
@ -30,7 +30,7 @@
|
||||||
#include "vi_structs.h"
|
#include "vi_structs.h"
|
||||||
#include "gca/gfx_8_0_sh_mask.h"
|
#include "gca/gfx_8_0_sh_mask.h"
|
||||||
#include "gca/gfx_8_0_enum.h"
|
#include "gca/gfx_8_0_enum.h"
|
||||||
|
#include "oss/oss_3_0_sh_mask.h"
|
||||||
#define CP_MQD_CONTROL__PRIV_STATE__SHIFT 0x8
|
#define CP_MQD_CONTROL__PRIV_STATE__SHIFT 0x8
|
||||||
|
|
||||||
static inline struct vi_mqd *get_mqd(void *mqd)
|
static inline struct vi_mqd *get_mqd(void *mqd)
|
||||||
|
@ -38,6 +38,11 @@ static inline struct vi_mqd *get_mqd(void *mqd)
|
||||||
return (struct vi_mqd *)mqd;
|
return (struct vi_mqd *)mqd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
|
||||||
|
{
|
||||||
|
return (struct vi_sdma_mqd *)mqd;
|
||||||
|
}
|
||||||
|
|
||||||
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||||
struct queue_properties *q)
|
struct queue_properties *q)
|
||||||
|
@ -84,6 +89,28 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||||
if (q->format == KFD_QUEUE_FORMAT_AQL)
|
if (q->format == KFD_QUEUE_FORMAT_AQL)
|
||||||
m->cp_hqd_iq_rptr = 1;
|
m->cp_hqd_iq_rptr = 1;
|
||||||
|
|
||||||
|
if (q->tba_addr) {
|
||||||
|
m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8);
|
||||||
|
m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8);
|
||||||
|
m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8);
|
||||||
|
m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8);
|
||||||
|
m->compute_pgm_rsrc2 |=
|
||||||
|
(1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) {
|
||||||
|
m->cp_hqd_persistent_state |=
|
||||||
|
(1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
|
||||||
|
m->cp_hqd_ctx_save_base_addr_lo =
|
||||||
|
lower_32_bits(q->ctx_save_restore_area_address);
|
||||||
|
m->cp_hqd_ctx_save_base_addr_hi =
|
||||||
|
upper_32_bits(q->ctx_save_restore_area_address);
|
||||||
|
m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
|
||||||
|
m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
|
||||||
|
m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
|
||||||
|
m->cp_hqd_wg_state_offset = q->ctl_stack_size;
|
||||||
|
}
|
||||||
|
|
||||||
*mqd = m;
|
*mqd = m;
|
||||||
if (gart_addr)
|
if (gart_addr)
|
||||||
*gart_addr = addr;
|
*gart_addr = addr;
|
||||||
|
@ -98,7 +125,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
|
||||||
{
|
{
|
||||||
/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
|
/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
|
||||||
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
|
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
|
||||||
uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1);
|
uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1);
|
||||||
|
|
||||||
return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
|
return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
|
||||||
(uint32_t __user *)p->write_ptr,
|
(uint32_t __user *)p->write_ptr,
|
||||||
|
@ -116,8 +143,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
|
||||||
m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT |
|
m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT |
|
||||||
atc_bit << CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT |
|
atc_bit << CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT |
|
||||||
mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT;
|
mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT;
|
||||||
m->cp_hqd_pq_control |=
|
m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
|
||||||
ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
|
|
||||||
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
|
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
|
||||||
|
|
||||||
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
|
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
|
||||||
|
@ -147,7 +173,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
|
||||||
* is safe, giving a maximum field value of 0xA.
|
* is safe, giving a maximum field value of 0xA.
|
||||||
*/
|
*/
|
||||||
m->cp_hqd_eop_control |= min(0xA,
|
m->cp_hqd_eop_control |= min(0xA,
|
||||||
ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1);
|
order_base_2(q->eop_ring_buffer_size / 4) - 1);
|
||||||
m->cp_hqd_eop_base_addr_lo =
|
m->cp_hqd_eop_base_addr_lo =
|
||||||
lower_32_bits(q->eop_ring_buffer_address >> 8);
|
lower_32_bits(q->eop_ring_buffer_address >> 8);
|
||||||
m->cp_hqd_eop_base_addr_hi =
|
m->cp_hqd_eop_base_addr_hi =
|
||||||
|
@ -163,6 +189,11 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
|
||||||
2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
|
2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
|
||||||
|
m->cp_hqd_ctx_save_control =
|
||||||
|
atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT |
|
||||||
|
mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT;
|
||||||
|
|
||||||
q->is_active = (q->queue_size > 0 &&
|
q->is_active = (q->queue_size > 0 &&
|
||||||
q->queue_address != 0 &&
|
q->queue_address != 0 &&
|
||||||
q->queue_percent > 0);
|
q->queue_percent > 0);
|
||||||
|
@ -234,6 +265,117 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
||||||
|
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||||
|
struct queue_properties *q)
|
||||||
|
{
|
||||||
|
int retval;
|
||||||
|
struct vi_sdma_mqd *m;
|
||||||
|
|
||||||
|
|
||||||
|
retval = kfd_gtt_sa_allocate(mm->dev,
|
||||||
|
sizeof(struct vi_sdma_mqd),
|
||||||
|
mqd_mem_obj);
|
||||||
|
|
||||||
|
if (retval != 0)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||||
|
|
||||||
|
memset(m, 0, sizeof(struct vi_sdma_mqd));
|
||||||
|
|
||||||
|
*mqd = m;
|
||||||
|
if (gart_addr != NULL)
|
||||||
|
*gart_addr = (*mqd_mem_obj)->gpu_addr;
|
||||||
|
|
||||||
|
retval = mm->update_mqd(mm, m, q);
|
||||||
|
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||||
|
struct kfd_mem_obj *mqd_mem_obj)
|
||||||
|
{
|
||||||
|
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||||
|
uint32_t pipe_id, uint32_t queue_id,
|
||||||
|
struct queue_properties *p, struct mm_struct *mms)
|
||||||
|
{
|
||||||
|
return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
|
||||||
|
(uint32_t __user *)p->write_ptr,
|
||||||
|
mms);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||||
|
struct queue_properties *q)
|
||||||
|
{
|
||||||
|
struct vi_sdma_mqd *m;
|
||||||
|
|
||||||
|
m = get_sdma_mqd(mqd);
|
||||||
|
m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4)
|
||||||
|
<< SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
|
||||||
|
q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
|
||||||
|
1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
|
||||||
|
6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
|
||||||
|
|
||||||
|
m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
|
||||||
|
m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
|
||||||
|
m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
|
||||||
|
m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
|
||||||
|
m->sdmax_rlcx_doorbell =
|
||||||
|
q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
|
||||||
|
|
||||||
|
m->sdmax_rlcx_virtual_addr = q->sdma_vm_addr;
|
||||||
|
|
||||||
|
m->sdma_engine_id = q->sdma_engine_id;
|
||||||
|
m->sdma_queue_id = q->sdma_queue_id;
|
||||||
|
|
||||||
|
q->is_active = (q->queue_size > 0 &&
|
||||||
|
q->queue_address != 0 &&
|
||||||
|
q->queue_percent > 0);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* * preempt type here is ignored because there is only one way
|
||||||
|
* * to preempt sdma queue
|
||||||
|
*/
|
||||||
|
static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||||
|
enum kfd_preempt_type type,
|
||||||
|
unsigned int timeout, uint32_t pipe_id,
|
||||||
|
uint32_t queue_id)
|
||||||
|
{
|
||||||
|
return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
|
||||||
|
uint64_t queue_address, uint32_t pipe_id,
|
||||||
|
uint32_t queue_id)
|
||||||
|
{
|
||||||
|
return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
|
|
||||||
|
static int debugfs_show_mqd(struct seq_file *m, void *data)
|
||||||
|
{
|
||||||
|
seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
|
||||||
|
data, sizeof(struct vi_mqd), false);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
|
||||||
|
{
|
||||||
|
seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
|
||||||
|
data, sizeof(struct vi_sdma_mqd), false);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
|
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
|
||||||
struct kfd_dev *dev)
|
struct kfd_dev *dev)
|
||||||
{
|
{
|
||||||
|
@ -257,6 +399,9 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
|
||||||
mqd->update_mqd = update_mqd;
|
mqd->update_mqd = update_mqd;
|
||||||
mqd->destroy_mqd = destroy_mqd;
|
mqd->destroy_mqd = destroy_mqd;
|
||||||
mqd->is_occupied = is_occupied;
|
mqd->is_occupied = is_occupied;
|
||||||
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
|
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||||
|
#endif
|
||||||
break;
|
break;
|
||||||
case KFD_MQD_TYPE_HIQ:
|
case KFD_MQD_TYPE_HIQ:
|
||||||
mqd->init_mqd = init_mqd_hiq;
|
mqd->init_mqd = init_mqd_hiq;
|
||||||
|
@ -265,8 +410,20 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
|
||||||
mqd->update_mqd = update_mqd_hiq;
|
mqd->update_mqd = update_mqd_hiq;
|
||||||
mqd->destroy_mqd = destroy_mqd;
|
mqd->destroy_mqd = destroy_mqd;
|
||||||
mqd->is_occupied = is_occupied;
|
mqd->is_occupied = is_occupied;
|
||||||
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
|
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||||
|
#endif
|
||||||
break;
|
break;
|
||||||
case KFD_MQD_TYPE_SDMA:
|
case KFD_MQD_TYPE_SDMA:
|
||||||
|
mqd->init_mqd = init_mqd_sdma;
|
||||||
|
mqd->uninit_mqd = uninit_mqd_sdma;
|
||||||
|
mqd->load_mqd = load_mqd_sdma;
|
||||||
|
mqd->update_mqd = update_mqd_sdma;
|
||||||
|
mqd->destroy_mqd = destroy_mqd_sdma;
|
||||||
|
mqd->is_occupied = is_occupied_sdma;
|
||||||
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
|
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
|
||||||
|
#endif
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
kfree(mqd);
|
kfree(mqd);
|
||||||
|
|
|
@ -45,7 +45,7 @@ static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size)
|
||||||
|
|
||||||
header.u32All = 0;
|
header.u32All = 0;
|
||||||
header.opcode = opcode;
|
header.opcode = opcode;
|
||||||
header.count = packet_size/sizeof(uint32_t) - 2;
|
header.count = packet_size / 4 - 2;
|
||||||
header.type = PM4_TYPE_3;
|
header.type = PM4_TYPE_3;
|
||||||
|
|
||||||
return header.u32All;
|
return header.u32All;
|
||||||
|
@ -55,15 +55,27 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
|
||||||
unsigned int *rlib_size,
|
unsigned int *rlib_size,
|
||||||
bool *over_subscription)
|
bool *over_subscription)
|
||||||
{
|
{
|
||||||
unsigned int process_count, queue_count;
|
unsigned int process_count, queue_count, compute_queue_count;
|
||||||
unsigned int map_queue_size;
|
unsigned int map_queue_size;
|
||||||
|
unsigned int max_proc_per_quantum = 1;
|
||||||
|
struct kfd_dev *dev = pm->dqm->dev;
|
||||||
|
|
||||||
process_count = pm->dqm->processes_count;
|
process_count = pm->dqm->processes_count;
|
||||||
queue_count = pm->dqm->queue_count;
|
queue_count = pm->dqm->queue_count;
|
||||||
|
compute_queue_count = queue_count - pm->dqm->sdma_queue_count;
|
||||||
|
|
||||||
/* check if there is over subscription*/
|
/* check if there is over subscription
|
||||||
|
* Note: the arbitration between the number of VMIDs and
|
||||||
|
* hws_max_conc_proc has been done in
|
||||||
|
* kgd2kfd_device_init().
|
||||||
|
*/
|
||||||
*over_subscription = false;
|
*over_subscription = false;
|
||||||
if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) {
|
|
||||||
|
if (dev->max_proc_per_quantum > 1)
|
||||||
|
max_proc_per_quantum = dev->max_proc_per_quantum;
|
||||||
|
|
||||||
|
if ((process_count > max_proc_per_quantum) ||
|
||||||
|
compute_queue_count > get_queues_num(pm->dqm)) {
|
||||||
*over_subscription = true;
|
*over_subscription = true;
|
||||||
pr_debug("Over subscribed runlist\n");
|
pr_debug("Over subscribed runlist\n");
|
||||||
}
|
}
|
||||||
|
@ -116,10 +128,24 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
|
||||||
uint64_t ib, size_t ib_size_in_dwords, bool chain)
|
uint64_t ib, size_t ib_size_in_dwords, bool chain)
|
||||||
{
|
{
|
||||||
struct pm4_mes_runlist *packet;
|
struct pm4_mes_runlist *packet;
|
||||||
|
int concurrent_proc_cnt = 0;
|
||||||
|
struct kfd_dev *kfd = pm->dqm->dev;
|
||||||
|
|
||||||
if (WARN_ON(!ib))
|
if (WARN_ON(!ib))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
|
/* Determine the number of processes to map together to HW:
|
||||||
|
* it can not exceed the number of VMIDs available to the
|
||||||
|
* scheduler, and it is determined by the smaller of the number
|
||||||
|
* of processes in the runlist and kfd module parameter
|
||||||
|
* hws_max_conc_proc.
|
||||||
|
* Note: the arbitration between the number of VMIDs and
|
||||||
|
* hws_max_conc_proc has been done in
|
||||||
|
* kgd2kfd_device_init().
|
||||||
|
*/
|
||||||
|
concurrent_proc_cnt = min(pm->dqm->processes_count,
|
||||||
|
kfd->max_proc_per_quantum);
|
||||||
|
|
||||||
packet = (struct pm4_mes_runlist *)buffer;
|
packet = (struct pm4_mes_runlist *)buffer;
|
||||||
|
|
||||||
memset(buffer, 0, sizeof(struct pm4_mes_runlist));
|
memset(buffer, 0, sizeof(struct pm4_mes_runlist));
|
||||||
|
@ -130,6 +156,7 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
|
||||||
packet->bitfields4.chain = chain ? 1 : 0;
|
packet->bitfields4.chain = chain ? 1 : 0;
|
||||||
packet->bitfields4.offload_polling = 0;
|
packet->bitfields4.offload_polling = 0;
|
||||||
packet->bitfields4.valid = 1;
|
packet->bitfields4.valid = 1;
|
||||||
|
packet->bitfields4.process_cnt = concurrent_proc_cnt;
|
||||||
packet->ordinal2 = lower_32_bits(ib);
|
packet->ordinal2 = lower_32_bits(ib);
|
||||||
packet->bitfields3.ib_base_hi = upper_32_bits(ib);
|
packet->bitfields3.ib_base_hi = upper_32_bits(ib);
|
||||||
|
|
||||||
|
@ -251,6 +278,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
|
||||||
return retval;
|
return retval;
|
||||||
|
|
||||||
*rl_size_bytes = alloc_size_bytes;
|
*rl_size_bytes = alloc_size_bytes;
|
||||||
|
pm->ib_size_bytes = alloc_size_bytes;
|
||||||
|
|
||||||
pr_debug("Building runlist ib process count: %d queues count %d\n",
|
pr_debug("Building runlist ib process count: %d queues count %d\n",
|
||||||
pm->dqm->processes_count, pm->dqm->queue_count);
|
pm->dqm->processes_count, pm->dqm->queue_count);
|
||||||
|
@ -564,3 +592,26 @@ void pm_release_ib(struct packet_manager *pm)
|
||||||
}
|
}
|
||||||
mutex_unlock(&pm->lock);
|
mutex_unlock(&pm->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
|
|
||||||
|
int pm_debugfs_runlist(struct seq_file *m, void *data)
|
||||||
|
{
|
||||||
|
struct packet_manager *pm = data;
|
||||||
|
|
||||||
|
mutex_lock(&pm->lock);
|
||||||
|
|
||||||
|
if (!pm->allocated) {
|
||||||
|
seq_puts(m, " No active runlist\n");
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
|
||||||
|
pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false);
|
||||||
|
|
||||||
|
out:
|
||||||
|
mutex_unlock(&pm->lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
|
@ -59,7 +59,7 @@ unsigned int kfd_pasid_alloc(void)
|
||||||
struct kfd_dev *dev = NULL;
|
struct kfd_dev *dev = NULL;
|
||||||
unsigned int i = 0;
|
unsigned int i = 0;
|
||||||
|
|
||||||
while ((dev = kfd_topology_enum_kfd_devices(i)) != NULL) {
|
while ((kfd_topology_enum_kfd_devices(i, &dev)) == 0) {
|
||||||
if (dev && dev->kfd2kgd) {
|
if (dev && dev->kfd2kgd) {
|
||||||
kfd2kgd = dev->kfd2kgd;
|
kfd2kgd = dev->kfd2kgd;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -33,6 +33,8 @@
|
||||||
#include <linux/kfd_ioctl.h>
|
#include <linux/kfd_ioctl.h>
|
||||||
#include <linux/idr.h>
|
#include <linux/idr.h>
|
||||||
#include <linux/kfifo.h>
|
#include <linux/kfifo.h>
|
||||||
|
#include <linux/seq_file.h>
|
||||||
|
#include <linux/kref.h>
|
||||||
#include <kgd_kfd_interface.h>
|
#include <kgd_kfd_interface.h>
|
||||||
|
|
||||||
#include "amd_shared.h"
|
#include "amd_shared.h"
|
||||||
|
@ -41,6 +43,7 @@
|
||||||
|
|
||||||
#define KFD_MMAP_DOORBELL_MASK 0x8000000000000
|
#define KFD_MMAP_DOORBELL_MASK 0x8000000000000
|
||||||
#define KFD_MMAP_EVENTS_MASK 0x4000000000000
|
#define KFD_MMAP_EVENTS_MASK 0x4000000000000
|
||||||
|
#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When working with cp scheduler we should assign the HIQ manually or via
|
* When working with cp scheduler we should assign the HIQ manually or via
|
||||||
|
@ -62,6 +65,15 @@
|
||||||
#define KFD_MAX_NUM_OF_PROCESSES 512
|
#define KFD_MAX_NUM_OF_PROCESSES 512
|
||||||
#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
|
#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Size of the per-process TBA+TMA buffer: 2 pages
|
||||||
|
*
|
||||||
|
* The first page is the TBA used for the CWSR ISA code. The second
|
||||||
|
* page is used as TMA for daisy changing a user-mode trap handler.
|
||||||
|
*/
|
||||||
|
#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
|
||||||
|
#define KFD_CWSR_TMA_OFFSET PAGE_SIZE
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Kernel module parameter to specify maximum number of supported queues per
|
* Kernel module parameter to specify maximum number of supported queues per
|
||||||
* device
|
* device
|
||||||
|
@ -78,12 +90,26 @@ extern int max_num_of_queues_per_device;
|
||||||
/* Kernel module parameter to specify the scheduling policy */
|
/* Kernel module parameter to specify the scheduling policy */
|
||||||
extern int sched_policy;
|
extern int sched_policy;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Kernel module parameter to specify the maximum process
|
||||||
|
* number per HW scheduler
|
||||||
|
*/
|
||||||
|
extern int hws_max_conc_proc;
|
||||||
|
|
||||||
|
extern int cwsr_enable;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Kernel module parameter to specify whether to send sigterm to HSA process on
|
* Kernel module parameter to specify whether to send sigterm to HSA process on
|
||||||
* unhandled exception
|
* unhandled exception
|
||||||
*/
|
*/
|
||||||
extern int send_sigterm;
|
extern int send_sigterm;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ignore CRAT table during KFD initialization, can be used to work around
|
||||||
|
* broken CRAT tables on some AMD systems
|
||||||
|
*/
|
||||||
|
extern int ignore_crat;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* enum kfd_sched_policy
|
* enum kfd_sched_policy
|
||||||
*
|
*
|
||||||
|
@ -131,6 +157,7 @@ struct kfd_device_info {
|
||||||
size_t ih_ring_entry_size;
|
size_t ih_ring_entry_size;
|
||||||
uint8_t num_of_watch_points;
|
uint8_t num_of_watch_points;
|
||||||
uint16_t mqd_size_aligned;
|
uint16_t mqd_size_aligned;
|
||||||
|
bool supports_cwsr;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kfd_mem_obj {
|
struct kfd_mem_obj {
|
||||||
|
@ -200,6 +227,14 @@ struct kfd_dev {
|
||||||
|
|
||||||
/* Debug manager */
|
/* Debug manager */
|
||||||
struct kfd_dbgmgr *dbgmgr;
|
struct kfd_dbgmgr *dbgmgr;
|
||||||
|
|
||||||
|
/* Maximum process number mapped to HW scheduler */
|
||||||
|
unsigned int max_proc_per_quantum;
|
||||||
|
|
||||||
|
/* CWSR */
|
||||||
|
bool cwsr_enabled;
|
||||||
|
const void *cwsr_isa;
|
||||||
|
unsigned int cwsr_isa_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* KGD2KFD callbacks */
|
/* KGD2KFD callbacks */
|
||||||
|
@ -332,6 +367,9 @@ struct queue_properties {
|
||||||
uint32_t eop_ring_buffer_size;
|
uint32_t eop_ring_buffer_size;
|
||||||
uint64_t ctx_save_restore_area_address;
|
uint64_t ctx_save_restore_area_address;
|
||||||
uint32_t ctx_save_restore_area_size;
|
uint32_t ctx_save_restore_area_size;
|
||||||
|
uint32_t ctl_stack_size;
|
||||||
|
uint64_t tba_addr;
|
||||||
|
uint64_t tma_addr;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -439,6 +477,11 @@ struct qcm_process_device {
|
||||||
uint32_t num_gws;
|
uint32_t num_gws;
|
||||||
uint32_t num_oac;
|
uint32_t num_oac;
|
||||||
uint32_t sh_hidden_private_base;
|
uint32_t sh_hidden_private_base;
|
||||||
|
|
||||||
|
/* CWSR memory */
|
||||||
|
void *cwsr_kaddr;
|
||||||
|
uint64_t tba_addr;
|
||||||
|
uint64_t tma_addr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -501,6 +544,9 @@ struct kfd_process {
|
||||||
*/
|
*/
|
||||||
void *mm;
|
void *mm;
|
||||||
|
|
||||||
|
struct kref ref;
|
||||||
|
struct work_struct release_work;
|
||||||
|
|
||||||
struct mutex mutex;
|
struct mutex mutex;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -563,9 +609,10 @@ struct amdkfd_ioctl_desc {
|
||||||
|
|
||||||
void kfd_process_create_wq(void);
|
void kfd_process_create_wq(void);
|
||||||
void kfd_process_destroy_wq(void);
|
void kfd_process_destroy_wq(void);
|
||||||
struct kfd_process *kfd_create_process(const struct task_struct *);
|
struct kfd_process *kfd_create_process(struct file *filep);
|
||||||
struct kfd_process *kfd_get_process(const struct task_struct *);
|
struct kfd_process *kfd_get_process(const struct task_struct *);
|
||||||
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
|
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
|
||||||
|
void kfd_unref_process(struct kfd_process *p);
|
||||||
|
|
||||||
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
|
struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
|
||||||
struct kfd_process *p);
|
struct kfd_process *p);
|
||||||
|
@ -577,6 +624,9 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
|
||||||
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
|
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
|
||||||
struct kfd_process *p);
|
struct kfd_process *p);
|
||||||
|
|
||||||
|
int kfd_reserved_mem_mmap(struct kfd_process *process,
|
||||||
|
struct vm_area_struct *vma);
|
||||||
|
|
||||||
/* Process device data iterator */
|
/* Process device data iterator */
|
||||||
struct kfd_process_device *kfd_get_first_process_device_data(
|
struct kfd_process_device *kfd_get_first_process_device_data(
|
||||||
struct kfd_process *p);
|
struct kfd_process *p);
|
||||||
|
@ -624,9 +674,12 @@ int kfd_topology_init(void);
|
||||||
void kfd_topology_shutdown(void);
|
void kfd_topology_shutdown(void);
|
||||||
int kfd_topology_add_device(struct kfd_dev *gpu);
|
int kfd_topology_add_device(struct kfd_dev *gpu);
|
||||||
int kfd_topology_remove_device(struct kfd_dev *gpu);
|
int kfd_topology_remove_device(struct kfd_dev *gpu);
|
||||||
|
struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
|
||||||
|
uint32_t proximity_domain);
|
||||||
struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
|
struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
|
||||||
struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
|
struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
|
||||||
struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx);
|
int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
|
||||||
|
int kfd_numa_node_to_apic_id(int numa_node_id);
|
||||||
|
|
||||||
/* Interrupts */
|
/* Interrupts */
|
||||||
int kfd_interrupt_init(struct kfd_dev *dev);
|
int kfd_interrupt_init(struct kfd_dev *dev);
|
||||||
|
@ -643,8 +696,6 @@ int kgd2kfd_resume(struct kfd_dev *kfd);
|
||||||
int kfd_init_apertures(struct kfd_process *process);
|
int kfd_init_apertures(struct kfd_process *process);
|
||||||
|
|
||||||
/* Queue Context Management */
|
/* Queue Context Management */
|
||||||
struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd);
|
|
||||||
|
|
||||||
int init_queue(struct queue **q, const struct queue_properties *properties);
|
int init_queue(struct queue **q, const struct queue_properties *properties);
|
||||||
void uninit_queue(struct queue *q);
|
void uninit_queue(struct queue *q);
|
||||||
void print_queue_properties(struct queue_properties *q);
|
void print_queue_properties(struct queue_properties *q);
|
||||||
|
@ -699,6 +750,7 @@ struct packet_manager {
|
||||||
struct mutex lock;
|
struct mutex lock;
|
||||||
bool allocated;
|
bool allocated;
|
||||||
struct kfd_mem_obj *ib_buffer_obj;
|
struct kfd_mem_obj *ib_buffer_obj;
|
||||||
|
unsigned int ib_size_bytes;
|
||||||
};
|
};
|
||||||
|
|
||||||
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
|
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
|
||||||
|
@ -745,4 +797,23 @@ int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
|
||||||
|
|
||||||
int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
|
int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
|
||||||
|
|
||||||
|
/* Debugfs */
|
||||||
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
|
|
||||||
|
void kfd_debugfs_init(void);
|
||||||
|
void kfd_debugfs_fini(void);
|
||||||
|
int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data);
|
||||||
|
int pqm_debugfs_mqds(struct seq_file *m, void *data);
|
||||||
|
int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data);
|
||||||
|
int dqm_debugfs_hqds(struct seq_file *m, void *data);
|
||||||
|
int kfd_debugfs_rls_by_device(struct seq_file *m, void *data);
|
||||||
|
int pm_debugfs_runlist(struct seq_file *m, void *data);
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
static inline void kfd_debugfs_init(void) {}
|
||||||
|
static inline void kfd_debugfs_fini(void) {}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -24,10 +24,12 @@
|
||||||
#include <linux/log2.h>
|
#include <linux/log2.h>
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/sched/mm.h>
|
#include <linux/sched/mm.h>
|
||||||
|
#include <linux/sched/task.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/amd-iommu.h>
|
#include <linux/amd-iommu.h>
|
||||||
#include <linux/notifier.h>
|
#include <linux/notifier.h>
|
||||||
#include <linux/compat.h>
|
#include <linux/compat.h>
|
||||||
|
#include <linux/mman.h>
|
||||||
|
|
||||||
struct mm_struct;
|
struct mm_struct;
|
||||||
|
|
||||||
|
@ -46,13 +48,12 @@ DEFINE_STATIC_SRCU(kfd_processes_srcu);
|
||||||
|
|
||||||
static struct workqueue_struct *kfd_process_wq;
|
static struct workqueue_struct *kfd_process_wq;
|
||||||
|
|
||||||
struct kfd_process_release_work {
|
|
||||||
struct work_struct kfd_work;
|
|
||||||
struct kfd_process *p;
|
|
||||||
};
|
|
||||||
|
|
||||||
static struct kfd_process *find_process(const struct task_struct *thread);
|
static struct kfd_process *find_process(const struct task_struct *thread);
|
||||||
static struct kfd_process *create_process(const struct task_struct *thread);
|
static void kfd_process_ref_release(struct kref *ref);
|
||||||
|
static struct kfd_process *create_process(const struct task_struct *thread,
|
||||||
|
struct file *filep);
|
||||||
|
static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
|
||||||
|
|
||||||
|
|
||||||
void kfd_process_create_wq(void)
|
void kfd_process_create_wq(void)
|
||||||
{
|
{
|
||||||
|
@ -68,9 +69,10 @@ void kfd_process_destroy_wq(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct kfd_process *kfd_create_process(const struct task_struct *thread)
|
struct kfd_process *kfd_create_process(struct file *filep)
|
||||||
{
|
{
|
||||||
struct kfd_process *process;
|
struct kfd_process *process;
|
||||||
|
struct task_struct *thread = current;
|
||||||
|
|
||||||
if (!thread->mm)
|
if (!thread->mm)
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
|
@ -79,9 +81,6 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread)
|
||||||
if (thread->group_leader->mm != thread->mm)
|
if (thread->group_leader->mm != thread->mm)
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
/* Take mmap_sem because we call __mmu_notifier_register inside */
|
|
||||||
down_write(&thread->mm->mmap_sem);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* take kfd processes mutex before starting of process creation
|
* take kfd processes mutex before starting of process creation
|
||||||
* so there won't be a case where two threads of the same process
|
* so there won't be a case where two threads of the same process
|
||||||
|
@ -93,14 +92,11 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread)
|
||||||
process = find_process(thread);
|
process = find_process(thread);
|
||||||
if (process)
|
if (process)
|
||||||
pr_debug("Process already found\n");
|
pr_debug("Process already found\n");
|
||||||
|
else
|
||||||
if (!process)
|
process = create_process(thread, filep);
|
||||||
process = create_process(thread);
|
|
||||||
|
|
||||||
mutex_unlock(&kfd_processes_mutex);
|
mutex_unlock(&kfd_processes_mutex);
|
||||||
|
|
||||||
up_write(&thread->mm->mmap_sem);
|
|
||||||
|
|
||||||
return process;
|
return process;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -144,63 +140,75 @@ static struct kfd_process *find_process(const struct task_struct *thread)
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void kfd_process_wq_release(struct work_struct *work)
|
void kfd_unref_process(struct kfd_process *p)
|
||||||
|
{
|
||||||
|
kref_put(&p->ref, kfd_process_ref_release);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void kfd_process_destroy_pdds(struct kfd_process *p)
|
||||||
{
|
{
|
||||||
struct kfd_process_release_work *my_work;
|
|
||||||
struct kfd_process_device *pdd, *temp;
|
struct kfd_process_device *pdd, *temp;
|
||||||
struct kfd_process *p;
|
|
||||||
|
|
||||||
my_work = (struct kfd_process_release_work *) work;
|
|
||||||
|
|
||||||
p = my_work->p;
|
|
||||||
|
|
||||||
pr_debug("Releasing process (pasid %d) in workqueue\n",
|
|
||||||
p->pasid);
|
|
||||||
|
|
||||||
mutex_lock(&p->mutex);
|
|
||||||
|
|
||||||
list_for_each_entry_safe(pdd, temp, &p->per_device_data,
|
list_for_each_entry_safe(pdd, temp, &p->per_device_data,
|
||||||
per_device_list) {
|
per_device_list) {
|
||||||
pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
|
pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
|
||||||
pdd->dev->id, p->pasid);
|
pdd->dev->id, p->pasid);
|
||||||
|
|
||||||
if (pdd->bound == PDD_BOUND)
|
|
||||||
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
|
|
||||||
|
|
||||||
list_del(&pdd->per_device_list);
|
list_del(&pdd->per_device_list);
|
||||||
|
|
||||||
|
if (pdd->qpd.cwsr_kaddr)
|
||||||
|
free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
|
||||||
|
get_order(KFD_CWSR_TBA_TMA_SIZE));
|
||||||
|
|
||||||
kfree(pdd);
|
kfree(pdd);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* No process locking is needed in this function, because the process
|
||||||
|
* is not findable any more. We must assume that no other thread is
|
||||||
|
* using it any more, otherwise we couldn't safely free the process
|
||||||
|
* structure in the end.
|
||||||
|
*/
|
||||||
|
static void kfd_process_wq_release(struct work_struct *work)
|
||||||
|
{
|
||||||
|
struct kfd_process *p = container_of(work, struct kfd_process,
|
||||||
|
release_work);
|
||||||
|
struct kfd_process_device *pdd;
|
||||||
|
|
||||||
|
pr_debug("Releasing process (pasid %d) in workqueue\n", p->pasid);
|
||||||
|
|
||||||
|
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
|
||||||
|
if (pdd->bound == PDD_BOUND)
|
||||||
|
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
|
||||||
|
}
|
||||||
|
|
||||||
|
kfd_process_destroy_pdds(p);
|
||||||
|
|
||||||
kfd_event_free_process(p);
|
kfd_event_free_process(p);
|
||||||
|
|
||||||
kfd_pasid_free(p->pasid);
|
kfd_pasid_free(p->pasid);
|
||||||
kfd_free_process_doorbells(p);
|
kfd_free_process_doorbells(p);
|
||||||
|
|
||||||
mutex_unlock(&p->mutex);
|
|
||||||
|
|
||||||
mutex_destroy(&p->mutex);
|
mutex_destroy(&p->mutex);
|
||||||
|
|
||||||
kfree(p);
|
put_task_struct(p->lead_thread);
|
||||||
|
|
||||||
kfree(work);
|
kfree(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void kfd_process_ref_release(struct kref *ref)
|
||||||
|
{
|
||||||
|
struct kfd_process *p = container_of(ref, struct kfd_process, ref);
|
||||||
|
|
||||||
|
INIT_WORK(&p->release_work, kfd_process_wq_release);
|
||||||
|
queue_work(kfd_process_wq, &p->release_work);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void kfd_process_destroy_delayed(struct rcu_head *rcu)
|
static void kfd_process_destroy_delayed(struct rcu_head *rcu)
|
||||||
{
|
{
|
||||||
struct kfd_process_release_work *work;
|
struct kfd_process *p = container_of(rcu, struct kfd_process, rcu);
|
||||||
struct kfd_process *p;
|
|
||||||
|
|
||||||
p = container_of(rcu, struct kfd_process, rcu);
|
kfd_unref_process(p);
|
||||||
|
|
||||||
mmdrop(p->mm);
|
|
||||||
|
|
||||||
work = kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC);
|
|
||||||
|
|
||||||
if (work) {
|
|
||||||
INIT_WORK((struct work_struct *) work, kfd_process_wq_release);
|
|
||||||
work->p = p;
|
|
||||||
queue_work(kfd_process_wq, (struct work_struct *) work);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void kfd_process_notifier_release(struct mmu_notifier *mn,
|
static void kfd_process_notifier_release(struct mmu_notifier *mn,
|
||||||
|
@ -244,15 +252,12 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
|
||||||
kfd_process_dequeue_from_all_devices(p);
|
kfd_process_dequeue_from_all_devices(p);
|
||||||
pqm_uninit(&p->pqm);
|
pqm_uninit(&p->pqm);
|
||||||
|
|
||||||
|
/* Indicate to other users that MM is no longer valid */
|
||||||
|
p->mm = NULL;
|
||||||
|
|
||||||
mutex_unlock(&p->mutex);
|
mutex_unlock(&p->mutex);
|
||||||
|
|
||||||
/*
|
mmu_notifier_unregister_no_release(&p->mmu_notifier, mm);
|
||||||
* Because we drop mm_count inside kfd_process_destroy_delayed
|
|
||||||
* and because the mmu_notifier_unregister function also drop
|
|
||||||
* mm_count we need to take an extra count here.
|
|
||||||
*/
|
|
||||||
mmgrab(p->mm);
|
|
||||||
mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm);
|
|
||||||
mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
|
mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -260,7 +265,44 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
|
||||||
.release = kfd_process_notifier_release,
|
.release = kfd_process_notifier_release,
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct kfd_process *create_process(const struct task_struct *thread)
|
static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
|
||||||
|
{
|
||||||
|
unsigned long offset;
|
||||||
|
struct kfd_process_device *pdd = NULL;
|
||||||
|
struct kfd_dev *dev = NULL;
|
||||||
|
struct qcm_process_device *qpd = NULL;
|
||||||
|
|
||||||
|
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
|
||||||
|
dev = pdd->dev;
|
||||||
|
qpd = &pdd->qpd;
|
||||||
|
if (!dev->cwsr_enabled || qpd->cwsr_kaddr)
|
||||||
|
continue;
|
||||||
|
offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT;
|
||||||
|
qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
|
||||||
|
KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
|
||||||
|
MAP_SHARED, offset);
|
||||||
|
|
||||||
|
if (IS_ERR_VALUE(qpd->tba_addr)) {
|
||||||
|
int err = qpd->tba_addr;
|
||||||
|
|
||||||
|
pr_err("Failure to set tba address. error %d.\n", err);
|
||||||
|
qpd->tba_addr = 0;
|
||||||
|
qpd->cwsr_kaddr = NULL;
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
|
||||||
|
|
||||||
|
qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
|
||||||
|
pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
|
||||||
|
qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct kfd_process *create_process(const struct task_struct *thread,
|
||||||
|
struct file *filep)
|
||||||
{
|
{
|
||||||
struct kfd_process *process;
|
struct kfd_process *process;
|
||||||
int err = -ENOMEM;
|
int err = -ENOMEM;
|
||||||
|
@ -277,13 +319,15 @@ static struct kfd_process *create_process(const struct task_struct *thread)
|
||||||
if (kfd_alloc_process_doorbells(process) < 0)
|
if (kfd_alloc_process_doorbells(process) < 0)
|
||||||
goto err_alloc_doorbells;
|
goto err_alloc_doorbells;
|
||||||
|
|
||||||
|
kref_init(&process->ref);
|
||||||
|
|
||||||
mutex_init(&process->mutex);
|
mutex_init(&process->mutex);
|
||||||
|
|
||||||
process->mm = thread->mm;
|
process->mm = thread->mm;
|
||||||
|
|
||||||
/* register notifier */
|
/* register notifier */
|
||||||
process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
|
process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
|
||||||
err = __mmu_notifier_register(&process->mmu_notifier, process->mm);
|
err = mmu_notifier_register(&process->mmu_notifier, process->mm);
|
||||||
if (err)
|
if (err)
|
||||||
goto err_mmu_notifier;
|
goto err_mmu_notifier;
|
||||||
|
|
||||||
|
@ -291,6 +335,7 @@ static struct kfd_process *create_process(const struct task_struct *thread)
|
||||||
(uintptr_t)process->mm);
|
(uintptr_t)process->mm);
|
||||||
|
|
||||||
process->lead_thread = thread->group_leader;
|
process->lead_thread = thread->group_leader;
|
||||||
|
get_task_struct(process->lead_thread);
|
||||||
|
|
||||||
INIT_LIST_HEAD(&process->per_device_data);
|
INIT_LIST_HEAD(&process->per_device_data);
|
||||||
|
|
||||||
|
@ -306,8 +351,14 @@ static struct kfd_process *create_process(const struct task_struct *thread)
|
||||||
if (err != 0)
|
if (err != 0)
|
||||||
goto err_init_apertures;
|
goto err_init_apertures;
|
||||||
|
|
||||||
|
err = kfd_process_init_cwsr(process, filep);
|
||||||
|
if (err)
|
||||||
|
goto err_init_cwsr;
|
||||||
|
|
||||||
return process;
|
return process;
|
||||||
|
|
||||||
|
err_init_cwsr:
|
||||||
|
kfd_process_destroy_pdds(process);
|
||||||
err_init_apertures:
|
err_init_apertures:
|
||||||
pqm_uninit(&process->pqm);
|
pqm_uninit(&process->pqm);
|
||||||
err_process_pqm_init:
|
err_process_pqm_init:
|
||||||
|
@ -343,16 +394,18 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
|
||||||
struct kfd_process_device *pdd = NULL;
|
struct kfd_process_device *pdd = NULL;
|
||||||
|
|
||||||
pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
|
pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
|
||||||
if (pdd != NULL) {
|
if (!pdd)
|
||||||
pdd->dev = dev;
|
return NULL;
|
||||||
INIT_LIST_HEAD(&pdd->qpd.queues_list);
|
|
||||||
INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
|
pdd->dev = dev;
|
||||||
pdd->qpd.dqm = dev->dqm;
|
INIT_LIST_HEAD(&pdd->qpd.queues_list);
|
||||||
pdd->process = p;
|
INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
|
||||||
pdd->bound = PDD_UNBOUND;
|
pdd->qpd.dqm = dev->dqm;
|
||||||
pdd->already_dequeued = false;
|
pdd->qpd.pqm = &p->pqm;
|
||||||
list_add(&pdd->per_device_list, &p->per_device_data);
|
pdd->process = p;
|
||||||
}
|
pdd->bound = PDD_UNBOUND;
|
||||||
|
pdd->already_dequeued = false;
|
||||||
|
list_add(&pdd->per_device_list, &p->per_device_data);
|
||||||
|
|
||||||
return pdd;
|
return pdd;
|
||||||
}
|
}
|
||||||
|
@ -483,6 +536,8 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
|
||||||
|
|
||||||
mutex_unlock(kfd_get_dbgmgr_mutex());
|
mutex_unlock(kfd_get_dbgmgr_mutex());
|
||||||
|
|
||||||
|
mutex_lock(&p->mutex);
|
||||||
|
|
||||||
pdd = kfd_get_process_device_data(dev, p);
|
pdd = kfd_get_process_device_data(dev, p);
|
||||||
if (pdd)
|
if (pdd)
|
||||||
/* For GPU relying on IOMMU, we need to dequeue here
|
/* For GPU relying on IOMMU, we need to dequeue here
|
||||||
|
@ -491,6 +546,8 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
|
||||||
kfd_process_dequeue_from_device(pdd);
|
kfd_process_dequeue_from_device(pdd);
|
||||||
|
|
||||||
mutex_unlock(&p->mutex);
|
mutex_unlock(&p->mutex);
|
||||||
|
|
||||||
|
kfd_unref_process(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct kfd_process_device *kfd_get_first_process_device_data(
|
struct kfd_process_device *kfd_get_first_process_device_data(
|
||||||
|
@ -515,22 +572,86 @@ bool kfd_has_process_device_data(struct kfd_process *p)
|
||||||
return !(list_empty(&p->per_device_data));
|
return !(list_empty(&p->per_device_data));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This returns with process->mutex locked. */
|
/* This increments the process->ref counter. */
|
||||||
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
|
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
|
||||||
{
|
{
|
||||||
struct kfd_process *p;
|
struct kfd_process *p, *ret_p = NULL;
|
||||||
unsigned int temp;
|
unsigned int temp;
|
||||||
|
|
||||||
int idx = srcu_read_lock(&kfd_processes_srcu);
|
int idx = srcu_read_lock(&kfd_processes_srcu);
|
||||||
|
|
||||||
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
|
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
|
||||||
if (p->pasid == pasid) {
|
if (p->pasid == pasid) {
|
||||||
mutex_lock(&p->mutex);
|
kref_get(&p->ref);
|
||||||
|
ret_p = p;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
srcu_read_unlock(&kfd_processes_srcu, idx);
|
srcu_read_unlock(&kfd_processes_srcu, idx);
|
||||||
|
|
||||||
return p;
|
return ret_p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int kfd_reserved_mem_mmap(struct kfd_process *process,
|
||||||
|
struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff);
|
||||||
|
struct kfd_process_device *pdd;
|
||||||
|
struct qcm_process_device *qpd;
|
||||||
|
|
||||||
|
if (!dev)
|
||||||
|
return -EINVAL;
|
||||||
|
if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
|
||||||
|
pr_err("Incorrect CWSR mapping size.\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
pdd = kfd_get_process_device_data(dev, process);
|
||||||
|
if (!pdd)
|
||||||
|
return -EINVAL;
|
||||||
|
qpd = &pdd->qpd;
|
||||||
|
|
||||||
|
qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
|
||||||
|
get_order(KFD_CWSR_TBA_TMA_SIZE));
|
||||||
|
if (!qpd->cwsr_kaddr) {
|
||||||
|
pr_err("Error allocating per process CWSR buffer.\n");
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
|
||||||
|
| VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
|
||||||
|
/* Mapping pages to user process */
|
||||||
|
return remap_pfn_range(vma, vma->vm_start,
|
||||||
|
PFN_DOWN(__pa(qpd->cwsr_kaddr)),
|
||||||
|
KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
|
|
||||||
|
int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
|
||||||
|
{
|
||||||
|
struct kfd_process *p;
|
||||||
|
unsigned int temp;
|
||||||
|
int r = 0;
|
||||||
|
|
||||||
|
int idx = srcu_read_lock(&kfd_processes_srcu);
|
||||||
|
|
||||||
|
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
|
||||||
|
seq_printf(m, "Process %d PASID %d:\n",
|
||||||
|
p->lead_thread->tgid, p->pasid);
|
||||||
|
|
||||||
|
mutex_lock(&p->mutex);
|
||||||
|
r = pqm_debugfs_mqds(m, &p->pqm);
|
||||||
|
mutex_unlock(&p->mutex);
|
||||||
|
|
||||||
|
if (r)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
srcu_read_unlock(&kfd_processes_srcu, idx);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
|
@ -178,10 +178,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
||||||
return retval;
|
return retval;
|
||||||
|
|
||||||
if (list_empty(&pdd->qpd.queues_list) &&
|
if (list_empty(&pdd->qpd.queues_list) &&
|
||||||
list_empty(&pdd->qpd.priv_queue_list)) {
|
list_empty(&pdd->qpd.priv_queue_list))
|
||||||
pdd->qpd.pqm = pqm;
|
|
||||||
dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
|
dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
|
||||||
}
|
|
||||||
|
|
||||||
pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
|
pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
|
||||||
if (!pqn) {
|
if (!pqn) {
|
||||||
|
@ -203,8 +201,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
||||||
goto err_create_queue;
|
goto err_create_queue;
|
||||||
pqn->q = q;
|
pqn->q = q;
|
||||||
pqn->kq = NULL;
|
pqn->kq = NULL;
|
||||||
retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd,
|
retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd);
|
||||||
&q->properties.vmid);
|
|
||||||
pr_debug("DQM returned %d for create_queue\n", retval);
|
pr_debug("DQM returned %d for create_queue\n", retval);
|
||||||
print_queue(q);
|
print_queue(q);
|
||||||
break;
|
break;
|
||||||
|
@ -224,8 +221,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
||||||
goto err_create_queue;
|
goto err_create_queue;
|
||||||
pqn->q = q;
|
pqn->q = q;
|
||||||
pqn->kq = NULL;
|
pqn->kq = NULL;
|
||||||
retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd,
|
retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd);
|
||||||
&q->properties.vmid);
|
|
||||||
pr_debug("DQM returned %d for create_queue\n", retval);
|
pr_debug("DQM returned %d for create_queue\n", retval);
|
||||||
print_queue(q);
|
print_queue(q);
|
||||||
break;
|
break;
|
||||||
|
@ -315,6 +311,10 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
|
||||||
if (pqn->q) {
|
if (pqn->q) {
|
||||||
dqm = pqn->q->device->dqm;
|
dqm = pqn->q->device->dqm;
|
||||||
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
|
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
|
||||||
|
if (retval) {
|
||||||
|
pr_debug("Destroy queue failed, returned %d\n", retval);
|
||||||
|
goto err_destroy_queue;
|
||||||
|
}
|
||||||
uninit_queue(pqn->q);
|
uninit_queue(pqn->q);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -326,6 +326,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
|
||||||
list_empty(&pdd->qpd.priv_queue_list))
|
list_empty(&pdd->qpd.priv_queue_list))
|
||||||
dqm->ops.unregister_process(dqm, &pdd->qpd);
|
dqm->ops.unregister_process(dqm, &pdd->qpd);
|
||||||
|
|
||||||
|
err_destroy_queue:
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -367,4 +368,67 @@ struct kernel_queue *pqm_get_kernel_queue(
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(CONFIG_DEBUG_FS)
|
||||||
|
|
||||||
|
int pqm_debugfs_mqds(struct seq_file *m, void *data)
|
||||||
|
{
|
||||||
|
struct process_queue_manager *pqm = data;
|
||||||
|
struct process_queue_node *pqn;
|
||||||
|
struct queue *q;
|
||||||
|
enum KFD_MQD_TYPE mqd_type;
|
||||||
|
struct mqd_manager *mqd_manager;
|
||||||
|
int r = 0;
|
||||||
|
|
||||||
|
list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
|
||||||
|
if (pqn->q) {
|
||||||
|
q = pqn->q;
|
||||||
|
switch (q->properties.type) {
|
||||||
|
case KFD_QUEUE_TYPE_SDMA:
|
||||||
|
seq_printf(m, " SDMA queue on device %x\n",
|
||||||
|
q->device->id);
|
||||||
|
mqd_type = KFD_MQD_TYPE_SDMA;
|
||||||
|
break;
|
||||||
|
case KFD_QUEUE_TYPE_COMPUTE:
|
||||||
|
seq_printf(m, " Compute queue on device %x\n",
|
||||||
|
q->device->id);
|
||||||
|
mqd_type = KFD_MQD_TYPE_CP;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
seq_printf(m,
|
||||||
|
" Bad user queue type %d on device %x\n",
|
||||||
|
q->properties.type, q->device->id);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
mqd_manager = q->device->dqm->ops.get_mqd_manager(
|
||||||
|
q->device->dqm, mqd_type);
|
||||||
|
} else if (pqn->kq) {
|
||||||
|
q = pqn->kq->queue;
|
||||||
|
mqd_manager = pqn->kq->mqd;
|
||||||
|
switch (q->properties.type) {
|
||||||
|
case KFD_QUEUE_TYPE_DIQ:
|
||||||
|
seq_printf(m, " DIQ on device %x\n",
|
||||||
|
pqn->kq->dev->id);
|
||||||
|
mqd_type = KFD_MQD_TYPE_HIQ;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
seq_printf(m,
|
||||||
|
" Bad kernel queue type %d on device %x\n",
|
||||||
|
q->properties.type,
|
||||||
|
pqn->kq->dev->id);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
seq_printf(m,
|
||||||
|
" Weird: Queue node with neither kernel nor user queue\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = mqd_manager->debugfs_show_mqd(m, q->mqd);
|
||||||
|
if (r != 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -39,8 +39,13 @@
|
||||||
#define HSA_CAP_WATCH_POINTS_SUPPORTED 0x00000080
|
#define HSA_CAP_WATCH_POINTS_SUPPORTED 0x00000080
|
||||||
#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00
|
#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00
|
||||||
#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8
|
#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8
|
||||||
#define HSA_CAP_RESERVED 0xfffff000
|
#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK 0x00003000
|
||||||
#define HSA_CAP_DOORBELL_PACKET_TYPE 0x00001000
|
#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT 12
|
||||||
|
#define HSA_CAP_RESERVED 0xffffc000
|
||||||
|
|
||||||
|
#define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0
|
||||||
|
#define HSA_CAP_DOORBELL_TYPE_1_0 0x1
|
||||||
|
#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000
|
||||||
|
|
||||||
struct kfd_node_properties {
|
struct kfd_node_properties {
|
||||||
uint32_t cpu_cores_count;
|
uint32_t cpu_cores_count;
|
||||||
|
@ -91,8 +96,6 @@ struct kfd_mem_properties {
|
||||||
struct attribute attr;
|
struct attribute attr;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define KFD_TOPOLOGY_CPU_SIBLINGS 256
|
|
||||||
|
|
||||||
#define HSA_CACHE_TYPE_DATA 0x00000001
|
#define HSA_CACHE_TYPE_DATA 0x00000001
|
||||||
#define HSA_CACHE_TYPE_INSTRUCTION 0x00000002
|
#define HSA_CACHE_TYPE_INSTRUCTION 0x00000002
|
||||||
#define HSA_CACHE_TYPE_CPU 0x00000004
|
#define HSA_CACHE_TYPE_CPU 0x00000004
|
||||||
|
@ -109,7 +112,7 @@ struct kfd_cache_properties {
|
||||||
uint32_t cache_assoc;
|
uint32_t cache_assoc;
|
||||||
uint32_t cache_latency;
|
uint32_t cache_latency;
|
||||||
uint32_t cache_type;
|
uint32_t cache_type;
|
||||||
uint8_t sibling_map[KFD_TOPOLOGY_CPU_SIBLINGS];
|
uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE];
|
||||||
struct kobject *kobj;
|
struct kobject *kobj;
|
||||||
struct attribute attr;
|
struct attribute attr;
|
||||||
};
|
};
|
||||||
|
@ -132,24 +135,36 @@ struct kfd_iolink_properties {
|
||||||
struct attribute attr;
|
struct attribute attr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct kfd_perf_properties {
|
||||||
|
struct list_head list;
|
||||||
|
char block_name[16];
|
||||||
|
uint32_t max_concurrent;
|
||||||
|
struct attribute_group *attr_group;
|
||||||
|
};
|
||||||
|
|
||||||
struct kfd_topology_device {
|
struct kfd_topology_device {
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
uint32_t gpu_id;
|
uint32_t gpu_id;
|
||||||
|
uint32_t proximity_domain;
|
||||||
struct kfd_node_properties node_props;
|
struct kfd_node_properties node_props;
|
||||||
uint32_t mem_bank_count;
|
|
||||||
struct list_head mem_props;
|
struct list_head mem_props;
|
||||||
uint32_t cache_count;
|
uint32_t cache_count;
|
||||||
struct list_head cache_props;
|
struct list_head cache_props;
|
||||||
uint32_t io_link_count;
|
uint32_t io_link_count;
|
||||||
struct list_head io_link_props;
|
struct list_head io_link_props;
|
||||||
|
struct list_head perf_props;
|
||||||
struct kfd_dev *gpu;
|
struct kfd_dev *gpu;
|
||||||
struct kobject *kobj_node;
|
struct kobject *kobj_node;
|
||||||
struct kobject *kobj_mem;
|
struct kobject *kobj_mem;
|
||||||
struct kobject *kobj_cache;
|
struct kobject *kobj_cache;
|
||||||
struct kobject *kobj_iolink;
|
struct kobject *kobj_iolink;
|
||||||
|
struct kobject *kobj_perf;
|
||||||
struct attribute attr_gpuid;
|
struct attribute attr_gpuid;
|
||||||
struct attribute attr_name;
|
struct attribute attr_name;
|
||||||
struct attribute attr_props;
|
struct attribute attr_props;
|
||||||
|
uint8_t oem_id[CRAT_OEMID_LENGTH];
|
||||||
|
uint8_t oem_table_id[CRAT_OEMTABLEID_LENGTH];
|
||||||
|
uint32_t oem_revision;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kfd_system_properties {
|
struct kfd_system_properties {
|
||||||
|
@ -164,6 +179,12 @@ struct kfd_system_properties {
|
||||||
struct attribute attr_props;
|
struct attribute attr_props;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct kfd_topology_device *kfd_create_topology_device(
|
||||||
|
struct list_head *device_list);
|
||||||
|
void kfd_release_topology_device_list(struct list_head *device_list);
|
||||||
|
|
||||||
|
extern bool amd_iommu_pc_supported(void);
|
||||||
|
extern u8 amd_iommu_pc_get_max_banks(u16 devid);
|
||||||
|
extern u8 amd_iommu_pc_get_max_counters(u16 devid);
|
||||||
|
|
||||||
#endif /* __KFD_TOPOLOGY_H__ */
|
#endif /* __KFD_TOPOLOGY_H__ */
|
||||||
|
|
|
@ -46,6 +46,28 @@ enum kfd_preempt_type {
|
||||||
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
|
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct kfd_cu_info {
|
||||||
|
uint32_t num_shader_engines;
|
||||||
|
uint32_t num_shader_arrays_per_engine;
|
||||||
|
uint32_t num_cu_per_sh;
|
||||||
|
uint32_t cu_active_number;
|
||||||
|
uint32_t cu_ao_mask;
|
||||||
|
uint32_t simd_per_cu;
|
||||||
|
uint32_t max_waves_per_simd;
|
||||||
|
uint32_t wave_front_size;
|
||||||
|
uint32_t max_scratch_slots_per_cu;
|
||||||
|
uint32_t lds_size;
|
||||||
|
uint32_t cu_bitmap[4][4];
|
||||||
|
};
|
||||||
|
|
||||||
|
/* For getting GPU local memory information from KGD */
|
||||||
|
struct kfd_local_mem_info {
|
||||||
|
uint64_t local_mem_size_private;
|
||||||
|
uint64_t local_mem_size_public;
|
||||||
|
uint32_t vram_width;
|
||||||
|
uint32_t mem_clk_max;
|
||||||
|
};
|
||||||
|
|
||||||
enum kgd_memory_pool {
|
enum kgd_memory_pool {
|
||||||
KGD_POOL_SYSTEM_CACHEABLE = 1,
|
KGD_POOL_SYSTEM_CACHEABLE = 1,
|
||||||
KGD_POOL_SYSTEM_WRITECOMBINE = 2,
|
KGD_POOL_SYSTEM_WRITECOMBINE = 2,
|
||||||
|
@ -106,7 +128,7 @@ struct tile_config {
|
||||||
*
|
*
|
||||||
* @free_gtt_mem: Frees a buffer that was allocated on the gart aperture
|
* @free_gtt_mem: Frees a buffer that was allocated on the gart aperture
|
||||||
*
|
*
|
||||||
* @get_vmem_size: Retrieves (physical) size of VRAM
|
* @get_local_mem_info: Retrieves information about GPU local memory
|
||||||
*
|
*
|
||||||
* @get_gpu_clock_counter: Retrieves GPU clock counter
|
* @get_gpu_clock_counter: Retrieves GPU clock counter
|
||||||
*
|
*
|
||||||
|
@ -131,6 +153,12 @@ struct tile_config {
|
||||||
* @hqd_sdma_load: Loads the SDMA mqd structure to a H/W SDMA hqd slot.
|
* @hqd_sdma_load: Loads the SDMA mqd structure to a H/W SDMA hqd slot.
|
||||||
* used only for no HWS mode.
|
* used only for no HWS mode.
|
||||||
*
|
*
|
||||||
|
* @hqd_dump: Dumps CPC HQD registers to an array of address-value pairs.
|
||||||
|
* Array is allocated with kmalloc, needs to be freed with kfree by caller.
|
||||||
|
*
|
||||||
|
* @hqd_sdma_dump: Dumps SDMA HQD registers to an array of address-value pairs.
|
||||||
|
* Array is allocated with kmalloc, needs to be freed with kfree by caller.
|
||||||
|
*
|
||||||
* @hqd_is_occupies: Checks if a hqd slot is occupied.
|
* @hqd_is_occupies: Checks if a hqd slot is occupied.
|
||||||
*
|
*
|
||||||
* @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot.
|
* @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot.
|
||||||
|
@ -147,6 +175,10 @@ struct tile_config {
|
||||||
*
|
*
|
||||||
* @get_tile_config: Returns GPU-specific tiling mode information
|
* @get_tile_config: Returns GPU-specific tiling mode information
|
||||||
*
|
*
|
||||||
|
* @get_cu_info: Retrieves activated cu info
|
||||||
|
*
|
||||||
|
* @get_vram_usage: Returns current VRAM usage
|
||||||
|
*
|
||||||
* This structure contains function pointers to services that the kgd driver
|
* This structure contains function pointers to services that the kgd driver
|
||||||
* provides to amdkfd driver.
|
* provides to amdkfd driver.
|
||||||
*
|
*
|
||||||
|
@ -158,7 +190,8 @@ struct kfd2kgd_calls {
|
||||||
|
|
||||||
void (*free_gtt_mem)(struct kgd_dev *kgd, void *mem_obj);
|
void (*free_gtt_mem)(struct kgd_dev *kgd, void *mem_obj);
|
||||||
|
|
||||||
uint64_t (*get_vmem_size)(struct kgd_dev *kgd);
|
void (*get_local_mem_info)(struct kgd_dev *kgd,
|
||||||
|
struct kfd_local_mem_info *mem_info);
|
||||||
uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd);
|
uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd);
|
||||||
|
|
||||||
uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd);
|
uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd);
|
||||||
|
@ -184,7 +217,16 @@ struct kfd2kgd_calls {
|
||||||
uint32_t wptr_shift, uint32_t wptr_mask,
|
uint32_t wptr_shift, uint32_t wptr_mask,
|
||||||
struct mm_struct *mm);
|
struct mm_struct *mm);
|
||||||
|
|
||||||
int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd);
|
int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd,
|
||||||
|
uint32_t __user *wptr, struct mm_struct *mm);
|
||||||
|
|
||||||
|
int (*hqd_dump)(struct kgd_dev *kgd,
|
||||||
|
uint32_t pipe_id, uint32_t queue_id,
|
||||||
|
uint32_t (**dump)[2], uint32_t *n_regs);
|
||||||
|
|
||||||
|
int (*hqd_sdma_dump)(struct kgd_dev *kgd,
|
||||||
|
uint32_t engine_id, uint32_t queue_id,
|
||||||
|
uint32_t (**dump)[2], uint32_t *n_regs);
|
||||||
|
|
||||||
bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address,
|
bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address,
|
||||||
uint32_t pipe_id, uint32_t queue_id);
|
uint32_t pipe_id, uint32_t queue_id);
|
||||||
|
@ -224,6 +266,10 @@ struct kfd2kgd_calls {
|
||||||
void (*set_scratch_backing_va)(struct kgd_dev *kgd,
|
void (*set_scratch_backing_va)(struct kgd_dev *kgd,
|
||||||
uint64_t va, uint32_t vmid);
|
uint64_t va, uint32_t vmid);
|
||||||
int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config);
|
int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config);
|
||||||
|
|
||||||
|
void (*get_cu_info)(struct kgd_dev *kgd,
|
||||||
|
struct kfd_cu_info *cu_info);
|
||||||
|
uint64_t (*get_vram_usage)(struct kgd_dev *kgd);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -153,6 +153,8 @@ struct vi_sdma_mqd {
|
||||||
uint32_t reserved_125;
|
uint32_t reserved_125;
|
||||||
uint32_t reserved_126;
|
uint32_t reserved_126;
|
||||||
uint32_t reserved_127;
|
uint32_t reserved_127;
|
||||||
|
uint32_t sdma_engine_id;
|
||||||
|
uint32_t sdma_queue_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct vi_mqd {
|
struct vi_mqd {
|
||||||
|
|
|
@ -58,7 +58,8 @@ struct kfd_ioctl_create_queue_args {
|
||||||
__u64 eop_buffer_address; /* to KFD */
|
__u64 eop_buffer_address; /* to KFD */
|
||||||
__u64 eop_buffer_size; /* to KFD */
|
__u64 eop_buffer_size; /* to KFD */
|
||||||
__u64 ctx_save_restore_address; /* to KFD */
|
__u64 ctx_save_restore_address; /* to KFD */
|
||||||
__u64 ctx_save_restore_size; /* to KFD */
|
__u32 ctx_save_restore_size; /* to KFD */
|
||||||
|
__u32 ctl_stack_size; /* to KFD */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kfd_ioctl_destroy_queue_args {
|
struct kfd_ioctl_destroy_queue_args {
|
||||||
|
@ -261,6 +262,13 @@ struct kfd_ioctl_get_tile_config_args {
|
||||||
*/
|
*/
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct kfd_ioctl_set_trap_handler_args {
|
||||||
|
uint64_t tba_addr; /* to KFD */
|
||||||
|
uint64_t tma_addr; /* to KFD */
|
||||||
|
uint32_t gpu_id; /* to KFD */
|
||||||
|
uint32_t pad;
|
||||||
|
};
|
||||||
|
|
||||||
#define AMDKFD_IOCTL_BASE 'K'
|
#define AMDKFD_IOCTL_BASE 'K'
|
||||||
#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr)
|
#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr)
|
||||||
#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type)
|
#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type)
|
||||||
|
@ -321,7 +329,10 @@ struct kfd_ioctl_get_tile_config_args {
|
||||||
#define AMDKFD_IOC_GET_TILE_CONFIG \
|
#define AMDKFD_IOC_GET_TILE_CONFIG \
|
||||||
AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args)
|
AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args)
|
||||||
|
|
||||||
|
#define AMDKFD_IOC_SET_TRAP_HANDLER \
|
||||||
|
AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args)
|
||||||
|
|
||||||
#define AMDKFD_COMMAND_START 0x01
|
#define AMDKFD_COMMAND_START 0x01
|
||||||
#define AMDKFD_COMMAND_END 0x13
|
#define AMDKFD_COMMAND_END 0x14
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue