amdgpu:
- SDMA 6.1.0 support - SMU 13.x fixes - PSP 13.x fixes - HDP 6.1 support - SMUIO 14.0 support - IH 6.1 support - Coding style cleanups - Misc display fixes - Initial Freesync panel replay support - RAS fixes - SDMA 5.2 MGCG updates - SR-IOV fixes - DCN3+ gamma fix - Revert zpos properly until IGT regression is fixed - NBIO 7.9 fixes - Use TTM to manage the doorbell BAR - Async flip fix - DPIA tracing support - DCN 3.x TMDS HDMI fixes - FRU fixes amdkfd: - Coding style cleanups - SVM fixes - Trap handler fixes - Convert older APUs to use dGPU path like newer APUs - Drop IOMMUv2 path as it is no longer used radeon: - Coding style cleanups drm buddy: - Fix debugging output UAPI: - A new memory pool was added to amdgpu_drm.h since we converted doorbell BAR management to use TTM, but userspace is blocked from allocating from it at this point, so kind of not really anything new here per se -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQQgO5Idg2tXNTSZAr293/aFa7yZ2AUCZNahZwAKCRC93/aFa7yZ 2KNjAP0UV2vJZjrze7OQI/YoI+40UlGjS81nKGlMIN3eR8nzvAD/c9McLJViL82R idEAK7tsr/MaCKoPAlED7CkUZiHNlQw= =4w7I -----END PGP SIGNATURE----- Merge tag 'amd-drm-next-6.6-2023-08-11' of https://gitlab.freedesktop.org/agd5f/linux into drm-next amdgpu: - SDMA 6.1.0 support - SMU 13.x fixes - PSP 13.x fixes - HDP 6.1 support - SMUIO 14.0 support - IH 6.1 support - Coding style cleanups - Misc display fixes - Initial Freesync panel replay support - RAS fixes - SDMA 5.2 MGCG updates - SR-IOV fixes - DCN3+ gamma fix - Revert zpos properly until IGT regression is fixed - NBIO 7.9 fixes - Use TTM to manage the doorbell BAR - Async flip fix - DPIA tracing support - DCN 3.x TMDS HDMI fixes - FRU fixes amdkfd: - Coding style cleanups - SVM fixes - Trap handler fixes - Convert older APUs to use dGPU path like newer APUs - Drop IOMMUv2 path as it is no longer used radeon: - Coding style cleanups drm buddy: - Fix debugging output UAPI: - A new memory pool was added to amdgpu_drm.h since we converted doorbell BAR management to use TTM, but userspace is blocked from allocating from it at this point, so kind of not really anything new here per se Signed-off-by: Dave Airlie <airlied@redhat.com> # -----BEGIN PGP SIGNATURE----- # # iHUEABYKAB0WIQQgO5Idg2tXNTSZAr293/aFa7yZ2AUCZNahZwAKCRC93/aFa7yZ # 2KNjAP0UV2vJZjrze7OQI/YoI+40UlGjS81nKGlMIN3eR8nzvAD/c9McLJViL82R # idEAK7tsr/MaCKoPAlED7CkUZiHNlQw= # =4w7I # -----END PGP SIGNATURE----- # gpg: Signature made Sat 12 Aug 2023 07:00:23 AEST # gpg: using EDDSA key 203B921D836B5735349902BDBDDFF6856BBC99D8 # gpg: Can't check signature: No public key From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230811211554.7804-1-alexander.deucher@amd.com
This commit is contained in:
commit
a8b273a8fd
|
@ -129,7 +129,8 @@ amdgpu-y += \
|
|||
vega10_ih.o \
|
||||
vega20_ih.o \
|
||||
navi10_ih.o \
|
||||
ih_v6_0.o
|
||||
ih_v6_0.o \
|
||||
ih_v6_1.o
|
||||
|
||||
# add PSP block
|
||||
amdgpu-y += \
|
||||
|
|
|
@ -1294,6 +1294,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
|||
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
|
||||
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
|
||||
bool amdgpu_device_need_post(struct amdgpu_device *adev);
|
||||
bool amdgpu_sg_display_supported(struct amdgpu_device *adev);
|
||||
bool amdgpu_device_pcie_dynamic_switching_supported(void);
|
||||
bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
|
||||
bool amdgpu_device_aspm_support_quirk(void);
|
||||
|
|
|
@ -226,16 +226,6 @@ void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
|
|||
kgd2kfd_suspend(adev->kfd.dev, run_pm);
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
if (adev->kfd.dev)
|
||||
r = kgd2kfd_resume_iommu(adev->kfd.dev);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
|
||||
{
|
||||
int r = 0;
|
||||
|
|
|
@ -148,7 +148,6 @@ int amdgpu_amdkfd_init(void);
|
|||
void amdgpu_amdkfd_fini(void);
|
||||
|
||||
void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
|
||||
int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev);
|
||||
int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
|
||||
void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
|
||||
const void *ih_ring_entry);
|
||||
|
@ -399,7 +398,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
|||
const struct kgd2kfd_shared_resources *gpu_resources);
|
||||
void kgd2kfd_device_exit(struct kfd_dev *kfd);
|
||||
void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
|
||||
int kgd2kfd_resume_iommu(struct kfd_dev *kfd);
|
||||
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
|
||||
int kgd2kfd_pre_reset(struct kfd_dev *kfd);
|
||||
int kgd2kfd_post_reset(struct kfd_dev *kfd);
|
||||
|
@ -439,11 +437,6 @@ static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
|
|||
{
|
||||
}
|
||||
|
||||
static int __maybe_unused kgd2kfd_resume_iommu(struct kfd_dev *kfd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
|
||||
{
|
||||
return 0;
|
||||
|
|
|
@ -1133,9 +1133,9 @@ void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
|
|||
* Program TBA registers
|
||||
*/
|
||||
WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_LO,
|
||||
lower_32_bits(tba_addr >> 8));
|
||||
lower_32_bits(tba_addr >> 8));
|
||||
WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_HI,
|
||||
upper_32_bits(tba_addr >> 8));
|
||||
upper_32_bits(tba_addr >> 8));
|
||||
|
||||
/*
|
||||
* Program TMA registers
|
||||
|
|
|
@ -1776,7 +1776,7 @@ static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev,
|
|||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
struct atom_context *ctx = adev->mode_info.atom_context;
|
||||
|
||||
return sysfs_emit(buf, "%s\n", ctx->vbios_pn);
|
||||
return sysfs_emit(buf, "%s\n", ctx->vbios_ver_str);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version,
|
||||
|
|
|
@ -89,8 +89,7 @@ struct atom_memory_info {
|
|||
|
||||
#define MAX_AC_TIMING_ENTRIES 16
|
||||
|
||||
struct atom_memory_clock_range_table
|
||||
{
|
||||
struct atom_memory_clock_range_table {
|
||||
u8 num_entries;
|
||||
u8 rsv[3];
|
||||
u32 mclk[MAX_AC_TIMING_ENTRIES];
|
||||
|
@ -118,14 +117,12 @@ struct atom_mc_reg_table {
|
|||
|
||||
#define MAX_VOLTAGE_ENTRIES 32
|
||||
|
||||
struct atom_voltage_table_entry
|
||||
{
|
||||
struct atom_voltage_table_entry {
|
||||
u16 value;
|
||||
u32 smio_low;
|
||||
};
|
||||
|
||||
struct atom_voltage_table
|
||||
{
|
||||
struct atom_voltage_table {
|
||||
u32 count;
|
||||
u32 mask_low;
|
||||
u32 phase_delay;
|
||||
|
|
|
@ -58,7 +58,7 @@ uint32_t amdgpu_atomfirmware_query_firmware_capability(struct amdgpu_device *ade
|
|||
if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context,
|
||||
index, &size, &frev, &crev, &data_offset)) {
|
||||
/* support firmware_info 3.1 + */
|
||||
if ((frev == 3 && crev >=1) || (frev > 3)) {
|
||||
if ((frev == 3 && crev >= 1) || (frev > 3)) {
|
||||
firmware_info = (union firmware_info *)
|
||||
(mode_info->atom_context->bios + data_offset);
|
||||
fw_cap = le32_to_cpu(firmware_info->v31.firmware_capability);
|
||||
|
@ -597,7 +597,7 @@ bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev,
|
|||
index, &size, &frev, &crev,
|
||||
&data_offset)) {
|
||||
/* support firmware_info 3.4 + */
|
||||
if ((frev == 3 && crev >=4) || (frev > 3)) {
|
||||
if ((frev == 3 && crev >= 4) || (frev > 3)) {
|
||||
firmware_info = (union firmware_info *)
|
||||
(mode_info->atom_context->bios + data_offset);
|
||||
/* The ras_rom_i2c_slave_addr should ideally
|
||||
|
@ -850,7 +850,7 @@ int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev)
|
|||
|
||||
firmware_info = (union firmware_info *)(ctx->bios + data_offset);
|
||||
|
||||
if (frev !=3)
|
||||
if (frev != 3)
|
||||
return -EINVAL;
|
||||
|
||||
switch (crev) {
|
||||
|
@ -909,7 +909,7 @@ int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset)
|
|||
}
|
||||
|
||||
index = get_index_into_master_table(atom_master_list_of_command_functions_v2_1,
|
||||
asic_init);
|
||||
asic_init);
|
||||
if (amdgpu_atom_parse_cmd_header(mode_info->atom_context, index, &frev, &crev)) {
|
||||
if (frev == 2 && crev >= 1) {
|
||||
memset(&asic_init_ps_v2_1, 0, sizeof(asic_init_ps_v2_1));
|
||||
|
|
|
@ -118,8 +118,6 @@ static union acpi_object *amdgpu_atpx_call(acpi_handle handle, int function,
|
|||
union acpi_object atpx_arg_elements[2];
|
||||
struct acpi_object_list atpx_arg;
|
||||
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
|
||||
struct acpi_device *adev = container_of(handle, struct acpi_device, handle);
|
||||
struct device *dev = &adev->dev;
|
||||
|
||||
atpx_arg.count = 2;
|
||||
atpx_arg.pointer = &atpx_arg_elements[0];
|
||||
|
@ -141,8 +139,8 @@ static union acpi_object *amdgpu_atpx_call(acpi_handle handle, int function,
|
|||
|
||||
/* Fail only if calling the method fails and ATPX is supported */
|
||||
if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
|
||||
dev_err(dev, "failed to evaluate ATPX got %s\n",
|
||||
acpi_format_exception(status));
|
||||
pr_err("failed to evaluate ATPX got %s\n",
|
||||
acpi_format_exception(status));
|
||||
kfree(buffer.pointer);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -183,8 +181,6 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas
|
|||
static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
|
||||
{
|
||||
u32 valid_bits = 0;
|
||||
struct acpi_device *adev = container_of(atpx->handle, struct acpi_device, handle);
|
||||
struct device *dev = &adev->dev;
|
||||
|
||||
if (atpx->functions.px_params) {
|
||||
union acpi_object *info;
|
||||
|
@ -199,7 +195,7 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
|
|||
|
||||
size = *(u16 *) info->buffer.pointer;
|
||||
if (size < 10) {
|
||||
dev_err(dev, "ATPX buffer is too small: %zu\n", size);
|
||||
pr_err("ATPX buffer is too small: %zu\n", size);
|
||||
kfree(info);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -232,11 +228,11 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
|
|||
atpx->is_hybrid = false;
|
||||
if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
|
||||
if (amdgpu_atpx_priv.quirks & AMDGPU_PX_QUIRK_FORCE_ATPX) {
|
||||
dev_info(dev, "ATPX Hybrid Graphics, forcing to ATPX\n");
|
||||
pr_warn("ATPX Hybrid Graphics, forcing to ATPX\n");
|
||||
atpx->functions.power_cntl = true;
|
||||
atpx->is_hybrid = false;
|
||||
} else {
|
||||
dev_info(dev, "ATPX Hybrid Graphics\n");
|
||||
pr_notice("ATPX Hybrid Graphics\n");
|
||||
/*
|
||||
* Disable legacy PM methods only when pcie port PM is usable,
|
||||
* otherwise the device might fail to power off or power on.
|
||||
|
@ -269,8 +265,6 @@ static int amdgpu_atpx_verify_interface(struct amdgpu_atpx *atpx)
|
|||
struct atpx_verify_interface output;
|
||||
size_t size;
|
||||
int err = 0;
|
||||
struct acpi_device *adev = container_of(atpx->handle, struct acpi_device, handle);
|
||||
struct device *dev = &adev->dev;
|
||||
|
||||
info = amdgpu_atpx_call(atpx->handle, ATPX_FUNCTION_VERIFY_INTERFACE, NULL);
|
||||
if (!info)
|
||||
|
@ -280,7 +274,7 @@ static int amdgpu_atpx_verify_interface(struct amdgpu_atpx *atpx)
|
|||
|
||||
size = *(u16 *) info->buffer.pointer;
|
||||
if (size < 8) {
|
||||
printk("ATPX buffer is too small: %zu\n", size);
|
||||
pr_err("ATPX buffer is too small: %zu\n", size);
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
@ -289,8 +283,8 @@ static int amdgpu_atpx_verify_interface(struct amdgpu_atpx *atpx)
|
|||
memcpy(&output, info->buffer.pointer, size);
|
||||
|
||||
/* TODO: check version? */
|
||||
dev_info(dev, "ATPX version %u, functions 0x%08x\n",
|
||||
output.version, output.function_bits);
|
||||
pr_notice("ATPX version %u, functions 0x%08x\n",
|
||||
output.version, output.function_bits);
|
||||
|
||||
amdgpu_atpx_parse_functions(&atpx->functions, output.function_bits);
|
||||
|
||||
|
|
|
@ -460,7 +460,7 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
|
|||
return false;
|
||||
|
||||
success:
|
||||
adev->is_atom_fw = (adev->asic_type >= CHIP_VEGA10) ? true : false;
|
||||
adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -41,13 +41,13 @@ struct amdgpu_cgs_device {
|
|||
((struct amdgpu_cgs_device *)cgs_device)->adev
|
||||
|
||||
|
||||
static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned offset)
|
||||
static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned int offset)
|
||||
{
|
||||
CGS_FUNC_ADEV;
|
||||
return RREG32(offset);
|
||||
}
|
||||
|
||||
static void amdgpu_cgs_write_register(struct cgs_device *cgs_device, unsigned offset,
|
||||
static void amdgpu_cgs_write_register(struct cgs_device *cgs_device, unsigned int offset,
|
||||
uint32_t value)
|
||||
{
|
||||
CGS_FUNC_ADEV;
|
||||
|
@ -56,7 +56,7 @@ static void amdgpu_cgs_write_register(struct cgs_device *cgs_device, unsigned of
|
|||
|
||||
static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
|
||||
enum cgs_ind_reg space,
|
||||
unsigned index)
|
||||
unsigned int index)
|
||||
{
|
||||
CGS_FUNC_ADEV;
|
||||
switch (space) {
|
||||
|
@ -84,7 +84,7 @@ static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
|
|||
|
||||
static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
|
||||
enum cgs_ind_reg space,
|
||||
unsigned index, uint32_t value)
|
||||
unsigned int index, uint32_t value)
|
||||
{
|
||||
CGS_FUNC_ADEV;
|
||||
switch (space) {
|
||||
|
@ -163,38 +163,38 @@ static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
|
|||
uint16_t fw_version = 0;
|
||||
|
||||
switch (type) {
|
||||
case CGS_UCODE_ID_SDMA0:
|
||||
fw_version = adev->sdma.instance[0].fw_version;
|
||||
break;
|
||||
case CGS_UCODE_ID_SDMA1:
|
||||
fw_version = adev->sdma.instance[1].fw_version;
|
||||
break;
|
||||
case CGS_UCODE_ID_CP_CE:
|
||||
fw_version = adev->gfx.ce_fw_version;
|
||||
break;
|
||||
case CGS_UCODE_ID_CP_PFP:
|
||||
fw_version = adev->gfx.pfp_fw_version;
|
||||
break;
|
||||
case CGS_UCODE_ID_CP_ME:
|
||||
fw_version = adev->gfx.me_fw_version;
|
||||
break;
|
||||
case CGS_UCODE_ID_CP_MEC:
|
||||
fw_version = adev->gfx.mec_fw_version;
|
||||
break;
|
||||
case CGS_UCODE_ID_CP_MEC_JT1:
|
||||
fw_version = adev->gfx.mec_fw_version;
|
||||
break;
|
||||
case CGS_UCODE_ID_CP_MEC_JT2:
|
||||
fw_version = adev->gfx.mec_fw_version;
|
||||
break;
|
||||
case CGS_UCODE_ID_RLC_G:
|
||||
fw_version = adev->gfx.rlc_fw_version;
|
||||
break;
|
||||
case CGS_UCODE_ID_STORAGE:
|
||||
break;
|
||||
default:
|
||||
DRM_ERROR("firmware type %d do not have version\n", type);
|
||||
break;
|
||||
case CGS_UCODE_ID_SDMA0:
|
||||
fw_version = adev->sdma.instance[0].fw_version;
|
||||
break;
|
||||
case CGS_UCODE_ID_SDMA1:
|
||||
fw_version = adev->sdma.instance[1].fw_version;
|
||||
break;
|
||||
case CGS_UCODE_ID_CP_CE:
|
||||
fw_version = adev->gfx.ce_fw_version;
|
||||
break;
|
||||
case CGS_UCODE_ID_CP_PFP:
|
||||
fw_version = adev->gfx.pfp_fw_version;
|
||||
break;
|
||||
case CGS_UCODE_ID_CP_ME:
|
||||
fw_version = adev->gfx.me_fw_version;
|
||||
break;
|
||||
case CGS_UCODE_ID_CP_MEC:
|
||||
fw_version = adev->gfx.mec_fw_version;
|
||||
break;
|
||||
case CGS_UCODE_ID_CP_MEC_JT1:
|
||||
fw_version = adev->gfx.mec_fw_version;
|
||||
break;
|
||||
case CGS_UCODE_ID_CP_MEC_JT2:
|
||||
fw_version = adev->gfx.mec_fw_version;
|
||||
break;
|
||||
case CGS_UCODE_ID_RLC_G:
|
||||
fw_version = adev->gfx.rlc_fw_version;
|
||||
break;
|
||||
case CGS_UCODE_ID_STORAGE:
|
||||
break;
|
||||
default:
|
||||
DRM_ERROR("firmware type %d do not have version\n", type);
|
||||
break;
|
||||
}
|
||||
return fw_version;
|
||||
}
|
||||
|
@ -205,7 +205,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
|
|||
{
|
||||
CGS_FUNC_ADEV;
|
||||
|
||||
if ((CGS_UCODE_ID_SMU != type) && (CGS_UCODE_ID_SMU_SK != type)) {
|
||||
if (type != CGS_UCODE_ID_SMU && type != CGS_UCODE_ID_SMU_SK) {
|
||||
uint64_t gpu_addr;
|
||||
uint32_t data_size;
|
||||
const struct gfx_firmware_header_v1_0 *header;
|
||||
|
@ -232,7 +232,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
|
|||
info->mc_addr = gpu_addr;
|
||||
info->version = (uint16_t)le32_to_cpu(header->header.ucode_version);
|
||||
|
||||
if (CGS_UCODE_ID_CP_MEC == type)
|
||||
if (type == CGS_UCODE_ID_CP_MEC)
|
||||
info->image_size = le32_to_cpu(header->jt_offset) << 2;
|
||||
|
||||
info->fw_version = amdgpu_get_firmware_version(cgs_device, type);
|
||||
|
|
|
@ -293,7 +293,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
|
|||
|
||||
if (!p->gang_size) {
|
||||
ret = -EINVAL;
|
||||
goto free_partial_kdata;
|
||||
goto free_all_kdata;
|
||||
}
|
||||
|
||||
for (i = 0; i < p->gang_size; ++i) {
|
||||
|
|
|
@ -375,7 +375,7 @@ static int amdgpu_debugfs_gprwave_open(struct inode *inode, struct file *file)
|
|||
{
|
||||
struct amdgpu_debugfs_gprwave_data *rd;
|
||||
|
||||
rd = kzalloc(sizeof *rd, GFP_KERNEL);
|
||||
rd = kzalloc(sizeof(*rd), GFP_KERNEL);
|
||||
if (!rd)
|
||||
return -ENOMEM;
|
||||
rd->adev = file_inode(file)->i_private;
|
||||
|
@ -388,6 +388,7 @@ static int amdgpu_debugfs_gprwave_open(struct inode *inode, struct file *file)
|
|||
static int amdgpu_debugfs_gprwave_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct amdgpu_debugfs_gprwave_data *rd = file->private_data;
|
||||
|
||||
mutex_destroy(&rd->lock);
|
||||
kfree(file->private_data);
|
||||
return 0;
|
||||
|
|
|
@ -164,71 +164,6 @@ static DEVICE_ATTR(pcie_replay_count, 0444,
|
|||
|
||||
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
|
||||
|
||||
/**
|
||||
* DOC: product_name
|
||||
*
|
||||
* The amdgpu driver provides a sysfs API for reporting the product name
|
||||
* for the device
|
||||
* The file product_name is used for this and returns the product name
|
||||
* as returned from the FRU.
|
||||
* NOTE: This is only available for certain server cards
|
||||
*/
|
||||
|
||||
static ssize_t amdgpu_device_get_product_name(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
|
||||
return sysfs_emit(buf, "%s\n", adev->product_name);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(product_name, 0444,
|
||||
amdgpu_device_get_product_name, NULL);
|
||||
|
||||
/**
|
||||
* DOC: product_number
|
||||
*
|
||||
* The amdgpu driver provides a sysfs API for reporting the part number
|
||||
* for the device
|
||||
* The file product_number is used for this and returns the part number
|
||||
* as returned from the FRU.
|
||||
* NOTE: This is only available for certain server cards
|
||||
*/
|
||||
|
||||
static ssize_t amdgpu_device_get_product_number(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
|
||||
return sysfs_emit(buf, "%s\n", adev->product_number);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(product_number, 0444,
|
||||
amdgpu_device_get_product_number, NULL);
|
||||
|
||||
/**
|
||||
* DOC: serial_number
|
||||
*
|
||||
* The amdgpu driver provides a sysfs API for reporting the serial number
|
||||
* for the device
|
||||
* The file serial_number is used for this and returns the serial number
|
||||
* as returned from the FRU.
|
||||
* NOTE: This is only available for certain server cards
|
||||
*/
|
||||
|
||||
static ssize_t amdgpu_device_get_serial_number(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
|
||||
return sysfs_emit(buf, "%s\n", adev->serial);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(serial_number, 0444,
|
||||
amdgpu_device_get_serial_number, NULL);
|
||||
|
||||
/**
|
||||
* amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
|
||||
|
@ -1296,6 +1231,32 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* On APUs with >= 64GB white flickering has been observed w/ SG enabled.
|
||||
* Disable S/G on such systems until we have a proper fix.
|
||||
* https://gitlab.freedesktop.org/drm/amd/-/issues/2354
|
||||
* https://gitlab.freedesktop.org/drm/amd/-/issues/2735
|
||||
*/
|
||||
bool amdgpu_sg_display_supported(struct amdgpu_device *adev)
|
||||
{
|
||||
switch (amdgpu_sg_display) {
|
||||
case -1:
|
||||
break;
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
if ((totalram_pages() << (PAGE_SHIFT - 10)) +
|
||||
(adev->gmc.real_vram_size / 1024) >= 64000000) {
|
||||
DRM_WARN("Disabling S/G due to >=64GB RAM\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
|
||||
* speed switching. Until we have confirmation from Intel that a specific host
|
||||
|
@ -3275,12 +3236,6 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
|
|||
{
|
||||
int r;
|
||||
|
||||
if (!adev->in_s0ix) {
|
||||
r = amdgpu_amdkfd_resume_iommu(adev);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_device_ip_resume_phase1(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
@ -3524,9 +3479,6 @@ static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
|
|||
}
|
||||
|
||||
static const struct attribute *amdgpu_dev_attributes[] = {
|
||||
&dev_attr_product_name.attr,
|
||||
&dev_attr_product_number.attr,
|
||||
&dev_attr_serial_number.attr,
|
||||
&dev_attr_pcie_replay_count.attr,
|
||||
NULL
|
||||
};
|
||||
|
@ -3941,6 +3893,8 @@ fence_driver_init:
|
|||
if (r)
|
||||
dev_err(adev->dev, "Could not create amdgpu device attr\n");
|
||||
|
||||
amdgpu_fru_sysfs_init(adev);
|
||||
|
||||
if (IS_ENABLED(CONFIG_PERF_EVENTS))
|
||||
r = amdgpu_pmu_init(adev);
|
||||
if (r)
|
||||
|
@ -4060,6 +4014,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
|
|||
if (adev->ucode_sysfs_en)
|
||||
amdgpu_ucode_sysfs_fini(adev);
|
||||
sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
|
||||
amdgpu_fru_sysfs_fini(adev);
|
||||
|
||||
/* disable ras feature must before hw fini */
|
||||
amdgpu_ras_pre_fini(adev);
|
||||
|
@ -4576,6 +4531,7 @@ retry:
|
|||
r = amdgpu_virt_reset_gpu(adev);
|
||||
if (r)
|
||||
return r;
|
||||
amdgpu_irq_gpu_reset_resume_helper(adev);
|
||||
|
||||
/* some sw clean up VF needs to do before recover */
|
||||
amdgpu_virt_post_reset(adev);
|
||||
|
@ -4605,7 +4561,6 @@ retry:
|
|||
amdgpu_put_xgmi_hive(hive);
|
||||
|
||||
if (!r) {
|
||||
amdgpu_irq_gpu_reset_resume_helper(adev);
|
||||
r = amdgpu_ib_ring_tests(adev);
|
||||
|
||||
amdgpu_amdkfd_post_reset(adev);
|
||||
|
@ -4985,9 +4940,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
|
|||
dev_warn(tmp_adev->dev, "asic atom init failed!");
|
||||
} else {
|
||||
dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
|
||||
r = amdgpu_amdkfd_resume_iommu(tmp_adev);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
r = amdgpu_device_ip_resume_phase1(tmp_adev);
|
||||
if (r)
|
||||
|
|
|
@ -65,6 +65,7 @@
|
|||
#include "soc21.h"
|
||||
#include "navi10_ih.h"
|
||||
#include "ih_v6_0.h"
|
||||
#include "ih_v6_1.h"
|
||||
#include "gfx_v10_0.h"
|
||||
#include "gfx_v11_0.h"
|
||||
#include "sdma_v5_0.h"
|
||||
|
@ -1702,6 +1703,9 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev)
|
|||
case IP_VERSION(6, 0, 2):
|
||||
amdgpu_device_ip_block_add(adev, &ih_v6_0_ip_block);
|
||||
break;
|
||||
case IP_VERSION(6, 1, 0):
|
||||
amdgpu_device_ip_block_add(adev, &ih_v6_1_ip_block);
|
||||
break;
|
||||
default:
|
||||
dev_err(adev->dev,
|
||||
"Failed to add ih ip block(OSSSYS_HWIP:0x%x)\n",
|
||||
|
@ -1969,6 +1973,7 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
|
|||
case IP_VERSION(6, 0, 1):
|
||||
case IP_VERSION(6, 0, 2):
|
||||
case IP_VERSION(6, 0, 3):
|
||||
case IP_VERSION(6, 1, 0):
|
||||
amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block);
|
||||
break;
|
||||
default:
|
||||
|
@ -2448,6 +2453,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
|
|||
break;
|
||||
case IP_VERSION(6, 0, 0):
|
||||
case IP_VERSION(6, 0, 1):
|
||||
case IP_VERSION(6, 1, 0):
|
||||
adev->hdp.funcs = &hdp_v6_0_funcs;
|
||||
break;
|
||||
default:
|
||||
|
@ -2510,6 +2516,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
|
|||
break;
|
||||
case IP_VERSION(13, 0, 6):
|
||||
case IP_VERSION(13, 0, 8):
|
||||
case IP_VERSION(14, 0, 0):
|
||||
adev->smuio.funcs = &smuio_v13_0_6_funcs;
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -31,10 +31,15 @@ struct amdgpu_doorbell {
|
|||
/* doorbell mmio */
|
||||
resource_size_t base;
|
||||
resource_size_t size;
|
||||
u32 __iomem *ptr;
|
||||
|
||||
/* Number of doorbells reserved for amdgpu kernel driver */
|
||||
u32 num_kernel_doorbells;
|
||||
|
||||
/* Kernel doorbells */
|
||||
struct amdgpu_bo *kernel_doorbells;
|
||||
|
||||
/* For CPU access of doorbells */
|
||||
uint32_t *cpu_addr;
|
||||
};
|
||||
|
||||
/* Reserved doorbells for amdgpu (including multimedia).
|
||||
|
@ -350,6 +355,10 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
|
|||
*/
|
||||
int amdgpu_doorbell_init(struct amdgpu_device *adev);
|
||||
void amdgpu_doorbell_fini(struct amdgpu_device *adev);
|
||||
int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev);
|
||||
uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
|
||||
struct amdgpu_bo *db_bo,
|
||||
uint32_t doorbell_index);
|
||||
|
||||
#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
|
||||
#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
|
||||
|
|
|
@ -39,7 +39,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
|
|||
return 0;
|
||||
|
||||
if (index < adev->doorbell.num_kernel_doorbells)
|
||||
return readl(adev->doorbell.ptr + index);
|
||||
return readl(adev->doorbell.cpu_addr + index);
|
||||
|
||||
DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
|
||||
return 0;
|
||||
|
@ -61,7 +61,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
|
|||
return;
|
||||
|
||||
if (index < adev->doorbell.num_kernel_doorbells)
|
||||
writel(v, adev->doorbell.ptr + index);
|
||||
writel(v, adev->doorbell.cpu_addr + index);
|
||||
else
|
||||
DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
|
||||
}
|
||||
|
@ -81,7 +81,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
|
|||
return 0;
|
||||
|
||||
if (index < adev->doorbell.num_kernel_doorbells)
|
||||
return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
|
||||
return atomic64_read((atomic64_t *)(adev->doorbell.cpu_addr + index));
|
||||
|
||||
DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
|
||||
return 0;
|
||||
|
@ -103,11 +103,68 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
|
|||
return;
|
||||
|
||||
if (index < adev->doorbell.num_kernel_doorbells)
|
||||
atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
|
||||
atomic64_set((atomic64_t *)(adev->doorbell.cpu_addr + index), v);
|
||||
else
|
||||
DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_doorbell_index_on_bar - Find doorbell's absolute offset in BAR
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @db_bo: doorbell object's bo
|
||||
* @db_index: doorbell relative index in this doorbell object
|
||||
*
|
||||
* returns doorbell's absolute index in BAR
|
||||
*/
|
||||
uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
|
||||
struct amdgpu_bo *db_bo,
|
||||
uint32_t doorbell_index)
|
||||
{
|
||||
int db_bo_offset;
|
||||
|
||||
db_bo_offset = amdgpu_bo_gpu_offset_no_check(db_bo);
|
||||
|
||||
/* doorbell index is 32 bit but doorbell's size is 64-bit, so *2 */
|
||||
return db_bo_offset / sizeof(u32) + doorbell_index * 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_doorbell_create_kernel_doorbells - Create kernel doorbells for graphics
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Creates doorbells for graphics driver usages.
|
||||
* returns 0 on success, error otherwise.
|
||||
*/
|
||||
int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev)
|
||||
{
|
||||
int r;
|
||||
int size;
|
||||
|
||||
/* Reserve first num_kernel_doorbells (page-aligned) for kernel ops */
|
||||
size = ALIGN(adev->doorbell.num_kernel_doorbells * sizeof(u32), PAGE_SIZE);
|
||||
|
||||
/* Allocate an extra page for MES kernel usages (ring test) */
|
||||
adev->mes.db_start_dw_offset = size / sizeof(u32);
|
||||
size += PAGE_SIZE;
|
||||
|
||||
r = amdgpu_bo_create_kernel(adev,
|
||||
size,
|
||||
PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_DOORBELL,
|
||||
&adev->doorbell.kernel_doorbells,
|
||||
NULL,
|
||||
(void **)&adev->doorbell.cpu_addr);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to allocate kernel doorbells, err=%d\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
adev->doorbell.num_kernel_doorbells = size / sizeof(u32);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* GPU doorbell aperture helpers function.
|
||||
*/
|
||||
|
@ -127,7 +184,6 @@ int amdgpu_doorbell_init(struct amdgpu_device *adev)
|
|||
adev->doorbell.base = 0;
|
||||
adev->doorbell.size = 0;
|
||||
adev->doorbell.num_kernel_doorbells = 0;
|
||||
adev->doorbell.ptr = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -140,31 +196,21 @@ int amdgpu_doorbell_init(struct amdgpu_device *adev)
|
|||
adev->doorbell.base = pci_resource_start(adev->pdev, 2);
|
||||
adev->doorbell.size = pci_resource_len(adev->pdev, 2);
|
||||
|
||||
if (adev->enable_mes) {
|
||||
adev->doorbell.num_kernel_doorbells =
|
||||
adev->doorbell.size / sizeof(u32);
|
||||
} else {
|
||||
adev->doorbell.num_kernel_doorbells =
|
||||
min_t(u32, adev->doorbell.size / sizeof(u32),
|
||||
adev->doorbell_index.max_assignment+1);
|
||||
if (adev->doorbell.num_kernel_doorbells == 0)
|
||||
return -EINVAL;
|
||||
adev->doorbell.num_kernel_doorbells =
|
||||
min_t(u32, adev->doorbell.size / sizeof(u32),
|
||||
adev->doorbell_index.max_assignment + 1);
|
||||
if (adev->doorbell.num_kernel_doorbells == 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* For Vega, reserve and map two pages on doorbell BAR since SDMA
|
||||
* paging queue doorbell use the second page. The
|
||||
* AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
|
||||
* doorbells are in the first page. So with paging queue enabled,
|
||||
* the max num_kernel_doorbells should + 1 page (0x400 in dword)
|
||||
*/
|
||||
if (adev->asic_type >= CHIP_VEGA10)
|
||||
adev->doorbell.num_kernel_doorbells += 0x400;
|
||||
}
|
||||
|
||||
adev->doorbell.ptr = ioremap(adev->doorbell.base,
|
||||
adev->doorbell.num_kernel_doorbells *
|
||||
sizeof(u32));
|
||||
if (adev->doorbell.ptr == NULL)
|
||||
return -ENOMEM;
|
||||
/*
|
||||
* For Vega, reserve and map two pages on doorbell BAR since SDMA
|
||||
* paging queue doorbell use the second page. The
|
||||
* AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
|
||||
* doorbells are in the first page. So with paging queue enabled,
|
||||
* the max num_kernel_doorbells should + 1 page (0x400 in dword)
|
||||
*/
|
||||
if (adev->asic_type >= CHIP_VEGA10)
|
||||
adev->doorbell.num_kernel_doorbells += 0x400;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -178,6 +224,7 @@ int amdgpu_doorbell_init(struct amdgpu_device *adev)
|
|||
*/
|
||||
void amdgpu_doorbell_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
iounmap(adev->doorbell.ptr);
|
||||
adev->doorbell.ptr = NULL;
|
||||
amdgpu_bo_free_kernel(&adev->doorbell.kernel_doorbells,
|
||||
NULL,
|
||||
(void **)&adev->doorbell.cpu_addr);
|
||||
}
|
||||
|
|
|
@ -26,30 +26,30 @@
|
|||
#include <drm/drm_drv.h>
|
||||
#include <drm/drm_fbdev_generic.h>
|
||||
#include <drm/drm_gem.h>
|
||||
#include <drm/drm_vblank.h>
|
||||
#include <drm/drm_managed.h>
|
||||
#include "amdgpu_drv.h"
|
||||
|
||||
#include <drm/drm_pciids.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/vga_switcheroo.h>
|
||||
#include <drm/drm_probe_helper.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <drm/drm_vblank.h>
|
||||
|
||||
#include <linux/cc_platform.h>
|
||||
#include <linux/dynamic_debug.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/vga_switcheroo.h>
|
||||
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_irq.h"
|
||||
#include "amdgpu_dma_buf.h"
|
||||
#include "amdgpu_sched.h"
|
||||
#include "amdgpu_fdinfo.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
|
||||
#include "amdgpu_dma_buf.h"
|
||||
#include "amdgpu_drv.h"
|
||||
#include "amdgpu_fdinfo.h"
|
||||
#include "amdgpu_irq.h"
|
||||
#include "amdgpu_psp.h"
|
||||
#include "amdgpu_ras.h"
|
||||
#include "amdgpu_xgmi.h"
|
||||
#include "amdgpu_reset.h"
|
||||
#include "amdgpu_sched.h"
|
||||
#include "amdgpu_xgmi.h"
|
||||
#include "../amdxcp/amdgpu_xcp_drv.h"
|
||||
|
||||
/*
|
||||
|
@ -755,20 +755,6 @@ module_param(debug_largebar, int, 0444);
|
|||
MODULE_PARM_DESC(debug_largebar,
|
||||
"Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
|
||||
|
||||
/**
|
||||
* DOC: ignore_crat (int)
|
||||
* Ignore CRAT table during KFD initialization. By default, KFD uses the ACPI CRAT
|
||||
* table to get information about AMD APUs. This option can serve as a workaround on
|
||||
* systems with a broken CRAT table.
|
||||
*
|
||||
* Default is auto (according to asic type, iommu_v2, and crat table, to decide
|
||||
* whether use CRAT)
|
||||
*/
|
||||
int ignore_crat;
|
||||
module_param(ignore_crat, int, 0444);
|
||||
MODULE_PARM_DESC(ignore_crat,
|
||||
"Ignore CRAT table during KFD initialization (0 = auto (default), 1 = ignore CRAT)");
|
||||
|
||||
/**
|
||||
* DOC: halt_if_hws_hang (int)
|
||||
* Halt if HWS hang is detected. Default value, 0, disables the halt on hang.
|
||||
|
|
|
@ -212,3 +212,92 @@ Out:
|
|||
kfree(pia);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* DOC: product_name
|
||||
*
|
||||
* The amdgpu driver provides a sysfs API for reporting the product name
|
||||
* for the device
|
||||
* The file product_name is used for this and returns the product name
|
||||
* as returned from the FRU.
|
||||
* NOTE: This is only available for certain server cards
|
||||
*/
|
||||
|
||||
static ssize_t amdgpu_fru_product_name_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
|
||||
return sysfs_emit(buf, "%s\n", adev->product_name);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(product_name, 0444, amdgpu_fru_product_name_show, NULL);
|
||||
|
||||
/**
|
||||
* DOC: product_number
|
||||
*
|
||||
* The amdgpu driver provides a sysfs API for reporting the part number
|
||||
* for the device
|
||||
* The file product_number is used for this and returns the part number
|
||||
* as returned from the FRU.
|
||||
* NOTE: This is only available for certain server cards
|
||||
*/
|
||||
|
||||
static ssize_t amdgpu_fru_product_number_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
|
||||
return sysfs_emit(buf, "%s\n", adev->product_number);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(product_number, 0444, amdgpu_fru_product_number_show, NULL);
|
||||
|
||||
/**
|
||||
* DOC: serial_number
|
||||
*
|
||||
* The amdgpu driver provides a sysfs API for reporting the serial number
|
||||
* for the device
|
||||
* The file serial_number is used for this and returns the serial number
|
||||
* as returned from the FRU.
|
||||
* NOTE: This is only available for certain server cards
|
||||
*/
|
||||
|
||||
static ssize_t amdgpu_fru_serial_number_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = drm_to_adev(ddev);
|
||||
|
||||
return sysfs_emit(buf, "%s\n", adev->serial);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(serial_number, 0444, amdgpu_fru_serial_number_show, NULL);
|
||||
|
||||
static const struct attribute *amdgpu_fru_attributes[] = {
|
||||
&dev_attr_product_name.attr,
|
||||
&dev_attr_product_number.attr,
|
||||
&dev_attr_serial_number.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
int amdgpu_fru_sysfs_init(struct amdgpu_device *adev)
|
||||
{
|
||||
if (!is_fru_eeprom_supported(adev, NULL))
|
||||
return 0;
|
||||
|
||||
return sysfs_create_files(&adev->dev->kobj, amdgpu_fru_attributes);
|
||||
}
|
||||
|
||||
void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
if (!is_fru_eeprom_supported(adev, NULL))
|
||||
return;
|
||||
|
||||
sysfs_remove_files(&adev->dev->kobj, amdgpu_fru_attributes);
|
||||
}
|
||||
|
|
|
@ -25,5 +25,7 @@
|
|||
#define __AMDGPU_FRU_EEPROM_H__
|
||||
|
||||
int amdgpu_fru_get_product_info(struct amdgpu_device *adev);
|
||||
int amdgpu_fru_sysfs_init(struct amdgpu_device *adev);
|
||||
void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev);
|
||||
|
||||
#endif // __AMDGPU_FRU_EEPROM_H__
|
||||
|
|
|
@ -289,6 +289,10 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
|
|||
uint32_t handle, initial_domain;
|
||||
int r;
|
||||
|
||||
/* reject DOORBELLs until userspace code to use it is available */
|
||||
if (args->in.domains & AMDGPU_GEM_DOMAIN_DOORBELL)
|
||||
return -EINVAL;
|
||||
|
||||
/* reject invalid gem flags */
|
||||
if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
|
||||
AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
|
||||
|
|
|
@ -39,120 +39,70 @@ int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
|
|||
PAGE_SIZE);
|
||||
}
|
||||
|
||||
int amdgpu_mes_alloc_process_doorbells(struct amdgpu_device *adev,
|
||||
unsigned int *doorbell_index)
|
||||
{
|
||||
int r = ida_simple_get(&adev->mes.doorbell_ida, 2,
|
||||
adev->mes.max_doorbell_slices,
|
||||
GFP_KERNEL);
|
||||
if (r > 0)
|
||||
*doorbell_index = r;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void amdgpu_mes_free_process_doorbells(struct amdgpu_device *adev,
|
||||
unsigned int doorbell_index)
|
||||
{
|
||||
if (doorbell_index)
|
||||
ida_simple_remove(&adev->mes.doorbell_ida, doorbell_index);
|
||||
}
|
||||
|
||||
unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar(
|
||||
struct amdgpu_device *adev,
|
||||
uint32_t doorbell_index,
|
||||
unsigned int doorbell_id)
|
||||
{
|
||||
return ((doorbell_index *
|
||||
amdgpu_mes_doorbell_process_slice(adev)) / sizeof(u32) +
|
||||
doorbell_id * 2);
|
||||
}
|
||||
|
||||
static int amdgpu_mes_queue_doorbell_get(struct amdgpu_device *adev,
|
||||
static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev,
|
||||
struct amdgpu_mes_process *process,
|
||||
int ip_type, uint64_t *doorbell_index)
|
||||
{
|
||||
unsigned int offset, found;
|
||||
struct amdgpu_mes *mes = &adev->mes;
|
||||
|
||||
if (ip_type == AMDGPU_RING_TYPE_SDMA) {
|
||||
if (ip_type == AMDGPU_RING_TYPE_SDMA)
|
||||
offset = adev->doorbell_index.sdma_engine[0];
|
||||
found = find_next_zero_bit(process->doorbell_bitmap,
|
||||
AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,
|
||||
offset);
|
||||
} else {
|
||||
found = find_first_zero_bit(process->doorbell_bitmap,
|
||||
AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS);
|
||||
}
|
||||
else
|
||||
offset = 0;
|
||||
|
||||
if (found >= AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS) {
|
||||
found = find_next_zero_bit(mes->doorbell_bitmap, mes->num_mes_dbs, offset);
|
||||
if (found >= mes->num_mes_dbs) {
|
||||
DRM_WARN("No doorbell available\n");
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
set_bit(found, process->doorbell_bitmap);
|
||||
|
||||
*doorbell_index = amdgpu_mes_get_doorbell_dw_offset_in_bar(adev,
|
||||
process->doorbell_index, found);
|
||||
set_bit(found, mes->doorbell_bitmap);
|
||||
|
||||
/* Get the absolute doorbell index on BAR */
|
||||
*doorbell_index = mes->db_start_dw_offset + found * 2;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void amdgpu_mes_queue_doorbell_free(struct amdgpu_device *adev,
|
||||
static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev,
|
||||
struct amdgpu_mes_process *process,
|
||||
uint32_t doorbell_index)
|
||||
{
|
||||
unsigned int old, doorbell_id;
|
||||
unsigned int old, rel_index;
|
||||
struct amdgpu_mes *mes = &adev->mes;
|
||||
|
||||
doorbell_id = doorbell_index -
|
||||
(process->doorbell_index *
|
||||
amdgpu_mes_doorbell_process_slice(adev)) / sizeof(u32);
|
||||
doorbell_id /= 2;
|
||||
|
||||
old = test_and_clear_bit(doorbell_id, process->doorbell_bitmap);
|
||||
/* Find the relative index of the doorbell in this object */
|
||||
rel_index = (doorbell_index - mes->db_start_dw_offset) / 2;
|
||||
old = test_and_clear_bit(rel_index, mes->doorbell_bitmap);
|
||||
WARN_ON(!old);
|
||||
}
|
||||
|
||||
static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
|
||||
{
|
||||
size_t doorbell_start_offset;
|
||||
size_t doorbell_aperture_size;
|
||||
size_t doorbell_process_limit;
|
||||
size_t aggregated_doorbell_start;
|
||||
int i;
|
||||
struct amdgpu_mes *mes = &adev->mes;
|
||||
|
||||
aggregated_doorbell_start = (adev->doorbell_index.max_assignment + 1) * sizeof(u32);
|
||||
aggregated_doorbell_start =
|
||||
roundup(aggregated_doorbell_start, PAGE_SIZE);
|
||||
/* Bitmap for dynamic allocation of kernel doorbells */
|
||||
mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL);
|
||||
if (!mes->doorbell_bitmap) {
|
||||
DRM_ERROR("Failed to allocate MES doorbell bitmap\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
doorbell_start_offset = aggregated_doorbell_start + PAGE_SIZE;
|
||||
doorbell_start_offset =
|
||||
roundup(doorbell_start_offset,
|
||||
amdgpu_mes_doorbell_process_slice(adev));
|
||||
mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE;
|
||||
for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) {
|
||||
adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2;
|
||||
set_bit(i, mes->doorbell_bitmap);
|
||||
}
|
||||
|
||||
doorbell_aperture_size = adev->doorbell.size;
|
||||
doorbell_aperture_size =
|
||||
rounddown(doorbell_aperture_size,
|
||||
amdgpu_mes_doorbell_process_slice(adev));
|
||||
|
||||
if (doorbell_aperture_size > doorbell_start_offset)
|
||||
doorbell_process_limit =
|
||||
(doorbell_aperture_size - doorbell_start_offset) /
|
||||
amdgpu_mes_doorbell_process_slice(adev);
|
||||
else
|
||||
return -ENOSPC;
|
||||
|
||||
adev->mes.doorbell_id_offset = doorbell_start_offset / sizeof(u32);
|
||||
adev->mes.max_doorbell_slices = doorbell_process_limit;
|
||||
|
||||
/* allocate Qword range for aggregated doorbell */
|
||||
for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++)
|
||||
adev->mes.aggregated_doorbells[i] =
|
||||
aggregated_doorbell_start / sizeof(u32) + i * 2;
|
||||
|
||||
DRM_INFO("max_doorbell_slices=%zu\n", doorbell_process_limit);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
|
||||
{
|
||||
bitmap_free(adev->mes.doorbell_bitmap);
|
||||
}
|
||||
|
||||
int amdgpu_mes_init(struct amdgpu_device *adev)
|
||||
{
|
||||
int i, r;
|
||||
|
@ -251,6 +201,7 @@ void amdgpu_mes_fini(struct amdgpu_device *adev)
|
|||
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
|
||||
amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
|
||||
amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
|
||||
amdgpu_mes_doorbell_free(adev);
|
||||
|
||||
idr_destroy(&adev->mes.pasid_idr);
|
||||
idr_destroy(&adev->mes.gang_id_idr);
|
||||
|
@ -279,15 +230,6 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
process->doorbell_bitmap =
|
||||
kzalloc(DIV_ROUND_UP(AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,
|
||||
BITS_PER_BYTE), GFP_KERNEL);
|
||||
if (!process->doorbell_bitmap) {
|
||||
DRM_ERROR("failed to allocate doorbell bitmap\n");
|
||||
kfree(process);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* allocate the process context bo and map it */
|
||||
r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_GTT,
|
||||
|
@ -314,15 +256,6 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
|
|||
goto clean_up_ctx;
|
||||
}
|
||||
|
||||
/* allocate the starting doorbell index of the process */
|
||||
r = amdgpu_mes_alloc_process_doorbells(adev, &process->doorbell_index);
|
||||
if (r < 0) {
|
||||
DRM_ERROR("failed to allocate doorbell for process\n");
|
||||
goto clean_up_pasid;
|
||||
}
|
||||
|
||||
DRM_DEBUG("process doorbell index = %d\n", process->doorbell_index);
|
||||
|
||||
INIT_LIST_HEAD(&process->gang_list);
|
||||
process->vm = vm;
|
||||
process->pasid = pasid;
|
||||
|
@ -332,15 +265,12 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
|
|||
amdgpu_mes_unlock(&adev->mes);
|
||||
return 0;
|
||||
|
||||
clean_up_pasid:
|
||||
idr_remove(&adev->mes.pasid_idr, pasid);
|
||||
amdgpu_mes_unlock(&adev->mes);
|
||||
clean_up_ctx:
|
||||
amdgpu_mes_unlock(&adev->mes);
|
||||
amdgpu_bo_free_kernel(&process->proc_ctx_bo,
|
||||
&process->proc_ctx_gpu_addr,
|
||||
&process->proc_ctx_cpu_ptr);
|
||||
clean_up_memory:
|
||||
kfree(process->doorbell_bitmap);
|
||||
kfree(process);
|
||||
return r;
|
||||
}
|
||||
|
@ -386,7 +316,6 @@ void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
|
|||
idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
|
||||
}
|
||||
|
||||
amdgpu_mes_free_process_doorbells(adev, process->doorbell_index);
|
||||
idr_remove(&adev->mes.pasid_idr, pasid);
|
||||
amdgpu_mes_unlock(&adev->mes);
|
||||
|
||||
|
@ -408,7 +337,6 @@ void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
|
|||
amdgpu_bo_free_kernel(&process->proc_ctx_bo,
|
||||
&process->proc_ctx_gpu_addr,
|
||||
&process->proc_ctx_cpu_ptr);
|
||||
kfree(process->doorbell_bitmap);
|
||||
kfree(process);
|
||||
}
|
||||
|
||||
|
@ -682,7 +610,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
|
|||
*queue_id = queue->queue_id = r;
|
||||
|
||||
/* allocate a doorbell index for the queue */
|
||||
r = amdgpu_mes_queue_doorbell_get(adev, gang->process,
|
||||
r = amdgpu_mes_kernel_doorbell_get(adev, gang->process,
|
||||
qprops->queue_type,
|
||||
&qprops->doorbell_off);
|
||||
if (r)
|
||||
|
@ -740,7 +668,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
|
|||
return 0;
|
||||
|
||||
clean_up_doorbell:
|
||||
amdgpu_mes_queue_doorbell_free(adev, gang->process,
|
||||
amdgpu_mes_kernel_doorbell_free(adev, gang->process,
|
||||
qprops->doorbell_off);
|
||||
clean_up_queue_id:
|
||||
spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
|
||||
|
@ -795,7 +723,7 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
|
|||
queue_id);
|
||||
|
||||
list_del(&queue->list);
|
||||
amdgpu_mes_queue_doorbell_free(adev, gang->process,
|
||||
amdgpu_mes_kernel_doorbell_free(adev, gang->process,
|
||||
queue->doorbell_off);
|
||||
amdgpu_mes_unlock(&adev->mes);
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "amdgpu_irq.h"
|
||||
#include "kgd_kfd_interface.h"
|
||||
#include "amdgpu_gfx.h"
|
||||
#include "amdgpu_doorbell.h"
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
#define AMDGPU_MES_MAX_COMPUTE_PIPES 8
|
||||
|
@ -76,7 +77,6 @@ struct amdgpu_mes {
|
|||
uint32_t kiq_version;
|
||||
|
||||
uint32_t total_max_queue;
|
||||
uint32_t doorbell_id_offset;
|
||||
uint32_t max_doorbell_slices;
|
||||
|
||||
uint64_t default_process_quantum;
|
||||
|
@ -128,6 +128,11 @@ struct amdgpu_mes {
|
|||
int (*kiq_hw_init)(struct amdgpu_device *adev);
|
||||
int (*kiq_hw_fini)(struct amdgpu_device *adev);
|
||||
|
||||
/* MES doorbells */
|
||||
uint32_t db_start_dw_offset;
|
||||
uint32_t num_mes_dbs;
|
||||
unsigned long *doorbell_bitmap;
|
||||
|
||||
/* ip specific functions */
|
||||
const struct amdgpu_mes_funcs *funcs;
|
||||
};
|
||||
|
@ -142,7 +147,6 @@ struct amdgpu_mes_process {
|
|||
uint64_t process_quantum;
|
||||
struct list_head gang_list;
|
||||
uint32_t doorbell_index;
|
||||
unsigned long *doorbell_bitmap;
|
||||
struct mutex doorbell_lock;
|
||||
};
|
||||
|
||||
|
@ -387,14 +391,6 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
|
|||
|
||||
int amdgpu_mes_self_test(struct amdgpu_device *adev);
|
||||
|
||||
int amdgpu_mes_alloc_process_doorbells(struct amdgpu_device *adev,
|
||||
unsigned int *doorbell_index);
|
||||
void amdgpu_mes_free_process_doorbells(struct amdgpu_device *adev,
|
||||
unsigned int doorbell_index);
|
||||
unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar(
|
||||
struct amdgpu_device *adev,
|
||||
uint32_t doorbell_index,
|
||||
unsigned int doorbell_id);
|
||||
int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
|
||||
|
||||
/*
|
||||
|
|
|
@ -45,6 +45,22 @@ int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->nbio.funcs && adev->nbio.funcs->get_pcie_replay_count)
|
||||
return adev->nbio.funcs->get_pcie_replay_count(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_nbio_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
|
||||
uint64_t *count1)
|
||||
{
|
||||
if (adev->nbio.funcs->get_pcie_usage)
|
||||
adev->nbio.funcs->get_pcie_usage(adev, count0, count1);
|
||||
|
||||
}
|
||||
|
||||
int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
|
||||
{
|
||||
int r;
|
||||
|
|
|
@ -99,6 +99,9 @@ struct amdgpu_nbio_funcs {
|
|||
int (*get_compute_partition_mode)(struct amdgpu_device *adev);
|
||||
u32 (*get_memory_partition_mode)(struct amdgpu_device *adev,
|
||||
u32 *supp_modes);
|
||||
u64 (*get_pcie_replay_count)(struct amdgpu_device *adev);
|
||||
void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0,
|
||||
uint64_t *count1);
|
||||
};
|
||||
|
||||
struct amdgpu_nbio {
|
||||
|
@ -111,5 +114,8 @@ struct amdgpu_nbio {
|
|||
};
|
||||
|
||||
int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev);
|
||||
void amdgpu_nbio_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, uint64_t *count1);
|
||||
int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
|
||||
u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -158,6 +158,14 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
|
|||
c++;
|
||||
}
|
||||
|
||||
if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) {
|
||||
places[c].fpfn = 0;
|
||||
places[c].lpfn = 0;
|
||||
places[c].mem_type = AMDGPU_PL_DOORBELL;
|
||||
places[c].flags = 0;
|
||||
c++;
|
||||
}
|
||||
|
||||
if (domain & AMDGPU_GEM_DOMAIN_GTT) {
|
||||
places[c].fpfn = 0;
|
||||
places[c].lpfn = 0;
|
||||
|
@ -477,7 +485,7 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
|
|||
goto fail;
|
||||
}
|
||||
|
||||
/* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */
|
||||
/* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, _DOMAIN_DOORBELL */
|
||||
return true;
|
||||
|
||||
fail:
|
||||
|
@ -1029,6 +1037,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo)
|
|||
} else if (bo->tbo.resource->mem_type == TTM_PL_TT) {
|
||||
atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static const char * const amdgpu_vram_names[] = {
|
||||
|
|
|
@ -182,6 +182,8 @@ static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type)
|
|||
return AMDGPU_GEM_DOMAIN_GWS;
|
||||
case AMDGPU_PL_OA:
|
||||
return AMDGPU_GEM_DOMAIN_OA;
|
||||
case AMDGPU_PL_DOORBELL:
|
||||
return AMDGPU_GEM_DOMAIN_DOORBELL;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -276,9 +276,8 @@ static void amdgpu_perf_read(struct perf_event *event)
|
|||
(!pe->adev->df.funcs->pmc_get_count))
|
||||
return;
|
||||
|
||||
prev = local64_read(&hwc->prev_count);
|
||||
do {
|
||||
prev = local64_read(&hwc->prev_count);
|
||||
|
||||
switch (hwc->config_base) {
|
||||
case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF:
|
||||
case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI:
|
||||
|
@ -289,7 +288,7 @@ static void amdgpu_perf_read(struct perf_event *event)
|
|||
count = 0;
|
||||
break;
|
||||
}
|
||||
} while (local64_cmpxchg(&hwc->prev_count, prev, count) != prev);
|
||||
} while (!local64_try_cmpxchg(&hwc->prev_count, &prev, count));
|
||||
|
||||
local64_add(count - prev, &event->count);
|
||||
}
|
||||
|
|
|
@ -145,6 +145,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
|
|||
break;
|
||||
case IP_VERSION(13, 0, 6):
|
||||
ret = psp_init_cap_microcode(psp, ucode_prefix);
|
||||
ret &= psp_init_ta_microcode(psp, ucode_prefix);
|
||||
break;
|
||||
case IP_VERSION(13, 0, 10):
|
||||
adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
|
||||
|
@ -438,14 +439,15 @@ static int psp_sw_init(void *handle)
|
|||
/* If psp runtime database exists, then
|
||||
* only enable two stage memory training
|
||||
* when TWO_STAGE_DRAM_TRAINING bit is set
|
||||
* in runtime database */
|
||||
* in runtime database
|
||||
*/
|
||||
mem_training_ctx->enable_mem_training = true;
|
||||
}
|
||||
|
||||
} else {
|
||||
/* If psp runtime database doesn't exist or
|
||||
* is invalid, force enable two stage memory
|
||||
* training */
|
||||
/* If psp runtime database doesn't exist or is
|
||||
* invalid, force enable two stage memory training
|
||||
*/
|
||||
mem_training_ctx->enable_mem_training = true;
|
||||
}
|
||||
|
||||
|
@ -797,7 +799,8 @@ static int psp_tmr_init(struct psp_context *psp)
|
|||
tmr_size = PSP_TMR_SIZE(psp->adev);
|
||||
|
||||
/* For ASICs support RLC autoload, psp will parse the toc
|
||||
* and calculate the total size of TMR needed */
|
||||
* and calculate the total size of TMR needed
|
||||
*/
|
||||
if (!amdgpu_sriov_vf(psp->adev) &&
|
||||
psp->toc.start_addr &&
|
||||
psp->toc.size_bytes &&
|
||||
|
@ -1137,9 +1140,9 @@ int psp_ta_init_shared_buf(struct psp_context *psp,
|
|||
struct ta_mem_context *mem_ctx)
|
||||
{
|
||||
/*
|
||||
* Allocate 16k memory aligned to 4k from Frame Buffer (local
|
||||
* physical) for ta to host memory
|
||||
*/
|
||||
* Allocate 16k memory aligned to 4k from Frame Buffer (local
|
||||
* physical) for ta to host memory
|
||||
*/
|
||||
return amdgpu_bo_create_kernel(psp->adev, mem_ctx->shared_mem_size,
|
||||
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM |
|
||||
AMDGPU_GEM_DOMAIN_GTT,
|
||||
|
@ -1728,7 +1731,8 @@ int psp_ras_trigger_error(struct psp_context *psp,
|
|||
return -EINVAL;
|
||||
|
||||
/* If err_event_athub occurs error inject was successful, however
|
||||
return status from TA is no long reliable */
|
||||
* return status from TA is no long reliable
|
||||
*/
|
||||
if (amdgpu_ras_intr_triggered())
|
||||
return 0;
|
||||
|
||||
|
@ -2577,7 +2581,8 @@ static int psp_load_non_psp_fw(struct psp_context *psp)
|
|||
ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 ||
|
||||
ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3))
|
||||
/* PSP only receive one SDMA fw for sienna_cichlid,
|
||||
* as all four sdma fw are same */
|
||||
* as all four sdma fw are same
|
||||
*/
|
||||
continue;
|
||||
|
||||
psp_print_fw_hdr(psp, ucode);
|
||||
|
@ -2642,8 +2647,8 @@ static int psp_load_fw(struct amdgpu_device *adev)
|
|||
if (adev->gmc.xgmi.num_physical_nodes > 1) {
|
||||
ret = psp_xgmi_initialize(psp, false, true);
|
||||
/* Warning the XGMI seesion initialize failure
|
||||
* Instead of stop driver initialization
|
||||
*/
|
||||
* Instead of stop driver initialization
|
||||
*/
|
||||
if (ret)
|
||||
dev_err(psp->adev->dev,
|
||||
"XGMI: Failed to initialize XGMI session\n");
|
||||
|
|
|
@ -39,6 +39,8 @@
|
|||
#define PSP_TMR_ALIGNMENT 0x100000
|
||||
#define PSP_FW_NAME_LEN 0x24
|
||||
|
||||
extern const struct attribute_group amdgpu_flash_attr_group;
|
||||
|
||||
enum psp_shared_mem_size {
|
||||
PSP_ASD_SHARED_MEM_SIZE = 0x0,
|
||||
PSP_XGMI_SHARED_MEM_SIZE = 0x4000,
|
||||
|
@ -78,8 +80,7 @@ enum psp_bootloader_cmd {
|
|||
PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000,
|
||||
};
|
||||
|
||||
enum psp_ring_type
|
||||
{
|
||||
enum psp_ring_type {
|
||||
PSP_RING_TYPE__INVALID = 0,
|
||||
/*
|
||||
* These values map to the way the PSP kernel identifies the
|
||||
|
@ -89,8 +90,7 @@ enum psp_ring_type
|
|||
PSP_RING_TYPE__KM = 2 /* Kernel mode ring (formerly called GPCOM) */
|
||||
};
|
||||
|
||||
struct psp_ring
|
||||
{
|
||||
struct psp_ring {
|
||||
enum psp_ring_type ring_type;
|
||||
struct psp_gfx_rb_frame *ring_mem;
|
||||
uint64_t ring_mem_mc_addr;
|
||||
|
@ -107,8 +107,7 @@ enum psp_reg_prog_id {
|
|||
PSP_REG_LAST
|
||||
};
|
||||
|
||||
struct psp_funcs
|
||||
{
|
||||
struct psp_funcs {
|
||||
int (*init_microcode)(struct psp_context *psp);
|
||||
int (*bootloader_load_kdb)(struct psp_context *psp);
|
||||
int (*bootloader_load_spl)(struct psp_context *psp);
|
||||
|
@ -307,8 +306,7 @@ struct psp_runtime_scpm_entry {
|
|||
enum psp_runtime_scpm_authentication scpm_status;
|
||||
};
|
||||
|
||||
struct psp_context
|
||||
{
|
||||
struct psp_context {
|
||||
struct amdgpu_device *adev;
|
||||
struct psp_ring km_ring;
|
||||
struct psp_gfx_cmd_resp *cmd;
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include "amdgpu_xgmi.h"
|
||||
#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
|
||||
#include "nbio_v4_3.h"
|
||||
#include "nbio_v7_9.h"
|
||||
#include "atom.h"
|
||||
#include "amdgpu_reset.h"
|
||||
|
||||
|
@ -757,16 +758,6 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_ras_check_feature_allowed(struct amdgpu_device *adev,
|
||||
struct ras_common_if *head)
|
||||
{
|
||||
if (amdgpu_ras_is_feature_allowed(adev, head) ||
|
||||
amdgpu_ras_is_poison_mode_supported(adev))
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* wrapper of psp_ras_enable_features */
|
||||
int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
|
||||
struct ras_common_if *head, bool enable)
|
||||
|
@ -778,7 +769,16 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
|
|||
if (!con)
|
||||
return -EINVAL;
|
||||
|
||||
if (head->block == AMDGPU_RAS_BLOCK__GFX) {
|
||||
/* Do not enable ras feature if it is not allowed */
|
||||
if (enable &&
|
||||
head->block != AMDGPU_RAS_BLOCK__GFX &&
|
||||
!amdgpu_ras_is_feature_allowed(adev, head))
|
||||
goto out;
|
||||
|
||||
/* Only enable gfx ras feature from host side */
|
||||
if (head->block == AMDGPU_RAS_BLOCK__GFX &&
|
||||
!amdgpu_sriov_vf(adev) &&
|
||||
!amdgpu_ras_intr_triggered()) {
|
||||
info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);
|
||||
if (!info)
|
||||
return -ENOMEM;
|
||||
|
@ -794,16 +794,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
|
|||
.error_type = amdgpu_ras_error_to_ta(head->type),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/* Do not enable if it is not allowed. */
|
||||
if (enable && !amdgpu_ras_check_feature_allowed(adev, head))
|
||||
goto out;
|
||||
|
||||
/* Only enable ras feature operation handle on host side */
|
||||
if (head->block == AMDGPU_RAS_BLOCK__GFX &&
|
||||
!amdgpu_sriov_vf(adev) &&
|
||||
!amdgpu_ras_intr_triggered()) {
|
||||
ret = psp_ras_enable_features(&adev->psp, info, enable);
|
||||
if (ret) {
|
||||
dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n",
|
||||
|
@ -2654,6 +2645,10 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
|
|||
* check DF RAS */
|
||||
adev->nbio.ras = &nbio_v4_3_ras;
|
||||
break;
|
||||
case IP_VERSION(7, 9, 0):
|
||||
if (!adev->gmc.is_app_apu)
|
||||
adev->nbio.ras = &nbio_v7_9_ras;
|
||||
break;
|
||||
default:
|
||||
/* nbio ras is not available */
|
||||
break;
|
||||
|
@ -2777,23 +2772,28 @@ int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
|
|||
goto cleanup;
|
||||
}
|
||||
|
||||
r = amdgpu_ras_sysfs_create(adev, ras_block);
|
||||
if (r)
|
||||
goto interrupt;
|
||||
if (ras_obj->hw_ops &&
|
||||
(ras_obj->hw_ops->query_ras_error_count ||
|
||||
ras_obj->hw_ops->query_ras_error_status)) {
|
||||
r = amdgpu_ras_sysfs_create(adev, ras_block);
|
||||
if (r)
|
||||
goto interrupt;
|
||||
|
||||
/* Those are the cached values at init.
|
||||
*/
|
||||
query_info = kzalloc(sizeof(struct ras_query_if), GFP_KERNEL);
|
||||
if (!query_info)
|
||||
return -ENOMEM;
|
||||
memcpy(&query_info->head, ras_block, sizeof(struct ras_common_if));
|
||||
/* Those are the cached values at init.
|
||||
*/
|
||||
query_info = kzalloc(sizeof(*query_info), GFP_KERNEL);
|
||||
if (!query_info)
|
||||
return -ENOMEM;
|
||||
memcpy(&query_info->head, ras_block, sizeof(struct ras_common_if));
|
||||
|
||||
if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, query_info) == 0) {
|
||||
atomic_set(&con->ras_ce_count, ce_count);
|
||||
atomic_set(&con->ras_ue_count, ue_count);
|
||||
if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, query_info) == 0) {
|
||||
atomic_set(&con->ras_ce_count, ce_count);
|
||||
atomic_set(&con->ras_ue_count, ue_count);
|
||||
}
|
||||
|
||||
kfree(query_info);
|
||||
}
|
||||
|
||||
kfree(query_info);
|
||||
return 0;
|
||||
|
||||
interrupt:
|
||||
|
@ -3148,6 +3148,10 @@ int amdgpu_ras_is_supported(struct amdgpu_device *adev,
|
|||
* that the ras block supports ras function.
|
||||
*/
|
||||
if (!ret &&
|
||||
(block == AMDGPU_RAS_BLOCK__GFX ||
|
||||
block == AMDGPU_RAS_BLOCK__SDMA ||
|
||||
block == AMDGPU_RAS_BLOCK__VCN ||
|
||||
block == AMDGPU_RAS_BLOCK__JPEG) &&
|
||||
amdgpu_ras_is_poison_mode_supported(adev) &&
|
||||
amdgpu_ras_get_ras_block(adev, block, 0))
|
||||
ret = 1;
|
||||
|
|
|
@ -90,6 +90,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
|
|||
cur->node = block;
|
||||
break;
|
||||
case TTM_PL_TT:
|
||||
case AMDGPU_PL_DOORBELL:
|
||||
node = to_ttm_range_mgr_node(res)->mm_nodes;
|
||||
while (start >= node->size << PAGE_SHIFT)
|
||||
start -= node++->size << PAGE_SHIFT;
|
||||
|
@ -152,6 +153,7 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
|
|||
cur->size = min(amdgpu_vram_mgr_block_size(block), cur->remaining);
|
||||
break;
|
||||
case TTM_PL_TT:
|
||||
case AMDGPU_PL_DOORBELL:
|
||||
node = cur->node;
|
||||
|
||||
cur->node = ++node;
|
||||
|
|
|
@ -389,7 +389,7 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
|
|||
occupied = ring->wptr & ring->buf_mask;
|
||||
dst = (void *)&ring->ring[occupied];
|
||||
chunk1 = ring->buf_mask + 1 - occupied;
|
||||
chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
|
||||
chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1;
|
||||
chunk2 = count_dw - chunk1;
|
||||
chunk1 <<= 2;
|
||||
chunk2 <<= 2;
|
||||
|
|
|
@ -432,7 +432,7 @@ TRACE_EVENT(amdgpu_vm_flush,
|
|||
),
|
||||
TP_printk("ring=%s, id=%u, hub=%u, pd_addr=%010Lx",
|
||||
__get_str(ring), __entry->vmid,
|
||||
__entry->vm_hub,__entry->pd_addr)
|
||||
__entry->vm_hub, __entry->pd_addr)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(amdgpu_pasid,
|
||||
|
@ -494,7 +494,7 @@ TRACE_EVENT(amdgpu_cs_bo_status,
|
|||
);
|
||||
|
||||
TRACE_EVENT(amdgpu_bo_move,
|
||||
TP_PROTO(struct amdgpu_bo* bo, uint32_t new_placement, uint32_t old_placement),
|
||||
TP_PROTO(struct amdgpu_bo *bo, uint32_t new_placement, uint32_t old_placement),
|
||||
TP_ARGS(bo, new_placement, old_placement),
|
||||
TP_STRUCT__entry(
|
||||
__field(struct amdgpu_bo *, bo)
|
||||
|
|
|
@ -127,6 +127,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
|
|||
case AMDGPU_PL_GDS:
|
||||
case AMDGPU_PL_GWS:
|
||||
case AMDGPU_PL_OA:
|
||||
case AMDGPU_PL_DOORBELL:
|
||||
placement->num_placement = 0;
|
||||
placement->num_busy_placement = 0;
|
||||
return;
|
||||
|
@ -496,9 +497,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
|
|||
if (old_mem->mem_type == AMDGPU_PL_GDS ||
|
||||
old_mem->mem_type == AMDGPU_PL_GWS ||
|
||||
old_mem->mem_type == AMDGPU_PL_OA ||
|
||||
old_mem->mem_type == AMDGPU_PL_DOORBELL ||
|
||||
new_mem->mem_type == AMDGPU_PL_GDS ||
|
||||
new_mem->mem_type == AMDGPU_PL_GWS ||
|
||||
new_mem->mem_type == AMDGPU_PL_OA) {
|
||||
new_mem->mem_type == AMDGPU_PL_OA ||
|
||||
new_mem->mem_type == AMDGPU_PL_DOORBELL) {
|
||||
/* Nothing to save here */
|
||||
ttm_bo_move_null(bo, new_mem);
|
||||
goto out;
|
||||
|
@ -582,6 +585,12 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
|
|||
mem->bus.offset += adev->gmc.aper_base;
|
||||
mem->bus.is_iomem = true;
|
||||
break;
|
||||
case AMDGPU_PL_DOORBELL:
|
||||
mem->bus.offset = mem->start << PAGE_SHIFT;
|
||||
mem->bus.offset += adev->doorbell.base;
|
||||
mem->bus.is_iomem = true;
|
||||
mem->bus.caching = ttm_uncached;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -596,6 +605,10 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
|
|||
|
||||
amdgpu_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0,
|
||||
&cursor);
|
||||
|
||||
if (bo->resource->mem_type == AMDGPU_PL_DOORBELL)
|
||||
return ((uint64_t)(adev->doorbell.base + cursor.start)) >> PAGE_SHIFT;
|
||||
|
||||
return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
|
@ -1305,6 +1318,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
|
|||
flags |= AMDGPU_PTE_VALID;
|
||||
|
||||
if (mem && (mem->mem_type == TTM_PL_TT ||
|
||||
mem->mem_type == AMDGPU_PL_DOORBELL ||
|
||||
mem->mem_type == AMDGPU_PL_PREEMPT)) {
|
||||
flags |= AMDGPU_PTE_SYSTEM;
|
||||
|
||||
|
@ -1924,6 +1938,20 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
|
|||
DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
|
||||
(unsigned int)(gtt_size / (1024 * 1024)));
|
||||
|
||||
/* Initiailize doorbell pool on PCI BAR */
|
||||
r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed initializing doorbell heap.\n");
|
||||
return r;
|
||||
}
|
||||
|
||||
/* Create a boorbell page for kernel usages */
|
||||
r = amdgpu_doorbell_create_kernel_doorbells(adev);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to initialize kernel doorbells.\n");
|
||||
return r;
|
||||
}
|
||||
|
||||
/* Initialize preemptible memory pool */
|
||||
r = amdgpu_preempt_mgr_init(adev);
|
||||
if (r) {
|
||||
|
|
|
@ -33,12 +33,16 @@
|
|||
#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
|
||||
#define AMDGPU_PL_OA (TTM_PL_PRIV + 2)
|
||||
#define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3)
|
||||
#define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4)
|
||||
|
||||
#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
|
||||
#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
|
||||
|
||||
#define AMDGPU_POISON 0xd0bed0be
|
||||
|
||||
extern const struct attribute_group amdgpu_vram_mgr_attr_group;
|
||||
extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
|
||||
|
||||
struct hmm_range;
|
||||
|
||||
struct amdgpu_gtt_mgr {
|
||||
|
|
|
@ -703,6 +703,8 @@ FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version);
|
|||
FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version);
|
||||
FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version);
|
||||
FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version);
|
||||
FW_VERSION_ATTR(mes_fw_version, 0444, mes.sched_version & AMDGPU_MES_VERSION_MASK);
|
||||
FW_VERSION_ATTR(mes_kiq_fw_version, 0444, mes.kiq_version & AMDGPU_MES_VERSION_MASK);
|
||||
|
||||
static struct attribute *fw_attrs[] = {
|
||||
&dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr,
|
||||
|
@ -716,6 +718,7 @@ static struct attribute *fw_attrs[] = {
|
|||
&dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr,
|
||||
&dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr,
|
||||
&dev_attr_dmcu_fw_version.attr, &dev_attr_imu_fw_version.attr,
|
||||
&dev_attr_mes_fw_version.attr, &dev_attr_mes_kiq_fw_version.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
|
|
|
@ -520,7 +520,7 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
|
|||
tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].encode_max_frame_pixels;
|
||||
adev->virt.encode_max_frame_pixels = max(tmp, adev->virt.encode_max_frame_pixels);
|
||||
}
|
||||
if((adev->virt.decode_max_dimension_pixels > 0) || (adev->virt.encode_max_dimension_pixels > 0))
|
||||
if ((adev->virt.decode_max_dimension_pixels > 0) || (adev->virt.encode_max_dimension_pixels > 0))
|
||||
adev->virt.is_mm_bw_enabled = true;
|
||||
|
||||
adev->unique_id =
|
||||
|
|
|
@ -442,8 +442,7 @@ static void cik_ih_set_interrupt_funcs(struct amdgpu_device *adev)
|
|||
adev->irq.ih_funcs = &cik_ih_funcs;
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version cik_ih_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version cik_ih_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_IH,
|
||||
.major = 2,
|
||||
.minor = 0,
|
||||
|
|
|
@ -52,8 +52,7 @@
|
|||
static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev);
|
||||
static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev);
|
||||
|
||||
static const u32 crtc_offsets[] =
|
||||
{
|
||||
static const u32 crtc_offsets[] = {
|
||||
CRTC0_REGISTER_OFFSET,
|
||||
CRTC1_REGISTER_OFFSET,
|
||||
CRTC2_REGISTER_OFFSET,
|
||||
|
@ -63,8 +62,7 @@ static const u32 crtc_offsets[] =
|
|||
CRTC6_REGISTER_OFFSET
|
||||
};
|
||||
|
||||
static const u32 hpd_offsets[] =
|
||||
{
|
||||
static const u32 hpd_offsets[] = {
|
||||
HPD0_REGISTER_OFFSET,
|
||||
HPD1_REGISTER_OFFSET,
|
||||
HPD2_REGISTER_OFFSET,
|
||||
|
@ -121,30 +119,26 @@ static const struct {
|
|||
.hpd = DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK
|
||||
} };
|
||||
|
||||
static const u32 golden_settings_tonga_a11[] =
|
||||
{
|
||||
static const u32 golden_settings_tonga_a11[] = {
|
||||
mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
|
||||
mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
|
||||
mmFBC_MISC, 0x1f311fff, 0x12300000,
|
||||
mmHDMI_CONTROL, 0x31000111, 0x00000011,
|
||||
};
|
||||
|
||||
static const u32 tonga_mgcg_cgcg_init[] =
|
||||
{
|
||||
static const u32 tonga_mgcg_cgcg_init[] = {
|
||||
mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
|
||||
mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
|
||||
};
|
||||
|
||||
static const u32 golden_settings_fiji_a10[] =
|
||||
{
|
||||
static const u32 golden_settings_fiji_a10[] = {
|
||||
mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
|
||||
mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
|
||||
mmFBC_MISC, 0x1f311fff, 0x12300000,
|
||||
mmHDMI_CONTROL, 0x31000111, 0x00000011,
|
||||
};
|
||||
|
||||
static const u32 fiji_mgcg_cgcg_init[] =
|
||||
{
|
||||
static const u32 fiji_mgcg_cgcg_init[] = {
|
||||
mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
|
||||
mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
|
||||
};
|
||||
|
@ -1425,8 +1419,7 @@ static void dce_v10_0_audio_enable(struct amdgpu_device *adev,
|
|||
enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
|
||||
}
|
||||
|
||||
static const u32 pin_offsets[] =
|
||||
{
|
||||
static const u32 pin_offsets[] = {
|
||||
AUD0_REGISTER_OFFSET,
|
||||
AUD1_REGISTER_OFFSET,
|
||||
AUD2_REGISTER_OFFSET,
|
||||
|
@ -1811,8 +1804,7 @@ static void dce_v10_0_afmt_fini(struct amdgpu_device *adev)
|
|||
}
|
||||
}
|
||||
|
||||
static const u32 vga_control_regs[6] =
|
||||
{
|
||||
static const u32 vga_control_regs[6] = {
|
||||
mmD1VGA_CONTROL,
|
||||
mmD2VGA_CONTROL,
|
||||
mmD3VGA_CONTROL,
|
||||
|
@ -3651,8 +3643,7 @@ static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev)
|
|||
adev->hpd_irq.funcs = &dce_v10_0_hpd_irq_funcs;
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version dce_v10_0_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version dce_v10_0_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_DCE,
|
||||
.major = 10,
|
||||
.minor = 0,
|
||||
|
@ -3660,8 +3651,7 @@ const struct amdgpu_ip_block_version dce_v10_0_ip_block =
|
|||
.funcs = &dce_v10_0_ip_funcs,
|
||||
};
|
||||
|
||||
const struct amdgpu_ip_block_version dce_v10_1_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version dce_v10_1_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_DCE,
|
||||
.major = 10,
|
||||
.minor = 1,
|
||||
|
|
|
@ -53,8 +53,7 @@
|
|||
static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev);
|
||||
static void dce_v8_0_set_irq_funcs(struct amdgpu_device *adev);
|
||||
|
||||
static const u32 crtc_offsets[6] =
|
||||
{
|
||||
static const u32 crtc_offsets[6] = {
|
||||
CRTC0_REGISTER_OFFSET,
|
||||
CRTC1_REGISTER_OFFSET,
|
||||
CRTC2_REGISTER_OFFSET,
|
||||
|
@ -63,8 +62,7 @@ static const u32 crtc_offsets[6] =
|
|||
CRTC5_REGISTER_OFFSET
|
||||
};
|
||||
|
||||
static const u32 hpd_offsets[] =
|
||||
{
|
||||
static const u32 hpd_offsets[] = {
|
||||
HPD0_REGISTER_OFFSET,
|
||||
HPD1_REGISTER_OFFSET,
|
||||
HPD2_REGISTER_OFFSET,
|
||||
|
@ -1345,9 +1343,9 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder)
|
|||
if (sad->channels > max_channels) {
|
||||
value = (sad->channels <<
|
||||
AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__MAX_CHANNELS__SHIFT) |
|
||||
(sad->byte2 <<
|
||||
(sad->byte2 <<
|
||||
AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__DESCRIPTOR_BYTE_2__SHIFT) |
|
||||
(sad->freq <<
|
||||
(sad->freq <<
|
||||
AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES__SHIFT);
|
||||
max_channels = sad->channels;
|
||||
}
|
||||
|
@ -1379,8 +1377,7 @@ static void dce_v8_0_audio_enable(struct amdgpu_device *adev,
|
|||
enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
|
||||
}
|
||||
|
||||
static const u32 pin_offsets[7] =
|
||||
{
|
||||
static const u32 pin_offsets[7] = {
|
||||
(0x1780 - 0x1780),
|
||||
(0x1786 - 0x1780),
|
||||
(0x178c - 0x1780),
|
||||
|
@ -1740,8 +1737,7 @@ static void dce_v8_0_afmt_fini(struct amdgpu_device *adev)
|
|||
}
|
||||
}
|
||||
|
||||
static const u32 vga_control_regs[6] =
|
||||
{
|
||||
static const u32 vga_control_regs[6] = {
|
||||
mmD1VGA_CONTROL,
|
||||
mmD2VGA_CONTROL,
|
||||
mmD3VGA_CONTROL,
|
||||
|
@ -1895,9 +1891,9 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
|
|||
case DRM_FORMAT_XBGR8888:
|
||||
case DRM_FORMAT_ABGR8888:
|
||||
fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
|
||||
(GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
|
||||
(GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
|
||||
fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) |
|
||||
(GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT));
|
||||
(GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT));
|
||||
#ifdef __BIG_ENDIAN
|
||||
fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
|
||||
#endif
|
||||
|
@ -3151,7 +3147,7 @@ static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev,
|
|||
|
||||
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
|
||||
works = amdgpu_crtc->pflip_works;
|
||||
if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
|
||||
if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
|
||||
DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
|
||||
"AMDGPU_FLIP_SUBMITTED(%d)\n",
|
||||
amdgpu_crtc->pflip_status,
|
||||
|
@ -3544,8 +3540,7 @@ static void dce_v8_0_set_irq_funcs(struct amdgpu_device *adev)
|
|||
adev->hpd_irq.funcs = &dce_v8_0_hpd_irq_funcs;
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version dce_v8_0_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version dce_v8_0_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_DCE,
|
||||
.major = 8,
|
||||
.minor = 0,
|
||||
|
@ -3553,8 +3548,7 @@ const struct amdgpu_ip_block_version dce_v8_0_ip_block =
|
|||
.funcs = &dce_v8_0_ip_funcs,
|
||||
};
|
||||
|
||||
const struct amdgpu_ip_block_version dce_v8_1_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version dce_v8_1_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_DCE,
|
||||
.major = 8,
|
||||
.minor = 1,
|
||||
|
@ -3562,8 +3556,7 @@ const struct amdgpu_ip_block_version dce_v8_1_ip_block =
|
|||
.funcs = &dce_v8_0_ip_funcs,
|
||||
};
|
||||
|
||||
const struct amdgpu_ip_block_version dce_v8_2_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version dce_v8_2_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_DCE,
|
||||
.major = 8,
|
||||
.minor = 2,
|
||||
|
@ -3571,8 +3564,7 @@ const struct amdgpu_ip_block_version dce_v8_2_ip_block =
|
|||
.funcs = &dce_v8_0_ip_funcs,
|
||||
};
|
||||
|
||||
const struct amdgpu_ip_block_version dce_v8_3_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version dce_v8_3_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_DCE,
|
||||
.major = 8,
|
||||
.minor = 3,
|
||||
|
@ -3580,8 +3572,7 @@ const struct amdgpu_ip_block_version dce_v8_3_ip_block =
|
|||
.funcs = &dce_v8_0_ip_funcs,
|
||||
};
|
||||
|
||||
const struct amdgpu_ip_block_version dce_v8_5_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version dce_v8_5_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_DCE,
|
||||
.major = 8,
|
||||
.minor = 5,
|
||||
|
|
|
@ -471,8 +471,12 @@ static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
|
|||
case IP_VERSION(11, 0, 3):
|
||||
if ((adev->gfx.me_fw_version >= 1505) &&
|
||||
(adev->gfx.pfp_fw_version >= 1600) &&
|
||||
(adev->gfx.mec_fw_version >= 512))
|
||||
adev->gfx.cp_gfx_shadow = true;
|
||||
(adev->gfx.mec_fw_version >= 512)) {
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
adev->gfx.cp_gfx_shadow = true;
|
||||
else
|
||||
adev->gfx.cp_gfx_shadow = false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
adev->gfx.cp_gfx_shadow = false;
|
||||
|
@ -4650,26 +4654,6 @@ static int gfx_v11_0_early_init(void *handle)
|
|||
return gfx_v11_0_init_microcode(adev);
|
||||
}
|
||||
|
||||
static int gfx_v11_0_ras_late_init(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
struct ras_common_if *gfx_common_if;
|
||||
int ret;
|
||||
|
||||
gfx_common_if = kzalloc(sizeof(struct ras_common_if), GFP_KERNEL);
|
||||
if (!gfx_common_if)
|
||||
return -ENOMEM;
|
||||
|
||||
gfx_common_if->block = AMDGPU_RAS_BLOCK__GFX;
|
||||
|
||||
ret = amdgpu_ras_feature_enable(adev, gfx_common_if, true);
|
||||
if (ret)
|
||||
dev_warn(adev->dev, "Failed to enable gfx11 ras feature\n");
|
||||
|
||||
kfree(gfx_common_if);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gfx_v11_0_late_init(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
@ -4683,12 +4667,6 @@ static int gfx_v11_0_late_init(void *handle)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) {
|
||||
r = gfx_v11_0_ras_late_init(handle);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -90,8 +90,7 @@ MODULE_FIRMWARE("amdgpu/mullins_ce.bin");
|
|||
MODULE_FIRMWARE("amdgpu/mullins_rlc.bin");
|
||||
MODULE_FIRMWARE("amdgpu/mullins_mec.bin");
|
||||
|
||||
static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
|
||||
{
|
||||
static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = {
|
||||
{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
|
||||
{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
|
||||
{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
|
||||
|
@ -110,8 +109,7 @@ static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
|
|||
{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
|
||||
};
|
||||
|
||||
static const u32 spectre_rlc_save_restore_register_list[] =
|
||||
{
|
||||
static const u32 spectre_rlc_save_restore_register_list[] = {
|
||||
(0x0e00 << 16) | (0xc12c >> 2),
|
||||
0x00000000,
|
||||
(0x0e00 << 16) | (0xc140 >> 2),
|
||||
|
@ -557,8 +555,7 @@ static const u32 spectre_rlc_save_restore_register_list[] =
|
|||
(0x0e00 << 16) | (0x9600 >> 2),
|
||||
};
|
||||
|
||||
static const u32 kalindi_rlc_save_restore_register_list[] =
|
||||
{
|
||||
static const u32 kalindi_rlc_save_restore_register_list[] = {
|
||||
(0x0e00 << 16) | (0xc12c >> 2),
|
||||
0x00000000,
|
||||
(0x0e00 << 16) | (0xc140 >> 2),
|
||||
|
@ -933,7 +930,8 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
|
|||
case CHIP_MULLINS:
|
||||
chip_name = "mullins";
|
||||
break;
|
||||
default: BUG();
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
|
||||
|
@ -2759,8 +2757,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
struct hqd_registers
|
||||
{
|
||||
struct hqd_registers {
|
||||
u32 cp_mqd_base_addr;
|
||||
u32 cp_mqd_base_addr_hi;
|
||||
u32 cp_hqd_active;
|
||||
|
@ -5124,11 +5121,11 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
|
|||
bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
|
||||
cu_info->bitmap[i][j] = bitmap;
|
||||
|
||||
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
|
||||
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
|
||||
if (bitmap & mask) {
|
||||
if (counter < ao_cu_num)
|
||||
ao_bitmap |= mask;
|
||||
counter ++;
|
||||
counter++;
|
||||
}
|
||||
mask <<= 1;
|
||||
}
|
||||
|
@ -5150,8 +5147,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
|
|||
cu_info->lds_size = 64;
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version gfx_v7_1_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version gfx_v7_1_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_GFX,
|
||||
.major = 7,
|
||||
.minor = 1,
|
||||
|
@ -5159,8 +5155,7 @@ const struct amdgpu_ip_block_version gfx_v7_1_ip_block =
|
|||
.funcs = &gfx_v7_0_ip_funcs,
|
||||
};
|
||||
|
||||
const struct amdgpu_ip_block_version gfx_v7_2_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version gfx_v7_2_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_GFX,
|
||||
.major = 7,
|
||||
.minor = 2,
|
||||
|
@ -5168,8 +5163,7 @@ const struct amdgpu_ip_block_version gfx_v7_2_ip_block =
|
|||
.funcs = &gfx_v7_0_ip_funcs,
|
||||
};
|
||||
|
||||
const struct amdgpu_ip_block_version gfx_v7_3_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version gfx_v7_3_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_GFX,
|
||||
.major = 7,
|
||||
.minor = 3,
|
||||
|
|
|
@ -402,22 +402,6 @@ static void gfxhub_v1_2_xcc_program_invalidation(struct amdgpu_device *adev,
|
|||
static int gfxhub_v1_2_xcc_gart_enable(struct amdgpu_device *adev,
|
||||
uint32_t xcc_mask)
|
||||
{
|
||||
int i;
|
||||
|
||||
/*
|
||||
* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, because they are
|
||||
* VF copy registers so vbios post doesn't program them, for
|
||||
* SRIOV driver need to program them
|
||||
*/
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
for_each_inst(i, xcc_mask) {
|
||||
WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_BASE,
|
||||
adev->gmc.vram_start >> 24);
|
||||
WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_TOP,
|
||||
adev->gmc.vram_end >> 24);
|
||||
}
|
||||
}
|
||||
|
||||
/* GART Enable. */
|
||||
gfxhub_v1_2_xcc_init_gart_aperture_regs(adev, xcc_mask);
|
||||
gfxhub_v1_2_xcc_init_system_aperture_regs(adev, xcc_mask);
|
||||
|
|
|
@ -494,7 +494,8 @@ static int ih_v6_0_self_irq(struct amdgpu_device *adev,
|
|||
*adev->irq.ih1.wptr_cpu = wptr;
|
||||
schedule_work(&adev->irq.ih1_work);
|
||||
break;
|
||||
default: break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -759,8 +760,7 @@ static void ih_v6_0_set_interrupt_funcs(struct amdgpu_device *adev)
|
|||
adev->irq.ih_funcs = &ih_v6_0_funcs;
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version ih_v6_0_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version ih_v6_0_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_IH,
|
||||
.major = 6,
|
||||
.minor = 0,
|
||||
|
|
|
@ -0,0 +1,769 @@
|
|||
/*
|
||||
* Copyright 2023 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/pci.h>
|
||||
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_ih.h"
|
||||
|
||||
#include "oss/osssys_6_1_0_offset.h"
|
||||
#include "oss/osssys_6_1_0_sh_mask.h"
|
||||
|
||||
#include "soc15_common.h"
|
||||
#include "ih_v6_1.h"
|
||||
|
||||
#define MAX_REARM_RETRY 10
|
||||
|
||||
static void ih_v6_1_set_interrupt_funcs(struct amdgpu_device *adev);
|
||||
|
||||
/**
|
||||
* ih_v6_1_init_register_offset - Initialize register offset for ih rings
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Initialize register offset ih rings (IH_V6_0).
|
||||
*/
|
||||
static void ih_v6_1_init_register_offset(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ih_regs *ih_regs;
|
||||
|
||||
/* ih ring 2 is removed
|
||||
* ih ring and ih ring 1 are available */
|
||||
if (adev->irq.ih.ring_size) {
|
||||
ih_regs = &adev->irq.ih.ih_regs;
|
||||
ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE);
|
||||
ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI);
|
||||
ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL);
|
||||
ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR);
|
||||
ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR);
|
||||
ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR);
|
||||
ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_LO);
|
||||
ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_HI);
|
||||
ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL;
|
||||
}
|
||||
|
||||
if (adev->irq.ih1.ring_size) {
|
||||
ih_regs = &adev->irq.ih1.ih_regs;
|
||||
ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_RING1);
|
||||
ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI_RING1);
|
||||
ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL_RING1);
|
||||
ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_RING1);
|
||||
ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR_RING1);
|
||||
ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR_RING1);
|
||||
ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* force_update_wptr_for_self_int - Force update the wptr for self interrupt
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @threshold: threshold to trigger the wptr reporting
|
||||
* @timeout: timeout to trigger the wptr reporting
|
||||
* @enabled: Enable/disable timeout flush mechanism
|
||||
*
|
||||
* threshold input range: 0 ~ 15, default 0,
|
||||
* real_threshold = 2^threshold
|
||||
* timeout input range: 0 ~ 20, default 8,
|
||||
* real_timeout = (2^timeout) * 1024 / (socclk_freq)
|
||||
*
|
||||
* Force update wptr for self interrupt ( >= SIENNA_CICHLID).
|
||||
*/
|
||||
static void
|
||||
force_update_wptr_for_self_int(struct amdgpu_device *adev,
|
||||
u32 threshold, u32 timeout, bool enabled)
|
||||
{
|
||||
u32 ih_cntl, ih_rb_cntl;
|
||||
|
||||
ih_cntl = RREG32_SOC15(OSSSYS, 0, regIH_CNTL2);
|
||||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1);
|
||||
|
||||
ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
|
||||
SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT, timeout);
|
||||
ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2,
|
||||
SELF_IV_FORCE_WPTR_UPDATE_ENABLE, enabled);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
|
||||
RB_USED_INT_THRESHOLD, threshold);
|
||||
|
||||
if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl))
|
||||
return;
|
||||
} else {
|
||||
WREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1, ih_rb_cntl);
|
||||
}
|
||||
|
||||
WREG32_SOC15(OSSSYS, 0, regIH_CNTL2, ih_cntl);
|
||||
}
|
||||
|
||||
/**
|
||||
* ih_v6_1_toggle_ring_interrupts - toggle the interrupt ring buffer
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @ih: amdgpu_ih_ring pointer
|
||||
* @enable: true - enable the interrupts, false - disable the interrupts
|
||||
*
|
||||
* Toggle the interrupt ring buffer (IH_V6_0)
|
||||
*/
|
||||
static int ih_v6_1_toggle_ring_interrupts(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih,
|
||||
bool enable)
|
||||
{
|
||||
struct amdgpu_ih_regs *ih_regs;
|
||||
uint32_t tmp;
|
||||
|
||||
ih_regs = &ih->ih_regs;
|
||||
|
||||
tmp = RREG32(ih_regs->ih_rb_cntl);
|
||||
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
|
||||
/* enable_intr field is only valid in ring0 */
|
||||
if (ih == &adev->irq.ih)
|
||||
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
|
||||
|
||||
if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
|
||||
if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
|
||||
return -ETIMEDOUT;
|
||||
} else {
|
||||
WREG32(ih_regs->ih_rb_cntl, tmp);
|
||||
}
|
||||
|
||||
if (enable) {
|
||||
ih->enabled = true;
|
||||
} else {
|
||||
/* set rptr, wptr to 0 */
|
||||
WREG32(ih_regs->ih_rb_rptr, 0);
|
||||
WREG32(ih_regs->ih_rb_wptr, 0);
|
||||
ih->enabled = false;
|
||||
ih->rptr = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ih_v6_1_toggle_interrupts - Toggle all the available interrupt ring buffers
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @enable: enable or disable interrupt ring buffers
|
||||
*
|
||||
* Toggle all the available interrupt ring buffers (IH_V6_0).
|
||||
*/
|
||||
static int ih_v6_1_toggle_interrupts(struct amdgpu_device *adev, bool enable)
|
||||
{
|
||||
struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
|
||||
int i;
|
||||
int r;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ih); i++) {
|
||||
if (ih[i]->ring_size) {
|
||||
r = ih_v6_1_toggle_ring_interrupts(adev, ih[i], enable);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t ih_v6_1_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl)
|
||||
{
|
||||
int rb_bufsz = order_base_2(ih->ring_size / 4);
|
||||
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
|
||||
MC_SPACE, ih->use_bus_addr ? 2 : 4);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
|
||||
WPTR_OVERFLOW_CLEAR, 1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
|
||||
WPTR_OVERFLOW_ENABLE, 1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
|
||||
/* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register
|
||||
* value is written to memory
|
||||
*/
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
|
||||
WPTR_WRITEBACK_ENABLE, 1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0);
|
||||
|
||||
return ih_rb_cntl;
|
||||
}
|
||||
|
||||
static uint32_t ih_v6_1_doorbell_rptr(struct amdgpu_ih_ring *ih)
|
||||
{
|
||||
u32 ih_doorbell_rtpr = 0;
|
||||
|
||||
if (ih->use_doorbell) {
|
||||
ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
|
||||
IH_DOORBELL_RPTR, OFFSET,
|
||||
ih->doorbell_index);
|
||||
ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
|
||||
IH_DOORBELL_RPTR,
|
||||
ENABLE, 1);
|
||||
} else {
|
||||
ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
|
||||
IH_DOORBELL_RPTR,
|
||||
ENABLE, 0);
|
||||
}
|
||||
return ih_doorbell_rtpr;
|
||||
}
|
||||
|
||||
/**
|
||||
* ih_v6_1_enable_ring - enable an ih ring buffer
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @ih: amdgpu_ih_ring pointer
|
||||
*
|
||||
* Enable an ih ring buffer (IH_V6_0)
|
||||
*/
|
||||
static int ih_v6_1_enable_ring(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih)
|
||||
{
|
||||
struct amdgpu_ih_regs *ih_regs;
|
||||
uint32_t tmp;
|
||||
|
||||
ih_regs = &ih->ih_regs;
|
||||
|
||||
/* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
|
||||
WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8);
|
||||
WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff);
|
||||
|
||||
tmp = RREG32(ih_regs->ih_rb_cntl);
|
||||
tmp = ih_v6_1_rb_cntl(ih, tmp);
|
||||
if (ih == &adev->irq.ih)
|
||||
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled);
|
||||
if (ih == &adev->irq.ih1) {
|
||||
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0);
|
||||
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
|
||||
}
|
||||
|
||||
if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
|
||||
if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
|
||||
DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
} else {
|
||||
WREG32(ih_regs->ih_rb_cntl, tmp);
|
||||
}
|
||||
|
||||
if (ih == &adev->irq.ih) {
|
||||
/* set the ih ring 0 writeback address whether it's enabled or not */
|
||||
WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr));
|
||||
WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF);
|
||||
}
|
||||
|
||||
/* set rptr, wptr to 0 */
|
||||
WREG32(ih_regs->ih_rb_wptr, 0);
|
||||
WREG32(ih_regs->ih_rb_rptr, 0);
|
||||
|
||||
WREG32(ih_regs->ih_doorbell_rptr, ih_v6_1_doorbell_rptr(ih));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ih_v6_1_irq_init - init and enable the interrupt ring
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Allocate a ring buffer for the interrupt controller,
|
||||
* enable the RLC, disable interrupts, enable the IH
|
||||
* ring buffer and enable it.
|
||||
* Called at device load and reume.
|
||||
* Returns 0 for success, errors for failure.
|
||||
*/
|
||||
static int ih_v6_1_irq_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1};
|
||||
u32 ih_chicken;
|
||||
u32 tmp;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
/* disable irqs */
|
||||
ret = ih_v6_1_toggle_interrupts(adev, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
adev->nbio.funcs->ih_control(adev);
|
||||
|
||||
if (unlikely((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
|
||||
(adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO))) {
|
||||
if (ih[0]->use_bus_addr) {
|
||||
ih_chicken = RREG32_SOC15(OSSSYS, 0, regIH_CHICKEN);
|
||||
ih_chicken = REG_SET_FIELD(ih_chicken,
|
||||
IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
|
||||
WREG32_SOC15(OSSSYS, 0, regIH_CHICKEN, ih_chicken);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ih); i++) {
|
||||
if (ih[i]->ring_size) {
|
||||
ret = ih_v6_1_enable_ring(adev, ih[i]);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
/* update doorbell range for ih ring 0 */
|
||||
adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell,
|
||||
ih[0]->doorbell_index);
|
||||
|
||||
tmp = RREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
|
||||
CLIENT18_IS_STORM_CLIENT, 1);
|
||||
WREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL, tmp);
|
||||
|
||||
tmp = RREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1);
|
||||
WREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL, tmp);
|
||||
|
||||
/* GC/MMHUB UTCL2 page fault interrupts are configured as
|
||||
* MSI storm capable interrupts by deafult. The delay is
|
||||
* used to avoid ISR being called too frequently
|
||||
* when page fault happens on several continuous page
|
||||
* and thus avoid MSI storm */
|
||||
tmp = RREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL);
|
||||
tmp = REG_SET_FIELD(tmp, IH_MSI_STORM_CTRL,
|
||||
DELAY, 3);
|
||||
WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp);
|
||||
|
||||
pci_set_master(adev->pdev);
|
||||
|
||||
/* enable interrupts */
|
||||
ret = ih_v6_1_toggle_interrupts(adev, true);
|
||||
if (ret)
|
||||
return ret;
|
||||
/* enable wptr force update for self int */
|
||||
force_update_wptr_for_self_int(adev, 0, 8, true);
|
||||
|
||||
if (adev->irq.ih_soft.ring_size)
|
||||
adev->irq.ih_soft.enabled = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ih_v6_1_irq_disable - disable interrupts
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Disable interrupts on the hw.
|
||||
*/
|
||||
static void ih_v6_1_irq_disable(struct amdgpu_device *adev)
|
||||
{
|
||||
force_update_wptr_for_self_int(adev, 0, 8, false);
|
||||
ih_v6_1_toggle_interrupts(adev, false);
|
||||
|
||||
/* Wait and acknowledge irq */
|
||||
mdelay(1);
|
||||
}
|
||||
|
||||
/**
|
||||
* ih_v6_1_get_wptr - get the IH ring buffer wptr
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @ih: amdgpu_ih_ring pointer
|
||||
*
|
||||
* Get the IH ring buffer wptr from either the register
|
||||
* or the writeback memory buffer. Also check for
|
||||
* ring buffer overflow and deal with it.
|
||||
* Returns the value of the wptr.
|
||||
*/
|
||||
static u32 ih_v6_1_get_wptr(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih)
|
||||
{
|
||||
u32 wptr, tmp;
|
||||
struct amdgpu_ih_regs *ih_regs;
|
||||
|
||||
wptr = le32_to_cpu(*ih->wptr_cpu);
|
||||
ih_regs = &ih->ih_regs;
|
||||
|
||||
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
|
||||
goto out;
|
||||
|
||||
wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
|
||||
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
|
||||
goto out;
|
||||
wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
|
||||
|
||||
/* When a ring buffer overflow happen start parsing interrupt
|
||||
* from the last not overwritten vector (wptr + 32). Hopefully
|
||||
* this should allow us to catch up.
|
||||
*/
|
||||
tmp = (wptr + 32) & ih->ptr_mask;
|
||||
dev_warn(adev->dev, "IH ring buffer overflow "
|
||||
"(0x%08X, 0x%08X, 0x%08X)\n",
|
||||
wptr, ih->rptr, tmp);
|
||||
ih->rptr = tmp;
|
||||
|
||||
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
|
||||
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
|
||||
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
|
||||
out:
|
||||
return (wptr & ih->ptr_mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* ih_v6_1_irq_rearm - rearm IRQ if lost
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @ih: amdgpu_ih_ring pointer
|
||||
*
|
||||
*/
|
||||
static void ih_v6_1_irq_rearm(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih)
|
||||
{
|
||||
uint32_t v = 0;
|
||||
uint32_t i = 0;
|
||||
struct amdgpu_ih_regs *ih_regs;
|
||||
|
||||
ih_regs = &ih->ih_regs;
|
||||
|
||||
/* Rearm IRQ / re-write doorbell if doorbell write is lost */
|
||||
for (i = 0; i < MAX_REARM_RETRY; i++) {
|
||||
v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr);
|
||||
if ((v < ih->ring_size) && (v != ih->rptr))
|
||||
WDOORBELL32(ih->doorbell_index, ih->rptr);
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ih_v6_1_set_rptr - set the IH ring buffer rptr
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @ih: amdgpu_ih_ring pointer
|
||||
*
|
||||
* Set the IH ring buffer rptr.
|
||||
*/
|
||||
static void ih_v6_1_set_rptr(struct amdgpu_device *adev,
|
||||
struct amdgpu_ih_ring *ih)
|
||||
{
|
||||
struct amdgpu_ih_regs *ih_regs;
|
||||
|
||||
if (ih->use_doorbell) {
|
||||
/* XXX check if swapping is necessary on BE */
|
||||
*ih->rptr_cpu = ih->rptr;
|
||||
WDOORBELL32(ih->doorbell_index, ih->rptr);
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
ih_v6_1_irq_rearm(adev, ih);
|
||||
} else {
|
||||
ih_regs = &ih->ih_regs;
|
||||
WREG32(ih_regs->ih_rb_rptr, ih->rptr);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ih_v6_1_self_irq - dispatch work for ring 1
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @source: irq source
|
||||
* @entry: IV with WPTR update
|
||||
*
|
||||
* Update the WPTR from the IV and schedule work to handle the entries.
|
||||
*/
|
||||
static int ih_v6_1_self_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
uint32_t wptr = cpu_to_le32(entry->src_data[0]);
|
||||
|
||||
switch (entry->ring_id) {
|
||||
case 1:
|
||||
*adev->irq.ih1.wptr_cpu = wptr;
|
||||
schedule_work(&adev->irq.ih1_work);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct amdgpu_irq_src_funcs ih_v6_1_self_irq_funcs = {
|
||||
.process = ih_v6_1_self_irq,
|
||||
};
|
||||
|
||||
static void ih_v6_1_set_self_irq_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->irq.self_irq.num_types = 0;
|
||||
adev->irq.self_irq.funcs = &ih_v6_1_self_irq_funcs;
|
||||
}
|
||||
|
||||
static int ih_v6_1_early_init(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
ih_v6_1_set_interrupt_funcs(adev);
|
||||
ih_v6_1_set_self_irq_funcs(adev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ih_v6_1_sw_init(void *handle)
|
||||
{
|
||||
int r;
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
bool use_bus_addr;
|
||||
|
||||
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0,
|
||||
&adev->irq.self_irq);
|
||||
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* use gpu virtual address for ih ring
|
||||
* until ih_checken is programmed to allow
|
||||
* use bus address for ih ring by psp bl */
|
||||
use_bus_addr =
|
||||
(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true;
|
||||
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
adev->irq.ih.use_doorbell = true;
|
||||
adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
|
||||
|
||||
adev->irq.ih1.ring_size = 0;
|
||||
adev->irq.ih2.ring_size = 0;
|
||||
|
||||
/* initialize ih control register offset */
|
||||
ih_v6_1_init_register_offset(adev);
|
||||
|
||||
r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_irq_init(adev);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int ih_v6_1_sw_fini(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
amdgpu_irq_fini_sw(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ih_v6_1_hw_init(void *handle)
|
||||
{
|
||||
int r;
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
r = ih_v6_1_irq_init(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ih_v6_1_hw_fini(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
ih_v6_1_irq_disable(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ih_v6_1_suspend(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
return ih_v6_1_hw_fini(adev);
|
||||
}
|
||||
|
||||
static int ih_v6_1_resume(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
return ih_v6_1_hw_init(adev);
|
||||
}
|
||||
|
||||
static bool ih_v6_1_is_idle(void *handle)
|
||||
{
|
||||
/* todo */
|
||||
return true;
|
||||
}
|
||||
|
||||
static int ih_v6_1_wait_for_idle(void *handle)
|
||||
{
|
||||
/* todo */
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
|
||||
static int ih_v6_1_soft_reset(void *handle)
|
||||
{
|
||||
/* todo */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ih_v6_1_update_clockgating_state(struct amdgpu_device *adev,
|
||||
bool enable)
|
||||
{
|
||||
uint32_t data, def, field_val;
|
||||
|
||||
if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) {
|
||||
def = data = RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL);
|
||||
field_val = enable ? 0 : 1;
|
||||
data = REG_SET_FIELD(data, IH_CLK_CTRL,
|
||||
DBUS_MUX_CLK_SOFT_OVERRIDE, field_val);
|
||||
data = REG_SET_FIELD(data, IH_CLK_CTRL,
|
||||
OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val);
|
||||
data = REG_SET_FIELD(data, IH_CLK_CTRL,
|
||||
LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val);
|
||||
data = REG_SET_FIELD(data, IH_CLK_CTRL,
|
||||
DYN_CLK_SOFT_OVERRIDE, field_val);
|
||||
data = REG_SET_FIELD(data, IH_CLK_CTRL,
|
||||
REG_CLK_SOFT_OVERRIDE, field_val);
|
||||
if (def != data)
|
||||
WREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL, data);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static int ih_v6_1_set_clockgating_state(void *handle,
|
||||
enum amd_clockgating_state state)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
ih_v6_1_update_clockgating_state(adev,
|
||||
state == AMD_CG_STATE_GATE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ih_v6_1_update_ih_mem_power_gating(struct amdgpu_device *adev,
|
||||
bool enable)
|
||||
{
|
||||
uint32_t ih_mem_pwr_cntl;
|
||||
|
||||
/* Disable ih sram power cntl before switch powergating mode */
|
||||
ih_mem_pwr_cntl = RREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL);
|
||||
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
|
||||
IH_BUFFER_MEM_POWER_CTRL_EN, 0);
|
||||
WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
|
||||
|
||||
/* It is recommended to set mem powergating mode to DS mode */
|
||||
if (enable) {
|
||||
/* mem power mode */
|
||||
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
|
||||
IH_BUFFER_MEM_POWER_LS_EN, 0);
|
||||
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
|
||||
IH_BUFFER_MEM_POWER_DS_EN, 1);
|
||||
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
|
||||
IH_BUFFER_MEM_POWER_SD_EN, 0);
|
||||
/* cam mem power mode */
|
||||
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
|
||||
IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
|
||||
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
|
||||
IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 1);
|
||||
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
|
||||
IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
|
||||
/* re-enable power cntl */
|
||||
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
|
||||
IH_BUFFER_MEM_POWER_CTRL_EN, 1);
|
||||
} else {
|
||||
/* mem power mode */
|
||||
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
|
||||
IH_BUFFER_MEM_POWER_LS_EN, 0);
|
||||
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
|
||||
IH_BUFFER_MEM_POWER_DS_EN, 0);
|
||||
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
|
||||
IH_BUFFER_MEM_POWER_SD_EN, 0);
|
||||
/* cam mem power mode */
|
||||
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
|
||||
IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0);
|
||||
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
|
||||
IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 0);
|
||||
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
|
||||
IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0);
|
||||
/* re-enable power cntl*/
|
||||
ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL,
|
||||
IH_BUFFER_MEM_POWER_CTRL_EN, 1);
|
||||
}
|
||||
|
||||
WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
|
||||
}
|
||||
|
||||
static int ih_v6_1_set_powergating_state(void *handle,
|
||||
enum amd_powergating_state state)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
bool enable = (state == AMD_PG_STATE_GATE);
|
||||
|
||||
if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
|
||||
ih_v6_1_update_ih_mem_power_gating(adev, enable);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ih_v6_1_get_clockgating_state(void *handle, u64 *flags)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL))
|
||||
*flags |= AMD_CG_SUPPORT_IH_CG;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static const struct amd_ip_funcs ih_v6_1_ip_funcs = {
|
||||
.name = "ih_v6_1",
|
||||
.early_init = ih_v6_1_early_init,
|
||||
.late_init = NULL,
|
||||
.sw_init = ih_v6_1_sw_init,
|
||||
.sw_fini = ih_v6_1_sw_fini,
|
||||
.hw_init = ih_v6_1_hw_init,
|
||||
.hw_fini = ih_v6_1_hw_fini,
|
||||
.suspend = ih_v6_1_suspend,
|
||||
.resume = ih_v6_1_resume,
|
||||
.is_idle = ih_v6_1_is_idle,
|
||||
.wait_for_idle = ih_v6_1_wait_for_idle,
|
||||
.soft_reset = ih_v6_1_soft_reset,
|
||||
.set_clockgating_state = ih_v6_1_set_clockgating_state,
|
||||
.set_powergating_state = ih_v6_1_set_powergating_state,
|
||||
.get_clockgating_state = ih_v6_1_get_clockgating_state,
|
||||
};
|
||||
|
||||
static const struct amdgpu_ih_funcs ih_v6_1_funcs = {
|
||||
.get_wptr = ih_v6_1_get_wptr,
|
||||
.decode_iv = amdgpu_ih_decode_iv_helper,
|
||||
.decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
|
||||
.set_rptr = ih_v6_1_set_rptr
|
||||
};
|
||||
|
||||
static void ih_v6_1_set_interrupt_funcs(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->irq.ih_funcs = &ih_v6_1_funcs;
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version ih_v6_1_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_IH,
|
||||
.major = 6,
|
||||
.minor = 0,
|
||||
.rev = 0,
|
||||
.funcs = &ih_v6_1_ip_funcs,
|
||||
};
|
|
@ -0,0 +1,28 @@
|
|||
/*
|
||||
* Copyright 2023 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#ifndef __IH_V6_1_IH_H__
|
||||
#define __IH_V6_1_IH_H__
|
||||
|
||||
extern const struct amdgpu_ip_block_version ih_v6_1_ip_block;
|
||||
|
||||
#endif
|
|
@ -807,8 +807,7 @@ static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev)
|
|||
adev->jpeg.inst->irq.funcs = &jpeg_v2_0_irq_funcs;
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version jpeg_v2_0_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version jpeg_v2_0_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_JPEG,
|
||||
.major = 2,
|
||||
.minor = 0,
|
||||
|
|
|
@ -479,7 +479,7 @@ static int jpeg_v3_0_set_clockgating_state(void *handle,
|
|||
enum amd_clockgating_state state)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
|
||||
bool enable = state == AMD_CG_STATE_GATE;
|
||||
|
||||
if (enable) {
|
||||
if (!jpeg_v3_0_is_idle(handle))
|
||||
|
|
|
@ -626,7 +626,7 @@ static int jpeg_v4_0_set_clockgating_state(void *handle,
|
|||
enum amd_clockgating_state state)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
|
||||
bool enable = state == AMD_CG_STATE_GATE;
|
||||
|
||||
if (enable) {
|
||||
if (!jpeg_v4_0_is_idle(handle))
|
||||
|
|
|
@ -785,7 +785,7 @@ static int jpeg_v4_0_3_set_clockgating_state(void *handle,
|
|||
enum amd_clockgating_state state)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
|
||||
bool enable = state == AMD_CG_STATE_GATE;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
|
||||
|
|
|
@ -788,8 +788,7 @@ static int mes_v11_0_mqd_init(struct amdgpu_ring *ring)
|
|||
DOORBELL_SOURCE, 0);
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
|
||||
DOORBELL_HIT, 0);
|
||||
}
|
||||
else
|
||||
} else
|
||||
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
|
||||
DOORBELL_EN, 0);
|
||||
mqd->cp_hqd_pq_doorbell_control = tmp;
|
||||
|
|
|
@ -413,18 +413,6 @@ static void mmhub_v1_8_program_invalidation(struct amdgpu_device *adev)
|
|||
|
||||
static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev)
|
||||
{
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
/*
|
||||
* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
|
||||
* VF copy registers so vbios post doesn't program them, for
|
||||
* SRIOV driver need to program them
|
||||
*/
|
||||
WREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_BASE,
|
||||
adev->gmc.vram_start >> 24);
|
||||
WREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_TOP,
|
||||
adev->gmc.vram_end >> 24);
|
||||
}
|
||||
|
||||
/* GART Enable. */
|
||||
mmhub_v1_8_init_gart_aperture_regs(adev);
|
||||
mmhub_v1_8_init_system_aperture_regs(adev);
|
||||
|
|
|
@ -331,7 +331,7 @@ static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev)
|
|||
static void mmhub_v2_3_program_invalidation(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
|
||||
unsigned i;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < 18; ++i) {
|
||||
WREG32_SOC15_OFFSET(MMHUB, 0,
|
||||
|
@ -406,6 +406,7 @@ static void mmhub_v2_3_set_fault_enable_default(struct amdgpu_device *adev,
|
|||
bool value)
|
||||
{
|
||||
u32 tmp;
|
||||
|
||||
tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
|
||||
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
|
||||
|
@ -499,11 +500,11 @@ mmhub_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *adev,
|
|||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
|
||||
data &= ~MM_ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK;
|
||||
data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
|
||||
DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
|
||||
DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
|
||||
DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
|
||||
DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
|
||||
DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
|
||||
DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
|
||||
DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
|
||||
DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
|
||||
DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
|
||||
DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
|
||||
|
||||
} else {
|
||||
data |= MM_ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK;
|
||||
|
@ -593,13 +594,13 @@ static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u64 *flags)
|
|||
|
||||
/* AMD_CG_SUPPORT_MC_MGCG */
|
||||
if (!(data & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
|
||||
DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
|
||||
DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
|
||||
DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
|
||||
DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
|
||||
DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))
|
||||
DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
|
||||
DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
|
||||
DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
|
||||
DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
|
||||
DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))
|
||||
&& !(data1 & MM_ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK)) {
|
||||
*flags |= AMD_CG_SUPPORT_MC_MGCG;
|
||||
*flags |= AMD_CG_SUPPORT_MC_MGCG;
|
||||
}
|
||||
|
||||
/* AMD_CG_SUPPORT_MC_LS */
|
||||
|
|
|
@ -108,7 +108,7 @@ static void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmi
|
|||
}
|
||||
|
||||
static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev,
|
||||
int hubid)
|
||||
int hubid)
|
||||
{
|
||||
uint64_t value;
|
||||
uint32_t tmp;
|
||||
|
@ -1568,7 +1568,7 @@ static int mmhub_v9_4_get_ras_error_count(struct amdgpu_device *adev,
|
|||
uint32_t sec_cnt, ded_cnt;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_ras_fields); i++) {
|
||||
if(mmhub_v9_4_ras_fields[i].reg_offset != reg->reg_offset)
|
||||
if (mmhub_v9_4_ras_fields[i].reg_offset != reg->reg_offset)
|
||||
continue;
|
||||
|
||||
sec_cnt = (value &
|
||||
|
|
|
@ -183,12 +183,10 @@ send_request:
|
|||
if (req != IDH_REQ_GPU_INIT_DATA) {
|
||||
pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
|
||||
return r;
|
||||
}
|
||||
else /* host doesn't support REQ_GPU_INIT_DATA handshake */
|
||||
} else /* host doesn't support REQ_GPU_INIT_DATA handshake */
|
||||
adev->virt.req_init_data_ver = 0;
|
||||
} else {
|
||||
if (req == IDH_REQ_GPU_INIT_DATA)
|
||||
{
|
||||
if (req == IDH_REQ_GPU_INIT_DATA) {
|
||||
adev->virt.req_init_data_ver =
|
||||
RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1);
|
||||
|
||||
|
|
|
@ -334,7 +334,7 @@ static void xgpu_vi_mailbox_send_ack(struct amdgpu_device *adev)
|
|||
break;
|
||||
}
|
||||
mdelay(1);
|
||||
timeout -=1;
|
||||
timeout -= 1;
|
||||
|
||||
reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL);
|
||||
}
|
||||
|
|
|
@ -32,6 +32,18 @@
|
|||
|
||||
#define NPS_MODE_MASK 0x000000FFL
|
||||
|
||||
/* Core 0 Port 0 counter */
|
||||
#define smnPCIEP_NAK_COUNTER 0x1A340218
|
||||
|
||||
#define smnPCIE_PERF_CNTL_TXCLK3 0x1A38021c
|
||||
#define smnPCIE_PERF_CNTL_TXCLK7 0x1A380888
|
||||
#define smnPCIE_PERF_COUNT_CNTL 0x1A380200
|
||||
#define smnPCIE_PERF_COUNT0_TXCLK3 0x1A380220
|
||||
#define smnPCIE_PERF_COUNT0_TXCLK7 0x1A38088C
|
||||
#define smnPCIE_PERF_COUNT0_UPVAL_TXCLK3 0x1A3808F8
|
||||
#define smnPCIE_PERF_COUNT0_UPVAL_TXCLK7 0x1A380918
|
||||
|
||||
|
||||
static void nbio_v7_9_remap_hdp_registers(struct amdgpu_device *adev)
|
||||
{
|
||||
WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
|
||||
|
@ -427,6 +439,75 @@ static void nbio_v7_9_init_registers(struct amdgpu_device *adev)
|
|||
}
|
||||
}
|
||||
|
||||
static u64 nbio_v7_9_get_pcie_replay_count(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 val, nak_r, nak_g;
|
||||
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
return 0;
|
||||
|
||||
/* Get the number of NAKs received and generated */
|
||||
val = RREG32_PCIE(smnPCIEP_NAK_COUNTER);
|
||||
nak_r = val & 0xFFFF;
|
||||
nak_g = val >> 16;
|
||||
|
||||
/* Add the total number of NAKs, i.e the number of replays */
|
||||
return (nak_r + nak_g);
|
||||
}
|
||||
|
||||
static void nbio_v7_9_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
|
||||
uint64_t *count1)
|
||||
{
|
||||
uint32_t perfctrrx = 0;
|
||||
uint32_t perfctrtx = 0;
|
||||
|
||||
/* This reports 0 on APUs, so return to avoid writing/reading registers
|
||||
* that may or may not be different from their GPU counterparts
|
||||
*/
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
return;
|
||||
|
||||
/* Use TXCLK3 counter group for rx event */
|
||||
/* Use TXCLK7 counter group for tx event */
|
||||
/* Set the 2 events that we wish to watch, defined above */
|
||||
/* 40 is event# for received msgs */
|
||||
/* 2 is event# of posted requests sent */
|
||||
perfctrrx = REG_SET_FIELD(perfctrrx, PCIE_PERF_CNTL_TXCLK3, EVENT0_SEL, 40);
|
||||
perfctrtx = REG_SET_FIELD(perfctrtx, PCIE_PERF_CNTL_TXCLK7, EVENT0_SEL, 2);
|
||||
|
||||
/* Write to enable desired perf counters */
|
||||
WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3, perfctrrx);
|
||||
WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK7, perfctrtx);
|
||||
|
||||
/* Zero out and enable SHADOW_WR
|
||||
* Write 0x6:
|
||||
* Bit 1 = Global Shadow wr(1)
|
||||
* Bit 2 = Global counter reset enable(1)
|
||||
*/
|
||||
WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000006);
|
||||
|
||||
/* Enable Gloabl Counter
|
||||
* Write 0x1:
|
||||
* Bit 0 = Global Counter Enable(1)
|
||||
*/
|
||||
WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000001);
|
||||
|
||||
msleep(1000);
|
||||
|
||||
/* Disable Global Counter, Reset and enable SHADOW_WR
|
||||
* Write 0x6:
|
||||
* Bit 1 = Global Shadow wr(1)
|
||||
* Bit 2 = Global counter reset enable(1)
|
||||
*/
|
||||
WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000006);
|
||||
|
||||
/* Get the upper and lower count */
|
||||
*count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK3) |
|
||||
((uint64_t)RREG32_PCIE(smnPCIE_PERF_COUNT0_UPVAL_TXCLK3) << 32);
|
||||
*count1 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK7) |
|
||||
((uint64_t)RREG32_PCIE(smnPCIE_PERF_COUNT0_UPVAL_TXCLK7) << 32);
|
||||
}
|
||||
|
||||
const struct amdgpu_nbio_funcs nbio_v7_9_funcs = {
|
||||
.get_hdp_flush_req_offset = nbio_v7_9_get_hdp_flush_req_offset,
|
||||
.get_hdp_flush_done_offset = nbio_v7_9_get_hdp_flush_done_offset,
|
||||
|
@ -450,4 +531,193 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = {
|
|||
.get_compute_partition_mode = nbio_v7_9_get_compute_partition_mode,
|
||||
.get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode,
|
||||
.init_registers = nbio_v7_9_init_registers,
|
||||
.get_pcie_replay_count = nbio_v7_9_get_pcie_replay_count,
|
||||
.get_pcie_usage = nbio_v7_9_get_pcie_usage,
|
||||
};
|
||||
|
||||
static void nbio_v7_9_query_ras_error_count(struct amdgpu_device *adev,
|
||||
void *ras_error_status)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev)
|
||||
{
|
||||
uint32_t bif_doorbell_intr_cntl;
|
||||
struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if);
|
||||
struct ras_err_data err_data = {0, 0, 0, NULL};
|
||||
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
||||
|
||||
bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
|
||||
|
||||
if (REG_GET_FIELD(bif_doorbell_intr_cntl,
|
||||
BIF_BX0_BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) {
|
||||
/* driver has to clear the interrupt status when bif ring is disabled */
|
||||
bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
|
||||
BIF_BX0_BIF_DOORBELL_INT_CNTL,
|
||||
RAS_CNTLR_INTERRUPT_CLEAR, 1);
|
||||
WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
|
||||
|
||||
if (!ras->disable_ras_err_cnt_harvest) {
|
||||
/*
|
||||
* clear error status after ras_controller_intr
|
||||
* according to hw team and count ue number
|
||||
* for query
|
||||
*/
|
||||
nbio_v7_9_query_ras_error_count(adev, &err_data);
|
||||
|
||||
/* logging on error cnt and printing for awareness */
|
||||
obj->err_data.ue_count += err_data.ue_count;
|
||||
obj->err_data.ce_count += err_data.ce_count;
|
||||
|
||||
if (err_data.ce_count)
|
||||
dev_info(adev->dev, "%ld correctable hardware "
|
||||
"errors detected in %s block, "
|
||||
"no user action is needed.\n",
|
||||
obj->err_data.ce_count,
|
||||
get_ras_block_str(adev->nbio.ras_if));
|
||||
|
||||
if (err_data.ue_count)
|
||||
dev_info(adev->dev, "%ld uncorrectable hardware "
|
||||
"errors detected in %s block\n",
|
||||
obj->err_data.ue_count,
|
||||
get_ras_block_str(adev->nbio.ras_if));
|
||||
}
|
||||
|
||||
dev_info(adev->dev, "RAS controller interrupt triggered "
|
||||
"by NBIF error\n");
|
||||
|
||||
/* ras_controller_int is dedicated for nbif ras error,
|
||||
* not the global interrupt for sync flood
|
||||
*/
|
||||
amdgpu_ras_reset_gpu(adev);
|
||||
}
|
||||
}
|
||||
|
||||
static void nbio_v7_9_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
|
||||
{
|
||||
uint32_t bif_doorbell_intr_cntl;
|
||||
|
||||
bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
|
||||
|
||||
if (REG_GET_FIELD(bif_doorbell_intr_cntl,
|
||||
BIF_BX0_BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) {
|
||||
/* driver has to clear the interrupt status when bif ring is disabled */
|
||||
bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
|
||||
BIF_BX0_BIF_DOORBELL_INT_CNTL,
|
||||
RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1);
|
||||
|
||||
WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl);
|
||||
|
||||
amdgpu_ras_global_ras_isr(adev);
|
||||
}
|
||||
}
|
||||
|
||||
static int nbio_v7_9_set_ras_controller_irq_state(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *src,
|
||||
unsigned type,
|
||||
enum amdgpu_interrupt_state state)
|
||||
{
|
||||
/* Dummy function, there is no initialization operation in driver */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nbio_v7_9_process_ras_controller_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
/* By design, the ih cookie for ras_controller_irq should be written
|
||||
* to BIFring instead of general iv ring. However, due to known bif ring
|
||||
* hw bug, it has to be disabled. There is no chance the process function
|
||||
* will be involked. Just left it as a dummy one.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nbio_v7_9_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *src,
|
||||
unsigned type,
|
||||
enum amdgpu_interrupt_state state)
|
||||
{
|
||||
/* Dummy function, there is no initialization operation in driver */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nbio_v7_9_process_err_event_athub_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
/* By design, the ih cookie for err_event_athub_irq should be written
|
||||
* to BIFring instead of general iv ring. However, due to known bif ring
|
||||
* hw bug, it has to be disabled. There is no chance the process function
|
||||
* will be involked. Just left it as a dummy one.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct amdgpu_irq_src_funcs nbio_v7_9_ras_controller_irq_funcs = {
|
||||
.set = nbio_v7_9_set_ras_controller_irq_state,
|
||||
.process = nbio_v7_9_process_ras_controller_irq,
|
||||
};
|
||||
|
||||
static const struct amdgpu_irq_src_funcs nbio_v7_9_ras_err_event_athub_irq_funcs = {
|
||||
.set = nbio_v7_9_set_ras_err_event_athub_irq_state,
|
||||
.process = nbio_v7_9_process_err_event_athub_irq,
|
||||
};
|
||||
|
||||
static int nbio_v7_9_init_ras_controller_interrupt (struct amdgpu_device *adev)
|
||||
{
|
||||
int r;
|
||||
|
||||
/* init the irq funcs */
|
||||
adev->nbio.ras_controller_irq.funcs =
|
||||
&nbio_v7_9_ras_controller_irq_funcs;
|
||||
adev->nbio.ras_controller_irq.num_types = 1;
|
||||
|
||||
/* register ras controller interrupt */
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF,
|
||||
NBIF_7_4__SRCID__RAS_CONTROLLER_INTERRUPT,
|
||||
&adev->nbio.ras_controller_irq);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int nbio_v7_9_init_ras_err_event_athub_interrupt (struct amdgpu_device *adev)
|
||||
{
|
||||
|
||||
int r;
|
||||
|
||||
/* init the irq funcs */
|
||||
adev->nbio.ras_err_event_athub_irq.funcs =
|
||||
&nbio_v7_9_ras_err_event_athub_irq_funcs;
|
||||
adev->nbio.ras_err_event_athub_irq.num_types = 1;
|
||||
|
||||
/* register ras err event athub interrupt */
|
||||
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF,
|
||||
NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT,
|
||||
&adev->nbio.ras_err_event_athub_irq);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
const struct amdgpu_ras_block_hw_ops nbio_v7_9_ras_hw_ops = {
|
||||
.query_ras_error_count = nbio_v7_9_query_ras_error_count,
|
||||
};
|
||||
|
||||
struct amdgpu_nbio_ras nbio_v7_9_ras = {
|
||||
.ras_block = {
|
||||
.ras_comm = {
|
||||
.name = "pcie_bif",
|
||||
.block = AMDGPU_RAS_BLOCK__PCIE_BIF,
|
||||
.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
|
||||
},
|
||||
.hw_ops = &nbio_v7_9_ras_hw_ops,
|
||||
.ras_late_init = amdgpu_nbio_ras_late_init,
|
||||
},
|
||||
.handle_ras_controller_intr_no_bifring = nbio_v7_9_handle_ras_controller_intr_no_bifring,
|
||||
.handle_ras_err_event_athub_intr_no_bifring = nbio_v7_9_handle_ras_err_event_athub_intr_no_bifring,
|
||||
.init_ras_controller_interrupt = nbio_v7_9_init_ras_controller_interrupt,
|
||||
.init_ras_err_event_athub_interrupt = nbio_v7_9_init_ras_err_event_athub_interrupt,
|
||||
};
|
||||
|
|
|
@ -28,5 +28,6 @@
|
|||
|
||||
extern const struct nbio_hdp_flush_reg nbio_v7_9_hdp_flush_reg;
|
||||
extern const struct amdgpu_nbio_funcs nbio_v7_9_funcs;
|
||||
extern struct amdgpu_nbio_ras nbio_v7_9_ras;
|
||||
|
||||
#endif
|
||||
|
|
|
@ -67,21 +67,18 @@
|
|||
static const struct amd_ip_funcs nv_common_ip_funcs;
|
||||
|
||||
/* Navi */
|
||||
static const struct amdgpu_video_codec_info nv_video_codecs_encode_array[] =
|
||||
{
|
||||
static const struct amdgpu_video_codec_info nv_video_codecs_encode_array[] = {
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
|
||||
};
|
||||
|
||||
static const struct amdgpu_video_codecs nv_video_codecs_encode =
|
||||
{
|
||||
static const struct amdgpu_video_codecs nv_video_codecs_encode = {
|
||||
.codec_count = ARRAY_SIZE(nv_video_codecs_encode_array),
|
||||
.codec_array = nv_video_codecs_encode_array,
|
||||
};
|
||||
|
||||
/* Navi1x */
|
||||
static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] =
|
||||
{
|
||||
static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = {
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
|
||||
|
@ -91,8 +88,7 @@ static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] =
|
|||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
|
||||
};
|
||||
|
||||
static const struct amdgpu_video_codecs nv_video_codecs_decode =
|
||||
{
|
||||
static const struct amdgpu_video_codecs nv_video_codecs_decode = {
|
||||
.codec_count = ARRAY_SIZE(nv_video_codecs_decode_array),
|
||||
.codec_array = nv_video_codecs_decode_array,
|
||||
};
|
||||
|
@ -108,8 +104,7 @@ static const struct amdgpu_video_codecs sc_video_codecs_encode = {
|
|||
.codec_array = sc_video_codecs_encode_array,
|
||||
};
|
||||
|
||||
static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] =
|
||||
{
|
||||
static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] = {
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
|
||||
|
@ -120,8 +115,7 @@ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[]
|
|||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
|
||||
};
|
||||
|
||||
static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn1[] =
|
||||
{
|
||||
static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn1[] = {
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
|
||||
|
@ -131,27 +125,23 @@ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn1[]
|
|||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
|
||||
};
|
||||
|
||||
static const struct amdgpu_video_codecs sc_video_codecs_decode_vcn0 =
|
||||
{
|
||||
static const struct amdgpu_video_codecs sc_video_codecs_decode_vcn0 = {
|
||||
.codec_count = ARRAY_SIZE(sc_video_codecs_decode_array_vcn0),
|
||||
.codec_array = sc_video_codecs_decode_array_vcn0,
|
||||
};
|
||||
|
||||
static const struct amdgpu_video_codecs sc_video_codecs_decode_vcn1 =
|
||||
{
|
||||
static const struct amdgpu_video_codecs sc_video_codecs_decode_vcn1 = {
|
||||
.codec_count = ARRAY_SIZE(sc_video_codecs_decode_array_vcn1),
|
||||
.codec_array = sc_video_codecs_decode_array_vcn1,
|
||||
};
|
||||
|
||||
/* SRIOV Sienna Cichlid, not const since data is controlled by host */
|
||||
static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] =
|
||||
{
|
||||
static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] = {
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2160, 0)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 7680, 4352, 0)},
|
||||
};
|
||||
|
||||
static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn0[] =
|
||||
{
|
||||
static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn0[] = {
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
|
||||
|
@ -162,8 +152,7 @@ static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn0[]
|
|||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
|
||||
};
|
||||
|
||||
static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn1[] =
|
||||
{
|
||||
static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn1[] = {
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
|
||||
|
@ -173,20 +162,17 @@ static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn1[]
|
|||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
|
||||
};
|
||||
|
||||
static struct amdgpu_video_codecs sriov_sc_video_codecs_encode =
|
||||
{
|
||||
static struct amdgpu_video_codecs sriov_sc_video_codecs_encode = {
|
||||
.codec_count = ARRAY_SIZE(sriov_sc_video_codecs_encode_array),
|
||||
.codec_array = sriov_sc_video_codecs_encode_array,
|
||||
};
|
||||
|
||||
static struct amdgpu_video_codecs sriov_sc_video_codecs_decode_vcn0 =
|
||||
{
|
||||
static struct amdgpu_video_codecs sriov_sc_video_codecs_decode_vcn0 = {
|
||||
.codec_count = ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn0),
|
||||
.codec_array = sriov_sc_video_codecs_decode_array_vcn0,
|
||||
};
|
||||
|
||||
static struct amdgpu_video_codecs sriov_sc_video_codecs_decode_vcn1 =
|
||||
{
|
||||
static struct amdgpu_video_codecs sriov_sc_video_codecs_decode_vcn1 = {
|
||||
.codec_count = ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn1),
|
||||
.codec_array = sriov_sc_video_codecs_decode_array_vcn1,
|
||||
};
|
||||
|
@ -536,8 +522,7 @@ static void nv_program_aspm(struct amdgpu_device *adev)
|
|||
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version nv_common_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version nv_common_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_COMMON,
|
||||
.major = 1,
|
||||
.minor = 0,
|
||||
|
@ -572,16 +557,6 @@ static bool nv_need_reset_on_init(struct amdgpu_device *adev)
|
|||
return false;
|
||||
}
|
||||
|
||||
static uint64_t nv_get_pcie_replay_count(struct amdgpu_device *adev)
|
||||
{
|
||||
|
||||
/* TODO
|
||||
* dummy implement for pcie_replay_count sysfs interface
|
||||
* */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nv_init_doorbell_index(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ;
|
||||
|
@ -642,8 +617,7 @@ static int nv_update_umd_stable_pstate(struct amdgpu_device *adev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static const struct amdgpu_asic_funcs nv_asic_funcs =
|
||||
{
|
||||
static const struct amdgpu_asic_funcs nv_asic_funcs = {
|
||||
.read_disabled_bios = &nv_read_disabled_bios,
|
||||
.read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
|
||||
.read_register = &nv_read_register,
|
||||
|
@ -656,7 +630,7 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =
|
|||
.init_doorbell_index = &nv_init_doorbell_index,
|
||||
.need_full_reset = &nv_need_full_reset,
|
||||
.need_reset_on_init = &nv_need_reset_on_init,
|
||||
.get_pcie_replay_count = &nv_get_pcie_replay_count,
|
||||
.get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count,
|
||||
.supports_baco = &amdgpu_dpm_is_baco_supported,
|
||||
.pre_asic_init = &nv_pre_asic_init,
|
||||
.update_umd_stable_pstate = &nv_update_umd_stable_pstate,
|
||||
|
@ -889,7 +863,8 @@ static int nv_common_early_init(void *handle)
|
|||
AMD_CG_SUPPORT_ATHUB_LS |
|
||||
AMD_CG_SUPPORT_IH_CG |
|
||||
AMD_CG_SUPPORT_VCN_MGCG |
|
||||
AMD_CG_SUPPORT_JPEG_MGCG;
|
||||
AMD_CG_SUPPORT_JPEG_MGCG |
|
||||
AMD_CG_SUPPORT_SDMA_MGCG;
|
||||
adev->pg_flags = AMD_PG_SUPPORT_GFX_PG |
|
||||
AMD_PG_SUPPORT_VCN |
|
||||
AMD_PG_SUPPORT_VCN_DPG |
|
||||
|
@ -950,7 +925,8 @@ static int nv_common_early_init(void *handle)
|
|||
AMD_CG_SUPPORT_ATHUB_LS |
|
||||
AMD_CG_SUPPORT_IH_CG |
|
||||
AMD_CG_SUPPORT_VCN_MGCG |
|
||||
AMD_CG_SUPPORT_JPEG_MGCG;
|
||||
AMD_CG_SUPPORT_JPEG_MGCG |
|
||||
AMD_CG_SUPPORT_SDMA_MGCG;
|
||||
adev->pg_flags = AMD_PG_SUPPORT_VCN |
|
||||
AMD_PG_SUPPORT_VCN_DPG |
|
||||
AMD_PG_SUPPORT_JPEG |
|
||||
|
|
|
@ -140,14 +140,15 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
|
|||
int ret;
|
||||
int retry_loop;
|
||||
|
||||
/* Wait for bootloader to signify that it is ready having bit 31 of
|
||||
* C2PMSG_35 set to 1. All other bits are expected to be cleared.
|
||||
* If there is an error in processing command, bits[7:0] will be set.
|
||||
* This is applicable for PSP v13.0.6 and newer.
|
||||
*/
|
||||
for (retry_loop = 0; retry_loop < 10; retry_loop++) {
|
||||
/* Wait for bootloader to signify that is
|
||||
ready having bit 31 of C2PMSG_35 set to 1 */
|
||||
ret = psp_wait_for(psp,
|
||||
SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
|
||||
0x80000000,
|
||||
0x80000000,
|
||||
false);
|
||||
ret = psp_wait_for(
|
||||
psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
|
||||
0x80000000, 0xffffffff, false);
|
||||
|
||||
if (ret == 0)
|
||||
return 0;
|
||||
|
|
|
@ -1507,6 +1507,30 @@ static int sdma_v5_2_process_illegal_inst_irq(struct amdgpu_device *adev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool sdma_v5_2_firmware_mgcg_support(struct amdgpu_device *adev,
|
||||
int i)
|
||||
{
|
||||
switch (adev->ip_versions[SDMA0_HWIP][0]) {
|
||||
case IP_VERSION(5, 2, 1):
|
||||
if (adev->sdma.instance[i].fw_version < 70)
|
||||
return false;
|
||||
break;
|
||||
case IP_VERSION(5, 2, 3):
|
||||
if (adev->sdma.instance[i].fw_version < 47)
|
||||
return false;
|
||||
break;
|
||||
case IP_VERSION(5, 2, 7):
|
||||
if (adev->sdma.instance[i].fw_version < 9)
|
||||
return false;
|
||||
break;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
|
||||
bool enable)
|
||||
{
|
||||
|
@ -1515,7 +1539,7 @@ static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *ade
|
|||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
|
||||
if (adev->sdma.instance[i].fw_version < 70 && adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 2, 1))
|
||||
if (!sdma_v5_2_firmware_mgcg_support(adev, i))
|
||||
adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_MGCG;
|
||||
|
||||
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
|
||||
|
@ -1589,6 +1613,7 @@ static int sdma_v5_2_set_clockgating_state(void *handle,
|
|||
case IP_VERSION(5, 2, 5):
|
||||
case IP_VERSION(5, 2, 6):
|
||||
case IP_VERSION(5, 2, 3):
|
||||
case IP_VERSION(5, 2, 7):
|
||||
sdma_v5_2_update_medium_grain_clock_gating(adev,
|
||||
state == AMD_CG_STATE_GATE);
|
||||
sdma_v5_2_update_medium_grain_light_sleep(adev,
|
||||
|
|
|
@ -48,6 +48,7 @@ MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin");
|
|||
MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin");
|
||||
MODULE_FIRMWARE("amdgpu/sdma_6_0_2.bin");
|
||||
MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin");
|
||||
MODULE_FIRMWARE("amdgpu/sdma_6_1_0.bin");
|
||||
|
||||
#define SDMA1_REG_OFFSET 0x600
|
||||
#define SDMA0_HYP_DEC_REG_START 0x5880
|
||||
|
|
|
@ -893,9 +893,9 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs =
|
|||
.get_config_memsize = &soc15_get_config_memsize,
|
||||
.need_full_reset = &soc15_need_full_reset,
|
||||
.init_doorbell_index = &aqua_vanjaram_doorbell_index_init,
|
||||
.get_pcie_usage = &vega20_get_pcie_usage,
|
||||
.get_pcie_usage = &amdgpu_nbio_get_pcie_usage,
|
||||
.need_reset_on_init = &soc15_need_reset_on_init,
|
||||
.get_pcie_replay_count = &soc15_get_pcie_replay_count,
|
||||
.get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count,
|
||||
.supports_baco = &soc15_supports_baco,
|
||||
.pre_asic_init = &soc15_pre_asic_init,
|
||||
.query_video_codecs = &soc15_query_video_codecs,
|
||||
|
|
|
@ -48,33 +48,28 @@
|
|||
static const struct amd_ip_funcs soc21_common_ip_funcs;
|
||||
|
||||
/* SOC21 */
|
||||
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn0[] =
|
||||
{
|
||||
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn0[] = {
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
|
||||
};
|
||||
|
||||
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] =
|
||||
{
|
||||
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] = {
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
|
||||
};
|
||||
|
||||
static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn0 =
|
||||
{
|
||||
static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn0 = {
|
||||
.codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_encode_array_vcn0),
|
||||
.codec_array = vcn_4_0_0_video_codecs_encode_array_vcn0,
|
||||
};
|
||||
|
||||
static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn1 =
|
||||
{
|
||||
static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn1 = {
|
||||
.codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_encode_array_vcn1),
|
||||
.codec_array = vcn_4_0_0_video_codecs_encode_array_vcn1,
|
||||
};
|
||||
|
||||
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn0[] =
|
||||
{
|
||||
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn0[] = {
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
|
||||
|
@ -82,22 +77,19 @@ static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_
|
|||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
|
||||
};
|
||||
|
||||
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn1[] =
|
||||
{
|
||||
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn1[] = {
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
|
||||
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
|
||||
};
|
||||
|
||||
static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn0 =
|
||||
{
|
||||
static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn0 = {
|
||||
.codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_decode_array_vcn0),
|
||||
.codec_array = vcn_4_0_0_video_codecs_decode_array_vcn0,
|
||||
};
|
||||
|
||||
static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn1 =
|
||||
{
|
||||
static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn1 = {
|
||||
.codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_decode_array_vcn1),
|
||||
.codec_array = vcn_4_0_0_video_codecs_decode_array_vcn1,
|
||||
};
|
||||
|
@ -445,8 +437,7 @@ static void soc21_program_aspm(struct amdgpu_device *adev)
|
|||
adev->nbio.funcs->program_aspm(adev);
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version soc21_common_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version soc21_common_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_COMMON,
|
||||
.major = 1,
|
||||
.minor = 0,
|
||||
|
@ -484,16 +475,6 @@ static bool soc21_need_reset_on_init(struct amdgpu_device *adev)
|
|||
return false;
|
||||
}
|
||||
|
||||
static uint64_t soc21_get_pcie_replay_count(struct amdgpu_device *adev)
|
||||
{
|
||||
|
||||
/* TODO
|
||||
* dummy implement for pcie_replay_count sysfs interface
|
||||
* */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void soc21_init_doorbell_index(struct amdgpu_device *adev)
|
||||
{
|
||||
adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ;
|
||||
|
@ -547,8 +528,7 @@ static int soc21_update_umd_stable_pstate(struct amdgpu_device *adev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static const struct amdgpu_asic_funcs soc21_asic_funcs =
|
||||
{
|
||||
static const struct amdgpu_asic_funcs soc21_asic_funcs = {
|
||||
.read_disabled_bios = &soc21_read_disabled_bios,
|
||||
.read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
|
||||
.read_register = &soc21_read_register,
|
||||
|
@ -561,7 +541,7 @@ static const struct amdgpu_asic_funcs soc21_asic_funcs =
|
|||
.init_doorbell_index = &soc21_init_doorbell_index,
|
||||
.need_full_reset = &soc21_need_full_reset,
|
||||
.need_reset_on_init = &soc21_need_reset_on_init,
|
||||
.get_pcie_replay_count = &soc21_get_pcie_replay_count,
|
||||
.get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count,
|
||||
.supports_baco = &amdgpu_dpm_is_baco_supported,
|
||||
.pre_asic_init = &soc21_pre_asic_init,
|
||||
.query_video_codecs = &soc21_query_video_codecs,
|
||||
|
|
|
@ -493,8 +493,7 @@ static void tonga_ih_set_interrupt_funcs(struct amdgpu_device *adev)
|
|||
adev->irq.ih_funcs = &tonga_ih_funcs;
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version tonga_ih_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version tonga_ih_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_IH,
|
||||
.major = 3,
|
||||
.minor = 0,
|
||||
|
|
|
@ -815,8 +815,7 @@ static const struct amd_ip_funcs uvd_v3_1_ip_funcs = {
|
|||
.set_powergating_state = uvd_v3_1_set_powergating_state,
|
||||
};
|
||||
|
||||
const struct amdgpu_ip_block_version uvd_v3_1_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version uvd_v3_1_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_UVD,
|
||||
.major = 3,
|
||||
.minor = 1,
|
||||
|
|
|
@ -679,11 +679,11 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev)
|
|||
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
|
||||
WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
|
||||
i == 0 ?
|
||||
adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_lo:
|
||||
adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_lo :
|
||||
adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].tmr_mc_addr_lo);
|
||||
WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
|
||||
i == 0 ?
|
||||
adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_hi:
|
||||
adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_hi :
|
||||
adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].tmr_mc_addr_hi);
|
||||
WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
|
||||
offset = 0;
|
||||
|
@ -1908,8 +1908,7 @@ static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev)
|
|||
}
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version uvd_v7_0_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version uvd_v7_0_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_UVD,
|
||||
.major = 7,
|
||||
.minor = 0,
|
||||
|
|
|
@ -998,8 +998,7 @@ static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
|
|||
adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
|
||||
};
|
||||
|
||||
const struct amdgpu_ip_block_version vce_v3_0_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version vce_v3_0_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_VCE,
|
||||
.major = 3,
|
||||
.minor = 0,
|
||||
|
@ -1007,8 +1006,7 @@ const struct amdgpu_ip_block_version vce_v3_0_ip_block =
|
|||
.funcs = &vce_v3_0_ip_funcs,
|
||||
};
|
||||
|
||||
const struct amdgpu_ip_block_version vce_v3_1_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version vce_v3_1_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_VCE,
|
||||
.major = 3,
|
||||
.minor = 1,
|
||||
|
@ -1016,8 +1014,7 @@ const struct amdgpu_ip_block_version vce_v3_1_ip_block =
|
|||
.funcs = &vce_v3_0_ip_funcs,
|
||||
};
|
||||
|
||||
const struct amdgpu_ip_block_version vce_v3_4_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version vce_v3_4_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_VCE,
|
||||
.major = 3,
|
||||
.minor = 4,
|
||||
|
|
|
@ -473,7 +473,7 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev)
|
|||
if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
|
||||
data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
|
||||
else
|
||||
data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
|
||||
data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
|
||||
|
||||
data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
|
||||
data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
|
||||
|
@ -1772,7 +1772,7 @@ static int vcn_v1_0_set_powergating_state(void *handle,
|
|||
int ret;
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
if(state == adev->vcn.cur_state)
|
||||
if (state == adev->vcn.cur_state)
|
||||
return 0;
|
||||
|
||||
if (state == AMD_PG_STATE_GATE)
|
||||
|
@ -1780,7 +1780,7 @@ static int vcn_v1_0_set_powergating_state(void *handle,
|
|||
else
|
||||
ret = vcn_v1_0_start(adev);
|
||||
|
||||
if(!ret)
|
||||
if (!ret)
|
||||
adev->vcn.cur_state = state;
|
||||
return ret;
|
||||
}
|
||||
|
@ -2065,8 +2065,7 @@ static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev)
|
|||
adev->vcn.inst->irq.funcs = &vcn_v1_0_irq_funcs;
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version vcn_v1_0_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version vcn_v1_0_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_VCN,
|
||||
.major = 1,
|
||||
.minor = 0,
|
||||
|
|
|
@ -1105,7 +1105,7 @@ static int vcn_v3_0_start(struct amdgpu_device *adev)
|
|||
if (adev->vcn.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){
|
||||
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
|
||||
r = vcn_v3_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
|
||||
continue;
|
||||
}
|
||||
|
@ -1789,7 +1789,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
|
|||
struct amdgpu_bo *bo;
|
||||
uint64_t start, end;
|
||||
unsigned int i;
|
||||
void * ptr;
|
||||
void *ptr;
|
||||
int r;
|
||||
|
||||
addr &= AMDGPU_GMC_HOLE_MASK;
|
||||
|
@ -2095,7 +2095,7 @@ static int vcn_v3_0_set_clockgating_state(void *handle,
|
|||
enum amd_clockgating_state state)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
|
||||
bool enable = state == AMD_CG_STATE_GATE;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
|
||||
|
@ -2129,7 +2129,7 @@ static int vcn_v3_0_set_powergating_state(void *handle,
|
|||
return 0;
|
||||
}
|
||||
|
||||
if(state == adev->vcn.cur_state)
|
||||
if (state == adev->vcn.cur_state)
|
||||
return 0;
|
||||
|
||||
if (state == AMD_PG_STATE_GATE)
|
||||
|
@ -2137,7 +2137,7 @@ static int vcn_v3_0_set_powergating_state(void *handle,
|
|||
else
|
||||
ret = vcn_v3_0_start(adev);
|
||||
|
||||
if(!ret)
|
||||
if (!ret)
|
||||
adev->vcn.cur_state = state;
|
||||
|
||||
return ret;
|
||||
|
@ -2228,8 +2228,7 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = {
|
|||
.set_powergating_state = vcn_v3_0_set_powergating_state,
|
||||
};
|
||||
|
||||
const struct amdgpu_ip_block_version vcn_v3_0_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version vcn_v3_0_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_VCN,
|
||||
.major = 3,
|
||||
.minor = 0,
|
||||
|
|
|
@ -1139,11 +1139,11 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
|
|||
if (status & 2)
|
||||
break;
|
||||
mdelay(10);
|
||||
if (amdgpu_emu_mode==1)
|
||||
if (amdgpu_emu_mode == 1)
|
||||
msleep(1);
|
||||
}
|
||||
|
||||
if (amdgpu_emu_mode==1) {
|
||||
if (amdgpu_emu_mode == 1) {
|
||||
r = -1;
|
||||
if (status & 2) {
|
||||
r = 0;
|
||||
|
@ -1918,7 +1918,7 @@ static int vcn_v4_0_wait_for_idle(void *handle)
|
|||
static int vcn_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_state state)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
|
||||
bool enable = state == AMD_CG_STATE_GATE;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
|
||||
|
@ -1959,7 +1959,7 @@ static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_sta
|
|||
return 0;
|
||||
}
|
||||
|
||||
if(state == adev->vcn.cur_state)
|
||||
if (state == adev->vcn.cur_state)
|
||||
return 0;
|
||||
|
||||
if (state == AMD_PG_STATE_GATE)
|
||||
|
@ -1967,7 +1967,7 @@ static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_sta
|
|||
else
|
||||
ret = vcn_v4_0_start(adev);
|
||||
|
||||
if(!ret)
|
||||
if (!ret)
|
||||
adev->vcn.cur_state = state;
|
||||
|
||||
return ret;
|
||||
|
@ -2101,8 +2101,7 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = {
|
|||
.set_powergating_state = vcn_v4_0_set_powergating_state,
|
||||
};
|
||||
|
||||
const struct amdgpu_ip_block_version vcn_v4_0_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version vcn_v4_0_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_VCN,
|
||||
.major = 4,
|
||||
.minor = 0,
|
||||
|
|
|
@ -1287,7 +1287,7 @@ static int vcn_v4_0_3_set_clockgating_state(void *handle,
|
|||
enum amd_clockgating_state state)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
|
||||
bool enable = state == AMD_CG_STATE_GATE;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
|
||||
|
|
|
@ -500,7 +500,8 @@ static int vega20_ih_self_irq(struct amdgpu_device *adev,
|
|||
case 2:
|
||||
schedule_work(&adev->irq.ih2_work);
|
||||
break;
|
||||
default: break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -710,8 +711,7 @@ static void vega20_ih_set_interrupt_funcs(struct amdgpu_device *adev)
|
|||
adev->irq.ih_funcs = &vega20_ih_funcs;
|
||||
}
|
||||
|
||||
const struct amdgpu_ip_block_version vega20_ih_ip_block =
|
||||
{
|
||||
const struct amdgpu_ip_block_version vega20_ih_ip_block = {
|
||||
.type = AMD_IP_BLOCK_TYPE_IH,
|
||||
.major = 4,
|
||||
.minor = 2,
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
config HSA_AMD
|
||||
bool "HSA kernel driver for AMD GPU devices"
|
||||
depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64)
|
||||
imply AMD_IOMMU_V2 if X86_64
|
||||
select HMM_MIRROR
|
||||
select MMU_NOTIFIER
|
||||
select DRM_AMDGPU_USERPTR
|
||||
|
|
|
@ -59,10 +59,6 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
|
|||
$(AMDKFD_PATH)/kfd_crat.o \
|
||||
$(AMDKFD_PATH)/kfd_debug.o
|
||||
|
||||
ifneq ($(CONFIG_AMD_IOMMU_V2),)
|
||||
AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o
|
||||
endif
|
||||
|
||||
ifneq ($(CONFIG_DEBUG_FS),)
|
||||
AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o
|
||||
endif
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -276,6 +276,11 @@ L_FETCH_2ND_TRAP:
|
|||
#endif
|
||||
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
|
||||
|
||||
s_bitcmp1_b32 ttmp15, 0xF
|
||||
s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
|
||||
s_or_b32 ttmp15, ttmp15, 0xFFFF0000
|
||||
L_NO_SIGN_EXTEND_TMA:
|
||||
|
||||
s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
|
||||
|
|
|
@ -283,6 +283,11 @@ L_FETCH_2ND_TRAP:
|
|||
s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
|
||||
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
|
||||
|
||||
s_bitcmp1_b32 ttmp15, 0xF
|
||||
s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
|
||||
s_or_b32 ttmp15, ttmp15, 0xFFFF0000
|
||||
L_NO_SIGN_EXTEND_TMA:
|
||||
|
||||
s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_lshl_b32 ttmp2, ttmp2, TTMP_DEBUG_TRAP_ENABLED_SHIFT
|
||||
|
|
|
@ -333,10 +333,12 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
|
|||
goto err_bind_process;
|
||||
}
|
||||
|
||||
if (!pdd->doorbell_index &&
|
||||
kfd_alloc_process_doorbells(dev->kfd, &pdd->doorbell_index) < 0) {
|
||||
err = -ENOMEM;
|
||||
goto err_alloc_doorbells;
|
||||
if (!pdd->qpd.proc_doorbells) {
|
||||
err = kfd_alloc_process_doorbells(dev->kfd, pdd);
|
||||
if (err) {
|
||||
pr_debug("failed to allocate process doorbells\n");
|
||||
goto err_bind_process;
|
||||
}
|
||||
}
|
||||
|
||||
/* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
|
||||
|
@ -417,7 +419,6 @@ err_create_queue:
|
|||
if (wptr_bo)
|
||||
amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo);
|
||||
err_wptr_map_gart:
|
||||
err_alloc_doorbells:
|
||||
err_bind_process:
|
||||
err_pdd:
|
||||
mutex_unlock(&p->mutex);
|
||||
|
@ -1025,9 +1026,6 @@ bool kfd_dev_is_large_bar(struct kfd_node *dev)
|
|||
return true;
|
||||
}
|
||||
|
||||
if (dev->kfd->use_iommu_v2)
|
||||
return false;
|
||||
|
||||
if (dev->local_mem_info.local_mem_size_private == 0 &&
|
||||
dev->local_mem_info.local_mem_size_public > 0)
|
||||
return true;
|
||||
|
@ -2266,10 +2264,10 @@ static int criu_restore_devices(struct kfd_process *p,
|
|||
goto exit;
|
||||
}
|
||||
|
||||
if (!pdd->doorbell_index &&
|
||||
kfd_alloc_process_doorbells(pdd->dev->kfd, &pdd->doorbell_index) < 0) {
|
||||
ret = -ENOMEM;
|
||||
goto exit;
|
||||
if (!pdd->qpd.proc_doorbells) {
|
||||
ret = kfd_alloc_process_doorbells(dev->kfd, pdd);
|
||||
if (ret)
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -26,7 +26,6 @@
|
|||
#include "kfd_crat.h"
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_topology.h"
|
||||
#include "kfd_iommu.h"
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
|
||||
|
@ -1536,76 +1535,6 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
|
|||
return num_of_cache_types;
|
||||
}
|
||||
|
||||
static bool kfd_ignore_crat(void)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
if (ignore_crat)
|
||||
return true;
|
||||
|
||||
#ifndef KFD_SUPPORT_IOMMU_V2
|
||||
ret = true;
|
||||
#else
|
||||
ret = false;
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* kfd_create_crat_image_acpi - Allocates memory for CRAT image and
|
||||
* copies CRAT from ACPI (if available).
|
||||
* NOTE: Call kfd_destroy_crat_image to free CRAT image memory
|
||||
*
|
||||
* @crat_image: CRAT read from ACPI. If no CRAT in ACPI then
|
||||
* crat_image will be NULL
|
||||
* @size: [OUT] size of crat_image
|
||||
*
|
||||
* Return 0 if successful else return error code
|
||||
*/
|
||||
int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
|
||||
{
|
||||
struct acpi_table_header *crat_table;
|
||||
acpi_status status;
|
||||
void *pcrat_image;
|
||||
int rc = 0;
|
||||
|
||||
if (!crat_image)
|
||||
return -EINVAL;
|
||||
|
||||
*crat_image = NULL;
|
||||
|
||||
if (kfd_ignore_crat()) {
|
||||
pr_info("CRAT table disabled by module option\n");
|
||||
return -ENODATA;
|
||||
}
|
||||
|
||||
/* Fetch the CRAT table from ACPI */
|
||||
status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
|
||||
if (status == AE_NOT_FOUND) {
|
||||
pr_info("CRAT table not found\n");
|
||||
return -ENODATA;
|
||||
} else if (ACPI_FAILURE(status)) {
|
||||
const char *err = acpi_format_exception(status);
|
||||
|
||||
pr_err("CRAT table error: %s\n", err);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
pcrat_image = kvmalloc(crat_table->length, GFP_KERNEL);
|
||||
if (!pcrat_image) {
|
||||
rc = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
memcpy(pcrat_image, crat_table, crat_table->length);
|
||||
*crat_image = pcrat_image;
|
||||
*size = crat_table->length;
|
||||
out:
|
||||
acpi_put_table(crat_table);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Memory required to create Virtual CRAT.
|
||||
* Since there is no easy way to predict the amount of memory required, the
|
||||
* following amount is allocated for GPU Virtual CRAT. This is
|
||||
|
@ -2173,12 +2102,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
|
|||
|
||||
cu->hsa_capability = 0;
|
||||
|
||||
/* Check if this node supports IOMMU. During parsing this flag will
|
||||
* translate to HSA_CAP_ATS_PRESENT
|
||||
*/
|
||||
if (!kfd_iommu_check_device(kdev->kfd))
|
||||
cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
|
||||
|
||||
crat_table->length += sub_type_hdr->length;
|
||||
crat_table->total_entries++;
|
||||
|
||||
|
|
|
@ -307,7 +307,6 @@ struct kfd_gpu_cache_info {
|
|||
};
|
||||
int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info);
|
||||
|
||||
int kfd_create_crat_image_acpi(void **crat_image, size_t *size);
|
||||
void kfd_destroy_crat_image(void *crat_image);
|
||||
int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
|
||||
uint32_t proximity_domain);
|
||||
|
|
|
@ -29,7 +29,6 @@
|
|||
#include "kfd_pm4_headers_vi.h"
|
||||
#include "kfd_pm4_headers_aldebaran.h"
|
||||
#include "cwsr_trap_handler.h"
|
||||
#include "kfd_iommu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "kfd_smi_events.h"
|
||||
#include "kfd_svm.h"
|
||||
|
@ -62,7 +61,6 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
|
|||
unsigned int chunk_size);
|
||||
static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
|
||||
|
||||
static int kfd_resume_iommu(struct kfd_dev *kfd);
|
||||
static int kfd_resume(struct kfd_node *kfd);
|
||||
|
||||
static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
|
||||
|
@ -194,11 +192,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
|
|||
|
||||
kfd_device_info_set_event_interrupt_class(kfd);
|
||||
|
||||
/* Raven */
|
||||
if (gc_version == IP_VERSION(9, 1, 0) ||
|
||||
gc_version == IP_VERSION(9, 2, 2))
|
||||
kfd->device_info.needs_iommu_device = true;
|
||||
|
||||
if (gc_version < IP_VERSION(11, 0, 0)) {
|
||||
/* Navi2x+, Navi1x+ */
|
||||
if (gc_version == IP_VERSION(10, 3, 6))
|
||||
|
@ -233,10 +226,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
|
|||
asic_type != CHIP_TONGA)
|
||||
kfd->device_info.supports_cwsr = true;
|
||||
|
||||
if (asic_type == CHIP_KAVERI ||
|
||||
asic_type == CHIP_CARRIZO)
|
||||
kfd->device_info.needs_iommu_device = true;
|
||||
|
||||
if (asic_type != CHIP_HAWAII && !vf)
|
||||
kfd->device_info.needs_pci_atomics = true;
|
||||
}
|
||||
|
@ -249,7 +238,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
|
|||
uint32_t gfx_target_version = 0;
|
||||
|
||||
switch (adev->asic_type) {
|
||||
#ifdef KFD_SUPPORT_IOMMU_V2
|
||||
#ifdef CONFIG_DRM_AMDGPU_CIK
|
||||
case CHIP_KAVERI:
|
||||
gfx_target_version = 70000;
|
||||
|
@ -262,7 +250,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
|
|||
if (!vf)
|
||||
f2g = &gfx_v8_kfd2kgd;
|
||||
break;
|
||||
#endif
|
||||
#ifdef CONFIG_DRM_AMDGPU_CIK
|
||||
case CHIP_HAWAII:
|
||||
gfx_target_version = 70001;
|
||||
|
@ -298,7 +285,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
|
|||
gfx_target_version = 90000;
|
||||
f2g = &gfx_v9_kfd2kgd;
|
||||
break;
|
||||
#ifdef KFD_SUPPORT_IOMMU_V2
|
||||
/* Raven */
|
||||
case IP_VERSION(9, 1, 0):
|
||||
case IP_VERSION(9, 2, 2):
|
||||
|
@ -306,7 +292,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
|
|||
if (!vf)
|
||||
f2g = &gfx_v9_kfd2kgd;
|
||||
break;
|
||||
#endif
|
||||
/* Vega12 */
|
||||
case IP_VERSION(9, 2, 1):
|
||||
gfx_target_version = 90004;
|
||||
|
@ -455,8 +440,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
|
|||
atomic_set(&kfd->compute_profile, 0);
|
||||
|
||||
mutex_init(&kfd->doorbell_mutex);
|
||||
memset(&kfd->doorbell_available_index, 0,
|
||||
sizeof(kfd->doorbell_available_index));
|
||||
|
||||
ida_init(&kfd->doorbell_ida);
|
||||
|
||||
|
@ -770,15 +753,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
|||
|
||||
kfd->noretry = kfd->adev->gmc.noretry;
|
||||
|
||||
/* If CRAT is broken, won't set iommu enabled */
|
||||
kfd_double_confirm_iommu_support(kfd);
|
||||
|
||||
if (kfd_iommu_device_init(kfd)) {
|
||||
kfd->use_iommu_v2 = false;
|
||||
dev_err(kfd_device, "Error initializing iommuv2\n");
|
||||
goto device_iommu_error;
|
||||
}
|
||||
|
||||
kfd_cwsr_init(kfd);
|
||||
|
||||
dev_info(kfd_device, "Total number of KFD nodes to be created: %d\n",
|
||||
|
@ -853,9 +827,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
|||
|
||||
svm_range_set_max_pages(kfd->adev);
|
||||
|
||||
if (kfd_resume_iommu(kfd))
|
||||
goto kfd_resume_iommu_error;
|
||||
|
||||
spin_lock_init(&kfd->watch_points_lock);
|
||||
|
||||
kfd->init_complete = true;
|
||||
|
@ -867,11 +838,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
|||
|
||||
goto out;
|
||||
|
||||
kfd_resume_iommu_error:
|
||||
node_init_error:
|
||||
node_alloc_error:
|
||||
kfd_cleanup_nodes(kfd, i);
|
||||
device_iommu_error:
|
||||
kfd_doorbell_fini(kfd);
|
||||
kfd_doorbell_error:
|
||||
kfd_gtt_sa_fini(kfd);
|
||||
|
@ -986,7 +955,6 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
|
|||
node = kfd->nodes[i];
|
||||
node->dqm->ops.stop(node->dqm);
|
||||
}
|
||||
kfd_iommu_suspend(kfd);
|
||||
}
|
||||
|
||||
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
|
||||
|
@ -1016,26 +984,6 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
|
|||
return ret;
|
||||
}
|
||||
|
||||
int kgd2kfd_resume_iommu(struct kfd_dev *kfd)
|
||||
{
|
||||
if (!kfd->init_complete)
|
||||
return 0;
|
||||
|
||||
return kfd_resume_iommu(kfd);
|
||||
}
|
||||
|
||||
static int kfd_resume_iommu(struct kfd_dev *kfd)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
err = kfd_iommu_resume(kfd);
|
||||
if (err)
|
||||
dev_err(kfd_device,
|
||||
"Failed to resume IOMMU for device %x:%x\n",
|
||||
kfd->adev->pdev->vendor, kfd->adev->pdev->device);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int kfd_resume(struct kfd_node *node)
|
||||
{
|
||||
int err = 0;
|
||||
|
|
|
@ -396,7 +396,7 @@ static int allocate_doorbell(struct qcm_process_device *qpd,
|
|||
unsigned int found;
|
||||
|
||||
found = find_first_zero_bit(qpd->doorbell_bitmap,
|
||||
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
|
||||
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
|
||||
if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
|
||||
pr_debug("No doorbells available");
|
||||
return -EBUSY;
|
||||
|
@ -406,9 +406,9 @@ static int allocate_doorbell(struct qcm_process_device *qpd,
|
|||
}
|
||||
}
|
||||
|
||||
q->properties.doorbell_off =
|
||||
kfd_get_doorbell_dw_offset_in_bar(dev->kfd, qpd_to_pdd(qpd),
|
||||
q->doorbell_id);
|
||||
q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev,
|
||||
qpd->proc_doorbells,
|
||||
q->doorbell_id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2558,32 +2558,26 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
|
|||
}
|
||||
|
||||
switch (dev->adev->asic_type) {
|
||||
case CHIP_CARRIZO:
|
||||
device_queue_manager_init_vi(&dqm->asic_ops);
|
||||
break;
|
||||
|
||||
case CHIP_KAVERI:
|
||||
case CHIP_HAWAII:
|
||||
device_queue_manager_init_cik(&dqm->asic_ops);
|
||||
break;
|
||||
|
||||
case CHIP_HAWAII:
|
||||
device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
|
||||
break;
|
||||
|
||||
case CHIP_CARRIZO:
|
||||
case CHIP_TONGA:
|
||||
case CHIP_FIJI:
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
device_queue_manager_init_vi_tonga(&dqm->asic_ops);
|
||||
device_queue_manager_init_vi(&dqm->asic_ops);
|
||||
break;
|
||||
|
||||
default:
|
||||
if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
|
||||
device_queue_manager_init_v11(&dqm->asic_ops);
|
||||
else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
|
||||
device_queue_manager_init_v10_navi10(&dqm->asic_ops);
|
||||
device_queue_manager_init_v10(&dqm->asic_ops);
|
||||
else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
|
||||
device_queue_manager_init_v9(&dqm->asic_ops);
|
||||
else {
|
||||
|
|
|
@ -269,15 +269,11 @@ struct device_queue_manager {
|
|||
|
||||
void device_queue_manager_init_cik(
|
||||
struct device_queue_manager_asic_ops *asic_ops);
|
||||
void device_queue_manager_init_cik_hawaii(
|
||||
struct device_queue_manager_asic_ops *asic_ops);
|
||||
void device_queue_manager_init_vi(
|
||||
struct device_queue_manager_asic_ops *asic_ops);
|
||||
void device_queue_manager_init_vi_tonga(
|
||||
struct device_queue_manager_asic_ops *asic_ops);
|
||||
void device_queue_manager_init_v9(
|
||||
struct device_queue_manager_asic_ops *asic_ops);
|
||||
void device_queue_manager_init_v10_navi10(
|
||||
void device_queue_manager_init_v10(
|
||||
struct device_queue_manager_asic_ops *asic_ops);
|
||||
void device_queue_manager_init_v11(
|
||||
struct device_queue_manager_asic_ops *asic_ops);
|
||||
|
|
|
@ -34,17 +34,13 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
|
|||
void __user *alternate_aperture_base,
|
||||
uint64_t alternate_aperture_size);
|
||||
static int update_qpd_cik(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
|
||||
struct qcm_process_device *qpd);
|
||||
static void init_sdma_vm_hawaii(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd);
|
||||
struct qcm_process_device *qpd);
|
||||
static void init_sdma_vm(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd);
|
||||
|
||||
void device_queue_manager_init_cik(
|
||||
struct device_queue_manager_asic_ops *asic_ops)
|
||||
struct device_queue_manager_asic_ops *asic_ops)
|
||||
{
|
||||
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
|
||||
asic_ops->update_qpd = update_qpd_cik;
|
||||
|
@ -52,15 +48,6 @@ void device_queue_manager_init_cik(
|
|||
asic_ops->mqd_manager_init = mqd_manager_init_cik;
|
||||
}
|
||||
|
||||
void device_queue_manager_init_cik_hawaii(
|
||||
struct device_queue_manager_asic_ops *asic_ops)
|
||||
{
|
||||
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
|
||||
asic_ops->update_qpd = update_qpd_cik_hawaii;
|
||||
asic_ops->init_sdma_vm = init_sdma_vm_hawaii;
|
||||
asic_ops->mqd_manager_init = mqd_manager_init_cik_hawaii;
|
||||
}
|
||||
|
||||
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
|
||||
{
|
||||
/* In 64-bit mode, we can only control the top 3 bits of the LDS,
|
||||
|
@ -115,41 +102,7 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
|
|||
}
|
||||
|
||||
static int update_qpd_cik(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
unsigned int temp;
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
|
||||
/* check if sh_mem_config register already configured */
|
||||
if (qpd->sh_mem_config == 0) {
|
||||
qpd->sh_mem_config =
|
||||
ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
|
||||
DEFAULT_MTYPE(MTYPE_NONCACHED) |
|
||||
APE1_MTYPE(MTYPE_NONCACHED);
|
||||
qpd->sh_mem_ape1_limit = 0;
|
||||
qpd->sh_mem_ape1_base = 0;
|
||||
}
|
||||
|
||||
if (qpd->pqm->process->is_32bit_user_mode) {
|
||||
temp = get_sh_mem_bases_32(pdd);
|
||||
qpd->sh_mem_bases = SHARED_BASE(temp);
|
||||
qpd->sh_mem_config |= PTR32;
|
||||
} else {
|
||||
temp = get_sh_mem_bases_nybble_64(pdd);
|
||||
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
|
||||
qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
|
||||
}
|
||||
|
||||
pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
|
||||
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
unsigned int temp;
|
||||
|
@ -178,25 +131,9 @@ static int update_qpd_cik_hawaii(struct device_queue_manager *dqm,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
|
||||
|
||||
if (q->process->is_32bit_user_mode)
|
||||
value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
|
||||
get_sh_mem_bases_32(qpd_to_pdd(qpd));
|
||||
else
|
||||
value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
|
||||
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
|
||||
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
|
||||
|
||||
q->properties.sdma_vm_addr = value;
|
||||
}
|
||||
|
||||
static void init_sdma_vm_hawaii(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd)
|
||||
static void init_sdma_vm(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
|
||||
* aperture addresses.
|
||||
|
|
|
@ -32,7 +32,7 @@ static int update_qpd_v10(struct device_queue_manager *dqm,
|
|||
static void init_sdma_vm_v10(struct device_queue_manager *dqm, struct queue *q,
|
||||
struct qcm_process_device *qpd);
|
||||
|
||||
void device_queue_manager_init_v10_navi10(
|
||||
void device_queue_manager_init_v10(
|
||||
struct device_queue_manager_asic_ops *asic_ops)
|
||||
{
|
||||
asic_ops->update_qpd = update_qpd_v10;
|
||||
|
|
|
@ -60,7 +60,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
|
|||
qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
|
||||
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
|
||||
|
||||
if (dqm->dev->kfd->noretry && !dqm->dev->kfd->use_iommu_v2)
|
||||
if (dqm->dev->kfd->noretry)
|
||||
qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
|
||||
|
||||
if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3))
|
||||
|
|
|
@ -28,29 +28,19 @@
|
|||
#include "oss/oss_3_0_sh_mask.h"
|
||||
|
||||
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd,
|
||||
enum cache_policy default_policy,
|
||||
enum cache_policy alternate_policy,
|
||||
void __user *alternate_aperture_base,
|
||||
uint64_t alternate_aperture_size);
|
||||
static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd,
|
||||
enum cache_policy default_policy,
|
||||
enum cache_policy alternate_policy,
|
||||
void __user *alternate_aperture_base,
|
||||
uint64_t alternate_aperture_size);
|
||||
struct qcm_process_device *qpd,
|
||||
enum cache_policy default_policy,
|
||||
enum cache_policy alternate_policy,
|
||||
void __user *alternate_aperture_base,
|
||||
uint64_t alternate_aperture_size);
|
||||
static int update_qpd_vi(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
|
||||
struct qcm_process_device *qpd);
|
||||
static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd);
|
||||
struct qcm_process_device *qpd);
|
||||
static void init_sdma_vm(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd);
|
||||
|
||||
void device_queue_manager_init_vi(
|
||||
struct device_queue_manager_asic_ops *asic_ops)
|
||||
struct device_queue_manager_asic_ops *asic_ops)
|
||||
{
|
||||
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
|
||||
asic_ops->update_qpd = update_qpd_vi;
|
||||
|
@ -58,15 +48,6 @@ void device_queue_manager_init_vi(
|
|||
asic_ops->mqd_manager_init = mqd_manager_init_vi;
|
||||
}
|
||||
|
||||
void device_queue_manager_init_vi_tonga(
|
||||
struct device_queue_manager_asic_ops *asic_ops)
|
||||
{
|
||||
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;
|
||||
asic_ops->update_qpd = update_qpd_vi_tonga;
|
||||
asic_ops->init_sdma_vm = init_sdma_vm_tonga;
|
||||
asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga;
|
||||
}
|
||||
|
||||
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
|
||||
{
|
||||
/* In 64-bit mode, we can only control the top 3 bits of the LDS,
|
||||
|
@ -96,35 +77,6 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
|
|||
}
|
||||
|
||||
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd,
|
||||
enum cache_policy default_policy,
|
||||
enum cache_policy alternate_policy,
|
||||
void __user *alternate_aperture_base,
|
||||
uint64_t alternate_aperture_size)
|
||||
{
|
||||
uint32_t default_mtype;
|
||||
uint32_t ape1_mtype;
|
||||
|
||||
default_mtype = (default_policy == cache_policy_coherent) ?
|
||||
MTYPE_CC :
|
||||
MTYPE_NC;
|
||||
|
||||
ape1_mtype = (alternate_policy == cache_policy_coherent) ?
|
||||
MTYPE_CC :
|
||||
MTYPE_NC;
|
||||
|
||||
qpd->sh_mem_config = (qpd->sh_mem_config &
|
||||
SH_MEM_CONFIG__ADDRESS_MODE_MASK) |
|
||||
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
|
||||
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
|
||||
default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
|
||||
ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
|
||||
SH_MEM_CONFIG__PRIVATE_ATC_MASK;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd,
|
||||
enum cache_policy default_policy,
|
||||
enum cache_policy alternate_policy,
|
||||
|
@ -152,48 +104,7 @@ static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm,
|
|||
}
|
||||
|
||||
static int update_qpd_vi(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
unsigned int temp;
|
||||
|
||||
pdd = qpd_to_pdd(qpd);
|
||||
|
||||
/* check if sh_mem_config register already configured */
|
||||
if (qpd->sh_mem_config == 0) {
|
||||
qpd->sh_mem_config =
|
||||
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
|
||||
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
|
||||
MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
|
||||
MTYPE_CC << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
|
||||
SH_MEM_CONFIG__PRIVATE_ATC_MASK;
|
||||
|
||||
qpd->sh_mem_ape1_limit = 0;
|
||||
qpd->sh_mem_ape1_base = 0;
|
||||
}
|
||||
|
||||
if (qpd->pqm->process->is_32bit_user_mode) {
|
||||
temp = get_sh_mem_bases_32(pdd);
|
||||
qpd->sh_mem_bases = temp << SH_MEM_BASES__SHARED_BASE__SHIFT;
|
||||
qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA32 <<
|
||||
SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
|
||||
} else {
|
||||
temp = get_sh_mem_bases_nybble_64(pdd);
|
||||
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
|
||||
qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 <<
|
||||
SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
|
||||
qpd->sh_mem_config |= 1 <<
|
||||
SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
|
||||
}
|
||||
|
||||
pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
|
||||
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
unsigned int temp;
|
||||
|
@ -226,25 +137,9 @@ static int update_qpd_vi_tonga(struct device_queue_manager *dqm,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
|
||||
|
||||
if (q->process->is_32bit_user_mode)
|
||||
value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
|
||||
get_sh_mem_bases_32(qpd_to_pdd(qpd));
|
||||
else
|
||||
value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
|
||||
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
|
||||
SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
|
||||
|
||||
q->properties.sdma_vm_addr = value;
|
||||
}
|
||||
|
||||
static void init_sdma_vm_tonga(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd)
|
||||
static void init_sdma_vm(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
|
||||
* aperture addresses.
|
||||
|
|
|
@ -61,81 +61,46 @@ size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
|
|||
/* Doorbell calculations for device init. */
|
||||
int kfd_doorbell_init(struct kfd_dev *kfd)
|
||||
{
|
||||
size_t doorbell_start_offset;
|
||||
size_t doorbell_aperture_size;
|
||||
size_t doorbell_process_limit;
|
||||
int size = PAGE_SIZE;
|
||||
int r;
|
||||
|
||||
/*
|
||||
* With MES enabled, just set the doorbell base as it is needed
|
||||
* to calculate doorbell physical address.
|
||||
* Todo: KFD kernel level operations need only one doorbell for
|
||||
* ring test/HWS. So instead of reserving a whole page here for
|
||||
* kernel, reserve and consume a doorbell from existing KGD kernel
|
||||
* doorbell page.
|
||||
*/
|
||||
if (kfd->shared_resources.enable_mes) {
|
||||
kfd->doorbell_base =
|
||||
kfd->shared_resources.doorbell_physical_address;
|
||||
return 0;
|
||||
|
||||
/* Bitmap to dynamically allocate doorbells from kernel page */
|
||||
kfd->doorbell_bitmap = bitmap_zalloc(size / sizeof(u32), GFP_KERNEL);
|
||||
if (!kfd->doorbell_bitmap) {
|
||||
DRM_ERROR("Failed to allocate kernel doorbell bitmap\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* We start with calculations in bytes because the input data might
|
||||
* only be byte-aligned.
|
||||
* Only after we have done the rounding can we assume any alignment.
|
||||
*/
|
||||
|
||||
doorbell_start_offset =
|
||||
roundup(kfd->shared_resources.doorbell_start_offset,
|
||||
kfd_doorbell_process_slice(kfd));
|
||||
|
||||
doorbell_aperture_size =
|
||||
rounddown(kfd->shared_resources.doorbell_aperture_size,
|
||||
kfd_doorbell_process_slice(kfd));
|
||||
|
||||
if (doorbell_aperture_size > doorbell_start_offset)
|
||||
doorbell_process_limit =
|
||||
(doorbell_aperture_size - doorbell_start_offset) /
|
||||
kfd_doorbell_process_slice(kfd);
|
||||
else
|
||||
return -ENOSPC;
|
||||
|
||||
if (!kfd->max_doorbell_slices ||
|
||||
doorbell_process_limit < kfd->max_doorbell_slices)
|
||||
kfd->max_doorbell_slices = doorbell_process_limit;
|
||||
|
||||
kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
|
||||
doorbell_start_offset;
|
||||
|
||||
kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
|
||||
|
||||
kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
|
||||
kfd_doorbell_process_slice(kfd));
|
||||
|
||||
if (!kfd->doorbell_kernel_ptr)
|
||||
return -ENOMEM;
|
||||
|
||||
pr_debug("Doorbell initialization:\n");
|
||||
pr_debug("doorbell base == 0x%08lX\n",
|
||||
(uintptr_t)kfd->doorbell_base);
|
||||
|
||||
pr_debug("doorbell_base_dw_offset == 0x%08lX\n",
|
||||
kfd->doorbell_base_dw_offset);
|
||||
|
||||
pr_debug("doorbell_process_limit == 0x%08lX\n",
|
||||
doorbell_process_limit);
|
||||
|
||||
pr_debug("doorbell_kernel_offset == 0x%08lX\n",
|
||||
(uintptr_t)kfd->doorbell_base);
|
||||
|
||||
pr_debug("doorbell aperture size == 0x%08lX\n",
|
||||
kfd->shared_resources.doorbell_aperture_size);
|
||||
|
||||
pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
|
||||
/* Alloc a doorbell page for KFD kernel usages */
|
||||
r = amdgpu_bo_create_kernel(kfd->adev,
|
||||
size,
|
||||
PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_DOORBELL,
|
||||
&kfd->doorbells,
|
||||
NULL,
|
||||
(void **)&kfd->doorbell_kernel_ptr);
|
||||
if (r) {
|
||||
pr_err("failed to allocate kernel doorbells\n");
|
||||
bitmap_free(kfd->doorbell_bitmap);
|
||||
return r;
|
||||
}
|
||||
|
||||
pr_debug("Doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kfd_doorbell_fini(struct kfd_dev *kfd)
|
||||
{
|
||||
if (kfd->doorbell_kernel_ptr)
|
||||
iounmap(kfd->doorbell_kernel_ptr);
|
||||
bitmap_free(kfd->doorbell_bitmap);
|
||||
amdgpu_bo_free_kernel(&kfd->doorbells, NULL,
|
||||
(void **)&kfd->doorbell_kernel_ptr);
|
||||
}
|
||||
|
||||
int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process,
|
||||
|
@ -188,22 +153,15 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
|
|||
u32 inx;
|
||||
|
||||
mutex_lock(&kfd->doorbell_mutex);
|
||||
inx = find_first_zero_bit(kfd->doorbell_available_index,
|
||||
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
|
||||
inx = find_first_zero_bit(kfd->doorbell_bitmap, PAGE_SIZE / sizeof(u32));
|
||||
|
||||
__set_bit(inx, kfd->doorbell_available_index);
|
||||
__set_bit(inx, kfd->doorbell_bitmap);
|
||||
mutex_unlock(&kfd->doorbell_mutex);
|
||||
|
||||
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
|
||||
return NULL;
|
||||
|
||||
inx *= kfd->device_info.doorbell_size / sizeof(u32);
|
||||
|
||||
/*
|
||||
* Calculating the kernel doorbell offset using the first
|
||||
* doorbell page.
|
||||
*/
|
||||
*doorbell_off = kfd->doorbell_base_dw_offset + inx;
|
||||
*doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx);
|
||||
|
||||
pr_debug("Get kernel queue doorbell\n"
|
||||
" doorbell offset == 0x%08X\n"
|
||||
|
@ -217,11 +175,10 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
|
|||
{
|
||||
unsigned int inx;
|
||||
|
||||
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
|
||||
* sizeof(u32) / kfd->device_info.doorbell_size;
|
||||
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
|
||||
|
||||
mutex_lock(&kfd->doorbell_mutex);
|
||||
__clear_bit(inx, kfd->doorbell_available_index);
|
||||
__clear_bit(inx, kfd->doorbell_bitmap);
|
||||
mutex_unlock(&kfd->doorbell_mutex);
|
||||
}
|
||||
|
||||
|
@ -243,80 +200,96 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
|
|||
}
|
||||
}
|
||||
|
||||
unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
|
||||
struct kfd_process_device *pdd,
|
||||
unsigned int doorbell_id)
|
||||
static int init_doorbell_bitmap(struct qcm_process_device *qpd,
|
||||
struct kfd_dev *dev)
|
||||
{
|
||||
/*
|
||||
* doorbell_base_dw_offset accounts for doorbells taken by KGD.
|
||||
* index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
|
||||
* the process's doorbells. The offset returned is in dword
|
||||
* units regardless of the ASIC-dependent doorbell size.
|
||||
*/
|
||||
if (!kfd->shared_resources.enable_mes)
|
||||
return kfd->doorbell_base_dw_offset +
|
||||
pdd->doorbell_index
|
||||
* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
|
||||
doorbell_id *
|
||||
kfd->device_info.doorbell_size / sizeof(u32);
|
||||
else
|
||||
return amdgpu_mes_get_doorbell_dw_offset_in_bar(
|
||||
(struct amdgpu_device *)kfd->adev,
|
||||
pdd->doorbell_index, doorbell_id);
|
||||
}
|
||||
unsigned int i;
|
||||
int range_start = dev->shared_resources.non_cp_doorbells_start;
|
||||
int range_end = dev->shared_resources.non_cp_doorbells_end;
|
||||
|
||||
uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
|
||||
{
|
||||
uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
|
||||
kfd->shared_resources.doorbell_start_offset) /
|
||||
kfd_doorbell_process_slice(kfd) + 1;
|
||||
if (!KFD_IS_SOC15(dev))
|
||||
return 0;
|
||||
|
||||
return num_of_elems;
|
||||
/* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
|
||||
pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
|
||||
pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
|
||||
range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
|
||||
range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
|
||||
|
||||
for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
|
||||
if (i >= range_start && i <= range_end) {
|
||||
__set_bit(i, qpd->doorbell_bitmap);
|
||||
__set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
|
||||
qpd->doorbell_bitmap);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
|
||||
{
|
||||
if (!pdd->doorbell_index) {
|
||||
int r = kfd_alloc_process_doorbells(pdd->dev->kfd,
|
||||
&pdd->doorbell_index);
|
||||
if (r < 0)
|
||||
struct amdgpu_device *adev = pdd->dev->adev;
|
||||
uint32_t first_db_index;
|
||||
|
||||
if (!pdd->qpd.proc_doorbells) {
|
||||
if (kfd_alloc_process_doorbells(pdd->dev->kfd, pdd))
|
||||
/* phys_addr_t 0 is error */
|
||||
return 0;
|
||||
}
|
||||
|
||||
return pdd->dev->kfd->doorbell_base +
|
||||
pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev->kfd);
|
||||
first_db_index = amdgpu_doorbell_index_on_bar(adev, pdd->qpd.proc_doorbells, 0);
|
||||
return adev->doorbell.base + first_db_index * sizeof(uint32_t);
|
||||
}
|
||||
|
||||
int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
|
||||
int kfd_alloc_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)
|
||||
{
|
||||
int r = 0;
|
||||
int r;
|
||||
struct qcm_process_device *qpd = &pdd->qpd;
|
||||
|
||||
if (!kfd->shared_resources.enable_mes)
|
||||
r = ida_simple_get(&kfd->doorbell_ida, 1,
|
||||
kfd->max_doorbell_slices, GFP_KERNEL);
|
||||
else
|
||||
r = amdgpu_mes_alloc_process_doorbells(
|
||||
(struct amdgpu_device *)kfd->adev,
|
||||
doorbell_index);
|
||||
/* Allocate bitmap for dynamic doorbell allocation */
|
||||
qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
|
||||
GFP_KERNEL);
|
||||
if (!qpd->doorbell_bitmap) {
|
||||
DRM_ERROR("Failed to allocate process doorbell bitmap\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (r > 0)
|
||||
*doorbell_index = r;
|
||||
r = init_doorbell_bitmap(&pdd->qpd, kfd);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to initialize process doorbells\n");
|
||||
r = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (r < 0)
|
||||
pr_err("Failed to allocate process doorbells\n");
|
||||
/* Allocate doorbells for this process */
|
||||
r = amdgpu_bo_create_kernel(kfd->adev,
|
||||
kfd_doorbell_process_slice(kfd),
|
||||
PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_DOORBELL,
|
||||
&qpd->proc_doorbells,
|
||||
NULL,
|
||||
NULL);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to allocate process doorbells\n");
|
||||
goto err;
|
||||
}
|
||||
return 0;
|
||||
|
||||
err:
|
||||
bitmap_free(qpd->doorbell_bitmap);
|
||||
qpd->doorbell_bitmap = NULL;
|
||||
return r;
|
||||
}
|
||||
|
||||
void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
|
||||
void kfd_free_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)
|
||||
{
|
||||
if (doorbell_index) {
|
||||
if (!kfd->shared_resources.enable_mes)
|
||||
ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
|
||||
else
|
||||
amdgpu_mes_free_process_doorbells(
|
||||
(struct amdgpu_device *)kfd->adev,
|
||||
doorbell_index);
|
||||
struct qcm_process_device *qpd = &pdd->qpd;
|
||||
|
||||
if (qpd->doorbell_bitmap) {
|
||||
bitmap_free(qpd->doorbell_bitmap);
|
||||
qpd->doorbell_bitmap = NULL;
|
||||
}
|
||||
|
||||
amdgpu_bo_free_kernel(&qpd->proc_doorbells, NULL, NULL);
|
||||
}
|
||||
|
|
|
@ -31,7 +31,6 @@
|
|||
#include <linux/memory.h>
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_events.h"
|
||||
#include "kfd_iommu.h"
|
||||
#include <linux/device.h>
|
||||
|
||||
/*
|
||||
|
@ -1146,87 +1145,6 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
|
|||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
#ifdef KFD_SUPPORT_IOMMU_V2
|
||||
void kfd_signal_iommu_event(struct kfd_node *dev, u32 pasid,
|
||||
unsigned long address, bool is_write_requested,
|
||||
bool is_execute_requested)
|
||||
{
|
||||
struct kfd_hsa_memory_exception_data memory_exception_data;
|
||||
struct vm_area_struct *vma;
|
||||
int user_gpu_id;
|
||||
|
||||
/*
|
||||
* Because we are called from arbitrary context (workqueue) as opposed
|
||||
* to process context, kfd_process could attempt to exit while we are
|
||||
* running so the lookup function increments the process ref count.
|
||||
*/
|
||||
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
|
||||
struct mm_struct *mm;
|
||||
|
||||
if (!p)
|
||||
return; /* Presumably process exited. */
|
||||
|
||||
/* Take a safe reference to the mm_struct, which may otherwise
|
||||
* disappear even while the kfd_process is still referenced.
|
||||
*/
|
||||
mm = get_task_mm(p->lead_thread);
|
||||
if (!mm) {
|
||||
kfd_unref_process(p);
|
||||
return; /* Process is exiting */
|
||||
}
|
||||
|
||||
user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
|
||||
if (unlikely(user_gpu_id == -EINVAL)) {
|
||||
WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
|
||||
return;
|
||||
}
|
||||
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
|
||||
|
||||
mmap_read_lock(mm);
|
||||
vma = find_vma(mm, address);
|
||||
|
||||
memory_exception_data.gpu_id = user_gpu_id;
|
||||
memory_exception_data.va = address;
|
||||
/* Set failure reason */
|
||||
memory_exception_data.failure.NotPresent = 1;
|
||||
memory_exception_data.failure.NoExecute = 0;
|
||||
memory_exception_data.failure.ReadOnly = 0;
|
||||
if (vma && address >= vma->vm_start) {
|
||||
memory_exception_data.failure.NotPresent = 0;
|
||||
|
||||
if (is_write_requested && !(vma->vm_flags & VM_WRITE))
|
||||
memory_exception_data.failure.ReadOnly = 1;
|
||||
else
|
||||
memory_exception_data.failure.ReadOnly = 0;
|
||||
|
||||
if (is_execute_requested && !(vma->vm_flags & VM_EXEC))
|
||||
memory_exception_data.failure.NoExecute = 1;
|
||||
else
|
||||
memory_exception_data.failure.NoExecute = 0;
|
||||
}
|
||||
|
||||
mmap_read_unlock(mm);
|
||||
mmput(mm);
|
||||
|
||||
pr_debug("notpresent %d, noexecute %d, readonly %d\n",
|
||||
memory_exception_data.failure.NotPresent,
|
||||
memory_exception_data.failure.NoExecute,
|
||||
memory_exception_data.failure.ReadOnly);
|
||||
|
||||
/* Workaround on Raven to not kill the process when memory is freed
|
||||
* before IOMMU is able to finish processing all the excessive PPRs
|
||||
*/
|
||||
|
||||
if (KFD_GC_VERSION(dev) != IP_VERSION(9, 1, 0) &&
|
||||
KFD_GC_VERSION(dev) != IP_VERSION(9, 2, 2) &&
|
||||
KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0))
|
||||
lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY,
|
||||
&memory_exception_data);
|
||||
|
||||
kfd_unref_process(p);
|
||||
}
|
||||
#endif /* KFD_SUPPORT_IOMMU_V2 */
|
||||
|
||||
void kfd_signal_hw_exception_event(u32 pasid)
|
||||
{
|
||||
/*
|
||||
|
|
|
@ -322,22 +322,19 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
|
|||
pdd->lds_base = MAKE_LDS_APP_BASE_VI();
|
||||
pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
|
||||
|
||||
if (!pdd->dev->kfd->use_iommu_v2) {
|
||||
/* dGPUs: SVM aperture starting at 0
|
||||
* with small reserved space for kernel.
|
||||
* Set them to CANONICAL addresses.
|
||||
*/
|
||||
pdd->gpuvm_base = SVM_USER_BASE;
|
||||
pdd->gpuvm_limit =
|
||||
pdd->dev->kfd->shared_resources.gpuvm_size - 1;
|
||||
} else {
|
||||
/* set them to non CANONICAL addresses, and no SVM is
|
||||
* allocated.
|
||||
*/
|
||||
pdd->gpuvm_base = MAKE_GPUVM_APP_BASE_VI(id + 1);
|
||||
pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base,
|
||||
pdd->dev->kfd->shared_resources.gpuvm_size);
|
||||
}
|
||||
/* dGPUs: SVM aperture starting at 0
|
||||
* with small reserved space for kernel.
|
||||
* Set them to CANONICAL addresses.
|
||||
*/
|
||||
pdd->gpuvm_base = SVM_USER_BASE;
|
||||
pdd->gpuvm_limit =
|
||||
pdd->dev->kfd->shared_resources.gpuvm_size - 1;
|
||||
|
||||
/* dGPUs: the reserved space for kernel
|
||||
* before SVM
|
||||
*/
|
||||
pdd->qpd.cwsr_base = SVM_CWSR_BASE;
|
||||
pdd->qpd.ib_base = SVM_IB_BASE;
|
||||
|
||||
pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
|
||||
pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
|
||||
|
@ -348,18 +345,18 @@ static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
|
|||
pdd->lds_base = MAKE_LDS_APP_BASE_V9();
|
||||
pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
|
||||
|
||||
/* Raven needs SVM to support graphic handle, etc. Leave the small
|
||||
* reserved space before SVM on Raven as well, even though we don't
|
||||
* have to.
|
||||
* Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they
|
||||
* are used in Thunk to reserve SVM.
|
||||
*/
|
||||
pdd->gpuvm_base = SVM_USER_BASE;
|
||||
pdd->gpuvm_base = PAGE_SIZE;
|
||||
pdd->gpuvm_limit =
|
||||
pdd->dev->kfd->shared_resources.gpuvm_size - 1;
|
||||
|
||||
pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
|
||||
pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
|
||||
|
||||
/*
|
||||
* Place TBA/TMA on opposite side of VM hole to prevent
|
||||
* stray faults from triggering SVM on these pages.
|
||||
*/
|
||||
pdd->qpd.cwsr_base = pdd->dev->kfd->shared_resources.gpuvm_size;
|
||||
}
|
||||
|
||||
int kfd_init_apertures(struct kfd_process *process)
|
||||
|
@ -416,14 +413,6 @@ int kfd_init_apertures(struct kfd_process *process)
|
|||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
if (!dev->kfd->use_iommu_v2) {
|
||||
/* dGPUs: the reserved space for kernel
|
||||
* before SVM
|
||||
*/
|
||||
pdd->qpd.cwsr_base = SVM_CWSR_BASE;
|
||||
pdd->qpd.ib_base = SVM_IB_BASE;
|
||||
}
|
||||
}
|
||||
|
||||
dev_dbg(kfd_device, "node id %u\n", id);
|
||||
|
|
|
@ -1,356 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright 2018-2022 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/kconfig.h>
|
||||
|
||||
#if IS_REACHABLE(CONFIG_AMD_IOMMU_V2)
|
||||
|
||||
#include <linux/printk.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/amd-iommu.h>
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_topology.h"
|
||||
#include "kfd_iommu.h"
|
||||
|
||||
static const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
|
||||
AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
|
||||
AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
|
||||
|
||||
/** kfd_iommu_check_device - Check whether IOMMU is available for device
|
||||
*/
|
||||
int kfd_iommu_check_device(struct kfd_dev *kfd)
|
||||
{
|
||||
struct amd_iommu_device_info iommu_info;
|
||||
int err;
|
||||
|
||||
if (!kfd->use_iommu_v2)
|
||||
return -ENODEV;
|
||||
|
||||
iommu_info.flags = 0;
|
||||
err = amd_iommu_device_info(kfd->adev->pdev, &iommu_info);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags)
|
||||
return -ENODEV;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** kfd_iommu_device_init - Initialize IOMMU for device
|
||||
*/
|
||||
int kfd_iommu_device_init(struct kfd_dev *kfd)
|
||||
{
|
||||
struct amd_iommu_device_info iommu_info;
|
||||
unsigned int pasid_limit;
|
||||
int err;
|
||||
|
||||
if (!kfd->use_iommu_v2)
|
||||
return 0;
|
||||
|
||||
iommu_info.flags = 0;
|
||||
err = amd_iommu_device_info(kfd->adev->pdev, &iommu_info);
|
||||
if (err < 0) {
|
||||
dev_err(kfd_device,
|
||||
"error getting iommu info. is the iommu enabled?\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
|
||||
dev_err(kfd_device,
|
||||
"error required iommu flags ats %i, pri %i, pasid %i\n",
|
||||
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
|
||||
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
|
||||
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP)
|
||||
!= 0);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
pasid_limit = min_t(unsigned int,
|
||||
(unsigned int)(1 << kfd->device_info.max_pasid_bits),
|
||||
iommu_info.max_pasids);
|
||||
|
||||
if (!kfd_set_pasid_limit(pasid_limit)) {
|
||||
dev_err(kfd_device, "error setting pasid limit\n");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** kfd_iommu_bind_process_to_device - Have the IOMMU bind a process
|
||||
*
|
||||
* Binds the given process to the given device using its PASID. This
|
||||
* enables IOMMUv2 address translation for the process on the device.
|
||||
*
|
||||
* This function assumes that the process mutex is held.
|
||||
*/
|
||||
int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd)
|
||||
{
|
||||
struct kfd_node *dev = pdd->dev;
|
||||
struct kfd_process *p = pdd->process;
|
||||
int err;
|
||||
|
||||
if (!dev->kfd->use_iommu_v2 || pdd->bound == PDD_BOUND)
|
||||
return 0;
|
||||
|
||||
if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
|
||||
pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!kfd_is_first_node(dev)) {
|
||||
dev_warn_once(kfd_device,
|
||||
"IOMMU supported only on first node\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
err = amd_iommu_bind_pasid(dev->adev->pdev, p->pasid, p->lead_thread);
|
||||
if (!err)
|
||||
pdd->bound = PDD_BOUND;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/** kfd_iommu_unbind_process - Unbind process from all devices
|
||||
*
|
||||
* This removes all IOMMU device bindings of the process. To be used
|
||||
* before process termination.
|
||||
*/
|
||||
void kfd_iommu_unbind_process(struct kfd_process *p)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < p->n_pdds; i++)
|
||||
if ((p->pdds[i]->bound == PDD_BOUND) &&
|
||||
(kfd_is_first_node((p->pdds[i]->dev))))
|
||||
amd_iommu_unbind_pasid(p->pdds[i]->dev->adev->pdev,
|
||||
p->pasid);
|
||||
}
|
||||
|
||||
/* Callback for process shutdown invoked by the IOMMU driver */
|
||||
static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, u32 pasid)
|
||||
{
|
||||
struct kfd_node *dev = kfd_device_by_pci_dev(pdev);
|
||||
struct kfd_process *p;
|
||||
struct kfd_process_device *pdd;
|
||||
|
||||
if (!dev)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Look for the process that matches the pasid. If there is no such
|
||||
* process, we either released it in amdkfd's own notifier, or there
|
||||
* is a bug. Unfortunately, there is no way to tell...
|
||||
*/
|
||||
p = kfd_lookup_process_by_pasid(pasid);
|
||||
if (!p)
|
||||
return;
|
||||
|
||||
pr_debug("Unbinding process 0x%x from IOMMU\n", pasid);
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
|
||||
pdd = kfd_get_process_device_data(dev, p);
|
||||
if (pdd)
|
||||
/* For GPU relying on IOMMU, we need to dequeue here
|
||||
* when PASID is still bound.
|
||||
*/
|
||||
kfd_process_dequeue_from_device(pdd);
|
||||
|
||||
mutex_unlock(&p->mutex);
|
||||
|
||||
kfd_unref_process(p);
|
||||
}
|
||||
|
||||
/* This function called by IOMMU driver on PPR failure */
|
||||
static int iommu_invalid_ppr_cb(struct pci_dev *pdev, u32 pasid,
|
||||
unsigned long address, u16 flags)
|
||||
{
|
||||
struct kfd_node *dev;
|
||||
|
||||
dev_warn_ratelimited(kfd_device,
|
||||
"Invalid PPR device %x:%x.%x pasid 0x%x address 0x%lX flags 0x%X",
|
||||
pdev->bus->number,
|
||||
PCI_SLOT(pdev->devfn),
|
||||
PCI_FUNC(pdev->devfn),
|
||||
pasid,
|
||||
address,
|
||||
flags);
|
||||
|
||||
dev = kfd_device_by_pci_dev(pdev);
|
||||
if (!WARN_ON(!dev))
|
||||
kfd_signal_iommu_event(dev, pasid, address,
|
||||
flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
|
||||
|
||||
return AMD_IOMMU_INV_PRI_RSP_INVALID;
|
||||
}
|
||||
|
||||
/*
|
||||
* Bind processes do the device that have been temporarily unbound
|
||||
* (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device.
|
||||
*/
|
||||
static int kfd_bind_processes_to_device(struct kfd_node *knode)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
struct kfd_process *p;
|
||||
unsigned int temp;
|
||||
int err = 0;
|
||||
|
||||
int idx = srcu_read_lock(&kfd_processes_srcu);
|
||||
|
||||
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
|
||||
mutex_lock(&p->mutex);
|
||||
pdd = kfd_get_process_device_data(knode, p);
|
||||
|
||||
if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) {
|
||||
mutex_unlock(&p->mutex);
|
||||
continue;
|
||||
}
|
||||
|
||||
err = amd_iommu_bind_pasid(knode->adev->pdev, p->pasid,
|
||||
p->lead_thread);
|
||||
if (err < 0) {
|
||||
pr_err("Unexpected pasid 0x%x binding failure\n",
|
||||
p->pasid);
|
||||
mutex_unlock(&p->mutex);
|
||||
break;
|
||||
}
|
||||
|
||||
pdd->bound = PDD_BOUND;
|
||||
mutex_unlock(&p->mutex);
|
||||
}
|
||||
|
||||
srcu_read_unlock(&kfd_processes_srcu, idx);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark currently bound processes as PDD_BOUND_SUSPENDED. These
|
||||
* processes will be restored to PDD_BOUND state in
|
||||
* kfd_bind_processes_to_device.
|
||||
*/
|
||||
static void kfd_unbind_processes_from_device(struct kfd_node *knode)
|
||||
{
|
||||
struct kfd_process_device *pdd;
|
||||
struct kfd_process *p;
|
||||
unsigned int temp;
|
||||
|
||||
int idx = srcu_read_lock(&kfd_processes_srcu);
|
||||
|
||||
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
|
||||
mutex_lock(&p->mutex);
|
||||
pdd = kfd_get_process_device_data(knode, p);
|
||||
|
||||
if (WARN_ON(!pdd)) {
|
||||
mutex_unlock(&p->mutex);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pdd->bound == PDD_BOUND)
|
||||
pdd->bound = PDD_BOUND_SUSPENDED;
|
||||
mutex_unlock(&p->mutex);
|
||||
}
|
||||
|
||||
srcu_read_unlock(&kfd_processes_srcu, idx);
|
||||
}
|
||||
|
||||
/** kfd_iommu_suspend - Prepare IOMMU for suspend
|
||||
*
|
||||
* This unbinds processes from the device and disables the IOMMU for
|
||||
* the device.
|
||||
*/
|
||||
void kfd_iommu_suspend(struct kfd_dev *kfd)
|
||||
{
|
||||
if (!kfd->use_iommu_v2)
|
||||
return;
|
||||
|
||||
kfd_unbind_processes_from_device(kfd->nodes[0]);
|
||||
|
||||
amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, NULL);
|
||||
amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, NULL);
|
||||
amd_iommu_free_device(kfd->adev->pdev);
|
||||
}
|
||||
|
||||
/** kfd_iommu_resume - Restore IOMMU after resume
|
||||
*
|
||||
* This reinitializes the IOMMU for the device and re-binds previously
|
||||
* suspended processes to the device.
|
||||
*/
|
||||
int kfd_iommu_resume(struct kfd_dev *kfd)
|
||||
{
|
||||
unsigned int pasid_limit;
|
||||
int err;
|
||||
|
||||
if (!kfd->use_iommu_v2)
|
||||
return 0;
|
||||
|
||||
pasid_limit = kfd_get_pasid_limit();
|
||||
|
||||
err = amd_iommu_init_device(kfd->adev->pdev, pasid_limit);
|
||||
if (err)
|
||||
return -ENXIO;
|
||||
|
||||
amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev,
|
||||
iommu_pasid_shutdown_callback);
|
||||
amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev,
|
||||
iommu_invalid_ppr_cb);
|
||||
|
||||
err = kfd_bind_processes_to_device(kfd->nodes[0]);
|
||||
if (err) {
|
||||
amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, NULL);
|
||||
amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, NULL);
|
||||
amd_iommu_free_device(kfd->adev->pdev);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** kfd_iommu_add_perf_counters - Add IOMMU performance counters to topology
|
||||
*/
|
||||
int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
|
||||
{
|
||||
struct kfd_perf_properties *props;
|
||||
|
||||
if (!(kdev->node_props.capability & HSA_CAP_ATS_PRESENT))
|
||||
return 0;
|
||||
|
||||
if (!amd_iommu_pc_supported())
|
||||
return 0;
|
||||
|
||||
props = kfd_alloc_struct(props);
|
||||
if (!props)
|
||||
return -ENOMEM;
|
||||
strcpy(props->block_name, "iommu");
|
||||
props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
|
||||
amd_iommu_pc_get_max_counters(0); /* assume one iommu */
|
||||
list_add_tail(&props->list, &kdev->perf_props);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,84 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright 2018-2022 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __KFD_IOMMU_H__
|
||||
#define __KFD_IOMMU_H__
|
||||
|
||||
#include <linux/kconfig.h>
|
||||
|
||||
#if IS_REACHABLE(CONFIG_AMD_IOMMU_V2)
|
||||
|
||||
#define KFD_SUPPORT_IOMMU_V2
|
||||
|
||||
int kfd_iommu_check_device(struct kfd_dev *kfd);
|
||||
int kfd_iommu_device_init(struct kfd_dev *kfd);
|
||||
|
||||
int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd);
|
||||
void kfd_iommu_unbind_process(struct kfd_process *p);
|
||||
|
||||
void kfd_iommu_suspend(struct kfd_dev *kfd);
|
||||
int kfd_iommu_resume(struct kfd_dev *kfd);
|
||||
|
||||
int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev);
|
||||
|
||||
#else
|
||||
|
||||
static inline int kfd_iommu_check_device(struct kfd_dev *kfd)
|
||||
{
|
||||
return -ENODEV;
|
||||
}
|
||||
static inline int kfd_iommu_device_init(struct kfd_dev *kfd)
|
||||
{
|
||||
#if IS_MODULE(CONFIG_AMD_IOMMU_V2)
|
||||
WARN_ONCE(1, "iommu_v2 module is not usable by built-in KFD");
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int kfd_iommu_bind_process_to_device(
|
||||
struct kfd_process_device *pdd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void kfd_iommu_unbind_process(struct kfd_process *p)
|
||||
{
|
||||
/* empty */
|
||||
}
|
||||
|
||||
static inline void kfd_iommu_suspend(struct kfd_dev *kfd)
|
||||
{
|
||||
/* empty */
|
||||
}
|
||||
static inline int kfd_iommu_resume(struct kfd_dev *kfd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* IS_REACHABLE(CONFIG_AMD_IOMMU_V2) */
|
||||
|
||||
#endif /* __KFD_IOMMU_H__ */
|
|
@ -461,7 +461,6 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
|
|||
0, node->id, trigger);
|
||||
|
||||
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
|
||||
svm_range_free_dma_mappings(prange);
|
||||
|
||||
out_free:
|
||||
kvfree(buf);
|
||||
|
@ -543,10 +542,12 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
|
|||
addr = next;
|
||||
}
|
||||
|
||||
if (cpages)
|
||||
if (cpages) {
|
||||
prange->actual_loc = best_loc;
|
||||
else
|
||||
svm_range_free_dma_mappings(prange, true);
|
||||
} else {
|
||||
svm_range_vram_node_free(prange);
|
||||
}
|
||||
|
||||
return r < 0 ? r : 0;
|
||||
}
|
||||
|
|
|
@ -206,13 +206,6 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd,
|
|||
q->is_active = QUEUE_IS_ACTIVE(*q);
|
||||
}
|
||||
|
||||
static void update_mqd(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q,
|
||||
struct mqd_update_info *minfo)
|
||||
{
|
||||
__update_mqd(mm, mqd, q, minfo, 1);
|
||||
}
|
||||
|
||||
static uint32_t read_doorbell_id(void *mqd)
|
||||
{
|
||||
struct cik_mqd *m = (struct cik_mqd *)mqd;
|
||||
|
@ -220,9 +213,9 @@ static uint32_t read_doorbell_id(void *mqd)
|
|||
return m->queue_doorbell_id0;
|
||||
}
|
||||
|
||||
static void update_mqd_hawaii(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q,
|
||||
struct mqd_update_info *minfo)
|
||||
static void update_mqd(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q,
|
||||
struct mqd_update_info *minfo)
|
||||
{
|
||||
__update_mqd(mm, mqd, q, minfo, 0);
|
||||
}
|
||||
|
@ -387,7 +380,6 @@ static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
|
|||
|
||||
#endif
|
||||
|
||||
|
||||
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
||||
struct kfd_node *dev)
|
||||
{
|
||||
|
@ -470,16 +462,3 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
|||
|
||||
return mqd;
|
||||
}
|
||||
|
||||
struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
|
||||
struct kfd_node *dev)
|
||||
{
|
||||
struct mqd_manager *mqd;
|
||||
|
||||
mqd = mqd_manager_init_cik(type, dev);
|
||||
if (!mqd)
|
||||
return NULL;
|
||||
if (type == KFD_MQD_TYPE_CP)
|
||||
mqd->update_mqd = update_mqd_hawaii;
|
||||
return mqd;
|
||||
}
|
||||
|
|
|
@ -237,14 +237,6 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd,
|
|||
q->is_active = QUEUE_IS_ACTIVE(*q);
|
||||
}
|
||||
|
||||
|
||||
static void update_mqd(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q,
|
||||
struct mqd_update_info *minfo)
|
||||
{
|
||||
__update_mqd(mm, mqd, q, minfo, MTYPE_CC, 1);
|
||||
}
|
||||
|
||||
static uint32_t read_doorbell_id(void *mqd)
|
||||
{
|
||||
struct vi_mqd *m = (struct vi_mqd *)mqd;
|
||||
|
@ -252,9 +244,9 @@ static uint32_t read_doorbell_id(void *mqd)
|
|||
return m->queue_doorbell_id0;
|
||||
}
|
||||
|
||||
static void update_mqd_tonga(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q,
|
||||
struct mqd_update_info *minfo)
|
||||
static void update_mqd(struct mqd_manager *mm, void *mqd,
|
||||
struct queue_properties *q,
|
||||
struct mqd_update_info *minfo)
|
||||
{
|
||||
__update_mqd(mm, mqd, q, minfo, MTYPE_UC, 0);
|
||||
}
|
||||
|
@ -529,16 +521,3 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
|
|||
|
||||
return mqd;
|
||||
}
|
||||
|
||||
struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
|
||||
struct kfd_node *dev)
|
||||
{
|
||||
struct mqd_manager *mqd;
|
||||
|
||||
mqd = mqd_manager_init_vi(type, dev);
|
||||
if (!mqd)
|
||||
return NULL;
|
||||
if (type == KFD_MQD_TYPE_CP)
|
||||
mqd->update_mqd = update_mqd_tonga;
|
||||
return mqd;
|
||||
}
|
||||
|
|
|
@ -175,12 +175,6 @@ extern int send_sigterm;
|
|||
*/
|
||||
extern int debug_largebar;
|
||||
|
||||
/*
|
||||
* Ignore CRAT table during KFD initialization, can be used to work around
|
||||
* broken CRAT tables on some AMD systems
|
||||
*/
|
||||
extern int ignore_crat;
|
||||
|
||||
/* Set sh_mem_config.retry_disable on GFX v9 */
|
||||
extern int amdgpu_noretry;
|
||||
|
||||
|
@ -234,7 +228,6 @@ struct kfd_device_info {
|
|||
uint8_t num_of_watch_points;
|
||||
uint16_t mqd_size_aligned;
|
||||
bool supports_cwsr;
|
||||
bool needs_iommu_device;
|
||||
bool needs_pci_atomics;
|
||||
uint32_t no_atomic_fw_version;
|
||||
unsigned int num_sdma_queues_per_engine;
|
||||
|
@ -323,15 +316,6 @@ struct kfd_dev {
|
|||
|
||||
struct kfd_device_info device_info;
|
||||
|
||||
phys_addr_t doorbell_base; /* Start of actual doorbells used by
|
||||
* KFD. It is aligned for mapping
|
||||
* into user mode
|
||||
*/
|
||||
size_t doorbell_base_dw_offset; /* Offset from the start of the PCI
|
||||
* doorbell BAR to the first KFD
|
||||
* doorbell in dwords. GFX reserves
|
||||
* the segment before this offset.
|
||||
*/
|
||||
u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells
|
||||
* page used by kernel queue
|
||||
*/
|
||||
|
@ -340,8 +324,6 @@ struct kfd_dev {
|
|||
|
||||
const struct kfd2kgd_calls *kfd2kgd;
|
||||
struct mutex doorbell_mutex;
|
||||
DECLARE_BITMAP(doorbell_available_index,
|
||||
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
|
||||
|
||||
void *gtt_mem;
|
||||
uint64_t gtt_start_gpu_addr;
|
||||
|
@ -368,9 +350,6 @@ struct kfd_dev {
|
|||
|
||||
bool pci_atomic_requested;
|
||||
|
||||
/* Use IOMMU v2 flag */
|
||||
bool use_iommu_v2;
|
||||
|
||||
/* Compute Profile ref. count */
|
||||
atomic_t compute_profile;
|
||||
|
||||
|
@ -385,6 +364,12 @@ struct kfd_dev {
|
|||
/* Track per device allocated watch points */
|
||||
uint32_t alloc_watch_ids;
|
||||
spinlock_t watch_points_lock;
|
||||
|
||||
/* Kernel doorbells for KFD device */
|
||||
struct amdgpu_bo *doorbells;
|
||||
|
||||
/* bitmap for dynamic doorbell allocation from doorbell object */
|
||||
unsigned long *doorbell_bitmap;
|
||||
};
|
||||
|
||||
enum kfd_mempool {
|
||||
|
@ -702,7 +687,10 @@ struct qcm_process_device {
|
|||
uint64_t ib_base;
|
||||
void *ib_kaddr;
|
||||
|
||||
/* doorbell resources per process per device */
|
||||
/* doorbells for kfd process */
|
||||
struct amdgpu_bo *proc_doorbells;
|
||||
|
||||
/* bitmap for dynamic doorbell allocation from the bo */
|
||||
unsigned long *doorbell_bitmap;
|
||||
};
|
||||
|
||||
|
@ -792,7 +780,6 @@ struct kfd_process_device {
|
|||
struct attribute attr_evict;
|
||||
|
||||
struct kobject *kobj_stats;
|
||||
unsigned int doorbell_index;
|
||||
|
||||
/*
|
||||
* @cu_occupancy: Reports occupancy of Compute Units (CU) of a process
|
||||
|
@ -1100,9 +1087,9 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
|
|||
unsigned int doorbell_id);
|
||||
phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd);
|
||||
int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
|
||||
unsigned int *doorbell_index);
|
||||
struct kfd_process_device *pdd);
|
||||
void kfd_free_process_doorbells(struct kfd_dev *kfd,
|
||||
unsigned int doorbell_index);
|
||||
struct kfd_process_device *pdd);
|
||||
/* GTT Sub-Allocator */
|
||||
|
||||
int kfd_gtt_sa_allocate(struct kfd_node *node, unsigned int size,
|
||||
|
@ -1152,7 +1139,6 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev,
|
|||
}
|
||||
int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev);
|
||||
int kfd_numa_node_to_apic_id(int numa_node_id);
|
||||
void kfd_double_confirm_iommu_support(struct kfd_dev *gpu);
|
||||
|
||||
/* Interrupts */
|
||||
#define KFD_IRQ_FENCE_CLIENTID 0xff
|
||||
|
@ -1299,12 +1285,8 @@ void print_queue(struct queue *q);
|
|||
|
||||
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
||||
struct kfd_node *dev);
|
||||
struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
|
||||
struct kfd_node *dev);
|
||||
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
|
||||
struct kfd_node *dev);
|
||||
struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
|
||||
struct kfd_node *dev);
|
||||
struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
|
||||
struct kfd_node *dev);
|
||||
struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
|
||||
|
@ -1459,9 +1441,6 @@ int kfd_wait_on_events(struct kfd_process *p,
|
|||
uint32_t *wait_result);
|
||||
void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
|
||||
uint32_t valid_id_bits);
|
||||
void kfd_signal_iommu_event(struct kfd_node *dev,
|
||||
u32 pasid, unsigned long address,
|
||||
bool is_write_requested, bool is_execute_requested);
|
||||
void kfd_signal_hw_exception_event(u32 pasid);
|
||||
int kfd_set_event(struct kfd_process *p, uint32_t event_id);
|
||||
int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
|
||||
|
|
|
@ -28,7 +28,6 @@
|
|||
#include <linux/sched/task.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/amd-iommu.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/mman.h>
|
||||
|
@ -41,7 +40,6 @@ struct mm_struct;
|
|||
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_device_queue_manager.h"
|
||||
#include "kfd_iommu.h"
|
||||
#include "kfd_svm.h"
|
||||
#include "kfd_smi_events.h"
|
||||
#include "kfd_debug.h"
|
||||
|
@ -1035,10 +1033,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
|
|||
free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
|
||||
get_order(KFD_CWSR_TBA_TMA_SIZE));
|
||||
|
||||
bitmap_free(pdd->qpd.doorbell_bitmap);
|
||||
idr_destroy(&pdd->alloc_idr);
|
||||
|
||||
kfd_free_process_doorbells(pdd->dev->kfd, pdd->doorbell_index);
|
||||
kfd_free_process_doorbells(pdd->dev->kfd, pdd);
|
||||
|
||||
if (pdd->dev->kfd->shared_resources.enable_mes)
|
||||
amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
|
||||
|
@ -1123,7 +1120,6 @@ static void kfd_process_wq_release(struct work_struct *work)
|
|||
dma_fence_signal(p->ef);
|
||||
|
||||
kfd_process_remove_sysfs(p);
|
||||
kfd_iommu_unbind_process(p);
|
||||
|
||||
kfd_process_kunmap_signal_bo(p);
|
||||
kfd_process_free_outstanding_kfd_bos(p);
|
||||
|
@ -1550,38 +1546,6 @@ err_alloc_process:
|
|||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static int init_doorbell_bitmap(struct qcm_process_device *qpd,
|
||||
struct kfd_dev *dev)
|
||||
{
|
||||
unsigned int i;
|
||||
int range_start = dev->shared_resources.non_cp_doorbells_start;
|
||||
int range_end = dev->shared_resources.non_cp_doorbells_end;
|
||||
|
||||
if (!KFD_IS_SOC15(dev))
|
||||
return 0;
|
||||
|
||||
qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
|
||||
GFP_KERNEL);
|
||||
if (!qpd->doorbell_bitmap)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
|
||||
pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
|
||||
pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
|
||||
range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
|
||||
range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
|
||||
|
||||
for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
|
||||
if (i >= range_start && i <= range_end) {
|
||||
__set_bit(i, qpd->doorbell_bitmap);
|
||||
__set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
|
||||
qpd->doorbell_bitmap);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct kfd_process_device *kfd_get_process_device_data(struct kfd_node *dev,
|
||||
struct kfd_process *p)
|
||||
{
|
||||
|
@ -1606,11 +1570,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
|
|||
if (!pdd)
|
||||
return NULL;
|
||||
|
||||
if (init_doorbell_bitmap(&pdd->qpd, dev->kfd)) {
|
||||
pr_err("Failed to init doorbell for process\n");
|
||||
goto err_free_pdd;
|
||||
}
|
||||
|
||||
pdd->dev = dev;
|
||||
INIT_LIST_HEAD(&pdd->qpd.queues_list);
|
||||
INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
|
||||
|
@ -1766,10 +1725,6 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev,
|
|||
}
|
||||
}
|
||||
|
||||
err = kfd_iommu_bind_process_to_device(pdd);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* make sure that runtime_usage counter is incremented just once
|
||||
* per pdd
|
||||
|
@ -1777,15 +1732,6 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev,
|
|||
pdd->runtime_inuse = true;
|
||||
|
||||
return pdd;
|
||||
|
||||
out:
|
||||
/* balance runpm reference count and exit with error */
|
||||
if (!pdd->runtime_inuse) {
|
||||
pm_runtime_mark_last_busy(adev_to_drm(dev->adev)->dev);
|
||||
pm_runtime_put_autosuspend(adev_to_drm(dev->adev)->dev);
|
||||
}
|
||||
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
/* Create specific handle mapped to mem from process local memory idr
|
||||
|
|
|
@ -368,17 +368,20 @@ int pqm_create_queue(struct process_queue_manager *pqm,
|
|||
goto err_create_queue;
|
||||
}
|
||||
|
||||
if (q && p_doorbell_offset_in_process)
|
||||
if (q && p_doorbell_offset_in_process) {
|
||||
/* Return the doorbell offset within the doorbell page
|
||||
* to the caller so it can be passed up to user mode
|
||||
* (in bytes).
|
||||
* There are always 1024 doorbells per process, so in case
|
||||
* of 8-byte doorbells, there are two doorbell pages per
|
||||
* process.
|
||||
* relative doorbell index = Absolute doorbell index -
|
||||
* absolute index of first doorbell in the page.
|
||||
*/
|
||||
*p_doorbell_offset_in_process =
|
||||
(q->properties.doorbell_off * sizeof(uint32_t)) &
|
||||
(kfd_doorbell_process_slice(dev->kfd) - 1);
|
||||
uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev,
|
||||
pdd->qpd.proc_doorbells,
|
||||
0);
|
||||
|
||||
*p_doorbell_offset_in_process = (q->properties.doorbell_off
|
||||
- first_db_index) * sizeof(uint32_t);
|
||||
}
|
||||
|
||||
pr_debug("PQM After DQM create queue\n");
|
||||
|
||||
|
@ -933,12 +936,6 @@ int kfd_criu_restore_queue(struct kfd_process *p,
|
|||
goto exit;
|
||||
}
|
||||
|
||||
if (!pdd->doorbell_index &&
|
||||
kfd_alloc_process_doorbells(pdd->dev->kfd, &pdd->doorbell_index) < 0) {
|
||||
ret = -ENOMEM;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* data stored in this order: mqd, ctl_stack */
|
||||
mqd = q_extra_data;
|
||||
ctl_stack = mqd + q_data->mqd_size;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue