From 73339a71547c07f7c28d0b48ad06591f8c4433d8 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 24 Mar 2020 16:39:37 -0400 Subject: [PATCH 01/17] drm/amdgpu: Add AQUIRE_MEM PACKET3 fields defintion Add this for gfx10 and gfx9. v2: Fix identation Signed-off-by: Andrey Grodzovsky Reviewed-by: Luben Tuikov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nvd.h | 48 +++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/soc15d.h | 25 ++++++++++++++- 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h index f3d8771ebed4..fd6b58243b03 100644 --- a/drivers/gpu/drm/amd/amdgpu/nvd.h +++ b/drivers/gpu/drm/amd/amdgpu/nvd.h @@ -256,6 +256,54 @@ #define PACKET3_BLK_CNTX_UPDATE 0x53 #define PACKET3_INCR_UPDT_STATE 0x55 #define PACKET3_ACQUIRE_MEM 0x58 +/* 1. HEADER + * 2. COHER_CNTL [30:0] + * 2.1 ENGINE_SEL [31:31] + * 2. COHER_SIZE [31:0] + * 3. COHER_SIZE_HI [7:0] + * 4. COHER_BASE_LO [31:0] + * 5. COHER_BASE_HI [23:0] + * 7. POLL_INTERVAL [15:0] + * 8. GCR_CNTL [18:0] + */ +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(x) ((x) << 0) + /* + * 0:NOP + * 1:ALL + * 2:RANGE + * 3:FIRST_LAST + */ +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_RANGE(x) ((x) << 2) + /* + * 0:ALL + * 1:reserved + * 2:RANGE + * 3:FIRST_LAST + */ +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(x) ((x) << 4) +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(x) ((x) << 5) +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_WB(x) ((x) << 6) +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(x) ((x) << 7) +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(x) ((x) << 8) +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(x) ((x) << 9) +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_US(x) ((x) << 10) +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_RANGE(x) ((x) << 11) + /* + * 0:ALL + * 1:VOL + * 2:RANGE + * 3:FIRST_LAST + */ +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_DISCARD(x) ((x) << 13) +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(x) ((x) << 14) +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(x) ((x) << 15) +#define PACKET3_ACQUIRE_MEM_GCR_CNTL_SEQ(x) ((x) << 16) + /* + * 0: PARALLEL + * 1: FORWARD + * 2: REVERSE + */ +#define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA (1 << 18) #define PACKET3_REWIND 0x59 #define PACKET3_INTERRUPT 0x5A #define PACKET3_GEN_PDEPTE 0x5B diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 295d68c5811d..799925d22fc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -253,7 +253,30 @@ # define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) # define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) # define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) -#define PACKET3_AQUIRE_MEM 0x58 +#define PACKET3_ACQUIRE_MEM 0x58 +/* 1. HEADER + * 2. COHER_CNTL [30:0] + * 2.1 ENGINE_SEL [31:31] + * 3. COHER_SIZE [31:0] + * 4. COHER_SIZE_HI [7:0] + * 5. COHER_BASE_LO [31:0] + * 6. COHER_BASE_HI [23:0] + * 7. POLL_INTERVAL [15:0] + */ +/* COHER_CNTL fields for CP_COHER_CNTL */ +#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_NC_ACTION_ENA(x) ((x) << 3) +#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WC_ACTION_ENA(x) ((x) << 4) +#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_INV_METADATA_ACTION_ENA(x) ((x) << 5) +#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_VOL_ACTION_ENA(x) ((x) << 15) +#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(x) ((x) << 18) +#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(x) ((x) << 22) +#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(x) ((x) << 23) +#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_CB_ACTION_ENA(x) ((x) << 25) +#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_DB_ACTION_ENA(x) ((x) << 26) +#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(x) ((x) << 27) +#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_VOL_ACTION_ENA(x) ((x) << 28) +#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(x) ((x) << 29) +#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_WB_ACTION_ENA(x) ((x) << 30) #define PACKET3_REWIND 0x59 #define PACKET3_LOAD_UCONFIG_REG 0x5E #define PACKET3_LOAD_SH_REG 0x5F From 9495220577416632675959caf122e968469ffd16 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Sat, 9 May 2020 13:26:00 +0800 Subject: [PATCH 02/17] drm/amd/powerplay: try to do a graceful shutdown on SW CTF Normally this(SW CTF) should not happen. And by doing graceful shutdown we can prevent further damage. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../gpu/drm/amd/powerplay/hwmgr/smu_helper.c | 21 +++++++++++++++---- drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 7 +++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c index 91b5d96db674..782f6d295202 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c @@ -22,6 +22,7 @@ */ #include +#include #include "hwmgr.h" #include "pp_debug.h" @@ -595,12 +596,18 @@ int phm_irq_process(struct amdgpu_device *adev, uint32_t src_id = entry->src_id; if (client_id == AMDGPU_IRQ_CLIENTID_LEGACY) { - if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_LOW_TO_HIGH) + if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_LOW_TO_HIGH) { pr_warn("GPU over temperature range detected on PCIe %d:%d.%d!\n", PCI_BUS_NUM(adev->pdev->devfn), PCI_SLOT(adev->pdev->devfn), PCI_FUNC(adev->pdev->devfn)); - else if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_HIGH_TO_LOW) + /* + * SW CTF just occurred. + * Try to do a graceful shutdown to prevent further damage. + */ + dev_emerg(adev->dev, "System is going to shutdown due to SW CTF!\n"); + orderly_poweroff(true); + } else if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_HIGH_TO_LOW) pr_warn("GPU under temperature range detected on PCIe %d:%d.%d!\n", PCI_BUS_NUM(adev->pdev->devfn), PCI_SLOT(adev->pdev->devfn), @@ -611,12 +618,18 @@ int phm_irq_process(struct amdgpu_device *adev, PCI_SLOT(adev->pdev->devfn), PCI_FUNC(adev->pdev->devfn)); } else if (client_id == SOC15_IH_CLIENTID_THM) { - if (src_id == 0) + if (src_id == 0) { pr_warn("GPU over temperature range detected on PCIe %d:%d.%d!\n", PCI_BUS_NUM(adev->pdev->devfn), PCI_SLOT(adev->pdev->devfn), PCI_FUNC(adev->pdev->devfn)); - else + /* + * SW CTF just occurred. + * Try to do a graceful shutdown to prevent further damage. + */ + dev_emerg(adev->dev, "System is going to shutdown due to SW CTF!\n"); + orderly_poweroff(true); + } else pr_warn("GPU under temperature range detected on PCIe %d:%d.%d!\n", PCI_BUS_NUM(adev->pdev->devfn), PCI_SLOT(adev->pdev->devfn), diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index d5a3487ccfac..d1fe762c8d3d 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -23,6 +23,7 @@ #include #include #include +#include #define SMU_11_0_PARTIAL_PPTABLE @@ -1561,6 +1562,12 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev, PCI_BUS_NUM(adev->pdev->devfn), PCI_SLOT(adev->pdev->devfn), PCI_FUNC(adev->pdev->devfn)); + /* + * SW CTF just occurred. + * Try to do a graceful shutdown to prevent further damage. + */ + dev_emerg(adev->dev, "System is going to shutdown due to SW CTF!\n"); + orderly_poweroff(true); break; case THM_11_0__SRCID__THM_DIG_THERM_H2L: pr_warn("GPU under temperature range detected on PCIe %d:%d.%d!\n", From e528ccf9320c3634395e39d467bb6daed3bec4e8 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Sat, 9 May 2020 13:49:26 +0800 Subject: [PATCH 03/17] drm/amd/powerplay: shutdown on HW CTF To prevent further damage. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../gpu/drm/amd/powerplay/hwmgr/smu_helper.c | 16 +++++++++++++-- drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 20 ++++++++++++++++++- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c index 782f6d295202..4279f95ba779 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c @@ -612,11 +612,17 @@ int phm_irq_process(struct amdgpu_device *adev, PCI_BUS_NUM(adev->pdev->devfn), PCI_SLOT(adev->pdev->devfn), PCI_FUNC(adev->pdev->devfn)); - else if (src_id == VISLANDS30_IV_SRCID_GPIO_19) + else if (src_id == VISLANDS30_IV_SRCID_GPIO_19) { pr_warn("GPU Critical Temperature Fault detected on PCIe %d:%d.%d!\n", PCI_BUS_NUM(adev->pdev->devfn), PCI_SLOT(adev->pdev->devfn), PCI_FUNC(adev->pdev->devfn)); + /* + * HW CTF just occurred. Shutdown to prevent further damage. + */ + dev_emerg(adev->dev, "System is going to shutdown due to HW CTF!\n"); + orderly_poweroff(true); + } } else if (client_id == SOC15_IH_CLIENTID_THM) { if (src_id == 0) { pr_warn("GPU over temperature range detected on PCIe %d:%d.%d!\n", @@ -634,11 +640,17 @@ int phm_irq_process(struct amdgpu_device *adev, PCI_BUS_NUM(adev->pdev->devfn), PCI_SLOT(adev->pdev->devfn), PCI_FUNC(adev->pdev->devfn)); - } else if (client_id == SOC15_IH_CLIENTID_ROM_SMUIO) + } else if (client_id == SOC15_IH_CLIENTID_ROM_SMUIO) { pr_warn("GPU Critical Temperature Fault detected on PCIe %d:%d.%d!\n", PCI_BUS_NUM(adev->pdev->devfn), PCI_SLOT(adev->pdev->devfn), PCI_FUNC(adev->pdev->devfn)); + /* + * HW CTF just occurred. Shutdown to prevent further damage. + */ + dev_emerg(adev->dev, "System is going to shutdown due to HW CTF!\n"); + orderly_poweroff(true); + } return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index d1fe762c8d3d..3979973680ec 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -1548,6 +1548,8 @@ static int smu_v11_0_ack_ac_dc_interrupt(struct smu_context *smu) #define THM_11_0__SRCID__THM_DIG_THERM_L2H 0 /* ASIC_TEMP > CG_THERMAL_INT.DIG_THERM_INTH */ #define THM_11_0__SRCID__THM_DIG_THERM_H2L 1 /* ASIC_TEMP < CG_THERMAL_INT.DIG_THERM_INTL */ +#define SMUIO_11_0__SRCID__SMUIO_GPIO19 83 + static int smu_v11_0_irq_process(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -1582,8 +1584,17 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev, PCI_SLOT(adev->pdev->devfn), PCI_FUNC(adev->pdev->devfn)); break; - } + } else if (client_id == SOC15_IH_CLIENTID_ROM_SMUIO) { + pr_warn("GPU Critical Temperature Fault detected on PCIe %d:%d.%d!\n", + PCI_BUS_NUM(adev->pdev->devfn), + PCI_SLOT(adev->pdev->devfn), + PCI_FUNC(adev->pdev->devfn)); + /* + * HW CTF just occurred. Shutdown to prevent further damage. + */ + dev_emerg(adev->dev, "System is going to shutdown due to HW CTF!\n"); + orderly_poweroff(true); } else if (client_id == SOC15_IH_CLIENTID_MP1) { if (src_id == 0xfe) smu_v11_0_ack_ac_dc_interrupt(&adev->smu); @@ -1626,6 +1637,13 @@ int smu_v11_0_register_irq_handler(struct smu_context *smu) if (ret) return ret; + /* Register CTF(GPIO_19) interrupt */ + ret = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_ROM_SMUIO, + SMUIO_11_0__SRCID__SMUIO_GPIO19, + irq_src); + if (ret) + return ret; + ret = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_MP1, 0xfe, irq_src); From cd598d6cfd217f18e53f6c26b2397692666eb3a4 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 8 May 2020 17:55:42 +0800 Subject: [PATCH 04/17] drm/amd/powerplay: report correct AC/DC event based on ctxid V2 'ctxid' is used to distinguish different events raised from SMC. 0x3 and 0x4 are for AC and DC power mode. V2: update the way to retrieve the ctxid and change the log level to debug Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index 3979973680ec..3391d1c4bbbe 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -1556,6 +1556,11 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev, { uint32_t client_id = entry->client_id; uint32_t src_id = entry->src_id; + /* + * ctxid is used to distinguish different + * events for SMCToHost interrupt. + */ + uint32_t ctxid = entry->src_data[0]; if (client_id == SOC15_IH_CLIENTID_THM) { switch (src_id) { @@ -1596,8 +1601,18 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev, dev_emerg(adev->dev, "System is going to shutdown due to HW CTF!\n"); orderly_poweroff(true); } else if (client_id == SOC15_IH_CLIENTID_MP1) { - if (src_id == 0xfe) - smu_v11_0_ack_ac_dc_interrupt(&adev->smu); + if (src_id == 0xfe) { + switch (ctxid) { + case 0x3: + dev_dbg(adev->dev, "Switched to AC mode!\n"); + smu_v11_0_ack_ac_dc_interrupt(&adev->smu); + break; + case 0x4: + dev_dbg(adev->dev, "Switched to DC mode!\n"); + smu_v11_0_ack_ac_dc_interrupt(&adev->smu); + break; + } + } } return 0; From 72d99b395f6c1b924f91e7964de231fb78e47c93 Mon Sep 17 00:00:00 2001 From: Yintian Tao Date: Tue, 12 May 2020 18:10:42 +0800 Subject: [PATCH 05/17] drm/amdgpu: turn back rlcg write for gfx_v10 There is no need to use amdgpu_mm_wreg_mmio_rlc() during initialization time because this interface is only designed for debugfs case to access the registers which are only permitted by RLCG during run-time. Therefore, turn back rlcg write for gfx_v10. If we not turn back it, it will raise amdgpu load failure. [ 54.904333] amdgpu: SMU driver if version not matched [ 54.904393] amdgpu: SMU is initialized successfully! [ 54.905971] [drm] kiq ring mec 2 pipe 1 q 0 [ 55.115416] amdgpu 0000:00:06.0: [drm:amdgpu_ring_test_helper [amdgpu]] *ERROR* ring gfx_0.0.0 test failed (-110) [ 55.118877] [drm:amdgpu_device_init [amdgpu]] *ERROR* hw_init of IP block failed -110 [ 55.126587] amdgpu 0000:00:06.0: amdgpu_device_ip_init failed [ 55.133466] amdgpu 0000:00:06.0: Fatal error during GPU init Signed-off-by: Yintian Tao Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 64080d209223..84aa0d4f3c3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4577,13 +4577,11 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev) adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ - /* amdgpu_mm_wreg_mmio_rlc will fall back to mmio if doesn't support rlcg_write */ - amdgpu_mm_wreg_mmio_rlc(adev, SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), - adev->gfx.rlc.clear_state_gpu_addr >> 32, 0); - amdgpu_mm_wreg_mmio_rlc(adev, SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), - adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc, 0); - amdgpu_mm_wreg_mmio_rlc(adev, SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), - adev->gfx.rlc.clear_state_size, 0); + WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI, + adev->gfx.rlc.clear_state_gpu_addr >> 32); + WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO, + adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); + WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); return 0; } @@ -5192,7 +5190,7 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); - amdgpu_mm_wreg_mmio_rlc(adev, SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL), tmp, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); for (i = 0; i < adev->usec_timeout; i++) { if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0) From 29c1ec244ca1c2acfe6a8918967d678e104705c2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 12 May 2020 13:48:06 +0100 Subject: [PATCH 06/17] drm/amdgpu: remove redundant assignment to variable ret The variable ret is being initializeed with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 90610b4f2c75..e9e59bc68c9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -441,7 +441,7 @@ out: int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev) { - int ret = -EINVAL; + int ret; /* Each psp need to set the latest topology */ ret = psp_xgmi_set_topology_info(&adev->psp, From ab9c21124d6e03460c9c59006a61cc076fefa82e Mon Sep 17 00:00:00 2001 From: John Clements Date: Wed, 13 May 2020 17:45:06 +0800 Subject: [PATCH 07/17] drm/amdgpu: Add cmd to control XGMI link sleep Added host to SMU FW cmd to enable/disable XGMI link power down Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 22 +++++++++++++ drivers/gpu/drm/amd/powerplay/arcturus_ppt.c | 31 +++++++++++++++++++ .../gpu/drm/amd/powerplay/inc/amdgpu_smu.h | 2 ++ .../drm/amd/powerplay/inc/arcturus_ppsmc.h | 3 +- drivers/gpu/drm/amd/powerplay/inc/smu_types.h | 1 + 5 files changed, 58 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index de14542de775..8c684a6e0156 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -2100,6 +2100,28 @@ int smu_set_df_cstate(struct smu_context *smu, return ret; } +int smu_allow_xgmi_power_down(struct smu_context *smu, bool en) +{ + struct amdgpu_device *adev = smu->adev; + int ret = 0; + + if (!adev->pm.dpm_enabled) + return -EINVAL; + + if (!smu->ppt_funcs || !smu->ppt_funcs->allow_xgmi_power_down) + return 0; + + mutex_lock(&smu->mutex); + + ret = smu->ppt_funcs->allow_xgmi_power_down(smu, en); + if (ret) + pr_err("[AllowXgmiPowerDown] failed!\n"); + + mutex_unlock(&smu->mutex); + + return ret; +} + int smu_write_watermarks_table(struct smu_context *smu) { void *watermarks_table = smu->smu_table.watermarks_table; diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index cfae4bcaf32e..4874a20ccdf1 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -128,6 +128,7 @@ static struct smu_11_0_cmn2aisc_mapping arcturus_message_map[SMU_MSG_MAX_COUNT] MSG_MAP(SetXgmiMode, PPSMC_MSG_SetXgmiMode), MSG_MAP(SetMemoryChannelEnable, PPSMC_MSG_SetMemoryChannelEnable), MSG_MAP(DFCstateControl, PPSMC_MSG_DFCstateControl), + MSG_MAP(GmiPwrDnControl, PPSMC_MSG_GmiPwrDnControl), }; static struct smu_11_0_cmn2aisc_mapping arcturus_clk_map[SMU_CLK_COUNT] = { @@ -2286,6 +2287,35 @@ static int arcturus_set_df_cstate(struct smu_context *smu, return smu_send_smc_msg_with_param(smu, SMU_MSG_DFCstateControl, state, NULL); } +static int arcturus_allow_xgmi_power_down(struct smu_context *smu, bool en) +{ + uint32_t smu_version; + int ret; + + ret = smu_get_smc_version(smu, NULL, &smu_version); + if (ret) { + pr_err("Failed to get smu version!\n"); + return ret; + } + + /* PPSMC_MSG_GmiPwrDnControl is supported by 54.20.0 and onwards */ + if (smu_version < 0x365000) { + pr_err("XGMI power down control is only supported by PMFW 54.20.0 and onwards\n"); + return -EINVAL; + } + + if (en) + return smu_send_smc_msg_with_param(smu, + SMU_MSG_GmiPwrDnControl, + 1, + NULL); + + return smu_send_smc_msg_with_param(smu, + SMU_MSG_GmiPwrDnControl, + 0, + NULL); +} + static const struct pptable_funcs arcturus_ppt_funcs = { /* translate smu index into arcturus specific index */ .get_smu_msg_index = arcturus_get_smu_msg_index, @@ -2379,6 +2409,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .override_pcie_parameters = smu_v11_0_override_pcie_parameters, .get_pptable_power_limit = arcturus_get_pptable_power_limit, .set_df_cstate = arcturus_set_df_cstate, + .allow_xgmi_power_down = arcturus_allow_xgmi_power_down, }; void arcturus_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 928eed220f93..4d1c2a44a8b6 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -491,6 +491,7 @@ struct pptable_funcs { int (*get_dpm_clk_limited)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t dpm_level, uint32_t *freq); int (*set_df_cstate)(struct smu_context *smu, enum pp_df_cstate state); + int (*allow_xgmi_power_down)(struct smu_context *smu, bool en); int (*update_pcie_parameters)(struct smu_context *smu, uint32_t pcie_gen_cap, uint32_t pcie_width_cap); int (*i2c_eeprom_init)(struct i2c_adapter *control); void (*i2c_eeprom_fini)(struct i2c_adapter *control); @@ -731,6 +732,7 @@ int smu_set_mp1_state(struct smu_context *smu, enum pp_mp1_state mp1_state); int smu_set_df_cstate(struct smu_context *smu, enum pp_df_cstate state); +int smu_allow_xgmi_power_down(struct smu_context *smu, bool en); int smu_get_max_sustainable_clocks_by_dc(struct smu_context *smu, struct pp_smu_nv_clock_table *max_clocks); diff --git a/drivers/gpu/drm/amd/powerplay/inc/arcturus_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/arcturus_ppsmc.h index f736d773f9d6..e07478b6ac04 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/arcturus_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/arcturus_ppsmc.h @@ -114,7 +114,8 @@ #define PPSMC_MSG_SetNumBadHbmPagesRetired 0x3A #define PPSMC_MSG_DFCstateControl 0x3B -#define PPSMC_Message_Count 0x3C +#define PPSMC_MSG_GmiPwrDnControl 0x3D +#define PPSMC_Message_Count 0x3E typedef uint32_t PPSMC_Result; typedef uint32_t PPSMC_Msg; diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_types.h b/drivers/gpu/drm/amd/powerplay/inc/smu_types.h index a5b4df146713..ee7dac4693d4 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_types.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_types.h @@ -170,6 +170,7 @@ __SMU_DUMMY_MAP(SetSoftMinJpeg), \ __SMU_DUMMY_MAP(SetHardMinFclkByFreq), \ __SMU_DUMMY_MAP(DFCstateControl), \ + __SMU_DUMMY_MAP(GmiPwrDnControl), \ __SMU_DUMMY_MAP(DAL_DISABLE_DUMMY_PSTATE_CHANGE), \ __SMU_DUMMY_MAP(DAL_ENABLE_DUMMY_PSTATE_CHANGE), \ From 5e7067b24fcf1549c72988dd92de6d17ff3d2077 Mon Sep 17 00:00:00 2001 From: John Clements Date: Wed, 13 May 2020 17:45:57 +0800 Subject: [PATCH 08/17] drm/amdgpu: Add DPM function for XGMI link power down control Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c | 10 ++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 0e8018c9aa8e..d2a105e3bf7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -1188,3 +1188,13 @@ int amdgpu_dpm_set_df_cstate(struct amdgpu_device *adev, return ret; } + +int amdgpu_dpm_allow_xgmi_power_down(struct amdgpu_device *adev, bool en) +{ + struct smu_context *smu = &adev->smu; + + if (is_support_sw_smu(adev)) + return smu_allow_xgmi_power_down(smu, en); + + return 0; +} \ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 936d85aa0fbc..956f6c710670 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -538,4 +538,6 @@ int amdgpu_dpm_baco_enter(struct amdgpu_device *adev); int amdgpu_dpm_set_df_cstate(struct amdgpu_device *adev, uint32_t cstate); +int amdgpu_dpm_allow_xgmi_power_down(struct amdgpu_device *adev, bool en); + #endif From 5c23e9e05e42b5ea56a87a17f1da9ccf9b100465 Mon Sep 17 00:00:00 2001 From: John Clements Date: Wed, 13 May 2020 20:23:51 +0800 Subject: [PATCH 09/17] drm/amdgpu: Update RAS XGMI error inject sequence Disable XGMI link power down prior to issuing a XGMI RAS error Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 30 ++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 7348619253c7..50fe08bf2f72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -811,6 +811,32 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev, return 0; } +/* Trigger XGMI/WAFL error */ +int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, + struct ta_ras_trigger_error_input *block_info) +{ + int ret; + + if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) + dev_warn(adev->dev, "Failed to disallow df cstate"); + + if (amdgpu_dpm_allow_xgmi_power_down(adev, false)) + dev_warn(adev->dev, "Failed to disallow XGMI power down"); + + ret = psp_ras_trigger_error(&adev->psp, block_info); + + if (amdgpu_ras_intr_triggered()) + return ret; + + if (amdgpu_dpm_allow_xgmi_power_down(adev, true)) + dev_warn(adev->dev, "Failed to allow XGMI power down"); + + if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) + dev_warn(adev->dev, "Failed to allow df cstate"); + + return ret; +} + /* wrapper of psp_ras_trigger_error */ int amdgpu_ras_error_inject(struct amdgpu_device *adev, struct ras_inject_if *info) @@ -844,10 +870,12 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, break; case AMDGPU_RAS_BLOCK__UMC: case AMDGPU_RAS_BLOCK__MMHUB: - case AMDGPU_RAS_BLOCK__XGMI_WAFL: case AMDGPU_RAS_BLOCK__PCIE_BIF: ret = psp_ras_trigger_error(&adev->psp, &block_info); break; + case AMDGPU_RAS_BLOCK__XGMI_WAFL: + ret = amdgpu_ras_error_inject_xgmi(adev, &block_info); + break; default: dev_info(adev->dev, "%s error injection is not supported yet\n", ras_block_str(info->head.block)); From b7f0656a25467fc26eb7fc375caf38ee99f5d004 Mon Sep 17 00:00:00 2001 From: John Clements Date: Thu, 14 May 2020 11:21:01 +0800 Subject: [PATCH 10/17] drm/amdgpu: Updated XGMI power down control support check Updated SMC FW version check to determine if XGMI power down control is supported Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/arcturus_ppt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index 4874a20ccdf1..cbf70122de9b 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -2298,9 +2298,9 @@ static int arcturus_allow_xgmi_power_down(struct smu_context *smu, bool en) return ret; } - /* PPSMC_MSG_GmiPwrDnControl is supported by 54.20.0 and onwards */ - if (smu_version < 0x365000) { - pr_err("XGMI power down control is only supported by PMFW 54.20.0 and onwards\n"); + /* PPSMC_MSG_GmiPwrDnControl is supported by 54.23.0 and onwards */ + if (smu_version < 0x00361700) { + pr_err("XGMI power down control is only supported by PMFW 54.23.0 and onwards\n"); return -EINVAL; } From 728e7e0cd61899208e924472b9e641dbeb0775c4 Mon Sep 17 00:00:00 2001 From: Jiange Zhao Date: Sun, 26 Apr 2020 17:57:00 +0800 Subject: [PATCH 11/17] drm/amdgpu: Add autodump debugfs node for gpu reset v8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When GPU got timeout, it would notify an interested part of an opportunity to dump info before actual GPU reset. A usermode app would open 'autodump' node under debugfs system and poll() for readable/writable. When a GPU reset is due, amdgpu would notify usermode app through wait_queue_head and give it 10 minutes to dump info. After usermode app has done its work, this 'autodump' node is closed. On node closure, amdgpu gets to know the dump is done through the completion that is triggered in release(). There is no write or read callback because necessary info can be obtained through dmesg and umr. Messages back and forth between usermode app and amdgpu are unnecessary. v2: (1) changed 'registered' to 'app_listening' (2) add a mutex in open() to prevent race condition v3 (chk): grab the reset lock to avoid race in autodump_open, rename debugfs file to amdgpu_autodump, provide autodump_read as well, style and code cleanups v4: add 'bool app_listening' to differentiate situations, so that the node can be reopened; also, there is no need to wait for completion when no app is waiting for a dump. v5: change 'bool app_listening' to 'enum amdgpu_autodump_state' add 'app_state_mutex' for race conditions: (1)Only 1 user can open this file node (2)wait_dump() can only take effect after poll() executed. (3)eliminated the race condition between release() and wait_dump() v6: removed 'enum amdgpu_autodump_state' and 'app_state_mutex' removed state checking in amdgpu_debugfs_wait_dump Improve on top of version 3 so that the node can be reopened. v7: move reinit_completion into open() so that only one user can open it. v8: remove complete_all() from amdgpu_debugfs_wait_dump(). Signed-off-by: Jiange Zhao Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 78 ++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 6 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 + 4 files changed, 87 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4a03a24348f4..7975f8e157df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -989,6 +989,8 @@ struct amdgpu_device { char product_number[16]; char product_name[32]; char serial[16]; + + struct amdgpu_autodump autodump; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 1a4894fa3693..d33cb344be69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -27,7 +27,7 @@ #include #include #include - +#include #include #include "amdgpu.h" @@ -74,7 +74,81 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev, return 0; } +int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev) +{ #if defined(CONFIG_DEBUG_FS) + unsigned long timeout = 600 * HZ; + int ret; + + wake_up_interruptible(&adev->autodump.gpu_hang); + + ret = wait_for_completion_interruptible_timeout(&adev->autodump.dumping, timeout); + if (ret == 0) { + pr_err("autodump: timeout, move on to gpu recovery\n"); + return -ETIMEDOUT; + } +#endif + return 0; +} + +#if defined(CONFIG_DEBUG_FS) + +static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file) +{ + struct amdgpu_device *adev = inode->i_private; + int ret; + + file->private_data = adev; + + mutex_lock(&adev->lock_reset); + if (adev->autodump.dumping.done) { + reinit_completion(&adev->autodump.dumping); + ret = 0; + } else { + ret = -EBUSY; + } + mutex_unlock(&adev->lock_reset); + + return ret; +} + +static int amdgpu_debugfs_autodump_release(struct inode *inode, struct file *file) +{ + struct amdgpu_device *adev = file->private_data; + + complete_all(&adev->autodump.dumping); + return 0; +} + +static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_table_struct *poll_table) +{ + struct amdgpu_device *adev = file->private_data; + + poll_wait(file, &adev->autodump.gpu_hang, poll_table); + + if (adev->in_gpu_reset) + return POLLIN | POLLRDNORM | POLLWRNORM; + + return 0; +} + +static const struct file_operations autodump_debug_fops = { + .owner = THIS_MODULE, + .open = amdgpu_debugfs_autodump_open, + .poll = amdgpu_debugfs_autodump_poll, + .release = amdgpu_debugfs_autodump_release, +}; + +static void amdgpu_debugfs_autodump_init(struct amdgpu_device *adev) +{ + init_completion(&adev->autodump.dumping); + complete_all(&adev->autodump.dumping); + init_waitqueue_head(&adev->autodump.gpu_hang); + + debugfs_create_file("amdgpu_autodump", 0600, + adev->ddev->primary->debugfs_root, + adev, &autodump_debug_fops); +} /** * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes @@ -1434,6 +1508,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_ras_debugfs_create_all(adev); + amdgpu_debugfs_autodump_init(adev); + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list, ARRAY_SIZE(amdgpu_debugfs_list)); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h index de12d1101526..2803884d338d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h @@ -31,6 +31,11 @@ struct amdgpu_debugfs { unsigned num_files; }; +struct amdgpu_autodump { + struct completion dumping; + struct wait_queue_head gpu_hang; +}; + int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); int amdgpu_debugfs_init(struct amdgpu_device *adev); void amdgpu_debugfs_fini(struct amdgpu_device *adev); @@ -40,3 +45,4 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev, int amdgpu_debugfs_fence_init(struct amdgpu_device *adev); int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); int amdgpu_debugfs_gem_init(struct amdgpu_device *adev); +int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index cc41e8f5ad14..545beebcf43e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3927,6 +3927,8 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, int i, r = 0; bool need_full_reset = *need_full_reset_arg; + amdgpu_debugfs_wait_dump(adev); + /* block all schedulers and reset given job's ring */ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; From a7f28103374787ae43b936cd2ec2f8388958668e Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Wed, 29 Apr 2020 18:49:23 +0800 Subject: [PATCH 12/17] drm/amdgpu: add amdgpu_virt_get_vf_mode helper function the swsmu or powerplay(hwmgr) need to handle task according to different VF mode, this function to help query vf mode. vf mode: 1. SRIOV_VF_MODE_BARE_METAL: the driver work on host OS (PF) 2. SRIOV_VF_MODE_ONE_VF : the driver work on guest OS with one VF 3. SRIOV_VF_MODE_MULTI_VF : the driver work on guest OS with multi VF Signed-off-by: Kevin Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 16 ++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 8 ++++++++ 2 files changed, 24 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index cbbb8d02535a..f3b38c9e04ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -370,3 +370,19 @@ void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME; } + +enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev) +{ + enum amdgpu_sriov_vf_mode mode; + + if (amdgpu_sriov_vf(adev)) { + if (amdgpu_sriov_is_pp_one_vf(adev)) + mode = SRIOV_VF_MODE_ONE_VF; + else + mode = SRIOV_VF_MODE_MULTI_VF; + } else { + mode = SRIOV_VF_MODE_BARE_METAL; + } + + return mode; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index de27308802c9..b90e822cebd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -35,6 +35,12 @@ /* tonga/fiji use this offset */ #define mmBIF_IOV_FUNC_IDENTIFIER 0x1503 +enum amdgpu_sriov_vf_mode { + SRIOV_VF_MODE_BARE_METAL = 0, + SRIOV_VF_MODE_ONE_VF, + SRIOV_VF_MODE_MULTI_VF, +}; + struct amdgpu_mm_table { struct amdgpu_bo *bo; uint32_t *cpu_addr; @@ -323,4 +329,6 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev); bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev); int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev); void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev); + +enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev); #endif From 4e01847c38f7a5e2b0ffa8ff74d6bf0e85924240 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Mon, 27 Apr 2020 23:45:49 +0800 Subject: [PATCH 13/17] drm/amdgpu: optimize amdgpu device attribute code unified amdgpu device attribute node functions: 1. add some helper functions to create amdgpu device attribute node. 2. create device node according to device attr flags on different VF mode. 3. rename some functions name to adapt a new interface. v2: 1. remove ATTR_STATE_DEAD, ATTR_STATE_ALIVE enum. 2. rename callback function perform to attr_update. 3. modify some variable names Signed-off-by: Kevin Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 492 +++++++++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h | 46 +++ 2 files changed, 261 insertions(+), 277 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index c762deb5abc7..b75362bf0742 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -154,9 +154,9 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso * */ -static ssize_t amdgpu_get_dpm_state(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t amdgpu_get_power_dpm_state(struct device *dev, + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; @@ -189,10 +189,10 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, (pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : "performance"); } -static ssize_t amdgpu_set_dpm_state(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) +static ssize_t amdgpu_set_power_dpm_state(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; @@ -294,9 +294,9 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, * */ -static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev, + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; @@ -332,10 +332,10 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, "unknown"); } -static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) +static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; @@ -873,10 +873,10 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, * the corresponding bit from original ppfeature masks and input the * new ppfeature masks. */ -static ssize_t amdgpu_set_pp_feature_status(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) +static ssize_t amdgpu_set_pp_features(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; @@ -917,9 +917,9 @@ static ssize_t amdgpu_set_pp_feature_status(struct device *dev, return count; } -static ssize_t amdgpu_get_pp_feature_status(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t amdgpu_get_pp_features(struct device *dev, + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; @@ -1663,9 +1663,9 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, * The SMU firmware computes a percentage of load based on the * aggregate activity level in the IP cores. */ -static ssize_t amdgpu_get_busy_percent(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev, + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; @@ -1699,9 +1699,9 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev, * The SMU firmware computes a percentage of load based on the * aggregate activity level in the IP cores. */ -static ssize_t amdgpu_get_memory_busy_percent(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t amdgpu_get_mem_busy_percent(struct device *dev, + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; @@ -1790,57 +1790,174 @@ static ssize_t amdgpu_get_unique_id(struct device *dev, return 0; } -static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); -static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, - amdgpu_get_dpm_forced_performance_level, - amdgpu_set_dpm_forced_performance_level); -static DEVICE_ATTR(pp_num_states, S_IRUGO, amdgpu_get_pp_num_states, NULL); -static DEVICE_ATTR(pp_cur_state, S_IRUGO, amdgpu_get_pp_cur_state, NULL); -static DEVICE_ATTR(pp_force_state, S_IRUGO | S_IWUSR, - amdgpu_get_pp_force_state, - amdgpu_set_pp_force_state); -static DEVICE_ATTR(pp_table, S_IRUGO | S_IWUSR, - amdgpu_get_pp_table, - amdgpu_set_pp_table); -static DEVICE_ATTR(pp_dpm_sclk, S_IRUGO | S_IWUSR, - amdgpu_get_pp_dpm_sclk, - amdgpu_set_pp_dpm_sclk); -static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR, - amdgpu_get_pp_dpm_mclk, - amdgpu_set_pp_dpm_mclk); -static DEVICE_ATTR(pp_dpm_socclk, S_IRUGO | S_IWUSR, - amdgpu_get_pp_dpm_socclk, - amdgpu_set_pp_dpm_socclk); -static DEVICE_ATTR(pp_dpm_fclk, S_IRUGO | S_IWUSR, - amdgpu_get_pp_dpm_fclk, - amdgpu_set_pp_dpm_fclk); -static DEVICE_ATTR(pp_dpm_dcefclk, S_IRUGO | S_IWUSR, - amdgpu_get_pp_dpm_dcefclk, - amdgpu_set_pp_dpm_dcefclk); -static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR, - amdgpu_get_pp_dpm_pcie, - amdgpu_set_pp_dpm_pcie); -static DEVICE_ATTR(pp_sclk_od, S_IRUGO | S_IWUSR, - amdgpu_get_pp_sclk_od, - amdgpu_set_pp_sclk_od); -static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR, - amdgpu_get_pp_mclk_od, - amdgpu_set_pp_mclk_od); -static DEVICE_ATTR(pp_power_profile_mode, S_IRUGO | S_IWUSR, - amdgpu_get_pp_power_profile_mode, - amdgpu_set_pp_power_profile_mode); -static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR, - amdgpu_get_pp_od_clk_voltage, - amdgpu_set_pp_od_clk_voltage); -static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, - amdgpu_get_busy_percent, NULL); -static DEVICE_ATTR(mem_busy_percent, S_IRUGO, - amdgpu_get_memory_busy_percent, NULL); -static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); -static DEVICE_ATTR(pp_features, S_IRUGO | S_IWUSR, - amdgpu_get_pp_feature_status, - amdgpu_set_pp_feature_status); -static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL); +static struct amdgpu_device_attr amdgpu_device_attrs[] = { + AMDGPU_DEVICE_ATTR_RW(power_dpm_state, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), + AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), + AMDGPU_DEVICE_ATTR_RO(pp_num_states, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RO(pp_cur_state, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RW(pp_force_state, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RW(pp_table, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RW(pp_dpm_sclk, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), + AMDGPU_DEVICE_ATTR_RW(pp_dpm_mclk, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), + AMDGPU_DEVICE_ATTR_RW(pp_dpm_socclk, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), + AMDGPU_DEVICE_ATTR_RW(pp_dpm_fclk, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), + AMDGPU_DEVICE_ATTR_RW(pp_dpm_dcefclk, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RW(pp_dpm_pcie, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RW(pp_sclk_od, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RW(pp_mclk_od, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RW(pp_power_profile_mode, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RW(pp_od_clk_voltage, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RO(gpu_busy_percent, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RO(mem_busy_percent, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RO(pcie_bw, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RW(pp_features, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RO(unique_id, ATTR_FLAG_BASIC), +}; + +static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, + uint32_t mask) +{ + struct device_attribute *dev_attr = &attr->dev_attr; + const char *attr_name = dev_attr->attr.name; + struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + enum amd_asic_type asic_type = adev->asic_type; + + if (!(attr->flags & mask)) { + attr->states = ATTR_STATE_UNSUPPORTED; + return 0; + } + +#define DEVICE_ATTR_IS(_name) (!strcmp(attr_name, #_name)) + + if (DEVICE_ATTR_IS(pp_dpm_socclk)) { + if (asic_type <= CHIP_VEGA10) + attr->states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(pp_dpm_dcefclk)) { + if (asic_type <= CHIP_VEGA10 || asic_type == CHIP_ARCTURUS) + attr->states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(pp_dpm_fclk)) { + if (asic_type < CHIP_VEGA20) + attr->states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(pp_dpm_pcie)) { + if (asic_type == CHIP_ARCTURUS) + attr->states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(pp_od_clk_voltage)) { + attr->states = ATTR_STATE_UNSUPPORTED; + if ((is_support_sw_smu(adev) && adev->smu.od_enabled) || + (!is_support_sw_smu(adev) && hwmgr->od_enabled)) + attr->states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(mem_busy_percent)) { + if (adev->flags & AMD_IS_APU || asic_type == CHIP_VEGA10) + attr->states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(pcie_bw)) { + /* PCIe Perf counters won't work on APU nodes */ + if (adev->flags & AMD_IS_APU) + attr->states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(unique_id)) { + if (!adev->unique_id) + attr->states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(pp_features)) { + if (adev->flags & AMD_IS_APU || asic_type <= CHIP_VEGA10) + attr->states = ATTR_STATE_UNSUPPORTED; + } + + if (asic_type == CHIP_ARCTURUS) { + /* Arcturus does not support standalone mclk/socclk/fclk level setting */ + if (DEVICE_ATTR_IS(pp_dpm_mclk) || + DEVICE_ATTR_IS(pp_dpm_socclk) || + DEVICE_ATTR_IS(pp_dpm_fclk)) { + dev_attr->attr.mode &= ~S_IWUGO; + dev_attr->store = NULL; + } + } + +#undef DEVICE_ATTR_IS + + return 0; +} + + +static int amdgpu_device_attr_create(struct amdgpu_device *adev, + struct amdgpu_device_attr *attr, + uint32_t mask) +{ + int ret = 0; + struct device_attribute *dev_attr = &attr->dev_attr; + const char *name = dev_attr->attr.name; + int (*attr_update)(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, + uint32_t mask) = default_attr_update; + + BUG_ON(!attr); + + attr_update = attr->attr_update ? attr_update : default_attr_update; + + ret = attr_update(adev, attr, mask); + if (ret) { + dev_err(adev->dev, "failed to update device file %s, ret = %d\n", + name, ret); + return ret; + } + + /* the attr->states maybe changed after call attr->attr_update function */ + if (attr->states == ATTR_STATE_UNSUPPORTED) + return 0; + + ret = device_create_file(adev->dev, dev_attr); + if (ret) { + dev_err(adev->dev, "failed to create device file %s, ret = %d\n", + name, ret); + } + + attr->states = ATTR_STATE_SUPPORTED; + + return ret; +} + +static void amdgpu_device_attr_remove(struct amdgpu_device *adev, struct amdgpu_device_attr *attr) +{ + struct device_attribute *dev_attr = &attr->dev_attr; + + if (attr->states == ATTR_STATE_UNSUPPORTED) + return; + + device_remove_file(adev->dev, dev_attr); + + attr->states = ATTR_STATE_UNSUPPORTED; +} + +static int amdgpu_device_attr_create_groups(struct amdgpu_device *adev, + struct amdgpu_device_attr *attrs, + uint32_t counts, + uint32_t mask) +{ + int ret = 0; + uint32_t i = 0; + + for (i = 0; i < counts; i++) { + ret = amdgpu_device_attr_create(adev, &attrs[i], mask); + if (ret) + goto failed; + } + + return 0; + +failed: + for (; i > 0; i--) { + amdgpu_device_attr_remove(adev, &attrs[i]); + } + + return ret; +} + +static void amdgpu_device_attr_remove_groups(struct amdgpu_device *adev, + struct amdgpu_device_attr *attrs, + uint32_t counts) +{ + uint32_t i = 0; + + for (i = 0; i < counts; i++) + amdgpu_device_attr_remove(adev, &attrs[i]); +} static ssize_t amdgpu_hwmon_show_temp(struct device *dev, struct device_attribute *attr, @@ -3241,8 +3358,8 @@ int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_versio int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) { - struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; int ret; + uint32_t mask = 0; if (adev->pm.sysfs_initialized) return 0; @@ -3260,168 +3377,25 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } - ret = device_create_file(adev->dev, &dev_attr_power_dpm_state); - if (ret) { - DRM_ERROR("failed to create device file for dpm state\n"); - return ret; - } - ret = device_create_file(adev->dev, &dev_attr_power_dpm_force_performance_level); - if (ret) { - DRM_ERROR("failed to create device file for dpm state\n"); - return ret; + switch (amdgpu_virt_get_sriov_vf_mode(adev)) { + case SRIOV_VF_MODE_ONE_VF: + mask = ATTR_FLAG_ONEVF; + break; + case SRIOV_VF_MODE_MULTI_VF: + mask = 0; + break; + case SRIOV_VF_MODE_BARE_METAL: + default: + mask = ATTR_FLAG_MASK_ALL; + break; } - if (!amdgpu_sriov_vf(adev)) { - ret = device_create_file(adev->dev, &dev_attr_pp_num_states); - if (ret) { - DRM_ERROR("failed to create device file pp_num_states\n"); - return ret; - } - ret = device_create_file(adev->dev, &dev_attr_pp_cur_state); - if (ret) { - DRM_ERROR("failed to create device file pp_cur_state\n"); - return ret; - } - ret = device_create_file(adev->dev, &dev_attr_pp_force_state); - if (ret) { - DRM_ERROR("failed to create device file pp_force_state\n"); - return ret; - } - ret = device_create_file(adev->dev, &dev_attr_pp_table); - if (ret) { - DRM_ERROR("failed to create device file pp_table\n"); - return ret; - } - } - - ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk); - if (ret) { - DRM_ERROR("failed to create device file pp_dpm_sclk\n"); + ret = amdgpu_device_attr_create_groups(adev, + amdgpu_device_attrs, + ARRAY_SIZE(amdgpu_device_attrs), + mask); + if (ret) return ret; - } - - /* Arcturus does not support standalone mclk/socclk/fclk level setting */ - if (adev->asic_type == CHIP_ARCTURUS) { - dev_attr_pp_dpm_mclk.attr.mode &= ~S_IWUGO; - dev_attr_pp_dpm_mclk.store = NULL; - - dev_attr_pp_dpm_socclk.attr.mode &= ~S_IWUGO; - dev_attr_pp_dpm_socclk.store = NULL; - - dev_attr_pp_dpm_fclk.attr.mode &= ~S_IWUGO; - dev_attr_pp_dpm_fclk.store = NULL; - } - - ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk); - if (ret) { - DRM_ERROR("failed to create device file pp_dpm_mclk\n"); - return ret; - } - if (adev->asic_type >= CHIP_VEGA10) { - ret = device_create_file(adev->dev, &dev_attr_pp_dpm_socclk); - if (ret) { - DRM_ERROR("failed to create device file pp_dpm_socclk\n"); - return ret; - } - if (adev->asic_type != CHIP_ARCTURUS) { - ret = device_create_file(adev->dev, &dev_attr_pp_dpm_dcefclk); - if (ret) { - DRM_ERROR("failed to create device file pp_dpm_dcefclk\n"); - return ret; - } - } - } - if (adev->asic_type >= CHIP_VEGA20) { - ret = device_create_file(adev->dev, &dev_attr_pp_dpm_fclk); - if (ret) { - DRM_ERROR("failed to create device file pp_dpm_fclk\n"); - return ret; - } - } - - /* the reset are not needed for SRIOV one vf mode */ - if (amdgpu_sriov_vf(adev)) { - adev->pm.sysfs_initialized = true; - return ret; - } - - if (adev->asic_type != CHIP_ARCTURUS) { - ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie); - if (ret) { - DRM_ERROR("failed to create device file pp_dpm_pcie\n"); - return ret; - } - } - ret = device_create_file(adev->dev, &dev_attr_pp_sclk_od); - if (ret) { - DRM_ERROR("failed to create device file pp_sclk_od\n"); - return ret; - } - ret = device_create_file(adev->dev, &dev_attr_pp_mclk_od); - if (ret) { - DRM_ERROR("failed to create device file pp_mclk_od\n"); - return ret; - } - ret = device_create_file(adev->dev, - &dev_attr_pp_power_profile_mode); - if (ret) { - DRM_ERROR("failed to create device file " - "pp_power_profile_mode\n"); - return ret; - } - if ((is_support_sw_smu(adev) && adev->smu.od_enabled) || - (!is_support_sw_smu(adev) && hwmgr->od_enabled)) { - ret = device_create_file(adev->dev, - &dev_attr_pp_od_clk_voltage); - if (ret) { - DRM_ERROR("failed to create device file " - "pp_od_clk_voltage\n"); - return ret; - } - } - ret = device_create_file(adev->dev, - &dev_attr_gpu_busy_percent); - if (ret) { - DRM_ERROR("failed to create device file " - "gpu_busy_level\n"); - return ret; - } - /* APU does not have its own dedicated memory */ - if (!(adev->flags & AMD_IS_APU) && - (adev->asic_type != CHIP_VEGA10)) { - ret = device_create_file(adev->dev, - &dev_attr_mem_busy_percent); - if (ret) { - DRM_ERROR("failed to create device file " - "mem_busy_percent\n"); - return ret; - } - } - /* PCIe Perf counters won't work on APU nodes */ - if (!(adev->flags & AMD_IS_APU)) { - ret = device_create_file(adev->dev, &dev_attr_pcie_bw); - if (ret) { - DRM_ERROR("failed to create device file pcie_bw\n"); - return ret; - } - } - if (adev->unique_id) - ret = device_create_file(adev->dev, &dev_attr_unique_id); - if (ret) { - DRM_ERROR("failed to create device file unique_id\n"); - return ret; - } - - if ((adev->asic_type >= CHIP_VEGA10) && - !(adev->flags & AMD_IS_APU)) { - ret = device_create_file(adev->dev, - &dev_attr_pp_features); - if (ret) { - DRM_ERROR("failed to create device file " - "pp_features\n"); - return ret; - } - } adev->pm.sysfs_initialized = true; @@ -3430,51 +3404,15 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) { - struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; - if (adev->pm.dpm_enabled == 0) return; if (adev->pm.int_hwmon_dev) hwmon_device_unregister(adev->pm.int_hwmon_dev); - device_remove_file(adev->dev, &dev_attr_power_dpm_state); - device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level); - device_remove_file(adev->dev, &dev_attr_pp_num_states); - device_remove_file(adev->dev, &dev_attr_pp_cur_state); - device_remove_file(adev->dev, &dev_attr_pp_force_state); - device_remove_file(adev->dev, &dev_attr_pp_table); - - device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk); - device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk); - if (adev->asic_type >= CHIP_VEGA10) { - device_remove_file(adev->dev, &dev_attr_pp_dpm_socclk); - if (adev->asic_type != CHIP_ARCTURUS) - device_remove_file(adev->dev, &dev_attr_pp_dpm_dcefclk); - } - if (adev->asic_type != CHIP_ARCTURUS) - device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie); - if (adev->asic_type >= CHIP_VEGA20) - device_remove_file(adev->dev, &dev_attr_pp_dpm_fclk); - device_remove_file(adev->dev, &dev_attr_pp_sclk_od); - device_remove_file(adev->dev, &dev_attr_pp_mclk_od); - device_remove_file(adev->dev, - &dev_attr_pp_power_profile_mode); - if ((is_support_sw_smu(adev) && adev->smu.od_enabled) || - (!is_support_sw_smu(adev) && hwmgr->od_enabled)) - device_remove_file(adev->dev, - &dev_attr_pp_od_clk_voltage); - device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); - if (!(adev->flags & AMD_IS_APU) && - (adev->asic_type != CHIP_VEGA10)) - device_remove_file(adev->dev, &dev_attr_mem_busy_percent); - if (!(adev->flags & AMD_IS_APU)) - device_remove_file(adev->dev, &dev_attr_pcie_bw); - if (adev->unique_id) - device_remove_file(adev->dev, &dev_attr_unique_id); - if ((adev->asic_type >= CHIP_VEGA10) && - !(adev->flags & AMD_IS_APU)) - device_remove_file(adev->dev, &dev_attr_pp_features); + amdgpu_device_attr_remove_groups(adev, + amdgpu_device_attrs, + ARRAY_SIZE(amdgpu_device_attrs)); } void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h index 5db0ef86e84c..48e8086baf33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h @@ -30,6 +30,52 @@ struct cg_flag_name const char *name; }; +enum amdgpu_device_attr_flags { + ATTR_FLAG_BASIC = (1 << 0), + ATTR_FLAG_ONEVF = (1 << 16), +}; + +#define ATTR_FLAG_TYPE_MASK (0x0000ffff) +#define ATTR_FLAG_MODE_MASK (0xffff0000) +#define ATTR_FLAG_MASK_ALL (0xffffffff) + +enum amdgpu_device_attr_states { + ATTR_STATE_UNSUPPORTED = 0, + ATTR_STATE_SUPPORTED, +}; + +struct amdgpu_device_attr { + struct device_attribute dev_attr; + enum amdgpu_device_attr_flags flags; + enum amdgpu_device_attr_states states; + int (*attr_update)(struct amdgpu_device *adev, + struct amdgpu_device_attr* attr, + uint32_t mask); +}; + +#define to_amdgpu_device_attr(_dev_attr) \ + container_of(_dev_attr, struct amdgpu_device_attr, dev_attr) + +#define __AMDGPU_DEVICE_ATTR(_name, _mode, _show, _store, _flags, ...) \ + { .dev_attr = __ATTR(_name, _mode, _show, _store), \ + .flags = _flags, \ + .states = ATTR_STATE_SUPPORTED, \ + ##__VA_ARGS__, } + +#define AMDGPU_DEVICE_ATTR(_name, _mode, _flags, ...) \ + __AMDGPU_DEVICE_ATTR(_name, _mode, \ + amdgpu_get_##_name, amdgpu_set_##_name, \ + _flags, ##__VA_ARGS__) + +#define AMDGPU_DEVICE_ATTR_RW(_name, _flags, ...) \ + AMDGPU_DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \ + _flags, ##__VA_ARGS__) + +#define AMDGPU_DEVICE_ATTR_RO(_name, _flags, ...) \ + __AMDGPU_DEVICE_ATTR(_name, S_IRUGO, \ + amdgpu_get_##_name, NULL, \ + _flags, ##__VA_ARGS__) + void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev); int amdgpu_pm_sysfs_init(struct amdgpu_device *adev); int amdgpu_pm_virt_sysfs_init(struct amdgpu_device *adev); From 22301177dbcb99eb1101ed5698c5b2239d024f8f Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 24 Mar 2020 16:37:01 -0400 Subject: [PATCH 14/17] drm/amdgpu: Add new ring callback to insert memory sync Used to flush and invalidate various caches. v2: Rename function hook Signed-off-by: Andrey Grodzovsky Reviewed-by: Luben Tuikov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 107e80063553..be218754629a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -195,6 +195,7 @@ struct amdgpu_ring_funcs { /* Try to soft recover the ring to make the fence signal */ void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid); int (*preempt_ib)(struct amdgpu_ring *ring); + void (*emit_mem_sync)(struct amdgpu_ring *ring); }; struct amdgpu_ring { From 2f9ce2a3860588803dc2ce4c866c06b29f5a201c Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Fri, 8 May 2020 14:34:26 -0400 Subject: [PATCH 15/17] drm/amdgpu: Add mem_sync implementation for all the ASICs. Implement the .mem_sync hook defined earlier. v2: Rename functions Signed-off-by: Andrey Grodzovsky Reviewed-by: Luben Tuikov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 27 +++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 16 ++++++++++++++- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 16 ++++++++++++++- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 17 +++++++++++++++- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 23 +++++++++++++++++++++- 5 files changed, 94 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 84aa0d4f3c3e..7ab6f6ae9a63 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -8020,6 +8020,29 @@ static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev, return 0; } +static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring) +{ + const unsigned int gcr_cntl = + PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | + PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1); + + /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ + amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); + amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ + amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ + amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ +} + static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { .name = "gfx_v10_0", .early_init = gfx_v10_0_early_init, @@ -8067,7 +8090,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { 3 + /* CNTX_CTRL */ 5 + /* HDP_INVL */ 8 + 8 + /* FENCE x2 */ - 2, /* SWITCH_BUFFER */ + 2 + /* SWITCH_BUFFER */ + 8, /* gfx_v10_0_emit_mem_sync */ .emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_gfx */ .emit_ib = gfx_v10_0_ring_emit_ib_gfx, .emit_fence = gfx_v10_0_ring_emit_fence, @@ -8089,6 +8113,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v10_0_ring_soft_recovery, + .emit_mem_sync = gfx_v10_0_emit_mem_sync, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index aa1e1be852dd..96112fb9273b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3465,6 +3465,18 @@ static int gfx_v6_0_set_powergating_state(void *handle, return 0; } +static void gfx_v6_0_emit_mem_sync(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); + amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | + PACKET3_TC_ACTION_ENA | + PACKET3_SH_KCACHE_ACTION_ENA | + PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ +} + static const struct amd_ip_funcs gfx_v6_0_ip_funcs = { .name = "gfx_v6_0", .early_init = gfx_v6_0_early_init, @@ -3495,7 +3507,8 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = { 14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ 7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */ SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v6_0_ring_emit_vm_flush */ - 3 + 2, /* gfx_v6_ring_emit_cntxcntl including vgt flush */ + 3 + 2 + /* gfx_v6_ring_emit_cntxcntl including vgt flush */ + 5, /* SURFACE_SYNC */ .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */ .emit_ib = gfx_v6_0_ring_emit_ib, .emit_fence = gfx_v6_0_ring_emit_fence, @@ -3506,6 +3519,7 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = { .insert_nop = amdgpu_ring_insert_nop, .emit_cntxcntl = gfx_v6_ring_emit_cntxcntl, .emit_wreg = gfx_v6_0_ring_emit_wreg, + .emit_mem_sync = gfx_v6_0_emit_mem_sync, }; static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index e5a88cad44cb..b2f10e39eff1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4998,6 +4998,18 @@ static int gfx_v7_0_set_powergating_state(void *handle, return 0; } +static void gfx_v7_0_emit_mem_sync(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); + amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | + PACKET3_TC_ACTION_ENA | + PACKET3_SH_KCACHE_ACTION_ENA | + PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ +} + static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { .name = "gfx_v7_0", .early_init = gfx_v7_0_early_init, @@ -5030,7 +5042,8 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */ 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */ CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */ - 3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/ + 3 + 4 + /* gfx_v7_ring_emit_cntxcntl including vgt flush*/ + 5, /* SURFACE_SYNC */ .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */ .emit_ib = gfx_v7_0_ring_emit_ib_gfx, .emit_fence = gfx_v7_0_ring_emit_fence_gfx, @@ -5045,6 +5058,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl, .emit_wreg = gfx_v7_0_ring_emit_wreg, .soft_recovery = gfx_v7_0_ring_soft_recovery, + .emit_mem_sync = gfx_v7_0_emit_mem_sync, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 2fcf6865abba..6ae78b9e9551 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6817,6 +6817,19 @@ static int gfx_v8_0_sq_irq(struct amdgpu_device *adev, return 0; } +static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); + amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | + PACKET3_TC_ACTION_ENA | + PACKET3_SH_KCACHE_ACTION_ENA | + PACKET3_SH_ICACHE_ACTION_ENA | + PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ +} + static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .name = "gfx_v8_0", .early_init = gfx_v8_0_early_init, @@ -6863,7 +6876,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 3 + /* CNTX_CTRL */ 5 + /* HDP_INVL */ 12 + 12 + /* FENCE x2 */ - 2, /* SWITCH_BUFFER */ + 2 + /* SWITCH_BUFFER */ + 5, /* SURFACE_SYNC */ .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ .emit_ib = gfx_v8_0_ring_emit_ib_gfx, .emit_fence = gfx_v8_0_ring_emit_fence_gfx, @@ -6881,6 +6895,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, .emit_wreg = gfx_v8_0_ring_emit_wreg, .soft_recovery = gfx_v8_0_ring_soft_recovery, + .emit_mem_sync = gfx_v8_0_emit_mem_sync, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index a0988634aeaf..b6916f82c705 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -6634,6 +6634,25 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, return 0; } +static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) +{ + const unsigned int cp_coher_cntl = + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); + + /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ + amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); + amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ + amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ +} + static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .name = "gfx_v9_0", .early_init = gfx_v9_0_early_init, @@ -6680,7 +6699,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 3 + /* CNTX_CTRL */ 5 + /* HDP_INVL */ 8 + 8 + /* FENCE x2 */ - 2, /* SWITCH_BUFFER */ + 2 + /* SWITCH_BUFFER */ + 7, /* gfx_v9_0_emit_mem_sync */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ .emit_ib = gfx_v9_0_ring_emit_ib_gfx, .emit_fence = gfx_v9_0_ring_emit_fence, @@ -6701,6 +6721,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v9_0_ring_soft_recovery, + .emit_mem_sync = gfx_v9_0_emit_mem_sync, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { From d35745bbec0958ec09a6580dea537828a571c632 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 27 Apr 2020 16:04:47 -0400 Subject: [PATCH 16/17] drm/amdgpu: apply AMDGPU_IB_FLAG_EMIT_MEM_SYNC to compute IBs too (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compute IBs need this too. v2: split out version bump v3: squash in emit frame count fixes Signed-off-by: Marek Olšák Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cikd.h | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 +++- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 4 +++- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 18 +++++++++++++++++- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 19 ++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 +++- drivers/gpu/drm/amd/amdgpu/vid.h | 2 +- 7 files changed, 46 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index cee6e8a3ad9c..5f3f6ebfb387 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -450,7 +450,7 @@ # define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) # define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) # define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) -#define PACKET3_AQUIRE_MEM 0x58 +#define PACKET3_ACQUIRE_MEM 0x58 #define PACKET3_REWIND 0x59 #define PACKET3_LOAD_UCONFIG_REG 0x5E #define PACKET3_LOAD_SH_REG 0x5F diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 7ab6f6ae9a63..bd5dd4f64311 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -8133,7 +8133,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 2 + /* gfx_v10_0_ring_emit_vm_flush */ - 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */ + 8 + 8 + 8 + /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */ + 8, /* gfx_v10_0_emit_mem_sync */ .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */ .emit_ib = gfx_v10_0_ring_emit_ib_compute, .emit_fence = gfx_v10_0_ring_emit_fence, @@ -8148,6 +8149,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, + .emit_mem_sync = gfx_v10_0_emit_mem_sync, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 96112fb9273b..79c52c7a02e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3533,7 +3533,8 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = { 5 + 5 + /* hdp flush / invalidate */ 7 + /* gfx_v6_0_ring_emit_pipeline_sync */ SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v6_0_ring_emit_vm_flush */ - 14 + 14 + 14, /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ + 14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ + 5, /* SURFACE_SYNC */ .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */ .emit_ib = gfx_v6_0_ring_emit_ib, .emit_fence = gfx_v6_0_ring_emit_fence, @@ -3543,6 +3544,7 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = { .test_ib = gfx_v6_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .emit_wreg = gfx_v6_0_ring_emit_wreg, + .emit_mem_sync = gfx_v6_0_emit_mem_sync, }; static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index b2f10e39eff1..0cc011f9190d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -5010,6 +5010,20 @@ static void gfx_v7_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ } +static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); + amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | + PACKET3_TC_ACTION_ENA | + PACKET3_SH_KCACHE_ACTION_ENA | + PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ + amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ +} + static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { .name = "gfx_v7_0", .early_init = gfx_v7_0_early_init, @@ -5075,7 +5089,8 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { 5 + /* hdp invalidate */ 7 + /* gfx_v7_0_ring_emit_pipeline_sync */ CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */ - 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ + 7 + 7 + 7 + /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ + 7, /* gfx_v7_0_emit_mem_sync_compute */ .emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */ .emit_ib = gfx_v7_0_ring_emit_ib_compute, .emit_fence = gfx_v7_0_ring_emit_fence_compute, @@ -5088,6 +5103,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v7_0_ring_emit_wreg, + .emit_mem_sync = gfx_v7_0_emit_mem_sync_compute, }; static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 6ae78b9e9551..1d4128227ffd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6830,6 +6830,21 @@ static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ } +static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); + amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | + PACKET3_TC_ACTION_ENA | + PACKET3_SH_KCACHE_ACTION_ENA | + PACKET3_SH_ICACHE_ACTION_ENA | + PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ + amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ +} + static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .name = "gfx_v8_0", .early_init = gfx_v8_0_early_init, @@ -6912,7 +6927,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 5 + /* hdp_invalidate */ 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ - 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ + 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ + 7, /* gfx_v8_0_emit_mem_sync_compute */ .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */ .emit_ib = gfx_v8_0_ring_emit_ib_compute, .emit_fence = gfx_v8_0_ring_emit_fence_compute, @@ -6925,6 +6941,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v8_0_ring_emit_wreg, + .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index b6916f82c705..1573ac1f03b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -6741,7 +6741,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 2 + /* gfx_v9_0_ring_emit_vm_flush */ - 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ + 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ + 7, /* gfx_v9_0_emit_mem_sync */ .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ .emit_ib = gfx_v9_0_ring_emit_ib_compute, .emit_fence = gfx_v9_0_ring_emit_fence, @@ -6756,6 +6757,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, + .emit_mem_sync = gfx_v9_0_emit_mem_sync, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index 19ddd2312e00..7a01e6133798 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h @@ -332,7 +332,7 @@ # define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) # define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) # define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) -#define PACKET3_AQUIRE_MEM 0x58 +#define PACKET3_ACQUIRE_MEM 0x58 #define PACKET3_REWIND 0x59 #define PACKET3_LOAD_UCONFIG_REG 0x5E #define PACKET3_LOAD_SH_REG 0x5F From 43c8546bcd854806736d8a635a0d696504dd4c21 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 28 Apr 2020 01:28:43 -0400 Subject: [PATCH 17/17] drm/amdgpu: Add a UAPI flag for user to call mem_sync When this flag is set in the CS IB flags, it causes a memory cache flush of the GFX. v2: Move new flag to drm_amdgpu_cs_chunk_ib.flags Bump up UAPI version Remove condition on job != null to emit mem_sync Signed-off-by: Andrey Grodzovsky Reviewed-by: Luben Tuikov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 3 +++ include/uapi/drm/amdgpu_drm.h | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index beb35dd12964..a0e5b54b6e47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -86,9 +86,10 @@ * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask * - 3.36.0 - Allow reading more status registers on si/cik * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness + * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 37 +#define KMS_DRIVER_MINOR 38 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index c24366aacf3a..b91853fd66d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -189,6 +189,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, dma_fence_put(tmp); } + if ((ib->flags & AMDGPU_IB_FLAG_EMIT_MEM_SYNC) && ring->funcs->emit_mem_sync) + ring->funcs->emit_mem_sync(ring); + if (ring->funcs->insert_start) ring->funcs->insert_start(ring); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index e01b673f0449..4e873dcbe68f 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -602,6 +602,10 @@ union drm_amdgpu_cs { */ #define AMDGPU_IB_FLAGS_SECURE (1 << 5) +/* Tell KMD to flush and invalidate caches + */ +#define AMDGPU_IB_FLAG_EMIT_MEM_SYNC (1 << 6) + struct drm_amdgpu_cs_chunk_ib { __u32 _pad; /** AMDGPU_IB_FLAG_* */