habanalabs/gaudi: split host irq interfaces towards FW

Current implementation uses a single interrupt interface towards
FW, this interface is causing races between interrupt types.
We split this interface to interface per interrupt type.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Ofir Bitton 2021-05-25 22:09:13 +03:00 committed by Oded Gabbay
parent 135ade0c6a
commit 5bc691d849
4 changed files with 44 additions and 8 deletions

View File

@ -1782,7 +1782,8 @@ static void hl_fw_boot_fit_update_state(struct hl_device *hdev,
/* Read boot_cpu status bits */
if (prop->fw_cpu_boot_dev_sts0_valid) {
prop->fw_bootfit_cpu_boot_dev_sts0 = RREG32(cpu_boot_dev_sts0_reg);
prop->fw_bootfit_cpu_boot_dev_sts0 =
RREG32(cpu_boot_dev_sts0_reg);
if (prop->fw_bootfit_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
@ -1793,7 +1794,8 @@ static void hl_fw_boot_fit_update_state(struct hl_device *hdev,
}
if (prop->fw_cpu_boot_dev_sts1_valid) {
prop->fw_bootfit_cpu_boot_dev_sts1 = RREG32(cpu_boot_dev_sts1_reg);
prop->fw_bootfit_cpu_boot_dev_sts1 =
RREG32(cpu_boot_dev_sts1_reg);
dev_dbg(hdev->dev, "Firmware boot CPU status1 %#x\n",
prop->fw_bootfit_cpu_boot_dev_sts1);
@ -1803,6 +1805,24 @@ static void hl_fw_boot_fit_update_state(struct hl_device *hdev,
prop->hard_reset_done_by_fw ? "enabled" : "disabled");
}
static void hl_fw_dynamic_update_linux_interrupt_if(struct hl_device *hdev)
{
struct cpu_dyn_regs *dyn_regs =
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
/* Check whether all 3 interrupt interfaces are set, if not use a
* single interface
*/
if (!hdev->asic_prop.gic_interrupts_enable &&
!(hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN)) {
dyn_regs->gic_host_halt_irq = dyn_regs->gic_host_irq_ctrl;
dyn_regs->gic_host_ints_irq = dyn_regs->gic_host_irq_ctrl;
dev_warn(hdev->dev,
"Using a single interrupt interface towards cpucp");
}
}
/**
* hl_fw_dynamic_load_image - load FW image using dynamic protocol
*
@ -2150,6 +2170,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
hl_fw_linux_update_state(hdev, le32_to_cpu(dyn_regs->cpu_boot_dev_sts0),
le32_to_cpu(dyn_regs->cpu_boot_dev_sts1));
hl_fw_dynamic_update_linux_interrupt_if(hdev);
return 0;
protocol_err:

View File

@ -3962,7 +3962,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
irq_handler_offset = prop->gic_interrupts_enable ?
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
le32_to_cpu(dyn_regs->gic_host_irq_ctrl);
le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
WREG32(irq_handler_offset, GAUDI_EVENT_PI_UPDATE);
@ -4148,7 +4148,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
if (hdev->fw_loader.linux_loaded) {
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
le32_to_cpu(dyn_regs->gic_host_irq_ctrl);
le32_to_cpu(dyn_regs->gic_host_halt_irq);
WREG32(irq_handler_offset, GAUDI_EVENT_HALT_MACHINE);
} else {
@ -4681,7 +4681,7 @@ static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
le32_to_cpu(dyn_regs->gic_host_irq_ctrl);
le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
WREG32(irq_handler_offset, GAUDI_EVENT_PI_UPDATE);
}
@ -8909,7 +8909,7 @@ static void gaudi_enable_events_from_fw(struct hl_device *hdev)
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
le32_to_cpu(dyn_regs->gic_host_irq_ctrl);
le32_to_cpu(dyn_regs->gic_host_ints_irq);
WREG32(irq_handler_offset, GAUDI_EVENT_INTS_REGISTER);
}

View File

@ -205,6 +205,10 @@
* was not served before.
* Initialized in: linux
*
* CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN Use multiple scratchpad interfaces to
* prevent IRQs overriding each other.
* Initialized in: linux
*
* CPU_BOOT_DEV_STS0_ENABLED Device status register enabled.
* This is a main indication that the
* running FW populates the device status
@ -235,6 +239,7 @@
#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << 19)
#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN (1 << 20)
#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN (1 << 21)
#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN (1 << 22)
#define CPU_BOOT_DEV_STS0_ENABLED (1 << 31)
#define CPU_BOOT_DEV_STS1_ENABLED (1 << 31)
@ -308,13 +313,18 @@ struct cpu_dyn_regs {
__le32 hw_state;
__le32 kmd_msg_to_cpu;
__le32 cpu_cmd_status_to_host;
__le32 gic_host_irq_ctrl;
union {
__le32 gic_host_irq_ctrl;
__le32 gic_host_pi_upd_irq;
};
__le32 gic_tpc_qm_irq_ctrl;
__le32 gic_mme_qm_irq_ctrl;
__le32 gic_dma_qm_irq_ctrl;
__le32 gic_nic_qm_irq_ctrl;
__le32 gic_dma_core_irq_ctrl;
__le32 reserved1[26]; /* reserve for future use */
__le32 gic_host_halt_irq;
__le32 gic_host_ints_irq;
__le32 reserved1[24]; /* reserve for future use */
};
/* TODO: remove the desc magic after the code is updated to use message */

View File

@ -12,12 +12,16 @@
* PSOC scratch-pad registers
*/
#define mmHW_STATE mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
/* TODO: remove mmGIC_HOST_IRQ_CTRL_POLL_REG */
#define mmGIC_HOST_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
#define mmGIC_HOST_PI_UPD_IRQ_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
#define mmGIC_TPC_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_2
#define mmGIC_MME_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_3
#define mmGIC_DMA_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_4
#define mmGIC_NIC_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_5
#define mmGIC_DMA_CR_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_6
#define mmGIC_HOST_HALT_IRQ_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_7
#define mmGIC_HOST_INTS_IRQ_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_8
#define mmCPU_BOOT_DEV_STS0 mmPSOC_GLOBAL_CONF_SCRATCHPAD_20
#define mmCPU_BOOT_DEV_STS1 mmPSOC_GLOBAL_CONF_SCRATCHPAD_21
#define mmFUSE_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_22