habanalabs/gaudi: handle axi errors from NIC engines
Various AXI errors can occur in the NIC engines and are reported to the driver by the f/w. Add code to print the errors and ack them to the f/w. Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
parent
f23f280277
commit
26ef1c000b
|
@ -7665,6 +7665,48 @@ static void gaudi_print_fw_alive_info(struct hl_device *hdev,
|
||||||
fw_alive->thread_id, fw_alive->uptime_seconds);
|
fw_alive->thread_id, fw_alive->uptime_seconds);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
|
||||||
|
void *data)
|
||||||
|
{
|
||||||
|
char desc[64] = "", *type;
|
||||||
|
struct eq_nic_sei_event *eq_nic_sei = data;
|
||||||
|
u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
|
||||||
|
|
||||||
|
switch (eq_nic_sei->axi_error_cause) {
|
||||||
|
case RXB:
|
||||||
|
type = "RXB";
|
||||||
|
break;
|
||||||
|
case RXE:
|
||||||
|
type = "RXE";
|
||||||
|
break;
|
||||||
|
case TXS:
|
||||||
|
type = "TXS";
|
||||||
|
break;
|
||||||
|
case TXE:
|
||||||
|
type = "TXE";
|
||||||
|
break;
|
||||||
|
case QPC_RESP:
|
||||||
|
type = "QPC_RESP";
|
||||||
|
break;
|
||||||
|
case NON_AXI_ERR:
|
||||||
|
type = "NON_AXI_ERR";
|
||||||
|
break;
|
||||||
|
case TMR:
|
||||||
|
type = "TMR";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
|
||||||
|
eq_nic_sei->axi_error_cause);
|
||||||
|
type = "N/A";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
|
||||||
|
eq_nic_sei->id);
|
||||||
|
dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
|
||||||
|
event_type, desc);
|
||||||
|
}
|
||||||
|
|
||||||
static int gaudi_non_hard_reset_late_init(struct hl_device *hdev)
|
static int gaudi_non_hard_reset_late_init(struct hl_device *hdev)
|
||||||
{
|
{
|
||||||
/* GAUDI doesn't support any reset except hard-reset */
|
/* GAUDI doesn't support any reset except hard-reset */
|
||||||
|
@ -7898,6 +7940,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
||||||
struct hl_eq_entry *eq_entry)
|
struct hl_eq_entry *eq_entry)
|
||||||
{
|
{
|
||||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||||
|
u64 data = le64_to_cpu(eq_entry->data[0]);
|
||||||
u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
|
u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
|
||||||
u32 fw_fatal_err_flag = 0;
|
u32 fw_fatal_err_flag = 0;
|
||||||
u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
|
u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
|
||||||
|
@ -8095,6 +8138,11 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
||||||
hl_fw_unmask_irq(hdev, event_type);
|
hl_fw_unmask_irq(hdev, event_type);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
|
||||||
|
gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
|
||||||
|
hl_fw_unmask_irq(hdev, event_type);
|
||||||
|
break;
|
||||||
|
|
||||||
case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
|
case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
|
||||||
gaudi_print_irq_info(hdev, event_type, false);
|
gaudi_print_irq_info(hdev, event_type, false);
|
||||||
gaudi_print_sm_sei_info(hdev, event_type,
|
gaudi_print_sm_sei_info(hdev, event_type,
|
||||||
|
|
Loading…
Reference in New Issue