habanalabs/gaudi: handle axi errors from NIC engines
Various AXI errors can occur in the NIC engines and are reported to the driver by the f/w. Add code to print the errors and ack them to the f/w. Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
parent
f23f280277
commit
26ef1c000b
|
@ -7665,6 +7665,48 @@ static void gaudi_print_fw_alive_info(struct hl_device *hdev,
|
|||
fw_alive->thread_id, fw_alive->uptime_seconds);
|
||||
}
|
||||
|
||||
static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
|
||||
void *data)
|
||||
{
|
||||
char desc[64] = "", *type;
|
||||
struct eq_nic_sei_event *eq_nic_sei = data;
|
||||
u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
|
||||
|
||||
switch (eq_nic_sei->axi_error_cause) {
|
||||
case RXB:
|
||||
type = "RXB";
|
||||
break;
|
||||
case RXE:
|
||||
type = "RXE";
|
||||
break;
|
||||
case TXS:
|
||||
type = "TXS";
|
||||
break;
|
||||
case TXE:
|
||||
type = "TXE";
|
||||
break;
|
||||
case QPC_RESP:
|
||||
type = "QPC_RESP";
|
||||
break;
|
||||
case NON_AXI_ERR:
|
||||
type = "NON_AXI_ERR";
|
||||
break;
|
||||
case TMR:
|
||||
type = "TMR";
|
||||
break;
|
||||
default:
|
||||
dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
|
||||
eq_nic_sei->axi_error_cause);
|
||||
type = "N/A";
|
||||
break;
|
||||
}
|
||||
|
||||
snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
|
||||
eq_nic_sei->id);
|
||||
dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
|
||||
event_type, desc);
|
||||
}
|
||||
|
||||
static int gaudi_non_hard_reset_late_init(struct hl_device *hdev)
|
||||
{
|
||||
/* GAUDI doesn't support any reset except hard-reset */
|
||||
|
@ -7898,6 +7940,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
|||
struct hl_eq_entry *eq_entry)
|
||||
{
|
||||
struct gaudi_device *gaudi = hdev->asic_specific;
|
||||
u64 data = le64_to_cpu(eq_entry->data[0]);
|
||||
u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
|
||||
u32 fw_fatal_err_flag = 0;
|
||||
u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
|
||||
|
@ -8095,6 +8138,11 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
|||
hl_fw_unmask_irq(hdev, event_type);
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
|
||||
gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
break;
|
||||
|
||||
case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
|
||||
gaudi_print_irq_info(hdev, event_type, false);
|
||||
gaudi_print_sm_sei_info(hdev, event_type,
|
||||
|
|
Loading…
Reference in New Issue