habanalabs/gaudi2: support abrupt device reset event
In certain scenarios, firmware might encounter a fatal event for which a device reset is required. Hence, a proper notification is needed for driver to be aware and initiate a reset sequence. In secured environments the reset will be performed by firmware without an explicit request from the driver. Signed-off-by: Ofir Bitton <obitton@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
parent
086ab54ac0
commit
b219d209ab
|
@ -9226,6 +9226,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
|
|||
break;
|
||||
|
||||
case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
|
||||
case GAUDI2_EVENT_DEV_RESET_REQ:
|
||||
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
|
||||
error_count = GAUDI2_NA_EVENT_CAUSE;
|
||||
is_critical = true;
|
||||
|
|
|
@ -958,6 +958,7 @@ enum gaudi2_async_event_id {
|
|||
GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1 = 1318,
|
||||
GAUDI2_EVENT_ARC_DCCM_FULL = 1319,
|
||||
GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED = 1320,
|
||||
GAUDI2_EVENT_DEV_RESET_REQ = 1321,
|
||||
GAUDI2_EVENT_SIZE,
|
||||
};
|
||||
|
||||
|
|
|
@ -2665,6 +2665,8 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
|
|||
.msg = 1, .reset = 0, .name = "ARC_DCCM_FULL" },
|
||||
{ .fc_id = 1320, .cpu_id = 626, .valid = 1,
|
||||
.msg = 1, .reset = 1, .name = "FP32_NOT_SUPPORTED" },
|
||||
{ .fc_id = 1321, .cpu_id = 627, .valid = 1,
|
||||
.msg = 1, .reset = 1, .name = "DEV_RESET_REQ" },
|
||||
};
|
||||
|
||||
#endif /* __GAUDI2_ASYNC_IDS_MAP_EVENTS_EXT_H_ */
|
||||
|
|
Loading…
Reference in New Issue