accel/habanalabs: in {e/p}dma_core events read the err cause reg
Since the err_cause register is unprivileged, we should read it from the driver instead of using the param that came from the FW. Signed-off-by: Dafna Hirschfeld <dhirschfeld@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
parent
f8d139a71b
commit
5d8a5f2965
|
@ -8689,14 +8689,13 @@ static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type,
|
||||||
return error_count;
|
return error_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type,
|
static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, int sts_addr)
|
||||||
u64 intr_cause_data)
|
|
||||||
{
|
{
|
||||||
u32 error_count = 0;
|
u32 error_count = 0, sts_val = RREG32(sts_addr);
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
|
for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
|
||||||
if (intr_cause_data & BIT(i)) {
|
if (sts_val & BIT(i)) {
|
||||||
gaudi2_print_event(hdev, event_type, true,
|
gaudi2_print_event(hdev, event_type, true,
|
||||||
"err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
|
"err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
|
||||||
error_count++;
|
error_count++;
|
||||||
|
@ -8707,6 +8706,27 @@ static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type,
|
||||||
return error_count;
|
return error_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int gaudi2_handle_pdma_core_event(struct hl_device *hdev, u16 event_type, int pdma_idx)
|
||||||
|
{
|
||||||
|
u32 sts_addr;
|
||||||
|
|
||||||
|
sts_addr = mmPDMA0_CORE_ERR_CAUSE + pdma_idx * PDMA_OFFSET;
|
||||||
|
return gaudi2_handle_dma_core_event(hdev, event_type, sts_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int gaudi2_handle_edma_core_event(struct hl_device *hdev, u16 event_type, int edma_idx)
|
||||||
|
{
|
||||||
|
static const int edma_event_index_map[] = {2, 3, 0, 1, 6, 7, 4, 5};
|
||||||
|
u32 sts_addr, index;
|
||||||
|
|
||||||
|
index = edma_event_index_map[edma_idx];
|
||||||
|
|
||||||
|
sts_addr = mmDCORE0_EDMA0_CORE_ERR_CAUSE +
|
||||||
|
DCORE_OFFSET * (index / NUM_OF_EDMA_PER_DCORE) +
|
||||||
|
DCORE_EDMA_OFFSET * (index % NUM_OF_EDMA_PER_DCORE);
|
||||||
|
return gaudi2_handle_dma_core_event(hdev, event_type, sts_addr);
|
||||||
|
}
|
||||||
|
|
||||||
static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
|
static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
|
||||||
{
|
{
|
||||||
u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
|
u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
|
||||||
|
@ -9524,9 +9544,15 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
|
||||||
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
|
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE:
|
case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_HDMA5_CORE:
|
||||||
error_count = gaudi2_handle_dma_core_event(hdev, event_type,
|
index = event_type - GAUDI2_EVENT_HDMA2_CORE;
|
||||||
le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
|
error_count = gaudi2_handle_edma_core_event(hdev, event_type, index);
|
||||||
|
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GAUDI2_EVENT_PDMA0_CORE ... GAUDI2_EVENT_PDMA1_CORE:
|
||||||
|
index = event_type - GAUDI2_EVENT_PDMA0_CORE;
|
||||||
|
error_count = gaudi2_handle_pdma_core_event(hdev, event_type, index);
|
||||||
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
|
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue