staging/rdma/hfi1: Adds software counters for bitfields within various error status fields

Provides error status counters for CceErrStatus, Send*ErrStatus,
RcvErrStatus and MISC_ERR_STATUS

Reviewed-by: Mitko Haralanov <mitko.haralanov@intel.com>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Joel Rosenzweig <joel.b.rosenzweig@intel.com>
Signed-off-by: Jubin John <jubin.john@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Joel Rosenzweig 2015-12-01 15:38:19 -05:00 committed by Greg Kroah-Hartman
parent 11a5909b26
commit 2c5b521ae6
3 changed files with 3487 additions and 1 deletions

File diff suppressed because it is too large Load Diff

View File

@ -787,6 +787,275 @@ enum {
C_SW_PIO_WAIT,
C_SW_KMEM_WAIT,
C_SW_SEND_SCHED,
/* MISC_ERR_STATUS */
C_MISC_PLL_LOCK_FAIL_ERR,
C_MISC_MBIST_FAIL_ERR,
C_MISC_INVALID_EEP_CMD_ERR,
C_MISC_EFUSE_DONE_PARITY_ERR,
C_MISC_EFUSE_WRITE_ERR,
C_MISC_EFUSE_READ_BAD_ADDR_ERR,
C_MISC_EFUSE_CSR_PARITY_ERR,
C_MISC_FW_AUTH_FAILED_ERR,
C_MISC_KEY_MISMATCH_ERR,
C_MISC_SBUS_WRITE_FAILED_ERR,
C_MISC_CSR_WRITE_BAD_ADDR_ERR,
C_MISC_CSR_READ_BAD_ADDR_ERR,
C_MISC_CSR_PARITY_ERR,
/* CceErrStatus */
/*
* A special counter that is the aggregate count
* of all the cce_err_status errors. The remainder
* are actual bits in the CceErrStatus register.
*/
C_CCE_ERR_STATUS_AGGREGATED_CNT,
C_CCE_MSIX_CSR_PARITY_ERR,
C_CCE_INT_MAP_UNC_ERR,
C_CCE_INT_MAP_COR_ERR,
C_CCE_MSIX_TABLE_UNC_ERR,
C_CCE_MSIX_TABLE_COR_ERR,
C_CCE_RXDMA_CONV_FIFO_PARITY_ERR,
C_CCE_RCPL_ASYNC_FIFO_PARITY_ERR,
C_CCE_SEG_WRITE_BAD_ADDR_ERR,
C_CCE_SEG_READ_BAD_ADDR_ERR,
C_LA_TRIGGERED,
C_CCE_TRGT_CPL_TIMEOUT_ERR,
C_PCIC_RECEIVE_PARITY_ERR,
C_PCIC_TRANSMIT_BACK_PARITY_ERR,
C_PCIC_TRANSMIT_FRONT_PARITY_ERR,
C_PCIC_CPL_DAT_Q_UNC_ERR,
C_PCIC_CPL_HD_Q_UNC_ERR,
C_PCIC_POST_DAT_Q_UNC_ERR,
C_PCIC_POST_HD_Q_UNC_ERR,
C_PCIC_RETRY_SOT_MEM_UNC_ERR,
C_PCIC_RETRY_MEM_UNC_ERR,
C_PCIC_N_POST_DAT_Q_PARITY_ERR,
C_PCIC_N_POST_H_Q_PARITY_ERR,
C_PCIC_CPL_DAT_Q_COR_ERR,
C_PCIC_CPL_HD_Q_COR_ERR,
C_PCIC_POST_DAT_Q_COR_ERR,
C_PCIC_POST_HD_Q_COR_ERR,
C_PCIC_RETRY_SOT_MEM_COR_ERR,
C_PCIC_RETRY_MEM_COR_ERR,
C_CCE_CLI1_ASYNC_FIFO_DBG_PARITY_ERR,
C_CCE_CLI1_ASYNC_FIFO_RXDMA_PARITY_ERR,
C_CCE_CLI1_ASYNC_FIFO_SDMA_HD_PARITY_ERR,
C_CCE_CLI1_ASYNC_FIFO_PIO_CRDT_PARITY_ERR,
C_CCE_CLI2_ASYNC_FIFO_PARITY_ERR,
C_CCE_CSR_CFG_BUS_PARITY_ERR,
C_CCE_CLI0_ASYNC_FIFO_PARTIY_ERR,
C_CCE_RSPD_DATA_PARITY_ERR,
C_CCE_TRGT_ACCESS_ERR,
C_CCE_TRGT_ASYNC_FIFO_PARITY_ERR,
C_CCE_CSR_WRITE_BAD_ADDR_ERR,
C_CCE_CSR_READ_BAD_ADDR_ERR,
C_CCE_CSR_PARITY_ERR,
/* RcvErrStatus */
C_RX_CSR_PARITY_ERR,
C_RX_CSR_WRITE_BAD_ADDR_ERR,
C_RX_CSR_READ_BAD_ADDR_ERR,
C_RX_DMA_CSR_UNC_ERR,
C_RX_DMA_DQ_FSM_ENCODING_ERR,
C_RX_DMA_EQ_FSM_ENCODING_ERR,
C_RX_DMA_CSR_PARITY_ERR,
C_RX_RBUF_DATA_COR_ERR,
C_RX_RBUF_DATA_UNC_ERR,
C_RX_DMA_DATA_FIFO_RD_COR_ERR,
C_RX_DMA_DATA_FIFO_RD_UNC_ERR,
C_RX_DMA_HDR_FIFO_RD_COR_ERR,
C_RX_DMA_HDR_FIFO_RD_UNC_ERR,
C_RX_RBUF_DESC_PART2_COR_ERR,
C_RX_RBUF_DESC_PART2_UNC_ERR,
C_RX_RBUF_DESC_PART1_COR_ERR,
C_RX_RBUF_DESC_PART1_UNC_ERR,
C_RX_HQ_INTR_FSM_ERR,
C_RX_HQ_INTR_CSR_PARITY_ERR,
C_RX_LOOKUP_CSR_PARITY_ERR,
C_RX_LOOKUP_RCV_ARRAY_COR_ERR,
C_RX_LOOKUP_RCV_ARRAY_UNC_ERR,
C_RX_LOOKUP_DES_PART2_PARITY_ERR,
C_RX_LOOKUP_DES_PART1_UNC_COR_ERR,
C_RX_LOOKUP_DES_PART1_UNC_ERR,
C_RX_RBUF_NEXT_FREE_BUF_COR_ERR,
C_RX_RBUF_NEXT_FREE_BUF_UNC_ERR,
C_RX_RBUF_FL_INIT_WR_ADDR_PARITY_ERR,
C_RX_RBUF_FL_INITDONE_PARITY_ERR,
C_RX_RBUF_FL_WRITE_ADDR_PARITY_ERR,
C_RX_RBUF_FL_RD_ADDR_PARITY_ERR,
C_RX_RBUF_EMPTY_ERR,
C_RX_RBUF_FULL_ERR,
C_RX_RBUF_BAD_LOOKUP_ERR,
C_RX_RBUF_CTX_ID_PARITY_ERR,
C_RX_RBUF_CSR_QEOPDW_PARITY_ERR,
C_RX_RBUF_CSR_Q_NUM_OF_PKT_PARITY_ERR,
C_RX_RBUF_CSR_Q_T1_PTR_PARITY_ERR,
C_RX_RBUF_CSR_Q_HD_PTR_PARITY_ERR,
C_RX_RBUF_CSR_Q_VLD_BIT_PARITY_ERR,
C_RX_RBUF_CSR_Q_NEXT_BUF_PARITY_ERR,
C_RX_RBUF_CSR_Q_ENT_CNT_PARITY_ERR,
C_RX_RBUF_CSR_Q_HEAD_BUF_NUM_PARITY_ERR,
C_RX_RBUF_BLOCK_LIST_READ_COR_ERR,
C_RX_RBUF_BLOCK_LIST_READ_UNC_ERR,
C_RX_RBUF_LOOKUP_DES_COR_ERR,
C_RX_RBUF_LOOKUP_DES_UNC_ERR,
C_RX_RBUF_LOOKUP_DES_REG_UNC_COR_ERR,
C_RX_RBUF_LOOKUP_DES_REG_UNC_ERR,
C_RX_RBUF_FREE_LIST_COR_ERR,
C_RX_RBUF_FREE_LIST_UNC_ERR,
C_RX_RCV_FSM_ENCODING_ERR,
C_RX_DMA_FLAG_COR_ERR,
C_RX_DMA_FLAG_UNC_ERR,
C_RX_DC_SOP_EOP_PARITY_ERR,
C_RX_RCV_CSR_PARITY_ERR,
C_RX_RCV_QP_MAP_TABLE_COR_ERR,
C_RX_RCV_QP_MAP_TABLE_UNC_ERR,
C_RX_RCV_DATA_COR_ERR,
C_RX_RCV_DATA_UNC_ERR,
C_RX_RCV_HDR_COR_ERR,
C_RX_RCV_HDR_UNC_ERR,
C_RX_DC_INTF_PARITY_ERR,
C_RX_DMA_CSR_COR_ERR,
/* SendPioErrStatus */
C_PIO_PEC_SOP_HEAD_PARITY_ERR,
C_PIO_PCC_SOP_HEAD_PARITY_ERR,
C_PIO_LAST_RETURNED_CNT_PARITY_ERR,
C_PIO_CURRENT_FREE_CNT_PARITY_ERR,
C_PIO_RSVD_31_ERR,
C_PIO_RSVD_30_ERR,
C_PIO_PPMC_SOP_LEN_ERR,
C_PIO_PPMC_BQC_MEM_PARITY_ERR,
C_PIO_VL_FIFO_PARITY_ERR,
C_PIO_VLF_SOP_PARITY_ERR,
C_PIO_VLF_V1_LEN_PARITY_ERR,
C_PIO_BLOCK_QW_COUNT_PARITY_ERR,
C_PIO_WRITE_QW_VALID_PARITY_ERR,
C_PIO_STATE_MACHINE_ERR,
C_PIO_WRITE_DATA_PARITY_ERR,
C_PIO_HOST_ADDR_MEM_COR_ERR,
C_PIO_HOST_ADDR_MEM_UNC_ERR,
C_PIO_PKT_EVICT_SM_OR_ARM_SM_ERR,
C_PIO_INIT_SM_IN_ERR,
C_PIO_PPMC_PBL_FIFO_ERR,
C_PIO_CREDIT_RET_FIFO_PARITY_ERR,
C_PIO_V1_LEN_MEM_BANK1_COR_ERR,
C_PIO_V1_LEN_MEM_BANK0_COR_ERR,
C_PIO_V1_LEN_MEM_BANK1_UNC_ERR,
C_PIO_V1_LEN_MEM_BANK0_UNC_ERR,
C_PIO_SM_PKT_RESET_PARITY_ERR,
C_PIO_PKT_EVICT_FIFO_PARITY_ERR,
C_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR,
C_PIO_SBRDCTL_CRREL_PARITY_ERR,
C_PIO_PEC_FIFO_PARITY_ERR,
C_PIO_PCC_FIFO_PARITY_ERR,
C_PIO_SB_MEM_FIFO1_ERR,
C_PIO_SB_MEM_FIFO0_ERR,
C_PIO_CSR_PARITY_ERR,
C_PIO_WRITE_ADDR_PARITY_ERR,
C_PIO_WRITE_BAD_CTXT_ERR,
/* SendDmaErrStatus */
C_SDMA_PCIE_REQ_TRACKING_COR_ERR,
C_SDMA_PCIE_REQ_TRACKING_UNC_ERR,
C_SDMA_CSR_PARITY_ERR,
C_SDMA_RPY_TAG_ERR,
/* SendEgressErrStatus */
C_TX_READ_PIO_MEMORY_CSR_UNC_ERR,
C_TX_READ_SDMA_MEMORY_CSR_UNC_ERR,
C_TX_EGRESS_FIFO_COR_ERR,
C_TX_READ_PIO_MEMORY_COR_ERR,
C_TX_READ_SDMA_MEMORY_COR_ERR,
C_TX_SB_HDR_COR_ERR,
C_TX_CREDIT_OVERRUN_ERR,
C_TX_LAUNCH_FIFO8_COR_ERR,
C_TX_LAUNCH_FIFO7_COR_ERR,
C_TX_LAUNCH_FIFO6_COR_ERR,
C_TX_LAUNCH_FIFO5_COR_ERR,
C_TX_LAUNCH_FIFO4_COR_ERR,
C_TX_LAUNCH_FIFO3_COR_ERR,
C_TX_LAUNCH_FIFO2_COR_ERR,
C_TX_LAUNCH_FIFO1_COR_ERR,
C_TX_LAUNCH_FIFO0_COR_ERR,
C_TX_CREDIT_RETURN_VL_ERR,
C_TX_HCRC_INSERTION_ERR,
C_TX_EGRESS_FIFI_UNC_ERR,
C_TX_READ_PIO_MEMORY_UNC_ERR,
C_TX_READ_SDMA_MEMORY_UNC_ERR,
C_TX_SB_HDR_UNC_ERR,
C_TX_CREDIT_RETURN_PARITY_ERR,
C_TX_LAUNCH_FIFO8_UNC_OR_PARITY_ERR,
C_TX_LAUNCH_FIFO7_UNC_OR_PARITY_ERR,
C_TX_LAUNCH_FIFO6_UNC_OR_PARITY_ERR,
C_TX_LAUNCH_FIFO5_UNC_OR_PARITY_ERR,
C_TX_LAUNCH_FIFO4_UNC_OR_PARITY_ERR,
C_TX_LAUNCH_FIFO3_UNC_OR_PARITY_ERR,
C_TX_LAUNCH_FIFO2_UNC_OR_PARITY_ERR,
C_TX_LAUNCH_FIFO1_UNC_OR_PARITY_ERR,
C_TX_LAUNCH_FIFO0_UNC_OR_PARITY_ERR,
C_TX_SDMA15_DISALLOWED_PACKET_ERR,
C_TX_SDMA14_DISALLOWED_PACKET_ERR,
C_TX_SDMA13_DISALLOWED_PACKET_ERR,
C_TX_SDMA12_DISALLOWED_PACKET_ERR,
C_TX_SDMA11_DISALLOWED_PACKET_ERR,
C_TX_SDMA10_DISALLOWED_PACKET_ERR,
C_TX_SDMA9_DISALLOWED_PACKET_ERR,
C_TX_SDMA8_DISALLOWED_PACKET_ERR,
C_TX_SDMA7_DISALLOWED_PACKET_ERR,
C_TX_SDMA6_DISALLOWED_PACKET_ERR,
C_TX_SDMA5_DISALLOWED_PACKET_ERR,
C_TX_SDMA4_DISALLOWED_PACKET_ERR,
C_TX_SDMA3_DISALLOWED_PACKET_ERR,
C_TX_SDMA2_DISALLOWED_PACKET_ERR,
C_TX_SDMA1_DISALLOWED_PACKET_ERR,
C_TX_SDMA0_DISALLOWED_PACKET_ERR,
C_TX_CONFIG_PARITY_ERR,
C_TX_SBRD_CTL_CSR_PARITY_ERR,
C_TX_LAUNCH_CSR_PARITY_ERR,
C_TX_ILLEGAL_CL_ERR,
C_TX_SBRD_CTL_STATE_MACHINE_PARITY_ERR,
C_TX_RESERVED_10,
C_TX_RESERVED_9,
C_TX_SDMA_LAUNCH_INTF_PARITY_ERR,
C_TX_PIO_LAUNCH_INTF_PARITY_ERR,
C_TX_RESERVED_6,
C_TX_INCORRECT_LINK_STATE_ERR,
C_TX_LINK_DOWN_ERR,
C_TX_EGRESS_FIFO_UNDERRUN_OR_PARITY_ERR,
C_TX_RESERVED_2,
C_TX_PKT_INTEGRITY_MEM_UNC_ERR,
C_TX_PKT_INTEGRITY_MEM_COR_ERR,
/* SendErrStatus */
C_SEND_CSR_WRITE_BAD_ADDR_ERR,
C_SEND_CSR_READ_BAD_ADD_ERR,
C_SEND_CSR_PARITY_ERR,
/* SendCtxtErrStatus */
C_PIO_WRITE_OUT_OF_BOUNDS_ERR,
C_PIO_WRITE_OVERFLOW_ERR,
C_PIO_WRITE_CROSSES_BOUNDARY_ERR,
C_PIO_DISALLOWED_PACKET_ERR,
C_PIO_INCONSISTENT_SOP_ERR,
/*SendDmaEngErrStatus */
C_SDMA_HEADER_REQUEST_FIFO_COR_ERR,
C_SDMA_HEADER_STORAGE_COR_ERR,
C_SDMA_PACKET_TRACKING_COR_ERR,
C_SDMA_ASSEMBLY_COR_ERR,
C_SDMA_DESC_TABLE_COR_ERR,
C_SDMA_HEADER_REQUEST_FIFO_UNC_ERR,
C_SDMA_HEADER_STORAGE_UNC_ERR,
C_SDMA_PACKET_TRACKING_UNC_ERR,
C_SDMA_ASSEMBLY_UNC_ERR,
C_SDMA_DESC_TABLE_UNC_ERR,
C_SDMA_TIMEOUT_ERR,
C_SDMA_HEADER_LENGTH_ERR,
C_SDMA_HEADER_ADDRESS_ERR,
C_SDMA_HEADER_SELECT_ERR,
C_SMDA_RESERVED_9,
C_SDMA_PACKET_DESC_OVERFLOW_ERR,
C_SDMA_LENGTH_MISMATCH_ERR,
C_SDMA_HALT_ERR,
C_SDMA_MEM_READ_ERR,
C_SDMA_FIRST_DESC_ERR,
C_SDMA_TAIL_OUT_OF_BOUNDS_ERR,
C_SDMA_TOO_LONG_ERR,
C_SDMA_GEN_MISMATCH_ERR,
C_SDMA_WRONG_DW_ERR,
DEV_CNTR_LAST /* Must be kept last */
};

View File

@ -105,6 +105,20 @@ extern unsigned long hfi1_cap_mask;
*/
#define HFI1_CTRL_CTXT 0
/*
* Driver context will store software counters for each of the events
* associated with these status registers
*/
#define NUM_CCE_ERR_STATUS_COUNTERS 41
#define NUM_RCV_ERR_STATUS_COUNTERS 64
#define NUM_MISC_ERR_STATUS_COUNTERS 13
#define NUM_SEND_PIO_ERR_STATUS_COUNTERS 36
#define NUM_SEND_DMA_ERR_STATUS_COUNTERS 4
#define NUM_SEND_EGRESS_ERR_STATUS_COUNTERS 64
#define NUM_SEND_ERR_STATUS_COUNTERS 3
#define NUM_SEND_CTXT_ERR_STATUS_COUNTERS 5
#define NUM_SEND_DMA_ENG_ERR_STATUS_COUNTERS 24
/*
* per driver stats, either not device nor port-specific, or
* summed over all of the devices and ports.
@ -1046,6 +1060,26 @@ struct hfi1_devdata {
atomic_t drop_packet;
u8 do_drop;
/*
* Software counters for the status bits defined by the
* associated error status registers
*/
u64 cce_err_status_cnt[NUM_CCE_ERR_STATUS_COUNTERS];
u64 rcv_err_status_cnt[NUM_RCV_ERR_STATUS_COUNTERS];
u64 misc_err_status_cnt[NUM_MISC_ERR_STATUS_COUNTERS];
u64 send_pio_err_status_cnt[NUM_SEND_PIO_ERR_STATUS_COUNTERS];
u64 send_dma_err_status_cnt[NUM_SEND_DMA_ERR_STATUS_COUNTERS];
u64 send_egress_err_status_cnt[NUM_SEND_EGRESS_ERR_STATUS_COUNTERS];
u64 send_err_status_cnt[NUM_SEND_ERR_STATUS_COUNTERS];
/* Software counter that spans all contexts */
u64 sw_ctxt_err_status_cnt[NUM_SEND_CTXT_ERR_STATUS_COUNTERS];
/* Software counter that spans all DMA engines */
u64 sw_send_dma_eng_err_status_cnt[
NUM_SEND_DMA_ENG_ERR_STATUS_COUNTERS];
/* Software counter that aggregates all cce_err_status errors */
u64 sw_cce_err_status_aggregate;
/* receive interrupt functions */
rhf_rcv_function_ptr *rhf_rcv_function_map;
rhf_rcv_function_ptr normal_rhf_rcv_functions[8];