From abef378c434e6f5abd46fd536e9972374fb74e98 Mon Sep 17 00:00:00 2001 From: Arumugam Kolappan Date: Tue, 1 Nov 2022 00:27:44 -0700 Subject: [PATCH] RDMA/mlx5: Change debug log level for remote access error syndromes The mlx5 driver dumps the entire CQE buffer by default for few syndromes. Some syndromes are expected due to the application behavior [ex: MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR, MLX5_CQE_SYNDROME_REMOTE_OP_ERR and MLX5_CQE_SYNDROME_LOCAL_PROT_ERR]. Hence, for these syndromes, the patch converts the log level from KERN_WARNING to KERN_DEBUG. This enables the application to get the CQE buffer dump by changing to KERN_DEBUG level as and when needed. Suggested-by: Leon Romanovsky Signed-off-by: Arumugam Kolappan Link: https://lore.kernel.org/r/1667287664-19377-1-git-send-email-aru.kolappan@oracle.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/cq.c | 27 +++++++++++++++------------ drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 ++++ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index be189e0525de..efc9e4a6df04 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -267,17 +267,20 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE; } -static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe) +static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe, + struct ib_wc *wc, const char *level) { - mlx5_ib_warn(dev, "dump error cqe\n"); - mlx5_dump_err_cqe(dev->mdev, cqe); + mlx5_ib_log(level, dev, "WC error: %d, Message: %s\n", wc->status, + ib_wc_status_msg(wc->status)); + print_hex_dump(level, "cqe_dump: ", DUMP_PREFIX_OFFSET, 16, 1, + cqe, sizeof(*cqe), false); } static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe, struct ib_wc *wc) { - int dump = 1; + const char *dump = KERN_WARNING; switch (cqe->syndrome) { case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR: @@ -287,10 +290,11 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, wc->status = IB_WC_LOC_QP_OP_ERR; break; case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR: + dump = KERN_DEBUG; wc->status = IB_WC_LOC_PROT_ERR; break; case MLX5_CQE_SYNDROME_WR_FLUSH_ERR: - dump = 0; + dump = NULL; wc->status = IB_WC_WR_FLUSH_ERR; break; case MLX5_CQE_SYNDROME_MW_BIND_ERR: @@ -306,18 +310,20 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, wc->status = IB_WC_REM_INV_REQ_ERR; break; case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR: + dump = KERN_DEBUG; wc->status = IB_WC_REM_ACCESS_ERR; break; case MLX5_CQE_SYNDROME_REMOTE_OP_ERR: + dump = KERN_DEBUG; wc->status = IB_WC_REM_OP_ERR; break; case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: + dump = NULL; wc->status = IB_WC_RETRY_EXC_ERR; - dump = 0; break; case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR: + dump = NULL; wc->status = IB_WC_RNR_RETRY_EXC_ERR; - dump = 0; break; case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR: wc->status = IB_WC_REM_ABORT_ERR; @@ -328,11 +334,8 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, } wc->vendor_err = cqe->vendor_err_synd; - if (dump) { - mlx5_ib_warn(dev, "WC error: %d, Message: %s\n", wc->status, - ib_wc_status_msg(wc->status)); - dump_cqe(dev, cqe); - } + if (dump) + dump_cqe(dev, cqe, wc, dump); } static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 4a7f7064bd0e..8b91babdd4c0 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -38,6 +38,10 @@ dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \ __LINE__, current->pid, ##arg) +#define mlx5_ib_log(lvl, _dev, format, arg...) \ + dev_printk(lvl, &(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, ##arg) + #define MLX5_IB_DEFAULT_UIDX 0xffffff #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)