RDMA/mlx5: Change debug log level for remote access error syndromes

The mlx5 driver dumps the entire CQE buffer by default for few syndromes.
Some syndromes are expected due to the application behavior [ex:
MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR, MLX5_CQE_SYNDROME_REMOTE_OP_ERR and
MLX5_CQE_SYNDROME_LOCAL_PROT_ERR]. Hence, for these syndromes, the patch
converts the log level from KERN_WARNING to KERN_DEBUG. This enables the
application to get the CQE buffer dump by changing to KERN_DEBUG level
as and when needed.

Suggested-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Arumugam Kolappan <aru.kolappan@oracle.com>
Link: https://lore.kernel.org/r/1667287664-19377-1-git-send-email-aru.kolappan@oracle.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
This commit is contained in:
Arumugam Kolappan 2022-11-01 00:27:44 -07:00 committed by Leon Romanovsky
parent 692373d186
commit abef378c43
2 changed files with 19 additions and 12 deletions

View File

@ -267,17 +267,20 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
}
static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe,
struct ib_wc *wc, const char *level)
{
mlx5_ib_warn(dev, "dump error cqe\n");
mlx5_dump_err_cqe(dev->mdev, cqe);
mlx5_ib_log(level, dev, "WC error: %d, Message: %s\n", wc->status,
ib_wc_status_msg(wc->status));
print_hex_dump(level, "cqe_dump: ", DUMP_PREFIX_OFFSET, 16, 1,
cqe, sizeof(*cqe), false);
}
static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
struct mlx5_err_cqe *cqe,
struct ib_wc *wc)
{
int dump = 1;
const char *dump = KERN_WARNING;
switch (cqe->syndrome) {
case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
@ -287,10 +290,11 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
wc->status = IB_WC_LOC_QP_OP_ERR;
break;
case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
dump = KERN_DEBUG;
wc->status = IB_WC_LOC_PROT_ERR;
break;
case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
dump = 0;
dump = NULL;
wc->status = IB_WC_WR_FLUSH_ERR;
break;
case MLX5_CQE_SYNDROME_MW_BIND_ERR:
@ -306,18 +310,20 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
wc->status = IB_WC_REM_INV_REQ_ERR;
break;
case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
dump = KERN_DEBUG;
wc->status = IB_WC_REM_ACCESS_ERR;
break;
case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
dump = KERN_DEBUG;
wc->status = IB_WC_REM_OP_ERR;
break;
case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
dump = NULL;
wc->status = IB_WC_RETRY_EXC_ERR;
dump = 0;
break;
case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
dump = NULL;
wc->status = IB_WC_RNR_RETRY_EXC_ERR;
dump = 0;
break;
case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
wc->status = IB_WC_REM_ABORT_ERR;
@ -328,11 +334,8 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
}
wc->vendor_err = cqe->vendor_err_synd;
if (dump) {
mlx5_ib_warn(dev, "WC error: %d, Message: %s\n", wc->status,
ib_wc_status_msg(wc->status));
dump_cqe(dev, cqe);
}
if (dump)
dump_cqe(dev, cqe, wc, dump);
}
static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,

View File

@ -38,6 +38,10 @@
dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
__LINE__, current->pid, ##arg)
#define mlx5_ib_log(lvl, _dev, format, arg...) \
dev_printk(lvl, &(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, \
__func__, __LINE__, current->pid, ##arg)
#define MLX5_IB_DEFAULT_UIDX 0xffffff
#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)