Merge branch 'mlx5_dcs' into rdma.git for-next

Leon Romanovsky says:

====================
Add ConnectX DCS offload support

This patchset from Lior adds support of DCI stream channel (DCS) support.

DCS is an offload to SW load balancing of DC initiator work requests.

A single DC QP initiator (DCI) can be connected to only one target at the
time and can't start new connection until the previous work request is
completed.

This limitation causes to delays when the initiator process needs to
transfer data to multiple targets at the same time.
====================

* branch 'mlx5_dcs':
  RDMA/mlx5: Add DCS offload support
  RDMA/mlx5: Separate DCI QP creation logic
  net/mlx5: Add DCS caps & fields support
This commit is contained in:
Jason Gunthorpe 2021-07-20 15:11:38 -03:00
commit 07d0f314ba
4 changed files with 204 additions and 5 deletions

View File

@ -1174,6 +1174,16 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP;
}
if (offsetofend(typeof(resp), dci_streams_caps) <= uhw_outlen) {
resp.response_length += sizeof(resp.dci_streams_caps);
resp.dci_streams_caps.max_log_num_concurent =
MLX5_CAP_GEN(mdev, log_max_dci_stream_channels);
resp.dci_streams_caps.max_log_num_errored =
MLX5_CAP_GEN(mdev, log_max_dci_errored_streams);
}
if (uhw_outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);

View File

@ -1982,6 +1982,167 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
return 0;
}
static int create_dci(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_qp *qp,
struct mlx5_create_qp_params *params)
{
struct ib_qp_init_attr *init_attr = params->attr;
struct mlx5_ib_create_qp *ucmd = params->ucmd;
u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
struct ib_udata *udata = params->udata;
u32 uidx = params->uidx;
struct mlx5_ib_resources *devr = &dev->devr;
int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_cq *send_cq;
struct mlx5_ib_cq *recv_cq;
unsigned long flags;
struct mlx5_ib_qp_base *base;
int ts_format;
int mlx5_st;
void *qpc;
u32 *in;
int err;
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
mlx5_st = to_mlx5_st(qp->type);
if (mlx5_st < 0)
return -EINVAL;
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
base = &qp->trans_qp.base;
qp->has_rq = qp_has_rq(init_attr);
err = set_rq_size(dev, &init_attr->cap, qp->has_rq, qp, ucmd);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
return err;
}
if (ucmd->rq_wqe_shift != qp->rq.wqe_shift ||
ucmd->rq_wqe_count != qp->rq.wqe_cnt)
return -EINVAL;
if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz)))
return -EINVAL;
ts_format = get_qp_ts_format(dev, to_mcq(init_attr->send_cq),
to_mcq(init_attr->recv_cq));
if (ts_format < 0)
return ts_format;
err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, &params->resp,
&inlen, base, ucmd);
if (err)
return err;
if (MLX5_CAP_GEN(mdev, ece_support))
MLX5_SET(create_qp_in, in, ece, ucmd->ece_options);
qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
MLX5_SET(qpc, qpc, st, mlx5_st);
MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
MLX5_SET(qpc, qpc, pd, to_mpd(pd)->pdn);
if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
MLX5_SET(qpc, qpc, wq_signature, 1);
if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
MLX5_SET(qpc, qpc, cd_master, 1);
if (qp->flags & IB_QP_CREATE_MANAGED_SEND)
MLX5_SET(qpc, qpc, cd_slave_send, 1);
if (qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE)
configure_requester_scat_cqe(dev, qp, init_attr, qpc);
if (qp->rq.wqe_cnt) {
MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
}
if (qp->flags_en & MLX5_QP_FLAG_DCI_STREAM) {
MLX5_SET(qpc, qpc, log_num_dci_stream_channels,
ucmd->dci_streams.log_num_concurent);
MLX5_SET(qpc, qpc, log_num_dci_errored_streams,
ucmd->dci_streams.log_num_errored);
}
MLX5_SET(qpc, qpc, ts_format, ts_format);
MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr));
MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
/* Set default resources */
if (init_attr->srq) {
MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
to_msrq(init_attr->srq)->msrq.srqn);
} else {
MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
to_msrq(devr->s1)->msrq.srqn);
}
if (init_attr->send_cq)
MLX5_SET(qpc, qpc, cqn_snd,
to_mcq(init_attr->send_cq)->mcq.cqn);
if (init_attr->recv_cq)
MLX5_SET(qpc, qpc, cqn_rcv,
to_mcq(init_attr->recv_cq)->mcq.cqn);
MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
/* 0xffffff means we ask to work with cqe version 0 */
if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
MLX5_SET(qpc, qpc, user_index, uidx);
if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) {
MLX5_SET(qpc, qpc, end_padding_mode,
MLX5_WQ_END_PAD_MODE_ALIGN);
/* Special case to clean flag */
qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING;
}
err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
kvfree(in);
if (err)
goto err_create;
base->container_mibqp = qp;
base->mqp.event = mlx5_ib_qp_event;
if (MLX5_CAP_GEN(mdev, ece_support))
params->resp.ece_options = MLX5_GET(create_qp_out, out, ece);
get_cqs(qp->type, init_attr->send_cq, init_attr->recv_cq,
&send_cq, &recv_cq);
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx5_ib_lock_cqs(send_cq, recv_cq);
/* Maintain device to QPs access, needed for further handling via reset
* flow
*/
list_add_tail(&qp->qps_list, &dev->qp_list);
/* Maintain CQ to QPs access, needed for further handling via reset flow
*/
if (send_cq)
list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
if (recv_cq)
list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
mlx5_ib_unlock_cqs(send_cq, recv_cq);
spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
return 0;
err_create:
destroy_qp(dev, qp, base, udata);
return err;
}
static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_qp *qp,
struct mlx5_create_qp_params *params)
@ -2653,6 +2814,10 @@ static int process_vendor_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCI, true, qp);
process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCT, true, qp);
process_vendor_flag(dev, &flags, MLX5_QP_FLAG_DCI_STREAM,
MLX5_CAP_GEN(mdev, log_max_dci_stream_channels) &&
MLX5_CAP_GEN(mdev, log_max_dci_errored_streams),
qp);
process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SIGNATURE, true, qp);
process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SCATTER_CQE,
@ -2848,6 +3013,9 @@ static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
case MLX5_IB_QPT_DCT:
err = create_dct(dev, pd, qp, params);
break;
case MLX5_IB_QPT_DCI:
err = create_dci(dev, pd, qp, params);
break;
case IB_QPT_XRC_TGT:
err = create_xrc_tgt_qp(dev, qp, params);
break;

View File

@ -1651,7 +1651,13 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 max_geneve_tlv_option_data_len[0x5];
u8 reserved_at_570[0x10];
u8 reserved_at_580[0x33];
u8 reserved_at_580[0xb];
u8 log_max_dci_stream_channels[0x5];
u8 reserved_at_590[0x3];
u8 log_max_dci_errored_streams[0x5];
u8 reserved_at_598[0x8];
u8 reserved_at_5a0[0x13];
u8 log_max_dek[0x5];
u8 reserved_at_5b8[0x4];
u8 mini_cqe_resp_stride_index[0x1];
@ -3020,10 +3026,12 @@ struct mlx5_ifc_qpc_bits {
u8 reserved_at_3c0[0x8];
u8 next_send_psn[0x18];
u8 reserved_at_3e0[0x8];
u8 reserved_at_3e0[0x3];
u8 log_num_dci_stream_channels[0x5];
u8 cqn_snd[0x18];
u8 reserved_at_400[0x8];
u8 reserved_at_400[0x3];
u8 log_num_dci_errored_streams[0x5];
u8 deth_sqpn[0x18];
u8 reserved_at_420[0x20];

View File

@ -50,6 +50,7 @@ enum {
MLX5_QP_FLAG_ALLOW_SCATTER_CQE = 1 << 8,
MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE = 1 << 9,
MLX5_QP_FLAG_UAR_PAGE_INDEX = 1 << 10,
MLX5_QP_FLAG_DCI_STREAM = 1 << 11,
};
enum {
@ -238,6 +239,11 @@ struct mlx5_ib_striding_rq_caps {
__u32 reserved;
};
struct mlx5_ib_dci_streams_caps {
__u8 max_log_num_concurent;
__u8 max_log_num_errored;
};
enum mlx5_ib_query_dev_resp_flags {
/* Support 128B CQE compression */
MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0,
@ -266,7 +272,8 @@ struct mlx5_ib_query_device_resp {
struct mlx5_ib_sw_parsing_caps sw_parsing_caps;
struct mlx5_ib_striding_rq_caps striding_rq_caps;
__u32 tunnel_offloads_caps; /* enum mlx5_ib_tunnel_offloads */
__u32 reserved;
struct mlx5_ib_dci_streams_caps dci_streams_caps;
__u16 reserved;
};
enum mlx5_ib_create_cq_flags {
@ -313,6 +320,11 @@ struct mlx5_ib_create_srq_resp {
__u32 reserved;
};
struct mlx5_ib_create_qp_dci_streams {
__u8 log_num_concurent;
__u8 log_num_errored;
};
struct mlx5_ib_create_qp {
__aligned_u64 buf_addr;
__aligned_u64 db_addr;
@ -327,7 +339,8 @@ struct mlx5_ib_create_qp {
__aligned_u64 access_key;
};
__u32 ece_options;
__u32 reserved;
struct mlx5_ib_create_qp_dci_streams dci_streams;
__u16 reserved;
};
/* RX Hash function flags */