RDMA subsystem updates for 5.4-rc
Bug fixes for old bugs in the hns and hfi1 drivers: - Calculate various values in hns properly to avoid over/underflows in some cases - Fix an oops, PCI negotiation on Gen4 systems, and bugs related to retries -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEfB7FMLh+8QxL+6i3OG33FX4gmxoFAl3NbgAACgkQOG33FX4g mxoRZQ//cCm6L9q6KJJ4Cg2gHcR3zKQzWiA2slQhRIttjS2fNcoI8a3l6unk8mwZ qaXhFVDnIRBIhfS0shCzrC6eFYE702DzmZ+3CDo5EtFdNZFqWtOWI9YeUh40dGex 1zd7bo0lE7Tx/Bv76wNVuLuP/LToY794Rd9s9YFd0KkZu+2gjUDeChl4aD4PNbr3 ky08rS1WchzglX5DjD5SYZXodZf1DSjOurWEZpV/cgRsIYJw5edKegD5I4NNSawe gWW/Qh1WBL5DO5tdgq8w06ZrHYCMOdN7pAwBOUfh2oEflNBsmvY1pyMoNfycUkbQ jcd6W3FCt5KrTes2nhh2gX3dbtB0iIwrVJkXS2gFhOinQNsbR9dbIc3zm2Nt2VGR JJbxZ3ROaXuMYCSp9GJTFNfkFCX1GX13T1uMbjsUXFdbbFPVcCYxwe4lA0hj7YVO Zw27TsIvOU3NWrMv0deTZaXs5ghGk0JCqkFPB61TPRb67a2Cr2qREdiqhPugI8K/ yAsUaL/mQxM9081U89UZNUjeImt14MvnoenLyCZD4SgFjo6avA30VbKWN4KAAG6n XCXnt2A5CkrcMdHXmKmHBYk+vqA+rXCYRLOZST9mGQRYsy4HaC0n7Ipfkz91/ew1 fTr5U7yR3HTyXJBwsfb/uQpiAMNTG2GrQ/i1za2vAsMCFlJ1mtg= =mPKm -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma Pull RDMA fixes from Jason Gunthorpe: "Bug fixes for old bugs in the hns and hfi1 drivers: - Calculate various values in hns properly to avoid over/underflows in some cases - Fix an oops, PCI negotiation on Gen4 systems, and bugs related to retries" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: RDMA/hns: Correct the value of srq_desc_size RDMA/hns: Correct the value of HNS_ROCE_HEM_CHUNK_LEN IB/hfi1: TID RDMA WRITE should not return IB_WC_RNR_RETRY_EXC_ERR IB/hfi1: Calculate flow weight based on QP MTU for TID RDMA IB/hfi1: Ensure r_tid_ack is valid before building TID RDMA ACK packet IB/hfi1: Ensure full Gen3 speed in a Gen4 system
This commit is contained in:
commit
4e84608c78
|
@ -1489,7 +1489,6 @@ static int __init hfi1_mod_init(void)
|
||||||
goto bail_dev;
|
goto bail_dev;
|
||||||
}
|
}
|
||||||
|
|
||||||
hfi1_compute_tid_rdma_flow_wt();
|
|
||||||
/*
|
/*
|
||||||
* These must be called before the driver is registered with
|
* These must be called before the driver is registered with
|
||||||
* the PCI subsystem.
|
* the PCI subsystem.
|
||||||
|
|
|
@ -319,7 +319,9 @@ int pcie_speeds(struct hfi1_devdata *dd)
|
||||||
/*
|
/*
|
||||||
* bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
|
* bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
|
||||||
*/
|
*/
|
||||||
if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
|
if (parent &&
|
||||||
|
(dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT ||
|
||||||
|
dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) {
|
||||||
dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
|
dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
|
||||||
dd->link_gen3_capable = 0;
|
dd->link_gen3_capable = 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2209,15 +2209,15 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
|
||||||
if (qp->s_flags & RVT_S_WAIT_RNR)
|
if (qp->s_flags & RVT_S_WAIT_RNR)
|
||||||
goto bail_stop;
|
goto bail_stop;
|
||||||
rdi = ib_to_rvt(qp->ibqp.device);
|
rdi = ib_to_rvt(qp->ibqp.device);
|
||||||
if (qp->s_rnr_retry == 0 &&
|
if (!(rdi->post_parms[wqe->wr.opcode].flags &
|
||||||
!((rdi->post_parms[wqe->wr.opcode].flags &
|
RVT_OPERATION_IGN_RNR_CNT)) {
|
||||||
RVT_OPERATION_IGN_RNR_CNT) &&
|
if (qp->s_rnr_retry == 0) {
|
||||||
qp->s_rnr_retry_cnt == 0)) {
|
status = IB_WC_RNR_RETRY_EXC_ERR;
|
||||||
status = IB_WC_RNR_RETRY_EXC_ERR;
|
goto class_b;
|
||||||
goto class_b;
|
}
|
||||||
|
if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
|
||||||
|
qp->s_rnr_retry--;
|
||||||
}
|
}
|
||||||
if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
|
|
||||||
qp->s_rnr_retry--;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The last valid PSN is the previous PSN. For TID RDMA WRITE
|
* The last valid PSN is the previous PSN. For TID RDMA WRITE
|
||||||
|
|
|
@ -107,8 +107,6 @@ static u32 mask_generation(u32 a)
|
||||||
* C - Capcode
|
* C - Capcode
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static u32 tid_rdma_flow_wt;
|
|
||||||
|
|
||||||
static void tid_rdma_trigger_resume(struct work_struct *work);
|
static void tid_rdma_trigger_resume(struct work_struct *work);
|
||||||
static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
|
static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
|
||||||
static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
|
static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
|
||||||
|
@ -136,6 +134,26 @@ static void update_r_next_psn_fecn(struct hfi1_packet *packet,
|
||||||
struct tid_rdma_flow *flow,
|
struct tid_rdma_flow *flow,
|
||||||
bool fecn);
|
bool fecn);
|
||||||
|
|
||||||
|
static void validate_r_tid_ack(struct hfi1_qp_priv *priv)
|
||||||
|
{
|
||||||
|
if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
|
||||||
|
priv->r_tid_ack = priv->r_tid_tail;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void tid_rdma_schedule_ack(struct rvt_qp *qp)
|
||||||
|
{
|
||||||
|
struct hfi1_qp_priv *priv = qp->priv;
|
||||||
|
|
||||||
|
priv->s_flags |= RVT_S_ACK_PENDING;
|
||||||
|
hfi1_schedule_tid_send(qp);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void tid_rdma_trigger_ack(struct rvt_qp *qp)
|
||||||
|
{
|
||||||
|
validate_r_tid_ack(qp->priv);
|
||||||
|
tid_rdma_schedule_ack(qp);
|
||||||
|
}
|
||||||
|
|
||||||
static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
|
static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
|
||||||
{
|
{
|
||||||
return
|
return
|
||||||
|
@ -3005,10 +3023,7 @@ nak_psn:
|
||||||
qpriv->s_nak_state = IB_NAK_PSN_ERROR;
|
qpriv->s_nak_state = IB_NAK_PSN_ERROR;
|
||||||
/* We are NAK'ing the next expected PSN */
|
/* We are NAK'ing the next expected PSN */
|
||||||
qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
|
qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
|
||||||
qpriv->s_flags |= RVT_S_ACK_PENDING;
|
tid_rdma_trigger_ack(qp);
|
||||||
if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID)
|
|
||||||
qpriv->r_tid_ack = qpriv->r_tid_tail;
|
|
||||||
hfi1_schedule_tid_send(qp);
|
|
||||||
}
|
}
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
|
@ -3371,18 +3386,17 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
|
||||||
return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
|
return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
|
||||||
}
|
}
|
||||||
|
|
||||||
void hfi1_compute_tid_rdma_flow_wt(void)
|
static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Heuristic for computing the RNR timeout when waiting on the flow
|
* Heuristic for computing the RNR timeout when waiting on the flow
|
||||||
* queue. Rather than a computationaly expensive exact estimate of when
|
* queue. Rather than a computationaly expensive exact estimate of when
|
||||||
* a flow will be available, we assume that if a QP is at position N in
|
* a flow will be available, we assume that if a QP is at position N in
|
||||||
* the flow queue it has to wait approximately (N + 1) * (number of
|
* the flow queue it has to wait approximately (N + 1) * (number of
|
||||||
* segments between two sync points), assuming PMTU of 4K. The rationale
|
* segments between two sync points). The rationale for this is that
|
||||||
* for this is that flows are released and recycled at each sync point.
|
* flows are released and recycled at each sync point.
|
||||||
*/
|
*/
|
||||||
tid_rdma_flow_wt = MAX_TID_FLOW_PSN * enum_to_mtu(OPA_MTU_4096) /
|
return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT;
|
||||||
TID_RDMA_MAX_SEGMENT_SIZE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static u32 position_in_queue(struct hfi1_qp_priv *qpriv,
|
static u32 position_in_queue(struct hfi1_qp_priv *qpriv,
|
||||||
|
@ -3505,7 +3519,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
|
||||||
if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
|
if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
|
||||||
ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
|
ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
to_seg = tid_rdma_flow_wt *
|
to_seg = hfi1_compute_tid_rdma_flow_wt(qp) *
|
||||||
position_in_queue(qpriv,
|
position_in_queue(qpriv,
|
||||||
&rcd->flow_queue);
|
&rcd->flow_queue);
|
||||||
break;
|
break;
|
||||||
|
@ -3526,7 +3540,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
|
||||||
/*
|
/*
|
||||||
* If overtaking req->acked_tail, send an RNR NAK. Because the
|
* If overtaking req->acked_tail, send an RNR NAK. Because the
|
||||||
* QP is not queued in this case, and the issue can only be
|
* QP is not queued in this case, and the issue can only be
|
||||||
* caused due a delay in scheduling the second leg which we
|
* caused by a delay in scheduling the second leg which we
|
||||||
* cannot estimate, we use a rather arbitrary RNR timeout of
|
* cannot estimate, we use a rather arbitrary RNR timeout of
|
||||||
* (MAX_FLOWS / 2) segments
|
* (MAX_FLOWS / 2) segments
|
||||||
*/
|
*/
|
||||||
|
@ -3534,8 +3548,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
|
||||||
MAX_FLOWS)) {
|
MAX_FLOWS)) {
|
||||||
ret = -EAGAIN;
|
ret = -EAGAIN;
|
||||||
to_seg = MAX_FLOWS >> 1;
|
to_seg = MAX_FLOWS >> 1;
|
||||||
qpriv->s_flags |= RVT_S_ACK_PENDING;
|
tid_rdma_trigger_ack(qp);
|
||||||
hfi1_schedule_tid_send(qp);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4335,8 +4348,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
|
||||||
trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
|
trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
|
||||||
req);
|
req);
|
||||||
trace_hfi1_tid_write_rsp_rcv_data(qp);
|
trace_hfi1_tid_write_rsp_rcv_data(qp);
|
||||||
if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
|
validate_r_tid_ack(priv);
|
||||||
priv->r_tid_ack = priv->r_tid_tail;
|
|
||||||
|
|
||||||
if (opcode == TID_OP(WRITE_DATA_LAST)) {
|
if (opcode == TID_OP(WRITE_DATA_LAST)) {
|
||||||
release_rdma_sge_mr(e);
|
release_rdma_sge_mr(e);
|
||||||
|
@ -4375,8 +4387,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
|
||||||
}
|
}
|
||||||
|
|
||||||
done:
|
done:
|
||||||
priv->s_flags |= RVT_S_ACK_PENDING;
|
tid_rdma_schedule_ack(qp);
|
||||||
hfi1_schedule_tid_send(qp);
|
|
||||||
exit:
|
exit:
|
||||||
priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
|
priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
|
||||||
if (fecn)
|
if (fecn)
|
||||||
|
@ -4388,10 +4399,7 @@ send_nak:
|
||||||
if (!priv->s_nak_state) {
|
if (!priv->s_nak_state) {
|
||||||
priv->s_nak_state = IB_NAK_PSN_ERROR;
|
priv->s_nak_state = IB_NAK_PSN_ERROR;
|
||||||
priv->s_nak_psn = flow->flow_state.r_next_psn;
|
priv->s_nak_psn = flow->flow_state.r_next_psn;
|
||||||
priv->s_flags |= RVT_S_ACK_PENDING;
|
tid_rdma_trigger_ack(qp);
|
||||||
if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
|
|
||||||
priv->r_tid_ack = priv->r_tid_tail;
|
|
||||||
hfi1_schedule_tid_send(qp);
|
|
||||||
}
|
}
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
@ -4939,8 +4947,7 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
|
||||||
qpriv->resync = true;
|
qpriv->resync = true;
|
||||||
/* RESYNC request always gets a TID RDMA ACK. */
|
/* RESYNC request always gets a TID RDMA ACK. */
|
||||||
qpriv->s_nak_state = 0;
|
qpriv->s_nak_state = 0;
|
||||||
qpriv->s_flags |= RVT_S_ACK_PENDING;
|
tid_rdma_trigger_ack(qp);
|
||||||
hfi1_schedule_tid_send(qp);
|
|
||||||
bail:
|
bail:
|
||||||
if (fecn)
|
if (fecn)
|
||||||
qp->s_flags |= RVT_S_ECN;
|
qp->s_flags |= RVT_S_ECN;
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
|
#define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
|
||||||
#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
|
#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
|
||||||
#define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT)
|
#define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT)
|
||||||
|
#define TID_RDMA_SEGMENT_SHIFT 18
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Bit definitions for priv->s_flags.
|
* Bit definitions for priv->s_flags.
|
||||||
|
@ -274,8 +275,6 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
|
||||||
struct ib_other_headers *ohdr,
|
struct ib_other_headers *ohdr,
|
||||||
u32 *bth1, u32 *bth2, u32 *len);
|
u32 *bth1, u32 *bth2, u32 *len);
|
||||||
|
|
||||||
void hfi1_compute_tid_rdma_flow_wt(void);
|
|
||||||
|
|
||||||
void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet);
|
void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet);
|
||||||
|
|
||||||
u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
|
u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
|
||||||
|
|
|
@ -59,7 +59,7 @@ enum {
|
||||||
|
|
||||||
#define HNS_ROCE_HEM_CHUNK_LEN \
|
#define HNS_ROCE_HEM_CHUNK_LEN \
|
||||||
((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \
|
((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \
|
||||||
(sizeof(struct scatterlist)))
|
(sizeof(struct scatterlist) + sizeof(void *)))
|
||||||
|
|
||||||
#define check_whether_bt_num_3(type, hop_num) \
|
#define check_whether_bt_num_3(type, hop_num) \
|
||||||
(type < HEM_TYPE_MTT && hop_num == 2)
|
(type < HEM_TYPE_MTT && hop_num == 2)
|
||||||
|
|
|
@ -376,7 +376,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
|
||||||
srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
|
srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
|
||||||
srq->max_gs = srq_init_attr->attr.max_sge;
|
srq->max_gs = srq_init_attr->attr.max_sge;
|
||||||
|
|
||||||
srq_desc_size = max(16, 16 * srq->max_gs);
|
srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs));
|
||||||
|
|
||||||
srq->wqe_shift = ilog2(srq_desc_size);
|
srq->wqe_shift = ilog2(srq_desc_size);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue