RDMA/cxgb4: Fastreg NSMR fixes
- Remove dsgl support - doesn't work in T4. - Wrap the immediate PBL as needed when building it in the wr. - Adjust max pbl depth allowed based on ulptx alignment requirements. - Bump the slots per SQ to 5 to allow up to 128MB fast registers. - Advertise fastreg support by default. Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
parent
410ade4c26
commit
40dbf6ee38
|
@ -54,9 +54,9 @@
|
|||
|
||||
#include "iw_cxgb4.h"
|
||||
|
||||
static int fastreg_support;
|
||||
static int fastreg_support = 1;
|
||||
module_param(fastreg_support, int, 0644);
|
||||
MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=0)");
|
||||
MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)");
|
||||
|
||||
static int c4iw_modify_port(struct ib_device *ibdev,
|
||||
u8 port, int port_modify_mask,
|
||||
|
|
|
@ -505,13 +505,15 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int build_fastreg(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
|
||||
static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe,
|
||||
struct ib_send_wr *wr, u8 *len16)
|
||||
{
|
||||
|
||||
struct fw_ri_immd *imdp;
|
||||
__be64 *p;
|
||||
int i;
|
||||
int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32);
|
||||
int rem;
|
||||
|
||||
if (wr->wr.fast_reg.page_list_len > T4_MAX_FR_DEPTH)
|
||||
return -EINVAL;
|
||||
|
@ -526,32 +528,28 @@ static int build_fastreg(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
|
|||
wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32);
|
||||
wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start &
|
||||
0xffffffff);
|
||||
if (pbllen > T4_MAX_FR_IMMD) {
|
||||
struct c4iw_fr_page_list *c4pl =
|
||||
to_c4iw_fr_page_list(wr->wr.fast_reg.page_list);
|
||||
struct fw_ri_dsgl *sglp;
|
||||
|
||||
sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1);
|
||||
sglp->op = FW_RI_DATA_DSGL;
|
||||
sglp->r1 = 0;
|
||||
sglp->nsge = cpu_to_be16(1);
|
||||
sglp->addr0 = cpu_to_be64(c4pl->dma_addr);
|
||||
sglp->len0 = cpu_to_be32(pbllen);
|
||||
|
||||
*len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *sglp, 16);
|
||||
} else {
|
||||
imdp = (struct fw_ri_immd *)(&wqe->fr + 1);
|
||||
imdp->op = FW_RI_DATA_IMMD;
|
||||
imdp->r1 = 0;
|
||||
imdp->r2 = 0;
|
||||
imdp->immdlen = cpu_to_be32(pbllen);
|
||||
p = (__be64 *)(imdp + 1);
|
||||
for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++)
|
||||
*p = cpu_to_be64(
|
||||
(u64)wr->wr.fast_reg.page_list->page_list[i]);
|
||||
*len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *imdp + pbllen,
|
||||
16);
|
||||
WARN_ON(pbllen > T4_MAX_FR_IMMD);
|
||||
imdp = (struct fw_ri_immd *)(&wqe->fr + 1);
|
||||
imdp->op = FW_RI_DATA_IMMD;
|
||||
imdp->r1 = 0;
|
||||
imdp->r2 = 0;
|
||||
imdp->immdlen = cpu_to_be32(pbllen);
|
||||
p = (__be64 *)(imdp + 1);
|
||||
rem = pbllen;
|
||||
for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
|
||||
*p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]);
|
||||
rem -= sizeof *p;
|
||||
if (++p == (__be64 *)&sq->queue[sq->size])
|
||||
p = (__be64 *)sq->queue;
|
||||
}
|
||||
BUG_ON(rem < 0);
|
||||
while (rem) {
|
||||
*p = 0;
|
||||
rem -= sizeof *p;
|
||||
if (++p == (__be64 *)&sq->queue[sq->size])
|
||||
p = (__be64 *)sq->queue;
|
||||
}
|
||||
*len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *imdp + pbllen, 16);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -652,7 +650,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
|
|||
case IB_WR_FAST_REG_MR:
|
||||
fw_opcode = FW_RI_FR_NSMR_WR;
|
||||
swsqe->opcode = FW_RI_FAST_REGISTER;
|
||||
err = build_fastreg(wqe, wr, &len16);
|
||||
err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16);
|
||||
break;
|
||||
case IB_WR_LOCAL_INV:
|
||||
if (wr->send_flags & IB_SEND_FENCE)
|
||||
|
|
|
@ -66,7 +66,7 @@ struct t4_status_page {
|
|||
|
||||
#define T4_EQ_ENTRY_SIZE 64
|
||||
|
||||
#define T4_SQ_NUM_SLOTS 4
|
||||
#define T4_SQ_NUM_SLOTS 5
|
||||
#define T4_SQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_SQ_NUM_SLOTS)
|
||||
#define T4_MAX_SEND_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \
|
||||
sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge))
|
||||
|
@ -79,7 +79,7 @@ struct t4_status_page {
|
|||
sizeof(struct fw_ri_rdma_write_wr) - \
|
||||
sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge))
|
||||
#define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \
|
||||
sizeof(struct fw_ri_immd)))
|
||||
sizeof(struct fw_ri_immd)) & ~31UL)
|
||||
#define T4_MAX_FR_DEPTH (T4_MAX_FR_IMMD / sizeof(u64))
|
||||
|
||||
#define T4_RQ_NUM_SLOTS 2
|
||||
|
|
Loading…
Reference in New Issue