RDMA/erdma: Add verbs implementation
The RDMA verbs implementation of erdma is divided into three files: erdma_qp.c, erdma_cq.c, and erdma_verbs.c. Internal used functions and datapath functions of QP/CQ are put in erdma_qp.c and erdma_cq.c, the rest is in erdma_verbs.c. This commit also fixes some static check warnings. Link: https://lore.kernel.org/r/20220727014927.76564-8-chengyou@linux.alibaba.com Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Reported-by: Abaci Robot <abaci@linux.alibaba.com> Signed-off-by: Yang Li <yang.lee@linux.alibaba.com> Reported-by: Hulk Robot <hulkci@huawei.com> Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com> Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
parent
db23ae64ca
commit
1550557717
|
@ -0,0 +1,205 @@
|
|||
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
|
||||
|
||||
/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
|
||||
/* Kai Shen <kaishen@linux.alibaba.com> */
|
||||
/* Copyright (c) 2020-2022, Alibaba Group. */
|
||||
|
||||
#include <rdma/ib_verbs.h>
|
||||
|
||||
#include "erdma_hw.h"
|
||||
#include "erdma_verbs.h"
|
||||
|
||||
static void *get_next_valid_cqe(struct erdma_cq *cq)
|
||||
{
|
||||
__be32 *cqe = get_queue_entry(cq->kern_cq.qbuf, cq->kern_cq.ci,
|
||||
cq->depth, CQE_SHIFT);
|
||||
u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
|
||||
__be32_to_cpu(READ_ONCE(*cqe)));
|
||||
|
||||
return owner ^ !!(cq->kern_cq.ci & cq->depth) ? cqe : NULL;
|
||||
}
|
||||
|
||||
static void notify_cq(struct erdma_cq *cq, u8 solcitied)
|
||||
{
|
||||
u64 db_data =
|
||||
FIELD_PREP(ERDMA_CQDB_IDX_MASK, (cq->kern_cq.notify_cnt)) |
|
||||
FIELD_PREP(ERDMA_CQDB_CQN_MASK, cq->cqn) |
|
||||
FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
|
||||
FIELD_PREP(ERDMA_CQDB_SOL_MASK, solcitied) |
|
||||
FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cq->kern_cq.cmdsn) |
|
||||
FIELD_PREP(ERDMA_CQDB_CI_MASK, cq->kern_cq.ci);
|
||||
|
||||
*cq->kern_cq.db_record = db_data;
|
||||
writeq(db_data, cq->kern_cq.db);
|
||||
}
|
||||
|
||||
int erdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
|
||||
{
|
||||
struct erdma_cq *cq = to_ecq(ibcq);
|
||||
unsigned long irq_flags;
|
||||
int ret = 0;
|
||||
|
||||
spin_lock_irqsave(&cq->kern_cq.lock, irq_flags);
|
||||
|
||||
notify_cq(cq, (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
|
||||
|
||||
if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && get_next_valid_cqe(cq))
|
||||
ret = 1;
|
||||
|
||||
cq->kern_cq.notify_cnt++;
|
||||
|
||||
spin_unlock_irqrestore(&cq->kern_cq.lock, irq_flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const enum ib_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = {
|
||||
[ERDMA_OP_WRITE] = IB_WC_RDMA_WRITE,
|
||||
[ERDMA_OP_READ] = IB_WC_RDMA_READ,
|
||||
[ERDMA_OP_SEND] = IB_WC_SEND,
|
||||
[ERDMA_OP_SEND_WITH_IMM] = IB_WC_SEND,
|
||||
[ERDMA_OP_RECEIVE] = IB_WC_RECV,
|
||||
[ERDMA_OP_RECV_IMM] = IB_WC_RECV_RDMA_WITH_IMM,
|
||||
[ERDMA_OP_RECV_INV] = IB_WC_RECV,
|
||||
[ERDMA_OP_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
|
||||
[ERDMA_OP_INVALIDATE] = IB_WC_LOCAL_INV,
|
||||
[ERDMA_OP_RSP_SEND_IMM] = IB_WC_RECV,
|
||||
[ERDMA_OP_SEND_WITH_INV] = IB_WC_SEND,
|
||||
[ERDMA_OP_REG_MR] = IB_WC_REG_MR,
|
||||
[ERDMA_OP_LOCAL_INV] = IB_WC_LOCAL_INV,
|
||||
[ERDMA_OP_READ_WITH_INV] = IB_WC_RDMA_READ,
|
||||
};
|
||||
|
||||
static const struct {
|
||||
enum erdma_wc_status erdma;
|
||||
enum ib_wc_status base;
|
||||
enum erdma_vendor_err vendor;
|
||||
} map_cqe_status[ERDMA_NUM_WC_STATUS] = {
|
||||
{ ERDMA_WC_SUCCESS, IB_WC_SUCCESS, ERDMA_WC_VENDOR_NO_ERR },
|
||||
{ ERDMA_WC_GENERAL_ERR, IB_WC_GENERAL_ERR, ERDMA_WC_VENDOR_NO_ERR },
|
||||
{ ERDMA_WC_RECV_WQE_FORMAT_ERR, IB_WC_GENERAL_ERR,
|
||||
ERDMA_WC_VENDOR_INVALID_RQE },
|
||||
{ ERDMA_WC_RECV_STAG_INVALID_ERR, IB_WC_REM_ACCESS_ERR,
|
||||
ERDMA_WC_VENDOR_RQE_INVALID_STAG },
|
||||
{ ERDMA_WC_RECV_ADDR_VIOLATION_ERR, IB_WC_REM_ACCESS_ERR,
|
||||
ERDMA_WC_VENDOR_RQE_ADDR_VIOLATION },
|
||||
{ ERDMA_WC_RECV_RIGHT_VIOLATION_ERR, IB_WC_REM_ACCESS_ERR,
|
||||
ERDMA_WC_VENDOR_RQE_ACCESS_RIGHT_ERR },
|
||||
{ ERDMA_WC_RECV_PDID_ERR, IB_WC_REM_ACCESS_ERR,
|
||||
ERDMA_WC_VENDOR_RQE_INVALID_PD },
|
||||
{ ERDMA_WC_RECV_WARRPING_ERR, IB_WC_REM_ACCESS_ERR,
|
||||
ERDMA_WC_VENDOR_RQE_WRAP_ERR },
|
||||
{ ERDMA_WC_SEND_WQE_FORMAT_ERR, IB_WC_LOC_QP_OP_ERR,
|
||||
ERDMA_WC_VENDOR_INVALID_SQE },
|
||||
{ ERDMA_WC_SEND_WQE_ORD_EXCEED, IB_WC_GENERAL_ERR,
|
||||
ERDMA_WC_VENDOR_ZERO_ORD },
|
||||
{ ERDMA_WC_SEND_STAG_INVALID_ERR, IB_WC_LOC_ACCESS_ERR,
|
||||
ERDMA_WC_VENDOR_SQE_INVALID_STAG },
|
||||
{ ERDMA_WC_SEND_ADDR_VIOLATION_ERR, IB_WC_LOC_ACCESS_ERR,
|
||||
ERDMA_WC_VENDOR_SQE_ADDR_VIOLATION },
|
||||
{ ERDMA_WC_SEND_RIGHT_VIOLATION_ERR, IB_WC_LOC_ACCESS_ERR,
|
||||
ERDMA_WC_VENDOR_SQE_ACCESS_ERR },
|
||||
{ ERDMA_WC_SEND_PDID_ERR, IB_WC_LOC_ACCESS_ERR,
|
||||
ERDMA_WC_VENDOR_SQE_INVALID_PD },
|
||||
{ ERDMA_WC_SEND_WARRPING_ERR, IB_WC_LOC_ACCESS_ERR,
|
||||
ERDMA_WC_VENDOR_SQE_WARP_ERR },
|
||||
{ ERDMA_WC_FLUSH_ERR, IB_WC_WR_FLUSH_ERR, ERDMA_WC_VENDOR_NO_ERR },
|
||||
{ ERDMA_WC_RETRY_EXC_ERR, IB_WC_RETRY_EXC_ERR, ERDMA_WC_VENDOR_NO_ERR },
|
||||
};
|
||||
|
||||
#define ERDMA_POLLCQ_NO_QP 1
|
||||
|
||||
static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc)
|
||||
{
|
||||
struct erdma_dev *dev = to_edev(cq->ibcq.device);
|
||||
u8 opcode, syndrome, qtype;
|
||||
struct erdma_kqp *kern_qp;
|
||||
struct erdma_cqe *cqe;
|
||||
struct erdma_qp *qp;
|
||||
u16 wqe_idx, depth;
|
||||
u32 qpn, cqe_hdr;
|
||||
u64 *id_table;
|
||||
u64 *wqe_hdr;
|
||||
|
||||
cqe = get_next_valid_cqe(cq);
|
||||
if (!cqe)
|
||||
return -EAGAIN;
|
||||
|
||||
cq->kern_cq.ci++;
|
||||
|
||||
/* cqbuf should be ready when we poll */
|
||||
dma_rmb();
|
||||
|
||||
qpn = be32_to_cpu(cqe->qpn);
|
||||
wqe_idx = be32_to_cpu(cqe->qe_idx);
|
||||
cqe_hdr = be32_to_cpu(cqe->hdr);
|
||||
|
||||
qp = find_qp_by_qpn(dev, qpn);
|
||||
if (!qp)
|
||||
return ERDMA_POLLCQ_NO_QP;
|
||||
|
||||
kern_qp = &qp->kern_qp;
|
||||
|
||||
qtype = FIELD_GET(ERDMA_CQE_HDR_QTYPE_MASK, cqe_hdr);
|
||||
syndrome = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, cqe_hdr);
|
||||
opcode = FIELD_GET(ERDMA_CQE_HDR_OPCODE_MASK, cqe_hdr);
|
||||
|
||||
if (qtype == ERDMA_CQE_QTYPE_SQ) {
|
||||
id_table = kern_qp->swr_tbl;
|
||||
depth = qp->attrs.sq_size;
|
||||
wqe_hdr = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
|
||||
qp->attrs.sq_size, SQEBB_SHIFT);
|
||||
kern_qp->sq_ci =
|
||||
FIELD_GET(ERDMA_SQE_HDR_WQEBB_CNT_MASK, *wqe_hdr) +
|
||||
wqe_idx + 1;
|
||||
} else {
|
||||
id_table = kern_qp->rwr_tbl;
|
||||
depth = qp->attrs.rq_size;
|
||||
}
|
||||
wc->wr_id = id_table[wqe_idx & (depth - 1)];
|
||||
wc->byte_len = be32_to_cpu(cqe->size);
|
||||
|
||||
wc->wc_flags = 0;
|
||||
|
||||
wc->opcode = wc_mapping_table[opcode];
|
||||
if (opcode == ERDMA_OP_RECV_IMM || opcode == ERDMA_OP_RSP_SEND_IMM) {
|
||||
wc->ex.imm_data = cpu_to_be32(le32_to_cpu(cqe->imm_data));
|
||||
wc->wc_flags |= IB_WC_WITH_IMM;
|
||||
} else if (opcode == ERDMA_OP_RECV_INV) {
|
||||
wc->ex.invalidate_rkey = be32_to_cpu(cqe->inv_rkey);
|
||||
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
|
||||
}
|
||||
|
||||
if (syndrome >= ERDMA_NUM_WC_STATUS)
|
||||
syndrome = ERDMA_WC_GENERAL_ERR;
|
||||
|
||||
wc->status = map_cqe_status[syndrome].base;
|
||||
wc->vendor_err = map_cqe_status[syndrome].vendor;
|
||||
wc->qp = &qp->ibqp;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
|
||||
{
|
||||
struct erdma_cq *cq = to_ecq(ibcq);
|
||||
unsigned long flags;
|
||||
int npolled, ret;
|
||||
|
||||
spin_lock_irqsave(&cq->kern_cq.lock, flags);
|
||||
|
||||
for (npolled = 0; npolled < num_entries;) {
|
||||
ret = erdma_poll_one_cqe(cq, wc + npolled);
|
||||
|
||||
if (ret == -EAGAIN) /* no received new CQEs. */
|
||||
break;
|
||||
else if (ret) /* ignore invalid CQEs. */
|
||||
continue;
|
||||
|
||||
npolled++;
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&cq->kern_cq.lock, flags);
|
||||
|
||||
return npolled;
|
||||
}
|
|
@ -0,0 +1,566 @@
|
|||
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
|
||||
|
||||
/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
|
||||
/* Kai Shen <kaishen@linux.alibaba.com> */
|
||||
/* Copyright (c) 2020-2021, Alibaba Group */
|
||||
/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
|
||||
/* Copyright (c) 2008-2019, IBM Corporation */
|
||||
|
||||
#include <linux/errno.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <rdma/ib_user_verbs.h>
|
||||
#include <rdma/ib_verbs.h>
|
||||
|
||||
#include "erdma.h"
|
||||
#include "erdma_cm.h"
|
||||
#include "erdma_verbs.h"
|
||||
|
||||
void erdma_qp_llp_close(struct erdma_qp *qp)
|
||||
{
|
||||
struct erdma_qp_attrs qp_attrs;
|
||||
|
||||
down_write(&qp->state_lock);
|
||||
|
||||
switch (qp->attrs.state) {
|
||||
case ERDMA_QP_STATE_RTS:
|
||||
case ERDMA_QP_STATE_RTR:
|
||||
case ERDMA_QP_STATE_IDLE:
|
||||
case ERDMA_QP_STATE_TERMINATE:
|
||||
qp_attrs.state = ERDMA_QP_STATE_CLOSING;
|
||||
erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
|
||||
break;
|
||||
case ERDMA_QP_STATE_CLOSING:
|
||||
qp->attrs.state = ERDMA_QP_STATE_IDLE;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (qp->cep) {
|
||||
erdma_cep_put(qp->cep);
|
||||
qp->cep = NULL;
|
||||
}
|
||||
|
||||
up_write(&qp->state_lock);
|
||||
}
|
||||
|
||||
struct ib_qp *erdma_get_ibqp(struct ib_device *ibdev, int id)
|
||||
{
|
||||
struct erdma_qp *qp = find_qp_by_qpn(to_edev(ibdev), id);
|
||||
|
||||
if (qp)
|
||||
return &qp->ibqp;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
|
||||
struct erdma_qp_attrs *attrs,
|
||||
enum erdma_qp_attr_mask mask)
|
||||
{
|
||||
int ret;
|
||||
struct erdma_dev *dev = qp->dev;
|
||||
struct erdma_cmdq_modify_qp_req req;
|
||||
struct tcp_sock *tp;
|
||||
struct erdma_cep *cep = qp->cep;
|
||||
struct sockaddr_storage local_addr, remote_addr;
|
||||
|
||||
if (!(mask & ERDMA_QP_ATTR_LLP_HANDLE))
|
||||
return -EINVAL;
|
||||
|
||||
if (!(mask & ERDMA_QP_ATTR_MPA))
|
||||
return -EINVAL;
|
||||
|
||||
ret = getname_local(cep->sock, &local_addr);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = getname_peer(cep->sock, &remote_addr);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
qp->attrs.state = ERDMA_QP_STATE_RTS;
|
||||
|
||||
tp = tcp_sk(qp->cep->sock->sk);
|
||||
|
||||
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
|
||||
CMDQ_OPCODE_MODIFY_QP);
|
||||
|
||||
req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, qp->attrs.state) |
|
||||
FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, qp->attrs.cc) |
|
||||
FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
|
||||
|
||||
req.cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie);
|
||||
req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr;
|
||||
req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr;
|
||||
req.dport = to_sockaddr_in(remote_addr).sin_port;
|
||||
req.sport = to_sockaddr_in(local_addr).sin_port;
|
||||
|
||||
req.send_nxt = tp->snd_nxt;
|
||||
/* rsvd tcp seq for mpa-rsp in server. */
|
||||
if (qp->attrs.qp_type == ERDMA_QP_PASSIVE)
|
||||
req.send_nxt += MPA_DEFAULT_HDR_LEN + qp->attrs.pd_len;
|
||||
req.recv_nxt = tp->rcv_nxt;
|
||||
|
||||
return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL,
|
||||
NULL);
|
||||
}
|
||||
|
||||
static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp,
|
||||
struct erdma_qp_attrs *attrs,
|
||||
enum erdma_qp_attr_mask mask)
|
||||
{
|
||||
struct erdma_dev *dev = qp->dev;
|
||||
struct erdma_cmdq_modify_qp_req req;
|
||||
|
||||
qp->attrs.state = attrs->state;
|
||||
|
||||
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
|
||||
CMDQ_OPCODE_MODIFY_QP);
|
||||
|
||||
req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, attrs->state) |
|
||||
FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
|
||||
|
||||
return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL,
|
||||
NULL);
|
||||
}
|
||||
|
||||
int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
|
||||
enum erdma_qp_attr_mask mask)
|
||||
{
|
||||
int drop_conn, ret = 0;
|
||||
|
||||
if (!mask)
|
||||
return 0;
|
||||
|
||||
if (!(mask & ERDMA_QP_ATTR_STATE))
|
||||
return 0;
|
||||
|
||||
switch (qp->attrs.state) {
|
||||
case ERDMA_QP_STATE_IDLE:
|
||||
case ERDMA_QP_STATE_RTR:
|
||||
if (attrs->state == ERDMA_QP_STATE_RTS) {
|
||||
ret = erdma_modify_qp_state_to_rts(qp, attrs, mask);
|
||||
} else if (attrs->state == ERDMA_QP_STATE_ERROR) {
|
||||
qp->attrs.state = ERDMA_QP_STATE_ERROR;
|
||||
if (qp->cep) {
|
||||
erdma_cep_put(qp->cep);
|
||||
qp->cep = NULL;
|
||||
}
|
||||
ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
|
||||
}
|
||||
break;
|
||||
case ERDMA_QP_STATE_RTS:
|
||||
drop_conn = 0;
|
||||
|
||||
if (attrs->state == ERDMA_QP_STATE_CLOSING) {
|
||||
ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
|
||||
drop_conn = 1;
|
||||
} else if (attrs->state == ERDMA_QP_STATE_TERMINATE) {
|
||||
qp->attrs.state = ERDMA_QP_STATE_TERMINATE;
|
||||
ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
|
||||
drop_conn = 1;
|
||||
} else if (attrs->state == ERDMA_QP_STATE_ERROR) {
|
||||
ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
|
||||
qp->attrs.state = ERDMA_QP_STATE_ERROR;
|
||||
drop_conn = 1;
|
||||
}
|
||||
|
||||
if (drop_conn)
|
||||
erdma_qp_cm_drop(qp);
|
||||
|
||||
break;
|
||||
case ERDMA_QP_STATE_TERMINATE:
|
||||
if (attrs->state == ERDMA_QP_STATE_ERROR)
|
||||
qp->attrs.state = ERDMA_QP_STATE_ERROR;
|
||||
break;
|
||||
case ERDMA_QP_STATE_CLOSING:
|
||||
if (attrs->state == ERDMA_QP_STATE_IDLE) {
|
||||
qp->attrs.state = ERDMA_QP_STATE_IDLE;
|
||||
} else if (attrs->state == ERDMA_QP_STATE_ERROR) {
|
||||
ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
|
||||
qp->attrs.state = ERDMA_QP_STATE_ERROR;
|
||||
} else if (attrs->state != ERDMA_QP_STATE_CLOSING) {
|
||||
return -ECONNABORTED;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void erdma_qp_safe_free(struct kref *ref)
|
||||
{
|
||||
struct erdma_qp *qp = container_of(ref, struct erdma_qp, ref);
|
||||
|
||||
complete(&qp->safe_free);
|
||||
}
|
||||
|
||||
void erdma_qp_put(struct erdma_qp *qp)
|
||||
{
|
||||
WARN_ON(kref_read(&qp->ref) < 1);
|
||||
kref_put(&qp->ref, erdma_qp_safe_free);
|
||||
}
|
||||
|
||||
void erdma_qp_get(struct erdma_qp *qp)
|
||||
{
|
||||
kref_get(&qp->ref);
|
||||
}
|
||||
|
||||
static int fill_inline_data(struct erdma_qp *qp,
|
||||
const struct ib_send_wr *send_wr, u16 wqe_idx,
|
||||
u32 sgl_offset, __le32 *length_field)
|
||||
{
|
||||
u32 remain_size, copy_size, data_off, bytes = 0;
|
||||
char *data;
|
||||
int i = 0;
|
||||
|
||||
wqe_idx += (sgl_offset >> SQEBB_SHIFT);
|
||||
sgl_offset &= (SQEBB_SIZE - 1);
|
||||
data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx, qp->attrs.sq_size,
|
||||
SQEBB_SHIFT);
|
||||
|
||||
while (i < send_wr->num_sge) {
|
||||
bytes += send_wr->sg_list[i].length;
|
||||
if (bytes > (int)ERDMA_MAX_INLINE)
|
||||
return -EINVAL;
|
||||
|
||||
remain_size = send_wr->sg_list[i].length;
|
||||
data_off = 0;
|
||||
|
||||
while (1) {
|
||||
copy_size = min(remain_size, SQEBB_SIZE - sgl_offset);
|
||||
|
||||
memcpy(data + sgl_offset,
|
||||
(void *)(uintptr_t)send_wr->sg_list[i].addr +
|
||||
data_off,
|
||||
copy_size);
|
||||
remain_size -= copy_size;
|
||||
data_off += copy_size;
|
||||
sgl_offset += copy_size;
|
||||
wqe_idx += (sgl_offset >> SQEBB_SHIFT);
|
||||
sgl_offset &= (SQEBB_SIZE - 1);
|
||||
|
||||
data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
|
||||
qp->attrs.sq_size, SQEBB_SHIFT);
|
||||
if (!remain_size)
|
||||
break;
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
*length_field = cpu_to_le32(bytes);
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
static int fill_sgl(struct erdma_qp *qp, const struct ib_send_wr *send_wr,
|
||||
u16 wqe_idx, u32 sgl_offset, __le32 *length_field)
|
||||
{
|
||||
int i = 0;
|
||||
u32 bytes = 0;
|
||||
char *sgl;
|
||||
|
||||
if (send_wr->num_sge > qp->dev->attrs.max_send_sge)
|
||||
return -EINVAL;
|
||||
|
||||
if (sgl_offset & 0xF)
|
||||
return -EINVAL;
|
||||
|
||||
while (i < send_wr->num_sge) {
|
||||
wqe_idx += (sgl_offset >> SQEBB_SHIFT);
|
||||
sgl_offset &= (SQEBB_SIZE - 1);
|
||||
sgl = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
|
||||
qp->attrs.sq_size, SQEBB_SHIFT);
|
||||
|
||||
bytes += send_wr->sg_list[i].length;
|
||||
memcpy(sgl + sgl_offset, &send_wr->sg_list[i],
|
||||
sizeof(struct ib_sge));
|
||||
|
||||
sgl_offset += sizeof(struct ib_sge);
|
||||
i++;
|
||||
}
|
||||
|
||||
*length_field = cpu_to_le32(bytes);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
|
||||
const struct ib_send_wr *send_wr)
|
||||
{
|
||||
u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset;
|
||||
u32 idx = *pi & (qp->attrs.sq_size - 1);
|
||||
enum ib_wr_opcode op = send_wr->opcode;
|
||||
struct erdma_readreq_sqe *read_sqe;
|
||||
struct erdma_reg_mr_sqe *regmr_sge;
|
||||
struct erdma_write_sqe *write_sqe;
|
||||
struct erdma_send_sqe *send_sqe;
|
||||
struct ib_rdma_wr *rdma_wr;
|
||||
struct erdma_mr *mr;
|
||||
__le32 *length_field;
|
||||
u64 wqe_hdr, *entry;
|
||||
struct ib_sge *sge;
|
||||
u32 attrs;
|
||||
int ret;
|
||||
|
||||
entry = get_queue_entry(qp->kern_qp.sq_buf, idx, qp->attrs.sq_size,
|
||||
SQEBB_SHIFT);
|
||||
|
||||
/* Clear the SQE header section. */
|
||||
*entry = 0;
|
||||
|
||||
qp->kern_qp.swr_tbl[idx] = send_wr->wr_id;
|
||||
flags = send_wr->send_flags;
|
||||
wqe_hdr = FIELD_PREP(
|
||||
ERDMA_SQE_HDR_CE_MASK,
|
||||
((flags & IB_SEND_SIGNALED) || qp->kern_qp.sig_all) ? 1 : 0);
|
||||
wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SE_MASK,
|
||||
flags & IB_SEND_SOLICITED ? 1 : 0);
|
||||
wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_FENCE_MASK,
|
||||
flags & IB_SEND_FENCE ? 1 : 0);
|
||||
wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_INLINE_MASK,
|
||||
flags & IB_SEND_INLINE ? 1 : 0);
|
||||
wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp));
|
||||
|
||||
switch (op) {
|
||||
case IB_WR_RDMA_WRITE:
|
||||
case IB_WR_RDMA_WRITE_WITH_IMM:
|
||||
hw_op = ERDMA_OP_WRITE;
|
||||
if (op == IB_WR_RDMA_WRITE_WITH_IMM)
|
||||
hw_op = ERDMA_OP_WRITE_WITH_IMM;
|
||||
wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
|
||||
rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr);
|
||||
write_sqe = (struct erdma_write_sqe *)entry;
|
||||
|
||||
write_sqe->imm_data = send_wr->ex.imm_data;
|
||||
write_sqe->sink_stag = cpu_to_le32(rdma_wr->rkey);
|
||||
write_sqe->sink_to_h =
|
||||
cpu_to_le32(upper_32_bits(rdma_wr->remote_addr));
|
||||
write_sqe->sink_to_l =
|
||||
cpu_to_le32(lower_32_bits(rdma_wr->remote_addr));
|
||||
|
||||
length_field = &write_sqe->length;
|
||||
wqe_size = sizeof(struct erdma_write_sqe);
|
||||
sgl_offset = wqe_size;
|
||||
break;
|
||||
case IB_WR_RDMA_READ:
|
||||
case IB_WR_RDMA_READ_WITH_INV:
|
||||
read_sqe = (struct erdma_readreq_sqe *)entry;
|
||||
if (unlikely(send_wr->num_sge != 1))
|
||||
return -EINVAL;
|
||||
hw_op = ERDMA_OP_READ;
|
||||
if (op == IB_WR_RDMA_READ_WITH_INV) {
|
||||
hw_op = ERDMA_OP_READ_WITH_INV;
|
||||
read_sqe->invalid_stag =
|
||||
cpu_to_le32(send_wr->ex.invalidate_rkey);
|
||||
}
|
||||
|
||||
wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
|
||||
rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr);
|
||||
read_sqe->length = cpu_to_le32(send_wr->sg_list[0].length);
|
||||
read_sqe->sink_stag = cpu_to_le32(send_wr->sg_list[0].lkey);
|
||||
read_sqe->sink_to_l =
|
||||
cpu_to_le32(lower_32_bits(send_wr->sg_list[0].addr));
|
||||
read_sqe->sink_to_h =
|
||||
cpu_to_le32(upper_32_bits(send_wr->sg_list[0].addr));
|
||||
|
||||
sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
|
||||
qp->attrs.sq_size, SQEBB_SHIFT);
|
||||
sge->addr = rdma_wr->remote_addr;
|
||||
sge->lkey = rdma_wr->rkey;
|
||||
sge->length = send_wr->sg_list[0].length;
|
||||
wqe_size = sizeof(struct erdma_readreq_sqe) +
|
||||
send_wr->num_sge * sizeof(struct ib_sge);
|
||||
|
||||
goto out;
|
||||
case IB_WR_SEND:
|
||||
case IB_WR_SEND_WITH_IMM:
|
||||
case IB_WR_SEND_WITH_INV:
|
||||
send_sqe = (struct erdma_send_sqe *)entry;
|
||||
hw_op = ERDMA_OP_SEND;
|
||||
if (op == IB_WR_SEND_WITH_IMM) {
|
||||
hw_op = ERDMA_OP_SEND_WITH_IMM;
|
||||
send_sqe->imm_data = send_wr->ex.imm_data;
|
||||
} else if (op == IB_WR_SEND_WITH_INV) {
|
||||
hw_op = ERDMA_OP_SEND_WITH_INV;
|
||||
send_sqe->invalid_stag =
|
||||
cpu_to_le32(send_wr->ex.invalidate_rkey);
|
||||
}
|
||||
wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
|
||||
length_field = &send_sqe->length;
|
||||
wqe_size = sizeof(struct erdma_send_sqe);
|
||||
sgl_offset = wqe_size;
|
||||
|
||||
break;
|
||||
case IB_WR_REG_MR:
|
||||
wqe_hdr |=
|
||||
FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, ERDMA_OP_REG_MR);
|
||||
regmr_sge = (struct erdma_reg_mr_sqe *)entry;
|
||||
mr = to_emr(reg_wr(send_wr)->mr);
|
||||
|
||||
mr->access = ERDMA_MR_ACC_LR |
|
||||
to_erdma_access_flags(reg_wr(send_wr)->access);
|
||||
regmr_sge->addr = cpu_to_le64(mr->ibmr.iova);
|
||||
regmr_sge->length = cpu_to_le32(mr->ibmr.length);
|
||||
regmr_sge->stag = cpu_to_le32(mr->ibmr.lkey);
|
||||
attrs = FIELD_PREP(ERDMA_SQE_MR_MODE_MASK, 0) |
|
||||
FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) |
|
||||
FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK,
|
||||
mr->mem.mtt_nents);
|
||||
|
||||
if (mr->mem.mtt_nents < ERDMA_MAX_INLINE_MTT_ENTRIES) {
|
||||
attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 0);
|
||||
/* Copy SGLs to SQE content to accelerate */
|
||||
memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
|
||||
qp->attrs.sq_size, SQEBB_SHIFT),
|
||||
mr->mem.mtt_buf, MTT_SIZE(mr->mem.mtt_nents));
|
||||
wqe_size = sizeof(struct erdma_reg_mr_sqe) +
|
||||
MTT_SIZE(mr->mem.mtt_nents);
|
||||
} else {
|
||||
attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 1);
|
||||
wqe_size = sizeof(struct erdma_reg_mr_sqe);
|
||||
}
|
||||
|
||||
regmr_sge->attrs = cpu_to_le32(attrs);
|
||||
goto out;
|
||||
case IB_WR_LOCAL_INV:
|
||||
wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
|
||||
ERDMA_OP_LOCAL_INV);
|
||||
regmr_sge = (struct erdma_reg_mr_sqe *)entry;
|
||||
regmr_sge->stag = cpu_to_le32(send_wr->ex.invalidate_rkey);
|
||||
wqe_size = sizeof(struct erdma_reg_mr_sqe);
|
||||
goto out;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (flags & IB_SEND_INLINE) {
|
||||
ret = fill_inline_data(qp, send_wr, idx, sgl_offset,
|
||||
length_field);
|
||||
if (ret < 0)
|
||||
return -EINVAL;
|
||||
wqe_size += ret;
|
||||
wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, ret);
|
||||
} else {
|
||||
ret = fill_sgl(qp, send_wr, idx, sgl_offset, length_field);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
wqe_size += send_wr->num_sge * sizeof(struct ib_sge);
|
||||
wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK,
|
||||
send_wr->num_sge);
|
||||
}
|
||||
|
||||
out:
|
||||
wqebb_cnt = SQEBB_COUNT(wqe_size);
|
||||
wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1);
|
||||
*pi += wqebb_cnt;
|
||||
wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, *pi);
|
||||
|
||||
*entry = wqe_hdr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kick_sq_db(struct erdma_qp *qp, u16 pi)
|
||||
{
|
||||
u64 db_data = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp)) |
|
||||
FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, pi);
|
||||
|
||||
*(u64 *)qp->kern_qp.sq_db_info = db_data;
|
||||
writeq(db_data, qp->kern_qp.hw_sq_db);
|
||||
}
|
||||
|
||||
int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
|
||||
const struct ib_send_wr **bad_send_wr)
|
||||
{
|
||||
struct erdma_qp *qp = to_eqp(ibqp);
|
||||
int ret = 0;
|
||||
const struct ib_send_wr *wr = send_wr;
|
||||
unsigned long flags;
|
||||
u16 sq_pi;
|
||||
|
||||
if (!send_wr)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock_irqsave(&qp->lock, flags);
|
||||
sq_pi = qp->kern_qp.sq_pi;
|
||||
|
||||
while (wr) {
|
||||
if ((u16)(sq_pi - qp->kern_qp.sq_ci) >= qp->attrs.sq_size) {
|
||||
ret = -ENOMEM;
|
||||
*bad_send_wr = send_wr;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = erdma_push_one_sqe(qp, &sq_pi, wr);
|
||||
if (ret) {
|
||||
*bad_send_wr = wr;
|
||||
break;
|
||||
}
|
||||
qp->kern_qp.sq_pi = sq_pi;
|
||||
kick_sq_db(qp, sq_pi);
|
||||
|
||||
wr = wr->next;
|
||||
}
|
||||
spin_unlock_irqrestore(&qp->lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int erdma_post_recv_one(struct erdma_qp *qp,
|
||||
const struct ib_recv_wr *recv_wr)
|
||||
{
|
||||
struct erdma_rqe *rqe =
|
||||
get_queue_entry(qp->kern_qp.rq_buf, qp->kern_qp.rq_pi,
|
||||
qp->attrs.rq_size, RQE_SHIFT);
|
||||
|
||||
rqe->qe_idx = cpu_to_le16(qp->kern_qp.rq_pi + 1);
|
||||
rqe->qpn = cpu_to_le32(QP_ID(qp));
|
||||
|
||||
if (recv_wr->num_sge == 0) {
|
||||
rqe->length = 0;
|
||||
} else if (recv_wr->num_sge == 1) {
|
||||
rqe->stag = cpu_to_le32(recv_wr->sg_list[0].lkey);
|
||||
rqe->to = cpu_to_le64(recv_wr->sg_list[0].addr);
|
||||
rqe->length = cpu_to_le32(recv_wr->sg_list[0].length);
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*(u64 *)qp->kern_qp.rq_db_info = *(u64 *)rqe;
|
||||
writeq(*(u64 *)rqe, qp->kern_qp.hw_rq_db);
|
||||
|
||||
qp->kern_qp.rwr_tbl[qp->kern_qp.rq_pi & (qp->attrs.rq_size - 1)] =
|
||||
recv_wr->wr_id;
|
||||
qp->kern_qp.rq_pi++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
|
||||
const struct ib_recv_wr **bad_recv_wr)
|
||||
{
|
||||
const struct ib_recv_wr *wr = recv_wr;
|
||||
struct erdma_qp *qp = to_eqp(ibqp);
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
spin_lock_irqsave(&qp->lock, flags);
|
||||
|
||||
while (wr) {
|
||||
ret = erdma_post_recv_one(qp, wr);
|
||||
if (ret) {
|
||||
*bad_recv_wr = wr;
|
||||
break;
|
||||
}
|
||||
wr = wr->next;
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&qp->lock, flags);
|
||||
return ret;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue