{net, IB}/mlx4: Initialize CQ buffers in the driver when possible
Perform CQ initialization in the driver when the capability is supported by the FW. When passing the CQ to HW indicate that the CQ buffer has been pre-initialized. Doing so decreases CQ creation time. Testing on P8 showed a single 2048 entry CQ creation time was reduced from ~395us to ~170us, which is 2.3x faster. Signed-off-by: Daniel Jurgens <danielj@mellanox.com> Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Tariq Toukan <tariqt@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
1d0795ea9c
commit
e45678973d
|
@ -181,6 +181,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
|
|||
struct mlx4_ib_dev *dev = to_mdev(ibdev);
|
||||
struct mlx4_ib_cq *cq;
|
||||
struct mlx4_uar *uar;
|
||||
void *buf_addr;
|
||||
int err;
|
||||
|
||||
if (entries < 1 || entries > dev->dev->caps.max_cqes)
|
||||
|
@ -211,6 +212,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
|
|||
goto err_cq;
|
||||
}
|
||||
|
||||
buf_addr = (void *)(unsigned long)ucmd.buf_addr;
|
||||
|
||||
err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem,
|
||||
ucmd.buf_addr, entries);
|
||||
if (err)
|
||||
|
@ -237,6 +240,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
|
|||
if (err)
|
||||
goto err_db;
|
||||
|
||||
buf_addr = &cq->buf.buf;
|
||||
|
||||
uar = &dev->priv_uar;
|
||||
cq->mcq.usage = MLX4_RES_USAGE_DRIVER;
|
||||
}
|
||||
|
@ -246,7 +251,9 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
|
|||
|
||||
err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
|
||||
cq->db.dma, &cq->mcq, vector, 0,
|
||||
!!(cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION));
|
||||
!!(cq->create_flags &
|
||||
IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION),
|
||||
buf_addr, !!context);
|
||||
if (err)
|
||||
goto err_dbmap;
|
||||
|
||||
|
|
|
@ -144,9 +144,9 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
|
|||
}
|
||||
|
||||
static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
|
||||
int cq_num)
|
||||
int cq_num, u8 opmod)
|
||||
{
|
||||
return mlx4_cmd(dev, mailbox->dma, cq_num, 0,
|
||||
return mlx4_cmd(dev, mailbox->dma, cq_num, opmod,
|
||||
MLX4_CMD_SW2HW_CQ, MLX4_CMD_TIME_CLASS_A,
|
||||
MLX4_CMD_WRAPPED);
|
||||
}
|
||||
|
@ -287,11 +287,61 @@ static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn)
|
|||
__mlx4_cq_free_icm(dev, cqn);
|
||||
}
|
||||
|
||||
static int mlx4_init_user_cqes(void *buf, int entries, int cqe_size)
|
||||
{
|
||||
int entries_per_copy = PAGE_SIZE / cqe_size;
|
||||
void *init_ents;
|
||||
int err = 0;
|
||||
int i;
|
||||
|
||||
init_ents = kmalloc(PAGE_SIZE, GFP_KERNEL);
|
||||
if (!init_ents)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Populate a list of CQ entries to reduce the number of
|
||||
* copy_to_user calls. 0xcc is the initialization value
|
||||
* required by the FW.
|
||||
*/
|
||||
memset(init_ents, 0xcc, PAGE_SIZE);
|
||||
|
||||
if (entries_per_copy < entries) {
|
||||
for (i = 0; i < entries / entries_per_copy; i++) {
|
||||
err = copy_to_user(buf, init_ents, PAGE_SIZE);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
buf += PAGE_SIZE;
|
||||
}
|
||||
} else {
|
||||
err = copy_to_user(buf, init_ents, entries * cqe_size);
|
||||
}
|
||||
|
||||
out:
|
||||
kfree(init_ents);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void mlx4_init_kernel_cqes(struct mlx4_buf *buf,
|
||||
int entries,
|
||||
int cqe_size)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (buf->nbufs == 1)
|
||||
memset(buf->direct.buf, 0xcc, entries * cqe_size);
|
||||
else
|
||||
for (i = 0; i < buf->npages; i++)
|
||||
memset(buf->page_list[i].buf, 0xcc,
|
||||
1UL << buf->page_shift);
|
||||
}
|
||||
|
||||
int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
|
||||
struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec,
|
||||
struct mlx4_cq *cq, unsigned vector, int collapsed,
|
||||
int timestamp_en)
|
||||
int timestamp_en, void *buf_addr, bool user_cq)
|
||||
{
|
||||
bool sw_cq_init = dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SW_CQ_INIT;
|
||||
struct mlx4_priv *priv = mlx4_priv(dev);
|
||||
struct mlx4_cq_table *cq_table = &priv->cq_table;
|
||||
struct mlx4_cmd_mailbox *mailbox;
|
||||
|
@ -336,7 +386,20 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
|
|||
cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
|
||||
cq_context->db_rec_addr = cpu_to_be64(db_rec);
|
||||
|
||||
err = mlx4_SW2HW_CQ(dev, mailbox, cq->cqn);
|
||||
if (sw_cq_init) {
|
||||
if (user_cq) {
|
||||
err = mlx4_init_user_cqes(buf_addr, nent,
|
||||
dev->caps.cqe_size);
|
||||
if (err)
|
||||
sw_cq_init = false;
|
||||
} else {
|
||||
mlx4_init_kernel_cqes(buf_addr, nent,
|
||||
dev->caps.cqe_size);
|
||||
}
|
||||
}
|
||||
|
||||
err = mlx4_SW2HW_CQ(dev, mailbox, cq->cqn, sw_cq_init);
|
||||
|
||||
mlx4_free_cmd_mailbox(dev, mailbox);
|
||||
if (err)
|
||||
goto err_radix;
|
||||
|
|
|
@ -143,7 +143,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
|
|||
cq->mcq.usage = MLX4_RES_USAGE_DRIVER;
|
||||
err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt,
|
||||
&mdev->priv_uar, cq->wqres.db.dma, &cq->mcq,
|
||||
cq->vector, 0, timestamp_en);
|
||||
cq->vector, 0, timestamp_en, &cq->wqres.buf, false);
|
||||
if (err)
|
||||
goto free_eq;
|
||||
|
||||
|
|
|
@ -166,6 +166,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
|
|||
[37] = "sl to vl mapping table change event support",
|
||||
[38] = "user MAC support",
|
||||
[39] = "Report driver version to FW support",
|
||||
[40] = "SW CQ initialization support",
|
||||
};
|
||||
int i;
|
||||
|
||||
|
@ -1098,6 +1099,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
|
|||
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FSM;
|
||||
if (field32 & (1 << 21))
|
||||
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_80_VFS;
|
||||
if (field32 & (1 << 23))
|
||||
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SW_CQ_INIT;
|
||||
|
||||
for (i = 1; i <= dev_cap->num_ports; i++) {
|
||||
err = mlx4_QUERY_PORT(dev, i, dev_cap->port_cap + i);
|
||||
|
|
|
@ -226,6 +226,7 @@ enum {
|
|||
MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT = 1ULL << 37,
|
||||
MLX4_DEV_CAP_FLAG2_USER_MAC_EN = 1ULL << 38,
|
||||
MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW = 1ULL << 39,
|
||||
MLX4_DEV_CAP_FLAG2_SW_CQ_INIT = 1ULL << 40,
|
||||
};
|
||||
|
||||
enum {
|
||||
|
@ -1136,7 +1137,8 @@ void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres,
|
|||
|
||||
int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
|
||||
struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
|
||||
unsigned vector, int collapsed, int timestamp_en);
|
||||
unsigned int vector, int collapsed, int timestamp_en,
|
||||
void *buf_addr, bool user_cq);
|
||||
void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);
|
||||
int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
|
||||
int *base, u8 flags, u8 usage);
|
||||
|
|
Loading…
Reference in New Issue