{net, IB}/mlx4: Initialize CQ buffers in the driver when possible

Perform CQ initialization in the driver when the capability is supported
by the FW.  When passing the CQ to HW indicate that the CQ buffer has
been pre-initialized.

Doing so decreases CQ creation time.  Testing on P8 showed a single 2048
entry CQ creation time was reduced from ~395us to ~170us, which is
2.3x faster.

Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Daniel Jurgens 2018-11-21 17:12:05 +02:00 committed by David S. Miller
parent 1d0795ea9c
commit e45678973d
5 changed files with 82 additions and 7 deletions

View File

@ -181,6 +181,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_cq *cq;
struct mlx4_uar *uar;
void *buf_addr;
int err;
if (entries < 1 || entries > dev->dev->caps.max_cqes)
@ -211,6 +212,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
goto err_cq;
}
buf_addr = (void *)(unsigned long)ucmd.buf_addr;
err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem,
ucmd.buf_addr, entries);
if (err)
@ -237,6 +240,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
if (err)
goto err_db;
buf_addr = &cq->buf.buf;
uar = &dev->priv_uar;
cq->mcq.usage = MLX4_RES_USAGE_DRIVER;
}
@ -246,7 +251,9 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
cq->db.dma, &cq->mcq, vector, 0,
!!(cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION));
!!(cq->create_flags &
IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION),
buf_addr, !!context);
if (err)
goto err_dbmap;

View File

@ -144,9 +144,9 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
}
static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int cq_num)
int cq_num, u8 opmod)
{
return mlx4_cmd(dev, mailbox->dma, cq_num, 0,
return mlx4_cmd(dev, mailbox->dma, cq_num, opmod,
MLX4_CMD_SW2HW_CQ, MLX4_CMD_TIME_CLASS_A,
MLX4_CMD_WRAPPED);
}
@ -287,11 +287,61 @@ static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn)
__mlx4_cq_free_icm(dev, cqn);
}
static int mlx4_init_user_cqes(void *buf, int entries, int cqe_size)
{
int entries_per_copy = PAGE_SIZE / cqe_size;
void *init_ents;
int err = 0;
int i;
init_ents = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!init_ents)
return -ENOMEM;
/* Populate a list of CQ entries to reduce the number of
* copy_to_user calls. 0xcc is the initialization value
* required by the FW.
*/
memset(init_ents, 0xcc, PAGE_SIZE);
if (entries_per_copy < entries) {
for (i = 0; i < entries / entries_per_copy; i++) {
err = copy_to_user(buf, init_ents, PAGE_SIZE);
if (err)
goto out;
buf += PAGE_SIZE;
}
} else {
err = copy_to_user(buf, init_ents, entries * cqe_size);
}
out:
kfree(init_ents);
return err;
}
static void mlx4_init_kernel_cqes(struct mlx4_buf *buf,
int entries,
int cqe_size)
{
int i;
if (buf->nbufs == 1)
memset(buf->direct.buf, 0xcc, entries * cqe_size);
else
for (i = 0; i < buf->npages; i++)
memset(buf->page_list[i].buf, 0xcc,
1UL << buf->page_shift);
}
int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec,
struct mlx4_cq *cq, unsigned vector, int collapsed,
int timestamp_en)
int timestamp_en, void *buf_addr, bool user_cq)
{
bool sw_cq_init = dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SW_CQ_INIT;
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cq_table *cq_table = &priv->cq_table;
struct mlx4_cmd_mailbox *mailbox;
@ -336,7 +386,20 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
cq_context->db_rec_addr = cpu_to_be64(db_rec);
err = mlx4_SW2HW_CQ(dev, mailbox, cq->cqn);
if (sw_cq_init) {
if (user_cq) {
err = mlx4_init_user_cqes(buf_addr, nent,
dev->caps.cqe_size);
if (err)
sw_cq_init = false;
} else {
mlx4_init_kernel_cqes(buf_addr, nent,
dev->caps.cqe_size);
}
}
err = mlx4_SW2HW_CQ(dev, mailbox, cq->cqn, sw_cq_init);
mlx4_free_cmd_mailbox(dev, mailbox);
if (err)
goto err_radix;

View File

@ -143,7 +143,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
cq->mcq.usage = MLX4_RES_USAGE_DRIVER;
err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt,
&mdev->priv_uar, cq->wqres.db.dma, &cq->mcq,
cq->vector, 0, timestamp_en);
cq->vector, 0, timestamp_en, &cq->wqres.buf, false);
if (err)
goto free_eq;

View File

@ -166,6 +166,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
[37] = "sl to vl mapping table change event support",
[38] = "user MAC support",
[39] = "Report driver version to FW support",
[40] = "SW CQ initialization support",
};
int i;
@ -1098,6 +1099,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FSM;
if (field32 & (1 << 21))
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_80_VFS;
if (field32 & (1 << 23))
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SW_CQ_INIT;
for (i = 1; i <= dev_cap->num_ports; i++) {
err = mlx4_QUERY_PORT(dev, i, dev_cap->port_cap + i);

View File

@ -226,6 +226,7 @@ enum {
MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT = 1ULL << 37,
MLX4_DEV_CAP_FLAG2_USER_MAC_EN = 1ULL << 38,
MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW = 1ULL << 39,
MLX4_DEV_CAP_FLAG2_SW_CQ_INIT = 1ULL << 40,
};
enum {
@ -1136,7 +1137,8 @@ void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres,
int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
unsigned vector, int collapsed, int timestamp_en);
unsigned int vector, int collapsed, int timestamp_en,
void *buf_addr, bool user_cq);
void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);
int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
int *base, u8 flags, u8 usage);