IB/mlx4: Add support for masked atomic operations
Add support for masked atomic operations (masked compare and swap, masked fetch and add). Signed-off-by: Vladimir Sokolovsky <vlad@mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
parent
5e80ba8ff0
commit
6fa8f71984
|
@ -661,6 +661,14 @@ repoll:
|
||||||
wc->opcode = IB_WC_FETCH_ADD;
|
wc->opcode = IB_WC_FETCH_ADD;
|
||||||
wc->byte_len = 8;
|
wc->byte_len = 8;
|
||||||
break;
|
break;
|
||||||
|
case MLX4_OPCODE_MASKED_ATOMIC_CS:
|
||||||
|
wc->opcode = IB_WC_MASKED_COMP_SWAP;
|
||||||
|
wc->byte_len = 8;
|
||||||
|
break;
|
||||||
|
case MLX4_OPCODE_MASKED_ATOMIC_FA:
|
||||||
|
wc->opcode = IB_WC_MASKED_FETCH_ADD;
|
||||||
|
wc->byte_len = 8;
|
||||||
|
break;
|
||||||
case MLX4_OPCODE_BIND_MW:
|
case MLX4_OPCODE_BIND_MW:
|
||||||
wc->opcode = IB_WC_BIND_MW;
|
wc->opcode = IB_WC_BIND_MW;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -139,6 +139,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
|
||||||
props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
|
props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
|
||||||
props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
|
props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
|
||||||
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
|
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
|
||||||
|
props->masked_atomic_cap = IB_ATOMIC_HCA;
|
||||||
props->max_pkeys = dev->dev->caps.pkey_table_len[1];
|
props->max_pkeys = dev->dev->caps.pkey_table_len[1];
|
||||||
props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
|
props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
|
||||||
props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
|
props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
|
||||||
|
|
|
@ -74,17 +74,19 @@ enum {
|
||||||
};
|
};
|
||||||
|
|
||||||
static const __be32 mlx4_ib_opcode[] = {
|
static const __be32 mlx4_ib_opcode[] = {
|
||||||
[IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND),
|
[IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND),
|
||||||
[IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO),
|
[IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO),
|
||||||
[IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM),
|
[IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM),
|
||||||
[IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
|
[IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
|
||||||
[IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
|
[IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
|
||||||
[IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ),
|
[IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ),
|
||||||
[IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
|
[IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
|
||||||
[IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
|
[IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
|
||||||
[IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
|
[IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
|
||||||
[IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
|
[IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
|
||||||
[IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
|
[IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
|
||||||
|
[IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
|
||||||
|
[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
|
static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
|
||||||
|
@ -1407,6 +1409,9 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *
|
||||||
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
|
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
|
||||||
aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
|
aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
|
||||||
aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
|
aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
|
||||||
|
} else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
|
||||||
|
aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
|
||||||
|
aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask);
|
||||||
} else {
|
} else {
|
||||||
aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
|
aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
|
||||||
aseg->compare = 0;
|
aseg->compare = 0;
|
||||||
|
@ -1414,6 +1419,15 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
|
||||||
|
struct ib_send_wr *wr)
|
||||||
|
{
|
||||||
|
aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
|
||||||
|
aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask);
|
||||||
|
aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
|
||||||
|
aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask);
|
||||||
|
}
|
||||||
|
|
||||||
static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
|
static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
|
||||||
struct ib_send_wr *wr)
|
struct ib_send_wr *wr)
|
||||||
{
|
{
|
||||||
|
@ -1567,6 +1581,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
|
||||||
switch (wr->opcode) {
|
switch (wr->opcode) {
|
||||||
case IB_WR_ATOMIC_CMP_AND_SWP:
|
case IB_WR_ATOMIC_CMP_AND_SWP:
|
||||||
case IB_WR_ATOMIC_FETCH_AND_ADD:
|
case IB_WR_ATOMIC_FETCH_AND_ADD:
|
||||||
|
case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
|
||||||
set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
|
set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
|
||||||
wr->wr.atomic.rkey);
|
wr->wr.atomic.rkey);
|
||||||
wqe += sizeof (struct mlx4_wqe_raddr_seg);
|
wqe += sizeof (struct mlx4_wqe_raddr_seg);
|
||||||
|
@ -1579,6 +1594,19 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
|
||||||
|
set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
|
||||||
|
wr->wr.atomic.rkey);
|
||||||
|
wqe += sizeof (struct mlx4_wqe_raddr_seg);
|
||||||
|
|
||||||
|
set_masked_atomic_seg(wqe, wr);
|
||||||
|
wqe += sizeof (struct mlx4_wqe_masked_atomic_seg);
|
||||||
|
|
||||||
|
size += (sizeof (struct mlx4_wqe_raddr_seg) +
|
||||||
|
sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16;
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
case IB_WR_RDMA_READ:
|
case IB_WR_RDMA_READ:
|
||||||
case IB_WR_RDMA_WRITE:
|
case IB_WR_RDMA_WRITE:
|
||||||
case IB_WR_RDMA_WRITE_WITH_IMM:
|
case IB_WR_RDMA_WRITE_WITH_IMM:
|
||||||
|
|
|
@ -123,8 +123,8 @@ enum {
|
||||||
MLX4_OPCODE_RDMA_READ = 0x10,
|
MLX4_OPCODE_RDMA_READ = 0x10,
|
||||||
MLX4_OPCODE_ATOMIC_CS = 0x11,
|
MLX4_OPCODE_ATOMIC_CS = 0x11,
|
||||||
MLX4_OPCODE_ATOMIC_FA = 0x12,
|
MLX4_OPCODE_ATOMIC_FA = 0x12,
|
||||||
MLX4_OPCODE_ATOMIC_MASK_CS = 0x14,
|
MLX4_OPCODE_MASKED_ATOMIC_CS = 0x14,
|
||||||
MLX4_OPCODE_ATOMIC_MASK_FA = 0x15,
|
MLX4_OPCODE_MASKED_ATOMIC_FA = 0x15,
|
||||||
MLX4_OPCODE_BIND_MW = 0x18,
|
MLX4_OPCODE_BIND_MW = 0x18,
|
||||||
MLX4_OPCODE_FMR = 0x19,
|
MLX4_OPCODE_FMR = 0x19,
|
||||||
MLX4_OPCODE_LOCAL_INVAL = 0x1b,
|
MLX4_OPCODE_LOCAL_INVAL = 0x1b,
|
||||||
|
|
|
@ -285,6 +285,13 @@ struct mlx4_wqe_atomic_seg {
|
||||||
__be64 compare;
|
__be64 compare;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct mlx4_wqe_masked_atomic_seg {
|
||||||
|
__be64 swap_add;
|
||||||
|
__be64 compare;
|
||||||
|
__be64 swap_add_mask;
|
||||||
|
__be64 compare_mask;
|
||||||
|
};
|
||||||
|
|
||||||
struct mlx4_wqe_data_seg {
|
struct mlx4_wqe_data_seg {
|
||||||
__be32 byte_count;
|
__be32 byte_count;
|
||||||
__be32 lkey;
|
__be32 lkey;
|
||||||
|
|
Loading…
Reference in New Issue