Merge branch 'mlx4-XDP-TX-improvements'
Tariq Toukan says:
====================
mlx4_en XDP TX improvements
This patchset contains performance improvements
to the XDP_TX use case in the mlx4 Eth driver.
Patch 1 is a simple change in a function parameter type.
Patch 2 replaces a call to a generic function with the
relevant parts inlined.
Patch 3 moves the write of descriptors' constant values
from data path to control path.
Series generated against net-next commit:
833e0e2f24
net: dst: move cpu inside ifdef to avoid compilation warning
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
df24cd4fda
|
@ -1752,6 +1752,7 @@ int mlx4_en_start_port(struct net_device *dev)
|
||||||
mlx4_en_arm_cq(priv, cq);
|
mlx4_en_arm_cq(priv, cq);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
mlx4_en_init_tx_xdp_ring_descs(priv, tx_ring);
|
||||||
mlx4_en_init_recycle_ring(priv, i);
|
mlx4_en_init_recycle_ring(priv, i);
|
||||||
/* XDP TX CQ should never be armed */
|
/* XDP TX CQ should never be armed */
|
||||||
}
|
}
|
||||||
|
|
|
@ -778,7 +778,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
|
||||||
case XDP_PASS:
|
case XDP_PASS:
|
||||||
break;
|
break;
|
||||||
case XDP_TX:
|
case XDP_TX:
|
||||||
if (likely(!mlx4_en_xmit_frame(ring, frags, dev,
|
if (likely(!mlx4_en_xmit_frame(ring, frags, priv,
|
||||||
length, cq_ring,
|
length, cq_ring,
|
||||||
&doorbell_pending))) {
|
&doorbell_pending))) {
|
||||||
frags[0].page = NULL;
|
frags[0].page = NULL;
|
||||||
|
|
|
@ -1085,13 +1085,35 @@ tx_drop:
|
||||||
#define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \
|
#define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \
|
||||||
/ 16) & 0x3f)
|
/ 16) & 0x3f)
|
||||||
|
|
||||||
|
void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv,
|
||||||
|
struct mlx4_en_tx_ring *ring)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < ring->size; i++) {
|
||||||
|
struct mlx4_en_tx_info *tx_info = &ring->tx_info[i];
|
||||||
|
struct mlx4_en_tx_desc *tx_desc = ring->buf +
|
||||||
|
(i << LOG_TXBB_SIZE);
|
||||||
|
|
||||||
|
tx_info->map0_byte_count = PAGE_SIZE;
|
||||||
|
tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
|
||||||
|
tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
|
||||||
|
tx_info->ts_requested = 0;
|
||||||
|
tx_info->nr_maps = 1;
|
||||||
|
tx_info->linear = 1;
|
||||||
|
tx_info->inl = 0;
|
||||||
|
|
||||||
|
tx_desc->data.lkey = ring->mr_key;
|
||||||
|
tx_desc->ctrl.qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
|
||||||
|
tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
|
netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
|
||||||
struct mlx4_en_rx_alloc *frame,
|
struct mlx4_en_rx_alloc *frame,
|
||||||
struct net_device *dev, unsigned int length,
|
struct mlx4_en_priv *priv, unsigned int length,
|
||||||
int tx_ind, bool *doorbell_pending)
|
int tx_ind, bool *doorbell_pending)
|
||||||
{
|
{
|
||||||
struct mlx4_en_priv *priv = netdev_priv(dev);
|
|
||||||
union mlx4_wqe_qpn_vlan qpn_vlan = {};
|
|
||||||
struct mlx4_en_tx_desc *tx_desc;
|
struct mlx4_en_tx_desc *tx_desc;
|
||||||
struct mlx4_en_tx_info *tx_info;
|
struct mlx4_en_tx_info *tx_info;
|
||||||
struct mlx4_wqe_data_seg *data;
|
struct mlx4_wqe_data_seg *data;
|
||||||
|
@ -1123,25 +1145,16 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
|
||||||
tx_info->page = frame->page;
|
tx_info->page = frame->page;
|
||||||
frame->page = NULL;
|
frame->page = NULL;
|
||||||
tx_info->map0_dma = dma;
|
tx_info->map0_dma = dma;
|
||||||
tx_info->map0_byte_count = PAGE_SIZE;
|
|
||||||
tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
|
|
||||||
tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
|
tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
|
||||||
tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
|
|
||||||
tx_info->ts_requested = 0;
|
|
||||||
tx_info->nr_maps = 1;
|
|
||||||
tx_info->linear = 1;
|
|
||||||
tx_info->inl = 0;
|
|
||||||
|
|
||||||
dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
|
dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
|
||||||
length, PCI_DMA_TODEVICE);
|
length, PCI_DMA_TODEVICE);
|
||||||
|
|
||||||
data->addr = cpu_to_be64(dma + frame->page_offset);
|
data->addr = cpu_to_be64(dma + frame->page_offset);
|
||||||
data->lkey = ring->mr_key;
|
|
||||||
dma_wmb();
|
dma_wmb();
|
||||||
data->byte_count = cpu_to_be32(length);
|
data->byte_count = cpu_to_be32(length);
|
||||||
|
|
||||||
/* tx completion can avoid cache line miss for common cases */
|
/* tx completion can avoid cache line miss for common cases */
|
||||||
tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
|
|
||||||
|
|
||||||
op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
|
op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
|
||||||
((ring->prod & ring->size) ?
|
((ring->prod & ring->size) ?
|
||||||
|
@ -1152,10 +1165,13 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
|
||||||
|
|
||||||
ring->prod += MLX4_EN_XDP_TX_NRTXBB;
|
ring->prod += MLX4_EN_XDP_TX_NRTXBB;
|
||||||
|
|
||||||
qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
|
/* Ensure new descriptor hits memory
|
||||||
|
* before setting ownership of this descriptor to HW
|
||||||
|
*/
|
||||||
|
dma_wmb();
|
||||||
|
tx_desc->ctrl.owner_opcode = op_own;
|
||||||
|
ring->xmit_more++;
|
||||||
|
|
||||||
mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, 0,
|
|
||||||
op_own, false, false);
|
|
||||||
*doorbell_pending = true;
|
*doorbell_pending = true;
|
||||||
|
|
||||||
return NETDEV_TX_OK;
|
return NETDEV_TX_OK;
|
||||||
|
|
|
@ -693,7 +693,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
|
||||||
netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
|
netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
|
||||||
netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
|
netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
|
||||||
struct mlx4_en_rx_alloc *frame,
|
struct mlx4_en_rx_alloc *frame,
|
||||||
struct net_device *dev, unsigned int length,
|
struct mlx4_en_priv *priv, unsigned int length,
|
||||||
int tx_ind, bool *doorbell_pending);
|
int tx_ind, bool *doorbell_pending);
|
||||||
void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring);
|
void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring);
|
||||||
bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
|
bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
|
||||||
|
@ -705,6 +705,8 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
|
||||||
int node, int queue_index);
|
int node, int queue_index);
|
||||||
void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
|
void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
|
||||||
struct mlx4_en_tx_ring **pring);
|
struct mlx4_en_tx_ring **pring);
|
||||||
|
void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv,
|
||||||
|
struct mlx4_en_tx_ring *ring);
|
||||||
int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
|
int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
|
||||||
struct mlx4_en_tx_ring *ring,
|
struct mlx4_en_tx_ring *ring,
|
||||||
int cq, int user_prio);
|
int cq, int user_prio);
|
||||||
|
|
Loading…
Reference in New Issue