Merge branch 'net_prefetch-API'

Tariq Toukan says:

====================
net_prefetch API

This patchset adds a common net API for L1 cacheline size-aware prefetch.

Patch 1 introduces the common API in net and aligns the drivers to use it.
Patches 2 and 3 add usage in mlx4 and mlx5 Eth drivers.

Series generated against net-next commit:
079f921e9f Merge tag 'batadv-next-for-davem-20200824' of git://git.open-mesh.org/linux-merge
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-08-26 15:55:54 -07:00
commit 751e42515e
17 changed files with 51 additions and 86 deletions

View File

@ -2372,10 +2372,7 @@ no_mem:
if (fl->use_pages) {
void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
prefetch(addr);
#if L1_CACHE_BYTES < 128
prefetch(addr + L1_CACHE_BYTES);
#endif
net_prefetch(addr);
__refill_fl(adap, fl);
if (lro > 0) {
lro_add_page(adap, qs, fl,

View File

@ -557,10 +557,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
va = (unsigned char *)desc_cb->buf + desc_cb->page_offset;
/* prefetch first cache line of first page */
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
#endif
net_prefetch(va);
skb = *out_skb = napi_alloc_skb(&ring_data->napi,
HNS_RX_HEAD_SIZE);

View File

@ -3091,10 +3091,7 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring)
* lines. In such a case, single fetch would suffice to cache in the
* relevant part of the header.
*/
prefetch(ring->va);
#if L1_CACHE_BYTES < 128
prefetch(ring->va + L1_CACHE_BYTES);
#endif
net_prefetch(ring->va);
if (!skb) {
ret = hns3_alloc_skb(ring, length, ring->va);

View File

@ -310,10 +310,7 @@ static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring,
rx_buffer->page_offset;
/* prefetch first cache line of first page */
prefetch(page_addr);
#if L1_CACHE_BYTES < 128
prefetch((void *)((u8 *)page_addr + L1_CACHE_BYTES));
#endif
net_prefetch(page_addr);
/* allocate a skb to store the frags */
skb = napi_alloc_skb(&rx_ring->q_vector->napi,

View File

@ -1992,10 +1992,8 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
struct sk_buff *skb;
/* prefetch first cache line of first page */
prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
prefetch(xdp->data + L1_CACHE_BYTES);
#endif
net_prefetch(xdp->data);
/* Note, we get here by enabling legacy-rx via:
*
* ethtool --set-priv-flags <dev> legacy-rx on
@ -2078,10 +2076,8 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
* likely have a consumer accessing first few bytes of meta
* data, and then actual data.
*/
prefetch(xdp->data_meta);
#if L1_CACHE_BYTES < 128
prefetch(xdp->data_meta + L1_CACHE_BYTES);
#endif
net_prefetch(xdp->data_meta);
/* build an skb around the page buffer */
skb = build_skb(xdp->data_hard_start, truesize);
if (unlikely(!skb))

View File

@ -1309,10 +1309,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring,
return NULL;
/* prefetch first cache line of first page */
va = page_address(rx_buffer->page) + rx_buffer->page_offset;
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
#endif
net_prefetch(va);
/* allocate a skb to store the frags */
skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
@ -1376,10 +1373,8 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring,
return NULL;
/* prefetch first cache line of first page */
va = page_address(rx_buffer->page) + rx_buffer->page_offset;
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
#endif
net_prefetch(va);
/* build an skb around the page buffer */
skb = build_skb(va - IAVF_SKB_PAD, truesize);
if (unlikely(!skb))

View File

@ -919,10 +919,7 @@ ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
* likely have a consumer accessing first few bytes of meta
* data, and then actual data.
*/
prefetch(xdp->data_meta);
#if L1_CACHE_BYTES < 128
prefetch((void *)(xdp->data + L1_CACHE_BYTES));
#endif
net_prefetch(xdp->data_meta);
/* build an skb around the page buffer */
skb = build_skb(xdp->data_hard_start, truesize);
if (unlikely(!skb))
@ -964,10 +961,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
struct sk_buff *skb;
/* prefetch first cache line of first page */
prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
prefetch((void *)(xdp->data + L1_CACHE_BYTES));
#endif /* L1_CACHE_BYTES */
net_prefetch(xdp->data);
/* allocate a skb to store the frags */
skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE,

View File

@ -8047,10 +8047,7 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
struct sk_buff *skb;
/* prefetch first cache line of first page */
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
#endif
net_prefetch(va);
/* allocate a skb to store the frags */
skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN);
@ -8104,10 +8101,7 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
struct sk_buff *skb;
/* prefetch first cache line of first page */
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
#endif
net_prefetch(va);
/* build an skb around the page buffer */
skb = build_skb(va - IGB_SKB_PAD, truesize);

View File

@ -1550,10 +1550,7 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
struct sk_buff *skb;
/* prefetch first cache line of first page */
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
#endif
net_prefetch(va);
/* build an skb around the page buffer */
skb = build_skb(va - IGC_SKB_PAD, truesize);
@ -1589,10 +1586,7 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
struct sk_buff *skb;
/* prefetch first cache line of first page */
prefetch(va);
#if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES);
#endif
net_prefetch(va);
/* allocate a skb to store the frags */
skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGC_RX_HDR_LEN);

View File

@ -2095,10 +2095,8 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
struct sk_buff *skb;
/* prefetch first cache line of first page */
prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
prefetch(xdp->data + L1_CACHE_BYTES);
#endif
net_prefetch(xdp->data);
/* Note, we get here by enabling legacy-rx via:
*
* ethtool --set-priv-flags <dev> legacy-rx on
@ -2161,10 +2159,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
* likely have a consumer accessing first few bytes of meta
* data, and then actual data.
*/
prefetch(xdp->data_meta);
#if L1_CACHE_BYTES < 128
prefetch(xdp->data_meta + L1_CACHE_BYTES);
#endif
net_prefetch(xdp->data_meta);
/* build an skb to around the page buffer */
skb = build_skb(xdp->data_hard_start, truesize);

View File

@ -866,10 +866,8 @@ struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
struct sk_buff *skb;
/* prefetch first cache line of first page */
prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
prefetch(xdp->data + L1_CACHE_BYTES);
#endif
net_prefetch(xdp->data);
/* Note, we get here by enabling legacy-rx via:
*
* ethtool --set-priv-flags <dev> legacy-rx on
@ -947,10 +945,7 @@ static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
* have a consumer accessing first few bytes of meta data,
* and then actual data.
*/
prefetch(xdp->data_meta);
#if L1_CACHE_BYTES < 128
prefetch(xdp->data_meta + L1_CACHE_BYTES);
#endif
net_prefetch(xdp->data_meta);
/* build an skb around the page buffer */
skb = build_skb(xdp->data_hard_start, truesize);

View File

@ -705,7 +705,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
frags = ring->rx_info + (index << priv->log_rx_info);
va = page_address(frags[0].page) + frags[0].page_offset;
prefetchw(va);
net_prefetchw(va);
/*
* make sure we read the CQE after we read the ownership bit
*/

View File

@ -201,7 +201,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
pi = mlx5e_xdpsq_get_next_pi(sq, MLX5_SEND_WQE_MAX_WQEBBS);
session->wqe = MLX5E_TX_FETCH_WQE(sq, pi);
prefetchw(session->wqe->data);
net_prefetchw(session->wqe->data);
session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
session->pkt_count = 0;
@ -322,7 +322,7 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd,
struct mlx5e_xdpsq_stats *stats = sq->stats;
prefetchw(wqe);
net_prefetchw(wqe);
if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
stats->err++;

View File

@ -49,7 +49,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
xdp->data_end = xdp->data + cqe_bcnt32;
xdp_set_data_meta_invalid(xdp);
xsk_buff_dma_sync_for_cpu(xdp);
prefetch(xdp->data);
net_prefetch(xdp->data);
rcu_read_lock();
consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp);
@ -100,7 +100,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
xdp->data_end = xdp->data + cqe_bcnt;
xdp_set_data_meta_invalid(xdp);
xsk_buff_dma_sync_for_cpu(xdp);
prefetch(xdp->data);
net_prefetch(xdp->data);
if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
rq->stats->wqe_err++;

View File

@ -30,7 +30,6 @@
* SOFTWARE.
*/
#include <linux/prefetch.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
@ -1141,8 +1140,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
frag_size, DMA_FROM_DEVICE);
prefetchw(va); /* xdp_frame data area */
prefetch(data);
net_prefetchw(va); /* xdp_frame data area */
net_prefetch(data);
rcu_read_lock();
mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
@ -1184,7 +1183,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
return NULL;
}
prefetchw(skb->data);
net_prefetchw(skb->data);
while (byte_cnt) {
u16 frag_consumed_bytes =
@ -1399,7 +1398,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
return NULL;
}
prefetchw(skb->data);
net_prefetchw(skb->data);
if (unlikely(frag_offset >= PAGE_SIZE)) {
di++;
@ -1452,8 +1451,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
frag_size, DMA_FROM_DEVICE);
prefetchw(va); /* xdp_frame data area */
prefetch(data);
net_prefetchw(va); /* xdp_frame data area */
net_prefetch(data);
rcu_read_lock();
mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);

View File

@ -30,7 +30,6 @@
* SOFTWARE.
*/
#include <linux/prefetch.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <net/udp.h>
@ -115,7 +114,7 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
return NULL;
}
prefetchw(skb->data);
net_prefetchw(skb->data);
skb_reserve(skb, NET_IP_ALIGN);
/* Reserve for ethernet and IP header */

View File

@ -2193,6 +2193,22 @@ int netdev_get_num_tc(struct net_device *dev)
return dev->num_tc;
}
static inline void net_prefetch(void *p)
{
prefetch(p);
#if L1_CACHE_BYTES < 128
prefetch((u8 *)p + L1_CACHE_BYTES);
#endif
}
static inline void net_prefetchw(void *p)
{
prefetchw(p);
#if L1_CACHE_BYTES < 128
prefetchw((u8 *)p + L1_CACHE_BYTES);
#endif
}
void netdev_unbind_sb_channel(struct net_device *dev,
struct net_device *sb_dev);
int netdev_bind_sb_channel_queue(struct net_device *dev,