Merge branch 'net_prefetch-API'
Tariq Toukan says:
====================
net_prefetch API
This patchset adds a common net API for L1 cacheline size-aware prefetch.
Patch 1 introduces the common API in net and aligns the drivers to use it.
Patches 2 and 3 add usage in mlx4 and mlx5 Eth drivers.
Series generated against net-next commit:
079f921e9f
Merge tag 'batadv-next-for-davem-20200824' of git://git.open-mesh.org/linux-merge
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
751e42515e
|
@ -2372,10 +2372,7 @@ no_mem:
|
|||
if (fl->use_pages) {
|
||||
void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
|
||||
|
||||
prefetch(addr);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch(addr + L1_CACHE_BYTES);
|
||||
#endif
|
||||
net_prefetch(addr);
|
||||
__refill_fl(adap, fl);
|
||||
if (lro > 0) {
|
||||
lro_add_page(adap, qs, fl,
|
||||
|
|
|
@ -557,10 +557,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
|
|||
va = (unsigned char *)desc_cb->buf + desc_cb->page_offset;
|
||||
|
||||
/* prefetch first cache line of first page */
|
||||
prefetch(va);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch(va + L1_CACHE_BYTES);
|
||||
#endif
|
||||
net_prefetch(va);
|
||||
|
||||
skb = *out_skb = napi_alloc_skb(&ring_data->napi,
|
||||
HNS_RX_HEAD_SIZE);
|
||||
|
|
|
@ -3091,10 +3091,7 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring)
|
|||
* lines. In such a case, single fetch would suffice to cache in the
|
||||
* relevant part of the header.
|
||||
*/
|
||||
prefetch(ring->va);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch(ring->va + L1_CACHE_BYTES);
|
||||
#endif
|
||||
net_prefetch(ring->va);
|
||||
|
||||
if (!skb) {
|
||||
ret = hns3_alloc_skb(ring, length, ring->va);
|
||||
|
|
|
@ -310,10 +310,7 @@ static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring,
|
|||
rx_buffer->page_offset;
|
||||
|
||||
/* prefetch first cache line of first page */
|
||||
prefetch(page_addr);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch((void *)((u8 *)page_addr + L1_CACHE_BYTES));
|
||||
#endif
|
||||
net_prefetch(page_addr);
|
||||
|
||||
/* allocate a skb to store the frags */
|
||||
skb = napi_alloc_skb(&rx_ring->q_vector->napi,
|
||||
|
|
|
@ -1992,10 +1992,8 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
|
|||
struct sk_buff *skb;
|
||||
|
||||
/* prefetch first cache line of first page */
|
||||
prefetch(xdp->data);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch(xdp->data + L1_CACHE_BYTES);
|
||||
#endif
|
||||
net_prefetch(xdp->data);
|
||||
|
||||
/* Note, we get here by enabling legacy-rx via:
|
||||
*
|
||||
* ethtool --set-priv-flags <dev> legacy-rx on
|
||||
|
@ -2078,10 +2076,8 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
|
|||
* likely have a consumer accessing first few bytes of meta
|
||||
* data, and then actual data.
|
||||
*/
|
||||
prefetch(xdp->data_meta);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch(xdp->data_meta + L1_CACHE_BYTES);
|
||||
#endif
|
||||
net_prefetch(xdp->data_meta);
|
||||
|
||||
/* build an skb around the page buffer */
|
||||
skb = build_skb(xdp->data_hard_start, truesize);
|
||||
if (unlikely(!skb))
|
||||
|
|
|
@ -1309,10 +1309,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring,
|
|||
return NULL;
|
||||
/* prefetch first cache line of first page */
|
||||
va = page_address(rx_buffer->page) + rx_buffer->page_offset;
|
||||
prefetch(va);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch(va + L1_CACHE_BYTES);
|
||||
#endif
|
||||
net_prefetch(va);
|
||||
|
||||
/* allocate a skb to store the frags */
|
||||
skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
|
||||
|
@ -1376,10 +1373,8 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring,
|
|||
return NULL;
|
||||
/* prefetch first cache line of first page */
|
||||
va = page_address(rx_buffer->page) + rx_buffer->page_offset;
|
||||
prefetch(va);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch(va + L1_CACHE_BYTES);
|
||||
#endif
|
||||
net_prefetch(va);
|
||||
|
||||
/* build an skb around the page buffer */
|
||||
skb = build_skb(va - IAVF_SKB_PAD, truesize);
|
||||
if (unlikely(!skb))
|
||||
|
|
|
@ -919,10 +919,7 @@ ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
|
|||
* likely have a consumer accessing first few bytes of meta
|
||||
* data, and then actual data.
|
||||
*/
|
||||
prefetch(xdp->data_meta);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch((void *)(xdp->data + L1_CACHE_BYTES));
|
||||
#endif
|
||||
net_prefetch(xdp->data_meta);
|
||||
/* build an skb around the page buffer */
|
||||
skb = build_skb(xdp->data_hard_start, truesize);
|
||||
if (unlikely(!skb))
|
||||
|
@ -964,10 +961,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
|
|||
struct sk_buff *skb;
|
||||
|
||||
/* prefetch first cache line of first page */
|
||||
prefetch(xdp->data);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch((void *)(xdp->data + L1_CACHE_BYTES));
|
||||
#endif /* L1_CACHE_BYTES */
|
||||
net_prefetch(xdp->data);
|
||||
|
||||
/* allocate a skb to store the frags */
|
||||
skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE,
|
||||
|
|
|
@ -8047,10 +8047,7 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
|
|||
struct sk_buff *skb;
|
||||
|
||||
/* prefetch first cache line of first page */
|
||||
prefetch(va);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch(va + L1_CACHE_BYTES);
|
||||
#endif
|
||||
net_prefetch(va);
|
||||
|
||||
/* allocate a skb to store the frags */
|
||||
skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN);
|
||||
|
@ -8104,10 +8101,7 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
|
|||
struct sk_buff *skb;
|
||||
|
||||
/* prefetch first cache line of first page */
|
||||
prefetch(va);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch(va + L1_CACHE_BYTES);
|
||||
#endif
|
||||
net_prefetch(va);
|
||||
|
||||
/* build an skb around the page buffer */
|
||||
skb = build_skb(va - IGB_SKB_PAD, truesize);
|
||||
|
|
|
@ -1550,10 +1550,7 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
|
|||
struct sk_buff *skb;
|
||||
|
||||
/* prefetch first cache line of first page */
|
||||
prefetch(va);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch(va + L1_CACHE_BYTES);
|
||||
#endif
|
||||
net_prefetch(va);
|
||||
|
||||
/* build an skb around the page buffer */
|
||||
skb = build_skb(va - IGC_SKB_PAD, truesize);
|
||||
|
@ -1589,10 +1586,7 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
|
|||
struct sk_buff *skb;
|
||||
|
||||
/* prefetch first cache line of first page */
|
||||
prefetch(va);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch(va + L1_CACHE_BYTES);
|
||||
#endif
|
||||
net_prefetch(va);
|
||||
|
||||
/* allocate a skb to store the frags */
|
||||
skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGC_RX_HDR_LEN);
|
||||
|
|
|
@ -2095,10 +2095,8 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
|
|||
struct sk_buff *skb;
|
||||
|
||||
/* prefetch first cache line of first page */
|
||||
prefetch(xdp->data);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch(xdp->data + L1_CACHE_BYTES);
|
||||
#endif
|
||||
net_prefetch(xdp->data);
|
||||
|
||||
/* Note, we get here by enabling legacy-rx via:
|
||||
*
|
||||
* ethtool --set-priv-flags <dev> legacy-rx on
|
||||
|
@ -2161,10 +2159,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
|
|||
* likely have a consumer accessing first few bytes of meta
|
||||
* data, and then actual data.
|
||||
*/
|
||||
prefetch(xdp->data_meta);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch(xdp->data_meta + L1_CACHE_BYTES);
|
||||
#endif
|
||||
net_prefetch(xdp->data_meta);
|
||||
|
||||
/* build an skb to around the page buffer */
|
||||
skb = build_skb(xdp->data_hard_start, truesize);
|
||||
|
|
|
@ -866,10 +866,8 @@ struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
|
|||
struct sk_buff *skb;
|
||||
|
||||
/* prefetch first cache line of first page */
|
||||
prefetch(xdp->data);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch(xdp->data + L1_CACHE_BYTES);
|
||||
#endif
|
||||
net_prefetch(xdp->data);
|
||||
|
||||
/* Note, we get here by enabling legacy-rx via:
|
||||
*
|
||||
* ethtool --set-priv-flags <dev> legacy-rx on
|
||||
|
@ -947,10 +945,7 @@ static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
|
|||
* have a consumer accessing first few bytes of meta data,
|
||||
* and then actual data.
|
||||
*/
|
||||
prefetch(xdp->data_meta);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch(xdp->data_meta + L1_CACHE_BYTES);
|
||||
#endif
|
||||
net_prefetch(xdp->data_meta);
|
||||
|
||||
/* build an skb around the page buffer */
|
||||
skb = build_skb(xdp->data_hard_start, truesize);
|
||||
|
|
|
@ -705,7 +705,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
|
|||
|
||||
frags = ring->rx_info + (index << priv->log_rx_info);
|
||||
va = page_address(frags[0].page) + frags[0].page_offset;
|
||||
prefetchw(va);
|
||||
net_prefetchw(va);
|
||||
/*
|
||||
* make sure we read the CQE after we read the ownership bit
|
||||
*/
|
||||
|
|
|
@ -201,7 +201,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
|
|||
pi = mlx5e_xdpsq_get_next_pi(sq, MLX5_SEND_WQE_MAX_WQEBBS);
|
||||
session->wqe = MLX5E_TX_FETCH_WQE(sq, pi);
|
||||
|
||||
prefetchw(session->wqe->data);
|
||||
net_prefetchw(session->wqe->data);
|
||||
session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
|
||||
session->pkt_count = 0;
|
||||
|
||||
|
@ -322,7 +322,7 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd,
|
|||
|
||||
struct mlx5e_xdpsq_stats *stats = sq->stats;
|
||||
|
||||
prefetchw(wqe);
|
||||
net_prefetchw(wqe);
|
||||
|
||||
if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
|
||||
stats->err++;
|
||||
|
|
|
@ -49,7 +49,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
|
|||
xdp->data_end = xdp->data + cqe_bcnt32;
|
||||
xdp_set_data_meta_invalid(xdp);
|
||||
xsk_buff_dma_sync_for_cpu(xdp);
|
||||
prefetch(xdp->data);
|
||||
net_prefetch(xdp->data);
|
||||
|
||||
rcu_read_lock();
|
||||
consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp);
|
||||
|
@ -100,7 +100,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
|
|||
xdp->data_end = xdp->data + cqe_bcnt;
|
||||
xdp_set_data_meta_invalid(xdp);
|
||||
xsk_buff_dma_sync_for_cpu(xdp);
|
||||
prefetch(xdp->data);
|
||||
net_prefetch(xdp->data);
|
||||
|
||||
if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
|
||||
rq->stats->wqe_err++;
|
||||
|
|
|
@ -30,7 +30,6 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <linux/tcp.h>
|
||||
|
@ -1141,8 +1140,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
|
|||
|
||||
dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
|
||||
frag_size, DMA_FROM_DEVICE);
|
||||
prefetchw(va); /* xdp_frame data area */
|
||||
prefetch(data);
|
||||
net_prefetchw(va); /* xdp_frame data area */
|
||||
net_prefetch(data);
|
||||
|
||||
rcu_read_lock();
|
||||
mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
|
||||
|
@ -1184,7 +1183,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
prefetchw(skb->data);
|
||||
net_prefetchw(skb->data);
|
||||
|
||||
while (byte_cnt) {
|
||||
u16 frag_consumed_bytes =
|
||||
|
@ -1399,7 +1398,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
|
|||
return NULL;
|
||||
}
|
||||
|
||||
prefetchw(skb->data);
|
||||
net_prefetchw(skb->data);
|
||||
|
||||
if (unlikely(frag_offset >= PAGE_SIZE)) {
|
||||
di++;
|
||||
|
@ -1452,8 +1451,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
|
|||
|
||||
dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
|
||||
frag_size, DMA_FROM_DEVICE);
|
||||
prefetchw(va); /* xdp_frame data area */
|
||||
prefetch(data);
|
||||
net_prefetchw(va); /* xdp_frame data area */
|
||||
net_prefetch(data);
|
||||
|
||||
rcu_read_lock();
|
||||
mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);
|
||||
|
|
|
@ -30,7 +30,6 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/udp.h>
|
||||
#include <net/udp.h>
|
||||
|
@ -115,7 +114,7 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
prefetchw(skb->data);
|
||||
net_prefetchw(skb->data);
|
||||
skb_reserve(skb, NET_IP_ALIGN);
|
||||
|
||||
/* Reserve for ethernet and IP header */
|
||||
|
|
|
@ -2193,6 +2193,22 @@ int netdev_get_num_tc(struct net_device *dev)
|
|||
return dev->num_tc;
|
||||
}
|
||||
|
||||
static inline void net_prefetch(void *p)
|
||||
{
|
||||
prefetch(p);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetch((u8 *)p + L1_CACHE_BYTES);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void net_prefetchw(void *p)
|
||||
{
|
||||
prefetchw(p);
|
||||
#if L1_CACHE_BYTES < 128
|
||||
prefetchw((u8 *)p + L1_CACHE_BYTES);
|
||||
#endif
|
||||
}
|
||||
|
||||
void netdev_unbind_sb_channel(struct net_device *dev,
|
||||
struct net_device *sb_dev);
|
||||
int netdev_bind_sb_channel_queue(struct net_device *dev,
|
||||
|
|
Loading…
Reference in New Issue