igb: add XDP support

Add XDP support to the IGB driver.
The implementation follows the IXGBE XDP implementation
closely and I used the following patches as basis:

1. commit 9247080816 ("ixgbe: add XDP support for pass and drop actions")
2. commit 33fdc82f08 ("ixgbe: add support for XDP_TX action")
3. commit ed93a39871 ("ixgbe: tweak page counting for XDP_REDIRECT")

Due to the hardware constraints of the devices using the
IGB driver we must share the TX queues with XDP which
means locking the TX queue for XDP.

I ran tests on an older device to get better numbers.
Test machine:

Intel(R) Atom(TM) CPU C2338 @ 1.74GHz (2 Cores)
2x Intel I211

Routing Original Driver Network Stack: 382 Kpps

Routing XDP Redirect (xdp_fwd_kern): 1.48 Mpps
XDP Drop: 1.48 Mpps

Using XDP we can achieve line rate forwarding even on
an older Intel Atom CPU.

Signed-off-by: Sven Auhagen <sven.auhagen@voleatech.de>
Tested-by: Sandeep Penigalapati <sandeep.penigalapati@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
This commit is contained in:
Sven Auhagen 2020-09-02 13:32:22 -07:00 committed by Tony Nguyen
parent bcbf1be0ad
commit 9cbc948b5a
3 changed files with 481 additions and 36 deletions

View File

@ -19,6 +19,8 @@
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/mdio.h> #include <linux/mdio.h>
#include <net/xdp.h>
struct igb_adapter; struct igb_adapter;
#define E1000_PCS_CFG_IGN_SD 1 #define E1000_PCS_CFG_IGN_SD 1
@ -79,6 +81,12 @@ struct igb_adapter;
#define IGB_I210_RX_LATENCY_100 2213 #define IGB_I210_RX_LATENCY_100 2213
#define IGB_I210_RX_LATENCY_1000 448 #define IGB_I210_RX_LATENCY_1000 448
/* XDP */
#define IGB_XDP_PASS 0
#define IGB_XDP_CONSUMED BIT(0)
#define IGB_XDP_TX BIT(1)
#define IGB_XDP_REDIR BIT(2)
struct vf_data_storage { struct vf_data_storage {
unsigned char vf_mac_addresses[ETH_ALEN]; unsigned char vf_mac_addresses[ETH_ALEN];
u16 vf_mc_hashes[IGB_MAX_VF_MC_ENTRIES]; u16 vf_mc_hashes[IGB_MAX_VF_MC_ENTRIES];
@ -132,17 +140,62 @@ struct vf_mac_filter {
/* Supported Rx Buffer Sizes */ /* Supported Rx Buffer Sizes */
#define IGB_RXBUFFER_256 256 #define IGB_RXBUFFER_256 256
#define IGB_RXBUFFER_1536 1536
#define IGB_RXBUFFER_2048 2048 #define IGB_RXBUFFER_2048 2048
#define IGB_RXBUFFER_3072 3072 #define IGB_RXBUFFER_3072 3072
#define IGB_RX_HDR_LEN IGB_RXBUFFER_256 #define IGB_RX_HDR_LEN IGB_RXBUFFER_256
#define IGB_TS_HDR_LEN 16 #define IGB_TS_HDR_LEN 16
#define IGB_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) /* Attempt to maximize the headroom available for incoming frames. We
* use a 2K buffer for receives and need 1536/1534 to store the data for
* the frame. This leaves us with 512 bytes of room. From that we need
* to deduct the space needed for the shared info and the padding needed
* to IP align the frame.
*
* Note: For cache line sizes 256 or larger this value is going to end
* up negative. In these cases we should fall back to the 3K
* buffers.
*/
#if (PAGE_SIZE < 8192) #if (PAGE_SIZE < 8192)
#define IGB_MAX_FRAME_BUILD_SKB \ #define IGB_MAX_FRAME_BUILD_SKB (IGB_RXBUFFER_1536 - NET_IP_ALIGN)
(SKB_WITH_OVERHEAD(IGB_RXBUFFER_2048) - IGB_SKB_PAD - IGB_TS_HDR_LEN) #define IGB_2K_TOO_SMALL_WITH_PADDING \
((NET_SKB_PAD + IGB_TS_HDR_LEN + IGB_RXBUFFER_1536) > SKB_WITH_OVERHEAD(IGB_RXBUFFER_2048))
static inline int igb_compute_pad(int rx_buf_len)
{
int page_size, pad_size;
page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len;
return pad_size;
}
static inline int igb_skb_pad(void)
{
int rx_buf_len;
/* If a 2K buffer cannot handle a standard Ethernet frame then
* optimize padding for a 3K buffer instead of a 1.5K buffer.
*
* For a 3K buffer we need to add enough padding to allow for
* tailroom due to NET_IP_ALIGN possibly shifting us out of
* cache-line alignment.
*/
if (IGB_2K_TOO_SMALL_WITH_PADDING)
rx_buf_len = IGB_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
else
rx_buf_len = IGB_RXBUFFER_1536;
/* if needed make room for NET_IP_ALIGN */
rx_buf_len -= NET_IP_ALIGN;
return igb_compute_pad(rx_buf_len);
}
#define IGB_SKB_PAD igb_skb_pad()
#else #else
#define IGB_MAX_FRAME_BUILD_SKB (IGB_RXBUFFER_2048 - IGB_TS_HDR_LEN) #define IGB_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
#endif #endif
/* How many Rx Buffers do we bundle into one write to the hardware ? */ /* How many Rx Buffers do we bundle into one write to the hardware ? */
@ -194,13 +247,22 @@ enum igb_tx_flags {
#define IGB_SFF_ADDRESSING_MODE 0x4 #define IGB_SFF_ADDRESSING_MODE 0x4
#define IGB_SFF_8472_UNSUP 0x00 #define IGB_SFF_8472_UNSUP 0x00
enum igb_tx_buf_type {
IGB_TYPE_SKB = 0,
IGB_TYPE_XDP,
};
/* wrapper around a pointer to a socket buffer, /* wrapper around a pointer to a socket buffer,
* so a DMA handle can be stored along with the buffer * so a DMA handle can be stored along with the buffer
*/ */
struct igb_tx_buffer { struct igb_tx_buffer {
union e1000_adv_tx_desc *next_to_watch; union e1000_adv_tx_desc *next_to_watch;
unsigned long time_stamp; unsigned long time_stamp;
struct sk_buff *skb; enum igb_tx_buf_type type;
union {
struct sk_buff *skb;
struct xdp_frame *xdpf;
};
unsigned int bytecount; unsigned int bytecount;
u16 gso_segs; u16 gso_segs;
__be16 protocol; __be16 protocol;
@ -248,6 +310,7 @@ struct igb_ring_container {
struct igb_ring { struct igb_ring {
struct igb_q_vector *q_vector; /* backlink to q_vector */ struct igb_q_vector *q_vector; /* backlink to q_vector */
struct net_device *netdev; /* back pointer to net_device */ struct net_device *netdev; /* back pointer to net_device */
struct bpf_prog *xdp_prog;
struct device *dev; /* device pointer for dma mapping */ struct device *dev; /* device pointer for dma mapping */
union { /* array of buffer info structs */ union { /* array of buffer info structs */
struct igb_tx_buffer *tx_buffer_info; struct igb_tx_buffer *tx_buffer_info;
@ -288,6 +351,7 @@ struct igb_ring {
struct u64_stats_sync rx_syncp; struct u64_stats_sync rx_syncp;
}; };
}; };
struct xdp_rxq_info xdp_rxq;
} ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp;
struct igb_q_vector { struct igb_q_vector {
@ -339,7 +403,7 @@ static inline unsigned int igb_rx_bufsz(struct igb_ring *ring)
return IGB_RXBUFFER_3072; return IGB_RXBUFFER_3072;
if (ring_uses_build_skb(ring)) if (ring_uses_build_skb(ring))
return IGB_MAX_FRAME_BUILD_SKB + IGB_TS_HDR_LEN; return IGB_MAX_FRAME_BUILD_SKB;
#endif #endif
return IGB_RXBUFFER_2048; return IGB_RXBUFFER_2048;
} }
@ -467,6 +531,7 @@ struct igb_adapter {
unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
struct net_device *netdev; struct net_device *netdev;
struct bpf_prog *xdp_prog;
unsigned long state; unsigned long state;
unsigned int flags; unsigned int flags;
@ -643,6 +708,9 @@ enum igb_boards {
extern char igb_driver_name[]; extern char igb_driver_name[];
int igb_xmit_xdp_ring(struct igb_adapter *adapter,
struct igb_ring *ring,
struct xdp_frame *xdpf);
int igb_open(struct net_device *netdev); int igb_open(struct net_device *netdev);
int igb_close(struct net_device *netdev); int igb_close(struct net_device *netdev);
int igb_up(struct igb_adapter *); int igb_up(struct igb_adapter *);

View File

@ -961,6 +961,10 @@ static int igb_set_ringparam(struct net_device *netdev,
memcpy(&temp_ring[i], adapter->rx_ring[i], memcpy(&temp_ring[i], adapter->rx_ring[i],
sizeof(struct igb_ring)); sizeof(struct igb_ring));
/* Clear copied XDP RX-queue info */
memset(&temp_ring[i].xdp_rxq, 0,
sizeof(temp_ring[i].xdp_rxq));
temp_ring[i].count = new_rx_count; temp_ring[i].count = new_rx_count;
err = igb_setup_rx_resources(&temp_ring[i]); err = igb_setup_rx_resources(&temp_ring[i]);
if (err) { if (err) {

View File

@ -30,6 +30,8 @@
#include <linux/if_ether.h> #include <linux/if_ether.h>
#include <linux/aer.h> #include <linux/aer.h>
#include <linux/prefetch.h> #include <linux/prefetch.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/pm_runtime.h> #include <linux/pm_runtime.h>
#include <linux/etherdevice.h> #include <linux/etherdevice.h>
#ifdef CONFIG_IGB_DCA #ifdef CONFIG_IGB_DCA
@ -2823,6 +2825,147 @@ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
} }
} }
static int igb_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
{
int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
struct igb_adapter *adapter = netdev_priv(dev);
bool running = netif_running(dev);
struct bpf_prog *old_prog;
bool need_reset;
/* verify igb ring attributes are sufficient for XDP */
for (i = 0; i < adapter->num_rx_queues; i++) {
struct igb_ring *ring = adapter->rx_ring[i];
if (frame_size > igb_rx_bufsz(ring))
return -EINVAL;
}
old_prog = xchg(&adapter->xdp_prog, prog);
need_reset = (!!prog != !!old_prog);
/* device is up and bpf is added/removed, must setup the RX queues */
if (need_reset && running) {
igb_close(dev);
} else {
for (i = 0; i < adapter->num_rx_queues; i++)
(void)xchg(&adapter->rx_ring[i]->xdp_prog,
adapter->xdp_prog);
}
if (old_prog)
bpf_prog_put(old_prog);
/* bpf is just replaced, RXQ and MTU are already setup */
if (!need_reset)
return 0;
if (running)
igb_open(dev);
return 0;
}
static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
switch (xdp->command) {
case XDP_SETUP_PROG:
return igb_xdp_setup(dev, xdp->prog);
default:
return -EINVAL;
}
}
static void igb_xdp_ring_update_tail(struct igb_ring *ring)
{
/* Force memory writes to complete before letting h/w know there
* are new descriptors to fetch.
*/
wmb();
writel(ring->next_to_use, ring->tail);
}
static struct igb_ring *igb_xdp_tx_queue_mapping(struct igb_adapter *adapter)
{
unsigned int r_idx = smp_processor_id();
if (r_idx >= adapter->num_tx_queues)
r_idx = r_idx % adapter->num_tx_queues;
return adapter->tx_ring[r_idx];
}
static int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp)
{
struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
int cpu = smp_processor_id();
struct igb_ring *tx_ring;
struct netdev_queue *nq;
u32 ret;
if (unlikely(!xdpf))
return IGB_XDP_CONSUMED;
/* During program transitions its possible adapter->xdp_prog is assigned
* but ring has not been configured yet. In this case simply abort xmit.
*/
tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL;
if (unlikely(!tx_ring))
return -ENXIO;
nq = txring_txq(tx_ring);
__netif_tx_lock(nq, cpu);
ret = igb_xmit_xdp_ring(adapter, tx_ring, xdpf);
__netif_tx_unlock(nq);
return ret;
}
static int igb_xdp_xmit(struct net_device *dev, int n,
struct xdp_frame **frames, u32 flags)
{
struct igb_adapter *adapter = netdev_priv(dev);
int cpu = smp_processor_id();
struct igb_ring *tx_ring;
struct netdev_queue *nq;
int drops = 0;
int i;
if (unlikely(test_bit(__IGB_DOWN, &adapter->state)))
return -ENETDOWN;
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
return -EINVAL;
/* During program transitions its possible adapter->xdp_prog is assigned
* but ring has not been configured yet. In this case simply abort xmit.
*/
tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL;
if (unlikely(!tx_ring))
return -ENXIO;
nq = txring_txq(tx_ring);
__netif_tx_lock(nq, cpu);
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
int err;
err = igb_xmit_xdp_ring(adapter, tx_ring, xdpf);
if (err != IGB_XDP_TX) {
xdp_return_frame_rx_napi(xdpf);
drops++;
}
}
__netif_tx_unlock(nq);
if (unlikely(flags & XDP_XMIT_FLUSH))
igb_xdp_ring_update_tail(tx_ring);
return n - drops;
}
static const struct net_device_ops igb_netdev_ops = { static const struct net_device_ops igb_netdev_ops = {
.ndo_open = igb_open, .ndo_open = igb_open,
.ndo_stop = igb_close, .ndo_stop = igb_close,
@ -2847,6 +2990,8 @@ static const struct net_device_ops igb_netdev_ops = {
.ndo_fdb_add = igb_ndo_fdb_add, .ndo_fdb_add = igb_ndo_fdb_add,
.ndo_features_check = igb_features_check, .ndo_features_check = igb_features_check,
.ndo_setup_tc = igb_setup_tc, .ndo_setup_tc = igb_setup_tc,
.ndo_bpf = igb_xdp,
.ndo_xdp_xmit = igb_xdp_xmit,
}; };
/** /**
@ -4179,6 +4324,7 @@ static void igb_configure_tx(struct igb_adapter *adapter)
**/ **/
int igb_setup_rx_resources(struct igb_ring *rx_ring) int igb_setup_rx_resources(struct igb_ring *rx_ring)
{ {
struct igb_adapter *adapter = netdev_priv(rx_ring->netdev);
struct device *dev = rx_ring->dev; struct device *dev = rx_ring->dev;
int size; int size;
@ -4201,6 +4347,13 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring)
rx_ring->next_to_clean = 0; rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0; rx_ring->next_to_use = 0;
rx_ring->xdp_prog = adapter->xdp_prog;
/* XDP RX-queue info */
if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
rx_ring->queue_index) < 0)
goto err;
return 0; return 0;
err: err:
@ -4505,6 +4658,10 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
int reg_idx = ring->reg_idx; int reg_idx = ring->reg_idx;
u32 rxdctl = 0; u32 rxdctl = 0;
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED, NULL));
/* disable the queue */ /* disable the queue */
wr32(E1000_RXDCTL(reg_idx), 0); wr32(E1000_RXDCTL(reg_idx), 0);
@ -4709,6 +4866,8 @@ void igb_free_rx_resources(struct igb_ring *rx_ring)
{ {
igb_clean_rx_ring(rx_ring); igb_clean_rx_ring(rx_ring);
rx_ring->xdp_prog = NULL;
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
vfree(rx_ring->rx_buffer_info); vfree(rx_ring->rx_buffer_info);
rx_ring->rx_buffer_info = NULL; rx_ring->rx_buffer_info = NULL;
@ -6078,6 +6237,80 @@ dma_error:
return -1; return -1;
} }
int igb_xmit_xdp_ring(struct igb_adapter *adapter,
struct igb_ring *tx_ring,
struct xdp_frame *xdpf)
{
union e1000_adv_tx_desc *tx_desc;
u32 len, cmd_type, olinfo_status;
struct igb_tx_buffer *tx_buffer;
dma_addr_t dma;
u16 i;
len = xdpf->len;
if (unlikely(!igb_desc_unused(tx_ring)))
return IGB_XDP_CONSUMED;
dma = dma_map_single(tx_ring->dev, xdpf->data, len, DMA_TO_DEVICE);
if (dma_mapping_error(tx_ring->dev, dma))
return IGB_XDP_CONSUMED;
/* record the location of the first descriptor for this packet */
tx_buffer = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
tx_buffer->bytecount = len;
tx_buffer->gso_segs = 1;
tx_buffer->protocol = 0;
i = tx_ring->next_to_use;
tx_desc = IGB_TX_DESC(tx_ring, i);
dma_unmap_len_set(tx_buffer, len, len);
dma_unmap_addr_set(tx_buffer, dma, dma);
tx_buffer->type = IGB_TYPE_XDP;
tx_buffer->xdpf = xdpf;
tx_desc->read.buffer_addr = cpu_to_le64(dma);
/* put descriptor type bits */
cmd_type = E1000_ADVTXD_DTYP_DATA |
E1000_ADVTXD_DCMD_DEXT |
E1000_ADVTXD_DCMD_IFCS;
cmd_type |= len | IGB_TXD_DCMD;
tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
olinfo_status = cpu_to_le32(len << E1000_ADVTXD_PAYLEN_SHIFT);
/* 82575 requires a unique index per ring */
if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
olinfo_status |= tx_ring->reg_idx << 4;
tx_desc->read.olinfo_status = olinfo_status;
netdev_tx_sent_queue(txring_txq(tx_ring), tx_buffer->bytecount);
/* set the timestamp */
tx_buffer->time_stamp = jiffies;
/* Avoid any potential race with xdp_xmit and cleanup */
smp_wmb();
/* set next_to_watch value indicating a packet is present */
i++;
if (i == tx_ring->count)
i = 0;
tx_buffer->next_to_watch = tx_desc;
tx_ring->next_to_use = i;
/* Make sure there is space in the ring for the next send. */
igb_maybe_stop_tx(tx_ring, DESC_NEEDED);
if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more())
writel(i, tx_ring->tail);
return IGB_XDP_TX;
}
netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
struct igb_ring *tx_ring) struct igb_ring *tx_ring)
{ {
@ -6106,6 +6339,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
/* record the location of the first descriptor for this packet */ /* record the location of the first descriptor for this packet */
first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
first->type = IGB_TYPE_SKB;
first->skb = skb; first->skb = skb;
first->bytecount = skb->len; first->bytecount = skb->len;
first->gso_segs = 1; first->gso_segs = 1;
@ -6258,6 +6492,19 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu)
struct igb_adapter *adapter = netdev_priv(netdev); struct igb_adapter *adapter = netdev_priv(netdev);
int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
if (adapter->xdp_prog) {
int i;
for (i = 0; i < adapter->num_rx_queues; i++) {
struct igb_ring *ring = adapter->rx_ring[i];
if (max_frame > igb_rx_bufsz(ring)) {
netdev_warn(adapter->netdev, "Requested MTU size is not supported with XDP\n");
return -EINVAL;
}
}
}
/* adjust max frame to be at least the size of a standard frame */ /* adjust max frame to be at least the size of a standard frame */
if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
@ -7811,7 +8058,10 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
total_packets += tx_buffer->gso_segs; total_packets += tx_buffer->gso_segs;
/* free the skb */ /* free the skb */
napi_consume_skb(tx_buffer->skb, napi_budget); if (tx_buffer->type == IGB_TYPE_SKB)
napi_consume_skb(tx_buffer->skb, napi_budget);
else
xdp_return_frame(tx_buffer->xdpf);
/* unmap skb header data */ /* unmap skb header data */
dma_unmap_single(tx_ring->dev, dma_unmap_single(tx_ring->dev,
@ -7995,8 +8245,8 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer)
* the pagecnt_bias and page count so that we fully restock the * the pagecnt_bias and page count so that we fully restock the
* number of references the driver holds. * number of references the driver holds.
*/ */
if (unlikely(!pagecnt_bias)) { if (unlikely(pagecnt_bias == 1)) {
page_ref_add(page, USHRT_MAX); page_ref_add(page, USHRT_MAX - 1);
rx_buffer->pagecnt_bias = USHRT_MAX; rx_buffer->pagecnt_bias = USHRT_MAX;
} }
@ -8035,20 +8285,21 @@ static void igb_add_rx_frag(struct igb_ring *rx_ring,
static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring, static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
struct igb_rx_buffer *rx_buffer, struct igb_rx_buffer *rx_buffer,
union e1000_adv_rx_desc *rx_desc, struct xdp_buff *xdp,
unsigned int size) union e1000_adv_rx_desc *rx_desc)
{ {
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
#if (PAGE_SIZE < 8192) #if (PAGE_SIZE < 8192)
unsigned int truesize = igb_rx_pg_size(rx_ring) / 2; unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
#else #else
unsigned int truesize = SKB_DATA_ALIGN(size); unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end -
xdp->data_hard_start);
#endif #endif
unsigned int size = xdp->data_end - xdp->data;
unsigned int headlen; unsigned int headlen;
struct sk_buff *skb; struct sk_buff *skb;
/* prefetch first cache line of first page */ /* prefetch first cache line of first page */
net_prefetch(va); net_prefetch(xdp->data);
/* allocate a skb to store the frags */ /* allocate a skb to store the frags */
skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN); skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN);
@ -8056,24 +8307,24 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
return NULL; return NULL;
if (unlikely(igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))) { if (unlikely(igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))) {
igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); igb_ptp_rx_pktstamp(rx_ring->q_vector, xdp->data, skb);
va += IGB_TS_HDR_LEN; xdp->data += IGB_TS_HDR_LEN;
size -= IGB_TS_HDR_LEN; size -= IGB_TS_HDR_LEN;
} }
/* Determine available headroom for copy */ /* Determine available headroom for copy */
headlen = size; headlen = size;
if (headlen > IGB_RX_HDR_LEN) if (headlen > IGB_RX_HDR_LEN)
headlen = eth_get_headlen(skb->dev, va, IGB_RX_HDR_LEN); headlen = eth_get_headlen(skb->dev, xdp->data, IGB_RX_HDR_LEN);
/* align pull length to size of long to optimize memcpy performance */ /* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, sizeof(long)));
/* update all of the pointers */ /* update all of the pointers */
size -= headlen; size -= headlen;
if (size) { if (size) {
skb_add_rx_frag(skb, 0, rx_buffer->page, skb_add_rx_frag(skb, 0, rx_buffer->page,
(va + headlen) - page_address(rx_buffer->page), (xdp->data + headlen) - page_address(rx_buffer->page),
size, truesize); size, truesize);
#if (PAGE_SIZE < 8192) #if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize; rx_buffer->page_offset ^= truesize;
@ -8089,29 +8340,29 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring, static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
struct igb_rx_buffer *rx_buffer, struct igb_rx_buffer *rx_buffer,
union e1000_adv_rx_desc *rx_desc, struct xdp_buff *xdp,
unsigned int size) union e1000_adv_rx_desc *rx_desc)
{ {
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
#if (PAGE_SIZE < 8192) #if (PAGE_SIZE < 8192)
unsigned int truesize = igb_rx_pg_size(rx_ring) / 2; unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
#else #else
unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
SKB_DATA_ALIGN(IGB_SKB_PAD + size); SKB_DATA_ALIGN(xdp->data_end -
xdp->data_hard_start);
#endif #endif
struct sk_buff *skb; struct sk_buff *skb;
/* prefetch first cache line of first page */ /* prefetch first cache line of first page */
net_prefetch(va); net_prefetch(xdp->data_meta);
/* build an skb around the page buffer */ /* build an skb around the page buffer */
skb = build_skb(va - IGB_SKB_PAD, truesize); skb = build_skb(xdp->data_hard_start, truesize);
if (unlikely(!skb)) if (unlikely(!skb))
return NULL; return NULL;
/* update pointers within the skb to store the data */ /* update pointers within the skb to store the data */
skb_reserve(skb, IGB_SKB_PAD); skb_reserve(skb, xdp->data - xdp->data_hard_start);
__skb_put(skb, size); __skb_put(skb, xdp->data_end - xdp->data);
/* pull timestamp out of packet data */ /* pull timestamp out of packet data */
if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
@ -8129,6 +8380,79 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
return skb; return skb;
} }
static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter,
struct igb_ring *rx_ring,
struct xdp_buff *xdp)
{
int err, result = IGB_XDP_PASS;
struct bpf_prog *xdp_prog;
u32 act;
rcu_read_lock();
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
if (!xdp_prog)
goto xdp_out;
prefetchw(xdp->data_hard_start); /* xdp_frame write */
act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
break;
case XDP_TX:
result = igb_xdp_xmit_back(adapter, xdp);
break;
case XDP_REDIRECT:
err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
if (!err)
result = IGB_XDP_REDIR;
else
result = IGB_XDP_CONSUMED;
break;
default:
bpf_warn_invalid_xdp_action(act);
fallthrough;
case XDP_ABORTED:
trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
fallthrough;
case XDP_DROP:
result = IGB_XDP_CONSUMED;
break;
}
xdp_out:
rcu_read_unlock();
return ERR_PTR(-result);
}
static unsigned int igb_rx_frame_truesize(struct igb_ring *rx_ring,
unsigned int size)
{
unsigned int truesize;
#if (PAGE_SIZE < 8192)
truesize = igb_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
#else
truesize = ring_uses_build_skb(rx_ring) ?
SKB_DATA_ALIGN(IGB_SKB_PAD + size) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
SKB_DATA_ALIGN(size);
#endif
return truesize;
}
static void igb_rx_buffer_flip(struct igb_ring *rx_ring,
struct igb_rx_buffer *rx_buffer,
unsigned int size)
{
unsigned int truesize = igb_rx_frame_truesize(rx_ring, size);
#if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize;
#else
rx_buffer->page_offset += truesize;
#endif
}
static inline void igb_rx_checksum(struct igb_ring *ring, static inline void igb_rx_checksum(struct igb_ring *ring,
union e1000_adv_rx_desc *rx_desc, union e1000_adv_rx_desc *rx_desc,
struct sk_buff *skb) struct sk_buff *skb)
@ -8224,6 +8548,10 @@ static bool igb_cleanup_headers(struct igb_ring *rx_ring,
union e1000_adv_rx_desc *rx_desc, union e1000_adv_rx_desc *rx_desc,
struct sk_buff *skb) struct sk_buff *skb)
{ {
/* XDP packets use error pointer so abort at this point */
if (IS_ERR(skb))
return true;
if (unlikely((igb_test_staterr(rx_desc, if (unlikely((igb_test_staterr(rx_desc,
E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) { E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) {
struct net_device *netdev = rx_ring->netdev; struct net_device *netdev = rx_ring->netdev;
@ -8282,6 +8610,11 @@ static void igb_process_skb_fields(struct igb_ring *rx_ring,
skb->protocol = eth_type_trans(skb, rx_ring->netdev); skb->protocol = eth_type_trans(skb, rx_ring->netdev);
} }
static unsigned int igb_rx_offset(struct igb_ring *rx_ring)
{
return ring_uses_build_skb(rx_ring) ? IGB_SKB_PAD : 0;
}
static struct igb_rx_buffer *igb_get_rx_buffer(struct igb_ring *rx_ring, static struct igb_rx_buffer *igb_get_rx_buffer(struct igb_ring *rx_ring,
const unsigned int size) const unsigned int size)
{ {
@ -8325,10 +8658,20 @@ static void igb_put_rx_buffer(struct igb_ring *rx_ring,
static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
{ {
struct igb_adapter *adapter = q_vector->adapter;
struct igb_ring *rx_ring = q_vector->rx.ring; struct igb_ring *rx_ring = q_vector->rx.ring;
struct sk_buff *skb = rx_ring->skb; struct sk_buff *skb = rx_ring->skb;
unsigned int total_bytes = 0, total_packets = 0; unsigned int total_bytes = 0, total_packets = 0;
u16 cleaned_count = igb_desc_unused(rx_ring); u16 cleaned_count = igb_desc_unused(rx_ring);
unsigned int xdp_xmit = 0;
struct xdp_buff xdp;
xdp.rxq = &rx_ring->xdp_rxq;
/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
#if (PAGE_SIZE < 8192)
xdp.frame_sz = igb_rx_frame_truesize(rx_ring, 0);
#endif
while (likely(total_packets < budget)) { while (likely(total_packets < budget)) {
union e1000_adv_rx_desc *rx_desc; union e1000_adv_rx_desc *rx_desc;
@ -8355,13 +8698,38 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
rx_buffer = igb_get_rx_buffer(rx_ring, size); rx_buffer = igb_get_rx_buffer(rx_ring, size);
/* retrieve a buffer from the ring */ /* retrieve a buffer from the ring */
if (skb) if (!skb) {
xdp.data = page_address(rx_buffer->page) +
rx_buffer->page_offset;
xdp.data_meta = xdp.data;
xdp.data_hard_start = xdp.data -
igb_rx_offset(rx_ring);
xdp.data_end = xdp.data + size;
#if (PAGE_SIZE > 4096)
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = igb_rx_frame_truesize(rx_ring, size);
#endif
skb = igb_run_xdp(adapter, rx_ring, &xdp);
}
if (IS_ERR(skb)) {
unsigned int xdp_res = -PTR_ERR(skb);
if (xdp_res & (IGB_XDP_TX | IGB_XDP_REDIR)) {
xdp_xmit |= xdp_res;
igb_rx_buffer_flip(rx_ring, rx_buffer, size);
} else {
rx_buffer->pagecnt_bias++;
}
total_packets++;
total_bytes += size;
} else if (skb)
igb_add_rx_frag(rx_ring, rx_buffer, skb, size); igb_add_rx_frag(rx_ring, rx_buffer, skb, size);
else if (ring_uses_build_skb(rx_ring)) else if (ring_uses_build_skb(rx_ring))
skb = igb_build_skb(rx_ring, rx_buffer, rx_desc, size); skb = igb_build_skb(rx_ring, rx_buffer, &xdp, rx_desc);
else else
skb = igb_construct_skb(rx_ring, rx_buffer, skb = igb_construct_skb(rx_ring, rx_buffer,
rx_desc, size); &xdp, rx_desc);
/* exit if we failed to retrieve a buffer */ /* exit if we failed to retrieve a buffer */
if (!skb) { if (!skb) {
@ -8401,6 +8769,15 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
/* place incomplete frames back on ring for completion */ /* place incomplete frames back on ring for completion */
rx_ring->skb = skb; rx_ring->skb = skb;
if (xdp_xmit & IGB_XDP_REDIR)
xdp_do_flush_map();
if (xdp_xmit & IGB_XDP_TX) {
struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter);
igb_xdp_ring_update_tail(tx_ring);
}
u64_stats_update_begin(&rx_ring->rx_syncp); u64_stats_update_begin(&rx_ring->rx_syncp);
rx_ring->rx_stats.packets += total_packets; rx_ring->rx_stats.packets += total_packets;
rx_ring->rx_stats.bytes += total_bytes; rx_ring->rx_stats.bytes += total_bytes;
@ -8414,11 +8791,6 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
return total_packets; return total_packets;
} }
static inline unsigned int igb_rx_offset(struct igb_ring *rx_ring)
{
return ring_uses_build_skb(rx_ring) ? IGB_SKB_PAD : 0;
}
static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
struct igb_rx_buffer *bi) struct igb_rx_buffer *bi)
{ {
@ -8455,7 +8827,8 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
bi->dma = dma; bi->dma = dma;
bi->page = page; bi->page = page;
bi->page_offset = igb_rx_offset(rx_ring); bi->page_offset = igb_rx_offset(rx_ring);
bi->pagecnt_bias = 1; page_ref_add(page, USHRT_MAX - 1);
bi->pagecnt_bias = USHRT_MAX;
return true; return true;
} }