Merge branch 'XDP-redirect-memory-return-API'
Jesper Dangaard Brouer says: ==================== XDP redirect memory return API Submitted against net-next, as it contains NIC driver changes. This patchset works towards supporting different XDP RX-ring memory allocators. As this will be needed by the AF_XDP zero-copy mode. The patchset uses mlx5 as the sample driver, which gets implemented XDP_REDIRECT RX-mode, but not ndo_xdp_xmit (as this API is subject to change thought the patchset). A new struct xdp_frame is introduced (modeled after cpumap xdp_pkt). And both ndo_xdp_xmit and the new xdp_return_frame end-up using this. Support for a driver supplied allocator is implemented, and a refurbished version of page_pool is the first return allocator type introduced. This will be a integration point for AF_XDP zero-copy. The mlx5 driver evolve into using the page_pool, and see a performance increase (with ndo_xdp_xmit out ixgbe driver) from 6Mpps to 12Mpps. The patchset stop at 16 patches (one over limit), but more API changes are planned. Specifically extending ndo_xdp_xmit and xdp_return_frame APIs to support bulking. As this will address some known limits. V2: Updated according to Tariq's feedback V3: Updated based on feedback from Jason Wang and Alex Duyck V4: Updated based on feedback from Tariq and Jason V5: Fix SPDX license, add Tariq's reviews, improve patch desc for perf test V6: Updated based on feedback from Eric Dumazet and Alex Duyck V7: Adapt to i40e that got XDP_REDIRECT support in-between V8: Updated based on feedback kbuild test robot, and adjust for mlx5 changes page_pool only compiled into kernel when drivers Kconfig 'select' feature V9: Remove some inline statements, let compiler decide what to inline Fix return value in virtio_net driver Adjust for mlx5 changes in-between submissions V10: Minor adjust for mlx5 requested by Tariq Resubmit against net-next V11: avoid leaking info stored in frame data on page reuse ==================== Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
684009d4fd
|
@ -638,7 +638,7 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
|
|||
if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
|
||||
kfree(tx_buffer->raw_buf);
|
||||
else if (ring_is_xdp(ring))
|
||||
page_frag_free(tx_buffer->raw_buf);
|
||||
xdp_return_frame(tx_buffer->xdpf);
|
||||
else
|
||||
dev_kfree_skb_any(tx_buffer->skb);
|
||||
if (dma_unmap_len(tx_buffer, len))
|
||||
|
@ -841,7 +841,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
|
|||
|
||||
/* free the skb/XDP data */
|
||||
if (ring_is_xdp(tx_ring))
|
||||
page_frag_free(tx_buf->raw_buf);
|
||||
xdp_return_frame(tx_buf->xdpf);
|
||||
else
|
||||
napi_consume_skb(tx_buf->skb, napi_budget);
|
||||
|
||||
|
@ -2203,9 +2203,20 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
|
|||
#define I40E_XDP_CONSUMED 1
|
||||
#define I40E_XDP_TX 2
|
||||
|
||||
static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
|
||||
static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
|
||||
struct i40e_ring *xdp_ring);
|
||||
|
||||
static int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp,
|
||||
struct i40e_ring *xdp_ring)
|
||||
{
|
||||
struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
|
||||
|
||||
if (unlikely(!xdpf))
|
||||
return I40E_XDP_CONSUMED;
|
||||
|
||||
return i40e_xmit_xdp_ring(xdpf, xdp_ring);
|
||||
}
|
||||
|
||||
/**
|
||||
* i40e_run_xdp - run an XDP program
|
||||
* @rx_ring: Rx ring being processed
|
||||
|
@ -2225,13 +2236,15 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
|
|||
if (!xdp_prog)
|
||||
goto xdp_out;
|
||||
|
||||
prefetchw(xdp->data_hard_start); /* xdp_frame write */
|
||||
|
||||
act = bpf_prog_run_xdp(xdp_prog, xdp);
|
||||
switch (act) {
|
||||
case XDP_PASS:
|
||||
break;
|
||||
case XDP_TX:
|
||||
xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
|
||||
result = i40e_xmit_xdp_ring(xdp, xdp_ring);
|
||||
result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
|
||||
break;
|
||||
case XDP_REDIRECT:
|
||||
err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
|
||||
|
@ -3478,13 +3491,13 @@ dma_error:
|
|||
* @xdp: data to transmit
|
||||
* @xdp_ring: XDP Tx ring
|
||||
**/
|
||||
static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
|
||||
static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
|
||||
struct i40e_ring *xdp_ring)
|
||||
{
|
||||
u32 size = xdp->data_end - xdp->data;
|
||||
u16 i = xdp_ring->next_to_use;
|
||||
struct i40e_tx_buffer *tx_bi;
|
||||
struct i40e_tx_desc *tx_desc;
|
||||
u32 size = xdpf->len;
|
||||
dma_addr_t dma;
|
||||
|
||||
if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) {
|
||||
|
@ -3492,14 +3505,14 @@ static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
|
|||
return I40E_XDP_CONSUMED;
|
||||
}
|
||||
|
||||
dma = dma_map_single(xdp_ring->dev, xdp->data, size, DMA_TO_DEVICE);
|
||||
dma = dma_map_single(xdp_ring->dev, xdpf->data, size, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(xdp_ring->dev, dma))
|
||||
return I40E_XDP_CONSUMED;
|
||||
|
||||
tx_bi = &xdp_ring->tx_bi[i];
|
||||
tx_bi->bytecount = size;
|
||||
tx_bi->gso_segs = 1;
|
||||
tx_bi->raw_buf = xdp->data;
|
||||
tx_bi->xdpf = xdpf;
|
||||
|
||||
/* record length, and DMA address */
|
||||
dma_unmap_len_set(tx_bi, len, size);
|
||||
|
@ -3675,7 +3688,7 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
|
|||
*
|
||||
* Returns Zero if sent, else an error code
|
||||
**/
|
||||
int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
|
||||
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
|
||||
{
|
||||
struct i40e_netdev_priv *np = netdev_priv(dev);
|
||||
unsigned int queue_index = smp_processor_id();
|
||||
|
@ -3688,7 +3701,7 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
|
|||
if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
|
||||
return -ENXIO;
|
||||
|
||||
err = i40e_xmit_xdp_ring(xdp, vsi->xdp_rings[queue_index]);
|
||||
err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
|
||||
if (err != I40E_XDP_TX)
|
||||
return -ENOSPC;
|
||||
|
||||
|
|
|
@ -306,6 +306,7 @@ static inline unsigned int i40e_txd_use_count(unsigned int size)
|
|||
struct i40e_tx_buffer {
|
||||
struct i40e_tx_desc *next_to_watch;
|
||||
union {
|
||||
struct xdp_frame *xdpf;
|
||||
struct sk_buff *skb;
|
||||
void *raw_buf;
|
||||
};
|
||||
|
@ -510,7 +511,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
|
|||
void i40e_detect_recover_hung(struct i40e_vsi *vsi);
|
||||
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
|
||||
bool __i40e_chk_linearize(struct sk_buff *skb);
|
||||
int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp);
|
||||
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf);
|
||||
void i40e_xdp_flush(struct net_device *dev);
|
||||
|
||||
/**
|
||||
|
|
|
@ -241,8 +241,7 @@ struct ixgbe_tx_buffer {
|
|||
unsigned long time_stamp;
|
||||
union {
|
||||
struct sk_buff *skb;
|
||||
/* XDP uses address ptr on irq_clean */
|
||||
void *data;
|
||||
struct xdp_frame *xdpf;
|
||||
};
|
||||
unsigned int bytecount;
|
||||
unsigned short gso_segs;
|
||||
|
|
|
@ -1216,7 +1216,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
|
|||
|
||||
/* free the skb */
|
||||
if (ring_is_xdp(tx_ring))
|
||||
page_frag_free(tx_buffer->data);
|
||||
xdp_return_frame(tx_buffer->xdpf);
|
||||
else
|
||||
napi_consume_skb(tx_buffer->skb, napi_budget);
|
||||
|
||||
|
@ -2262,7 +2262,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
|
|||
#define IXGBE_XDP_TX 2
|
||||
|
||||
static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
|
||||
struct xdp_buff *xdp);
|
||||
struct xdp_frame *xdpf);
|
||||
|
||||
static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
|
||||
struct ixgbe_ring *rx_ring,
|
||||
|
@ -2270,6 +2270,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
|
|||
{
|
||||
int err, result = IXGBE_XDP_PASS;
|
||||
struct bpf_prog *xdp_prog;
|
||||
struct xdp_frame *xdpf;
|
||||
u32 act;
|
||||
|
||||
rcu_read_lock();
|
||||
|
@ -2278,12 +2279,19 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
|
|||
if (!xdp_prog)
|
||||
goto xdp_out;
|
||||
|
||||
prefetchw(xdp->data_hard_start); /* xdp_frame write */
|
||||
|
||||
act = bpf_prog_run_xdp(xdp_prog, xdp);
|
||||
switch (act) {
|
||||
case XDP_PASS:
|
||||
break;
|
||||
case XDP_TX:
|
||||
result = ixgbe_xmit_xdp_ring(adapter, xdp);
|
||||
xdpf = convert_to_xdp_frame(xdp);
|
||||
if (unlikely(!xdpf)) {
|
||||
result = IXGBE_XDP_CONSUMED;
|
||||
break;
|
||||
}
|
||||
result = ixgbe_xmit_xdp_ring(adapter, xdpf);
|
||||
break;
|
||||
case XDP_REDIRECT:
|
||||
err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
|
||||
|
@ -5797,7 +5805,7 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring)
|
|||
|
||||
/* Free all the Tx ring sk_buffs */
|
||||
if (ring_is_xdp(tx_ring))
|
||||
page_frag_free(tx_buffer->data);
|
||||
xdp_return_frame(tx_buffer->xdpf);
|
||||
else
|
||||
dev_kfree_skb_any(tx_buffer->skb);
|
||||
|
||||
|
@ -6370,7 +6378,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
|
|||
struct device *dev = rx_ring->dev;
|
||||
int orig_node = dev_to_node(dev);
|
||||
int ring_node = -1;
|
||||
int size;
|
||||
int size, err;
|
||||
|
||||
size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
|
||||
|
||||
|
@ -6407,6 +6415,13 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
|
|||
rx_ring->queue_index) < 0)
|
||||
goto err;
|
||||
|
||||
err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq,
|
||||
MEM_TYPE_PAGE_SHARED, NULL);
|
||||
if (err) {
|
||||
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
|
||||
goto err;
|
||||
}
|
||||
|
||||
rx_ring->xdp_prog = adapter->xdp_prog;
|
||||
|
||||
return 0;
|
||||
|
@ -8336,7 +8351,7 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
|
|||
}
|
||||
|
||||
static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
|
||||
struct xdp_buff *xdp)
|
||||
struct xdp_frame *xdpf)
|
||||
{
|
||||
struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
|
||||
struct ixgbe_tx_buffer *tx_buffer;
|
||||
|
@ -8345,12 +8360,12 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
|
|||
dma_addr_t dma;
|
||||
u16 i;
|
||||
|
||||
len = xdp->data_end - xdp->data;
|
||||
len = xdpf->len;
|
||||
|
||||
if (unlikely(!ixgbe_desc_unused(ring)))
|
||||
return IXGBE_XDP_CONSUMED;
|
||||
|
||||
dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE);
|
||||
dma = dma_map_single(ring->dev, xdpf->data, len, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(ring->dev, dma))
|
||||
return IXGBE_XDP_CONSUMED;
|
||||
|
||||
|
@ -8365,7 +8380,8 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
|
|||
|
||||
dma_unmap_len_set(tx_buffer, len, len);
|
||||
dma_unmap_addr_set(tx_buffer, dma, dma);
|
||||
tx_buffer->data = xdp->data;
|
||||
tx_buffer->xdpf = xdpf;
|
||||
|
||||
tx_desc->read.buffer_addr = cpu_to_le64(dma);
|
||||
|
||||
/* put descriptor type bits */
|
||||
|
@ -9996,7 +10012,7 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
|
|||
}
|
||||
}
|
||||
|
||||
static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
|
||||
static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
|
||||
{
|
||||
struct ixgbe_adapter *adapter = netdev_priv(dev);
|
||||
struct ixgbe_ring *ring;
|
||||
|
@ -10012,7 +10028,7 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
|
|||
if (unlikely(!ring))
|
||||
return -ENXIO;
|
||||
|
||||
err = ixgbe_xmit_xdp_ring(adapter, xdp);
|
||||
err = ixgbe_xmit_xdp_ring(adapter, xdpf);
|
||||
if (err != IXGBE_XDP_TX)
|
||||
return -ENOSPC;
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ config MLX5_CORE_EN
|
|||
bool "Mellanox Technologies ConnectX-4 Ethernet support"
|
||||
depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
|
||||
depends on IPV6=y || IPV6=n || MLX5_CORE=m
|
||||
select PAGE_POOL
|
||||
default n
|
||||
---help---
|
||||
Ethernet support in Mellanox Technologies ConnectX-4 NIC.
|
||||
|
|
|
@ -53,6 +53,8 @@
|
|||
#include "mlx5_core.h"
|
||||
#include "en_stats.h"
|
||||
|
||||
struct page_pool;
|
||||
|
||||
#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
|
||||
|
||||
#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
|
||||
|
@ -392,6 +394,7 @@ struct mlx5e_xdpsq {
|
|||
struct {
|
||||
struct mlx5e_dma_info *di;
|
||||
bool doorbell;
|
||||
bool redirect_flush;
|
||||
} db;
|
||||
|
||||
/* read only */
|
||||
|
@ -533,6 +536,7 @@ struct mlx5e_rq {
|
|||
unsigned int hw_mtu;
|
||||
struct mlx5e_xdpsq xdpsq;
|
||||
DECLARE_BITMAP(flags, 8);
|
||||
struct page_pool *page_pool;
|
||||
|
||||
/* control */
|
||||
struct mlx5_wq_ctrl wq_ctrl;
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include <linux/mlx5/fs.h>
|
||||
#include <net/vxlan.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <net/page_pool.h>
|
||||
#include "eswitch.h"
|
||||
#include "en.h"
|
||||
#include "en_tc.h"
|
||||
|
@ -389,10 +390,11 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
|
|||
struct mlx5e_rq_param *rqp,
|
||||
struct mlx5e_rq *rq)
|
||||
{
|
||||
struct page_pool_params pp_params = { 0 };
|
||||
struct mlx5_core_dev *mdev = c->mdev;
|
||||
void *rqc = rqp->rqc;
|
||||
void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
|
||||
u32 byte_count;
|
||||
u32 byte_count, pool_size;
|
||||
int npages;
|
||||
int wq_sz;
|
||||
int err;
|
||||
|
@ -432,9 +434,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
|
|||
|
||||
rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
|
||||
rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params);
|
||||
pool_size = 1 << params->log_rq_mtu_frames;
|
||||
|
||||
switch (rq->wq_type) {
|
||||
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
|
||||
|
||||
pool_size = MLX5_MPWRQ_PAGES_PER_WQE << mlx5e_mpwqe_get_log_rq_size(params);
|
||||
rq->post_wqes = mlx5e_post_rx_mpwqes;
|
||||
rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
|
||||
|
||||
|
@ -512,6 +517,32 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
|
|||
rq->mkey_be = c->mkey_be;
|
||||
}
|
||||
|
||||
/* Create a page_pool and register it with rxq */
|
||||
pp_params.order = rq->buff.page_order;
|
||||
pp_params.flags = 0; /* No-internal DMA mapping in page_pool */
|
||||
pp_params.pool_size = pool_size;
|
||||
pp_params.nid = cpu_to_node(c->cpu);
|
||||
pp_params.dev = c->pdev;
|
||||
pp_params.dma_dir = rq->buff.map_dir;
|
||||
|
||||
/* page_pool can be used even when there is no rq->xdp_prog,
|
||||
* given page_pool does not handle DMA mapping there is no
|
||||
* required state to clear. And page_pool gracefully handle
|
||||
* elevated refcnt.
|
||||
*/
|
||||
rq->page_pool = page_pool_create(&pp_params);
|
||||
if (IS_ERR(rq->page_pool)) {
|
||||
if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
|
||||
kfree(rq->wqe.frag_info);
|
||||
err = PTR_ERR(rq->page_pool);
|
||||
rq->page_pool = NULL;
|
||||
goto err_rq_wq_destroy;
|
||||
}
|
||||
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
|
||||
MEM_TYPE_PAGE_POOL, rq->page_pool);
|
||||
if (err)
|
||||
goto err_rq_wq_destroy;
|
||||
|
||||
for (i = 0; i < wq_sz; i++) {
|
||||
struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
|
||||
|
||||
|
@ -548,6 +579,8 @@ err_rq_wq_destroy:
|
|||
if (rq->xdp_prog)
|
||||
bpf_prog_put(rq->xdp_prog);
|
||||
xdp_rxq_info_unreg(&rq->xdp_rxq);
|
||||
if (rq->page_pool)
|
||||
page_pool_destroy(rq->page_pool);
|
||||
mlx5_wq_destroy(&rq->wq_ctrl);
|
||||
|
||||
return err;
|
||||
|
@ -561,6 +594,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
|
|||
bpf_prog_put(rq->xdp_prog);
|
||||
|
||||
xdp_rxq_info_unreg(&rq->xdp_rxq);
|
||||
if (rq->page_pool)
|
||||
page_pool_destroy(rq->page_pool);
|
||||
|
||||
switch (rq->wq_type) {
|
||||
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#include <linux/bpf_trace.h>
|
||||
#include <net/busy_poll.h>
|
||||
#include <net/ip6_checksum.h>
|
||||
#include <net/page_pool.h>
|
||||
#include "en.h"
|
||||
#include "en_tc.h"
|
||||
#include "eswitch.h"
|
||||
|
@ -221,7 +222,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
|
|||
if (mlx5e_rx_cache_get(rq, dma_info))
|
||||
return 0;
|
||||
|
||||
dma_info->page = dev_alloc_pages(rq->buff.page_order);
|
||||
dma_info->page = page_pool_dev_alloc_pages(rq->page_pool);
|
||||
if (unlikely(!dma_info->page))
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -236,15 +237,26 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void mlx5e_page_dma_unmap(struct mlx5e_rq *rq,
|
||||
struct mlx5e_dma_info *dma_info)
|
||||
{
|
||||
dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq),
|
||||
rq->buff.map_dir);
|
||||
}
|
||||
|
||||
void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
|
||||
bool recycle)
|
||||
{
|
||||
if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info))
|
||||
return;
|
||||
if (likely(recycle)) {
|
||||
if (mlx5e_rx_cache_put(rq, dma_info))
|
||||
return;
|
||||
|
||||
dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq),
|
||||
rq->buff.map_dir);
|
||||
put_page(dma_info->page);
|
||||
mlx5e_page_dma_unmap(rq, dma_info);
|
||||
page_pool_recycle_direct(rq->page_pool, dma_info->page);
|
||||
} else {
|
||||
mlx5e_page_dma_unmap(rq, dma_info);
|
||||
put_page(dma_info->page);
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool mlx5e_page_reuse(struct mlx5e_rq *rq,
|
||||
|
@ -800,9 +812,10 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq,
|
|||
struct mlx5e_dma_info *di,
|
||||
void *va, u16 *rx_headroom, u32 *len)
|
||||
{
|
||||
const struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
|
||||
struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
|
||||
struct xdp_buff xdp;
|
||||
u32 act;
|
||||
int err;
|
||||
|
||||
if (!prog)
|
||||
return false;
|
||||
|
@ -823,6 +836,15 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq,
|
|||
if (unlikely(!mlx5e_xmit_xdp_frame(rq, di, &xdp)))
|
||||
trace_xdp_exception(rq->netdev, prog, act);
|
||||
return true;
|
||||
case XDP_REDIRECT:
|
||||
/* When XDP enabled then page-refcnt==1 here */
|
||||
err = xdp_do_redirect(rq->netdev, &xdp, prog);
|
||||
if (!err) {
|
||||
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
|
||||
rq->xdpsq.db.redirect_flush = true;
|
||||
mlx5e_page_dma_unmap(rq, di);
|
||||
}
|
||||
return true;
|
||||
default:
|
||||
bpf_warn_invalid_xdp_action(act);
|
||||
case XDP_ABORTED:
|
||||
|
@ -868,6 +890,7 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
|
|||
|
||||
dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
|
||||
frag_size, DMA_FROM_DEVICE);
|
||||
prefetchw(va); /* xdp_frame data area */
|
||||
prefetch(data);
|
||||
wi->offset += frag_size;
|
||||
|
||||
|
@ -1140,6 +1163,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
|
|||
xdpsq->db.doorbell = false;
|
||||
}
|
||||
|
||||
if (xdpsq->db.redirect_flush) {
|
||||
xdp_do_flush_map();
|
||||
xdpsq->db.redirect_flush = false;
|
||||
}
|
||||
|
||||
mlx5_cqwq_update_db_record(&cq->wq);
|
||||
|
||||
/* ensure cq space is freed before enabling more cqes */
|
||||
|
|
|
@ -248,11 +248,11 @@ struct veth {
|
|||
__be16 h_vlan_TCI;
|
||||
};
|
||||
|
||||
bool tun_is_xdp_buff(void *ptr)
|
||||
bool tun_is_xdp_frame(void *ptr)
|
||||
{
|
||||
return (unsigned long)ptr & TUN_XDP_FLAG;
|
||||
}
|
||||
EXPORT_SYMBOL(tun_is_xdp_buff);
|
||||
EXPORT_SYMBOL(tun_is_xdp_frame);
|
||||
|
||||
void *tun_xdp_to_ptr(void *ptr)
|
||||
{
|
||||
|
@ -660,10 +660,10 @@ void tun_ptr_free(void *ptr)
|
|||
{
|
||||
if (!ptr)
|
||||
return;
|
||||
if (tun_is_xdp_buff(ptr)) {
|
||||
struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
|
||||
if (tun_is_xdp_frame(ptr)) {
|
||||
struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
|
||||
|
||||
put_page(virt_to_head_page(xdp->data));
|
||||
xdp_return_frame(xdpf);
|
||||
} else {
|
||||
__skb_array_destroy_skb(ptr);
|
||||
}
|
||||
|
@ -854,6 +854,12 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
|
|||
tun->dev, tfile->queue_index);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
err = xdp_rxq_info_reg_mem_model(&tfile->xdp_rxq,
|
||||
MEM_TYPE_PAGE_SHARED, NULL);
|
||||
if (err < 0) {
|
||||
xdp_rxq_info_unreg(&tfile->xdp_rxq);
|
||||
goto out;
|
||||
}
|
||||
err = 0;
|
||||
}
|
||||
|
||||
|
@ -1295,21 +1301,13 @@ static const struct net_device_ops tun_netdev_ops = {
|
|||
.ndo_get_stats64 = tun_net_get_stats64,
|
||||
};
|
||||
|
||||
static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
|
||||
static int tun_xdp_xmit(struct net_device *dev, struct xdp_frame *frame)
|
||||
{
|
||||
struct tun_struct *tun = netdev_priv(dev);
|
||||
struct xdp_buff *buff = xdp->data_hard_start;
|
||||
int headroom = xdp->data - xdp->data_hard_start;
|
||||
struct tun_file *tfile;
|
||||
u32 numqueues;
|
||||
int ret = 0;
|
||||
|
||||
/* Assure headroom is available and buff is properly aligned */
|
||||
if (unlikely(headroom < sizeof(*xdp) || tun_is_xdp_buff(xdp)))
|
||||
return -ENOSPC;
|
||||
|
||||
*buff = *xdp;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
numqueues = READ_ONCE(tun->numqueues);
|
||||
|
@ -1323,7 +1321,7 @@ static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
|
|||
/* Encode the XDP flag into lowest bit for consumer to differ
|
||||
* XDP buffer from sk_buff.
|
||||
*/
|
||||
if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(buff))) {
|
||||
if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(frame))) {
|
||||
this_cpu_inc(tun->pcpu_stats->tx_dropped);
|
||||
ret = -ENOSPC;
|
||||
}
|
||||
|
@ -1333,6 +1331,16 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
|
||||
{
|
||||
struct xdp_frame *frame = convert_to_xdp_frame(xdp);
|
||||
|
||||
if (unlikely(!frame))
|
||||
return -EOVERFLOW;
|
||||
|
||||
return tun_xdp_xmit(dev, frame);
|
||||
}
|
||||
|
||||
static void tun_xdp_flush(struct net_device *dev)
|
||||
{
|
||||
struct tun_struct *tun = netdev_priv(dev);
|
||||
|
@ -1680,7 +1688,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
|
|||
case XDP_TX:
|
||||
get_page(alloc_frag->page);
|
||||
alloc_frag->offset += buflen;
|
||||
if (tun_xdp_xmit(tun->dev, &xdp))
|
||||
if (tun_xdp_tx(tun->dev, &xdp))
|
||||
goto err_redirect;
|
||||
tun_xdp_flush(tun->dev);
|
||||
rcu_read_unlock();
|
||||
|
@ -2001,11 +2009,11 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
|||
|
||||
static ssize_t tun_put_user_xdp(struct tun_struct *tun,
|
||||
struct tun_file *tfile,
|
||||
struct xdp_buff *xdp,
|
||||
struct xdp_frame *xdp_frame,
|
||||
struct iov_iter *iter)
|
||||
{
|
||||
int vnet_hdr_sz = 0;
|
||||
size_t size = xdp->data_end - xdp->data;
|
||||
size_t size = xdp_frame->len;
|
||||
struct tun_pcpu_stats *stats;
|
||||
size_t ret;
|
||||
|
||||
|
@ -2021,7 +2029,7 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun,
|
|||
iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
|
||||
}
|
||||
|
||||
ret = copy_to_iter(xdp->data, size, iter) + vnet_hdr_sz;
|
||||
ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz;
|
||||
|
||||
stats = get_cpu_ptr(tun->pcpu_stats);
|
||||
u64_stats_update_begin(&stats->syncp);
|
||||
|
@ -2189,11 +2197,11 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
|
|||
return err;
|
||||
}
|
||||
|
||||
if (tun_is_xdp_buff(ptr)) {
|
||||
struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
|
||||
if (tun_is_xdp_frame(ptr)) {
|
||||
struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
|
||||
|
||||
ret = tun_put_user_xdp(tun, tfile, xdp, to);
|
||||
put_page(virt_to_head_page(xdp->data));
|
||||
ret = tun_put_user_xdp(tun, tfile, xdpf, to);
|
||||
xdp_return_frame(xdpf);
|
||||
} else {
|
||||
struct sk_buff *skb = ptr;
|
||||
|
||||
|
@ -2432,10 +2440,10 @@ out_free:
|
|||
static int tun_ptr_peek_len(void *ptr)
|
||||
{
|
||||
if (likely(ptr)) {
|
||||
if (tun_is_xdp_buff(ptr)) {
|
||||
struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
|
||||
if (tun_is_xdp_frame(ptr)) {
|
||||
struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
|
||||
|
||||
return xdp->data_end - xdp->data;
|
||||
return xdpf->len;
|
||||
}
|
||||
return __skb_array_len_with_tag(ptr);
|
||||
} else {
|
||||
|
|
|
@ -415,46 +415,51 @@ static void virtnet_xdp_flush(struct net_device *dev)
|
|||
virtqueue_kick(sq->vq);
|
||||
}
|
||||
|
||||
static bool __virtnet_xdp_xmit(struct virtnet_info *vi,
|
||||
struct xdp_buff *xdp)
|
||||
static int __virtnet_xdp_xmit(struct virtnet_info *vi,
|
||||
struct xdp_frame *xdpf)
|
||||
{
|
||||
struct virtio_net_hdr_mrg_rxbuf *hdr;
|
||||
unsigned int len;
|
||||
struct xdp_frame *xdpf_sent;
|
||||
struct send_queue *sq;
|
||||
unsigned int len;
|
||||
unsigned int qp;
|
||||
void *xdp_sent;
|
||||
int err;
|
||||
|
||||
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
|
||||
sq = &vi->sq[qp];
|
||||
|
||||
/* Free up any pending old buffers before queueing new ones. */
|
||||
while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) {
|
||||
struct page *sent_page = virt_to_head_page(xdp_sent);
|
||||
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
|
||||
xdp_return_frame(xdpf_sent);
|
||||
|
||||
put_page(sent_page);
|
||||
}
|
||||
/* virtqueue want to use data area in-front of packet */
|
||||
if (unlikely(xdpf->metasize > 0))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
xdp->data -= vi->hdr_len;
|
||||
if (unlikely(xdpf->headroom < vi->hdr_len))
|
||||
return -EOVERFLOW;
|
||||
|
||||
/* Make room for virtqueue hdr (also change xdpf->headroom?) */
|
||||
xdpf->data -= vi->hdr_len;
|
||||
/* Zero header and leave csum up to XDP layers */
|
||||
hdr = xdp->data;
|
||||
hdr = xdpf->data;
|
||||
memset(hdr, 0, vi->hdr_len);
|
||||
xdpf->len += vi->hdr_len;
|
||||
|
||||
sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
|
||||
sg_init_one(sq->sg, xdpf->data, xdpf->len);
|
||||
|
||||
err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp->data, GFP_ATOMIC);
|
||||
err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC);
|
||||
if (unlikely(err))
|
||||
return false; /* Caller handle free/refcnt */
|
||||
return -ENOSPC; /* Caller handle free/refcnt */
|
||||
|
||||
return true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
|
||||
static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
|
||||
{
|
||||
struct virtnet_info *vi = netdev_priv(dev);
|
||||
struct receive_queue *rq = vi->rq;
|
||||
struct bpf_prog *xdp_prog;
|
||||
bool sent;
|
||||
|
||||
/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
|
||||
* indicate XDP resources have been successfully allocated.
|
||||
|
@ -463,10 +468,7 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
|
|||
if (!xdp_prog)
|
||||
return -ENXIO;
|
||||
|
||||
sent = __virtnet_xdp_xmit(vi, xdp);
|
||||
if (!sent)
|
||||
return -ENOSPC;
|
||||
return 0;
|
||||
return __virtnet_xdp_xmit(vi, xdpf);
|
||||
}
|
||||
|
||||
static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
|
||||
|
@ -555,7 +557,6 @@ static struct sk_buff *receive_small(struct net_device *dev,
|
|||
struct page *page = virt_to_head_page(buf);
|
||||
unsigned int delta = 0;
|
||||
struct page *xdp_page;
|
||||
bool sent;
|
||||
int err;
|
||||
|
||||
len -= vi->hdr_len;
|
||||
|
@ -564,6 +565,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
|
|||
xdp_prog = rcu_dereference(rq->xdp_prog);
|
||||
if (xdp_prog) {
|
||||
struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
|
||||
struct xdp_frame *xdpf;
|
||||
struct xdp_buff xdp;
|
||||
void *orig_data;
|
||||
u32 act;
|
||||
|
@ -606,8 +608,11 @@ static struct sk_buff *receive_small(struct net_device *dev,
|
|||
delta = orig_data - xdp.data;
|
||||
break;
|
||||
case XDP_TX:
|
||||
sent = __virtnet_xdp_xmit(vi, &xdp);
|
||||
if (unlikely(!sent)) {
|
||||
xdpf = convert_to_xdp_frame(&xdp);
|
||||
if (unlikely(!xdpf))
|
||||
goto err_xdp;
|
||||
err = __virtnet_xdp_xmit(vi, xdpf);
|
||||
if (unlikely(err)) {
|
||||
trace_xdp_exception(vi->dev, xdp_prog, act);
|
||||
goto err_xdp;
|
||||
}
|
||||
|
@ -690,7 +695,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
|
|||
struct bpf_prog *xdp_prog;
|
||||
unsigned int truesize;
|
||||
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
|
||||
bool sent;
|
||||
int err;
|
||||
|
||||
head_skb = NULL;
|
||||
|
@ -698,6 +702,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
|
|||
rcu_read_lock();
|
||||
xdp_prog = rcu_dereference(rq->xdp_prog);
|
||||
if (xdp_prog) {
|
||||
struct xdp_frame *xdpf;
|
||||
struct page *xdp_page;
|
||||
struct xdp_buff xdp;
|
||||
void *data;
|
||||
|
@ -762,8 +767,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
|
|||
}
|
||||
break;
|
||||
case XDP_TX:
|
||||
sent = __virtnet_xdp_xmit(vi, &xdp);
|
||||
if (unlikely(!sent)) {
|
||||
xdpf = convert_to_xdp_frame(&xdp);
|
||||
if (unlikely(!xdpf))
|
||||
goto err_xdp;
|
||||
err = __virtnet_xdp_xmit(vi, xdpf);
|
||||
if (unlikely(err)) {
|
||||
trace_xdp_exception(vi->dev, xdp_prog, act);
|
||||
if (unlikely(xdp_page != page))
|
||||
put_page(xdp_page);
|
||||
|
@ -1301,6 +1309,13 @@ static int virtnet_open(struct net_device *dev)
|
|||
if (err < 0)
|
||||
return err;
|
||||
|
||||
err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq,
|
||||
MEM_TYPE_PAGE_SHARED, NULL);
|
||||
if (err < 0) {
|
||||
xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
|
||||
return err;
|
||||
}
|
||||
|
||||
virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
|
||||
virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#include <linux/skbuff.h>
|
||||
|
||||
#include <net/sock.h>
|
||||
#include <net/xdp.h>
|
||||
|
||||
#include "vhost.h"
|
||||
|
||||
|
@ -181,10 +182,10 @@ static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq)
|
|||
|
||||
static int vhost_net_buf_peek_len(void *ptr)
|
||||
{
|
||||
if (tun_is_xdp_buff(ptr)) {
|
||||
struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
|
||||
if (tun_is_xdp_frame(ptr)) {
|
||||
struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
|
||||
|
||||
return xdp->data_end - xdp->data;
|
||||
return xdpf->len;
|
||||
}
|
||||
|
||||
return __skb_array_len_with_tag(ptr);
|
||||
|
|
|
@ -30,6 +30,7 @@ struct sock;
|
|||
struct seccomp_data;
|
||||
struct bpf_prog_aux;
|
||||
struct xdp_rxq_info;
|
||||
struct xdp_buff;
|
||||
|
||||
/* ArgX, context and stack frame pointer register positions. Note,
|
||||
* Arg1, Arg2, Arg3, etc are used as argument mappings of function
|
||||
|
@ -500,14 +501,6 @@ struct bpf_skb_data_end {
|
|||
void *data_end;
|
||||
};
|
||||
|
||||
struct xdp_buff {
|
||||
void *data;
|
||||
void *data_end;
|
||||
void *data_meta;
|
||||
void *data_hard_start;
|
||||
struct xdp_rxq_info *rxq;
|
||||
};
|
||||
|
||||
struct sk_msg_buff {
|
||||
void *data;
|
||||
void *data_end;
|
||||
|
@ -772,21 +765,6 @@ int xdp_do_redirect(struct net_device *dev,
|
|||
struct bpf_prog *prog);
|
||||
void xdp_do_flush_map(void);
|
||||
|
||||
/* Drivers not supporting XDP metadata can use this helper, which
|
||||
* rejects any room expansion for metadata as a result.
|
||||
*/
|
||||
static __always_inline void
|
||||
xdp_set_data_meta_invalid(struct xdp_buff *xdp)
|
||||
{
|
||||
xdp->data_meta = xdp->data + 1;
|
||||
}
|
||||
|
||||
static __always_inline bool
|
||||
xdp_data_meta_unsupported(const struct xdp_buff *xdp)
|
||||
{
|
||||
return unlikely(xdp->data_meta > xdp->data);
|
||||
}
|
||||
|
||||
void bpf_warn_invalid_xdp_action(u32 act);
|
||||
|
||||
struct sock *do_sk_redirect_map(struct sk_buff *skb);
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
|
||||
struct socket *tun_get_socket(struct file *);
|
||||
struct ptr_ring *tun_get_tx_ring(struct file *file);
|
||||
bool tun_is_xdp_buff(void *ptr);
|
||||
bool tun_is_xdp_frame(void *ptr);
|
||||
void *tun_xdp_to_ptr(void *ptr);
|
||||
void *tun_ptr_to_xdp(void *ptr);
|
||||
void tun_ptr_free(void *ptr);
|
||||
|
@ -39,7 +39,7 @@ static inline struct ptr_ring *tun_get_tx_ring(struct file *f)
|
|||
{
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
static inline bool tun_is_xdp_buff(void *ptr)
|
||||
static inline bool tun_is_xdp_frame(void *ptr)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -1165,7 +1165,7 @@ struct dev_ifalias {
|
|||
* This function is used to set or query state related to XDP on the
|
||||
* netdevice and manage BPF offload. See definition of
|
||||
* enum bpf_netdev_command for details.
|
||||
* int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp);
|
||||
* int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_frame *xdp);
|
||||
* This function is used to submit a XDP packet for transmit on a
|
||||
* netdevice.
|
||||
* void (*ndo_xdp_flush)(struct net_device *dev);
|
||||
|
@ -1356,7 +1356,7 @@ struct net_device_ops {
|
|||
int (*ndo_bpf)(struct net_device *dev,
|
||||
struct netdev_bpf *bpf);
|
||||
int (*ndo_xdp_xmit)(struct net_device *dev,
|
||||
struct xdp_buff *xdp);
|
||||
struct xdp_frame *xdp);
|
||||
void (*ndo_xdp_flush)(struct net_device *dev);
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,143 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* page_pool.h
|
||||
* Author: Jesper Dangaard Brouer <netoptimizer@brouer.com>
|
||||
* Copyright (C) 2016 Red Hat, Inc.
|
||||
*/
|
||||
|
||||
/**
|
||||
* DOC: page_pool allocator
|
||||
*
|
||||
* This page_pool allocator is optimized for the XDP mode that
|
||||
* uses one-frame-per-page, but have fallbacks that act like the
|
||||
* regular page allocator APIs.
|
||||
*
|
||||
* Basic use involve replacing alloc_pages() calls with the
|
||||
* page_pool_alloc_pages() call. Drivers should likely use
|
||||
* page_pool_dev_alloc_pages() replacing dev_alloc_pages().
|
||||
*
|
||||
* If page_pool handles DMA mapping (use page->private), then API user
|
||||
* is responsible for invoking page_pool_put_page() once. In-case of
|
||||
* elevated refcnt, the DMA state is released, assuming other users of
|
||||
* the page will eventually call put_page().
|
||||
*
|
||||
* If no DMA mapping is done, then it can act as shim-layer that
|
||||
* fall-through to alloc_page. As no state is kept on the page, the
|
||||
* regular put_page() call is sufficient.
|
||||
*/
|
||||
#ifndef _NET_PAGE_POOL_H
|
||||
#define _NET_PAGE_POOL_H
|
||||
|
||||
#include <linux/mm.h> /* Needed by ptr_ring */
|
||||
#include <linux/ptr_ring.h>
|
||||
#include <linux/dma-direction.h>
|
||||
|
||||
#define PP_FLAG_DMA_MAP 1 /* Should page_pool do the DMA map/unmap */
|
||||
#define PP_FLAG_ALL PP_FLAG_DMA_MAP
|
||||
|
||||
/*
|
||||
* Fast allocation side cache array/stack
|
||||
*
|
||||
* The cache size and refill watermark is related to the network
|
||||
* use-case. The NAPI budget is 64 packets. After a NAPI poll the RX
|
||||
* ring is usually refilled and the max consumed elements will be 64,
|
||||
* thus a natural max size of objects needed in the cache.
|
||||
*
|
||||
* Keeping room for more objects, is due to XDP_DROP use-case. As
|
||||
* XDP_DROP allows the opportunity to recycle objects directly into
|
||||
* this array, as it shares the same softirq/NAPI protection. If
|
||||
* cache is already full (or partly full) then the XDP_DROP recycles
|
||||
* would have to take a slower code path.
|
||||
*/
|
||||
#define PP_ALLOC_CACHE_SIZE 128
|
||||
#define PP_ALLOC_CACHE_REFILL 64
|
||||
struct pp_alloc_cache {
|
||||
u32 count;
|
||||
void *cache[PP_ALLOC_CACHE_SIZE];
|
||||
};
|
||||
|
||||
struct page_pool_params {
|
||||
unsigned int flags;
|
||||
unsigned int order;
|
||||
unsigned int pool_size;
|
||||
int nid; /* Numa node id to allocate from pages from */
|
||||
struct device *dev; /* device, for DMA pre-mapping purposes */
|
||||
enum dma_data_direction dma_dir; /* DMA mapping direction */
|
||||
};
|
||||
|
||||
struct page_pool {
|
||||
struct rcu_head rcu;
|
||||
struct page_pool_params p;
|
||||
|
||||
/*
|
||||
* Data structure for allocation side
|
||||
*
|
||||
* Drivers allocation side usually already perform some kind
|
||||
* of resource protection. Piggyback on this protection, and
|
||||
* require driver to protect allocation side.
|
||||
*
|
||||
* For NIC drivers this means, allocate a page_pool per
|
||||
* RX-queue. As the RX-queue is already protected by
|
||||
* Softirq/BH scheduling and napi_schedule. NAPI schedule
|
||||
* guarantee that a single napi_struct will only be scheduled
|
||||
* on a single CPU (see napi_schedule).
|
||||
*/
|
||||
struct pp_alloc_cache alloc ____cacheline_aligned_in_smp;
|
||||
|
||||
/* Data structure for storing recycled pages.
|
||||
*
|
||||
* Returning/freeing pages is more complicated synchronization
|
||||
* wise, because free's can happen on remote CPUs, with no
|
||||
* association with allocation resource.
|
||||
*
|
||||
* Use ptr_ring, as it separates consumer and producer
|
||||
* effeciently, it a way that doesn't bounce cache-lines.
|
||||
*
|
||||
* TODO: Implement bulk return pages into this structure.
|
||||
*/
|
||||
struct ptr_ring ring;
|
||||
};
|
||||
|
||||
struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
|
||||
|
||||
static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
|
||||
{
|
||||
gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
|
||||
|
||||
return page_pool_alloc_pages(pool, gfp);
|
||||
}
|
||||
|
||||
struct page_pool *page_pool_create(const struct page_pool_params *params);
|
||||
|
||||
void page_pool_destroy(struct page_pool *pool);
|
||||
|
||||
/* Never call this directly, use helpers below */
|
||||
void __page_pool_put_page(struct page_pool *pool,
|
||||
struct page *page, bool allow_direct);
|
||||
|
||||
static inline void page_pool_put_page(struct page_pool *pool, struct page *page)
|
||||
{
|
||||
/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
|
||||
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
|
||||
*/
|
||||
#ifdef CONFIG_PAGE_POOL
|
||||
__page_pool_put_page(pool, page, false);
|
||||
#endif
|
||||
}
|
||||
/* Very limited use-cases allow recycle direct */
|
||||
static inline void page_pool_recycle_direct(struct page_pool *pool,
|
||||
struct page *page)
|
||||
{
|
||||
__page_pool_put_page(pool, page, true);
|
||||
}
|
||||
|
||||
static inline bool is_page_pool_compiled_in(void)
|
||||
{
|
||||
#ifdef CONFIG_PAGE_POOL
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* _NET_PAGE_POOL_H */
|
|
@ -33,16 +33,99 @@
|
|||
* also mandatory during RX-ring setup.
|
||||
*/
|
||||
|
||||
enum xdp_mem_type {
|
||||
MEM_TYPE_PAGE_SHARED = 0, /* Split-page refcnt based model */
|
||||
MEM_TYPE_PAGE_ORDER0, /* Orig XDP full page model */
|
||||
MEM_TYPE_PAGE_POOL,
|
||||
MEM_TYPE_MAX,
|
||||
};
|
||||
|
||||
struct xdp_mem_info {
|
||||
u32 type; /* enum xdp_mem_type, but known size type */
|
||||
u32 id;
|
||||
};
|
||||
|
||||
struct page_pool;
|
||||
|
||||
struct xdp_rxq_info {
|
||||
struct net_device *dev;
|
||||
u32 queue_index;
|
||||
u32 reg_state;
|
||||
struct xdp_mem_info mem;
|
||||
} ____cacheline_aligned; /* perf critical, avoid false-sharing */
|
||||
|
||||
struct xdp_buff {
|
||||
void *data;
|
||||
void *data_end;
|
||||
void *data_meta;
|
||||
void *data_hard_start;
|
||||
struct xdp_rxq_info *rxq;
|
||||
};
|
||||
|
||||
struct xdp_frame {
|
||||
void *data;
|
||||
u16 len;
|
||||
u16 headroom;
|
||||
u16 metasize;
|
||||
/* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time,
|
||||
* while mem info is valid on remote CPU.
|
||||
*/
|
||||
struct xdp_mem_info mem;
|
||||
struct net_device *dev_rx; /* used by cpumap */
|
||||
};
|
||||
|
||||
/* Convert xdp_buff to xdp_frame */
|
||||
static inline
|
||||
struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
|
||||
{
|
||||
struct xdp_frame *xdp_frame;
|
||||
int metasize;
|
||||
int headroom;
|
||||
|
||||
/* Assure headroom is available for storing info */
|
||||
headroom = xdp->data - xdp->data_hard_start;
|
||||
metasize = xdp->data - xdp->data_meta;
|
||||
metasize = metasize > 0 ? metasize : 0;
|
||||
if (unlikely((headroom - metasize) < sizeof(*xdp_frame)))
|
||||
return NULL;
|
||||
|
||||
/* Store info in top of packet */
|
||||
xdp_frame = xdp->data_hard_start;
|
||||
|
||||
xdp_frame->data = xdp->data;
|
||||
xdp_frame->len = xdp->data_end - xdp->data;
|
||||
xdp_frame->headroom = headroom - sizeof(*xdp_frame);
|
||||
xdp_frame->metasize = metasize;
|
||||
|
||||
/* rxq only valid until napi_schedule ends, convert to xdp_mem_info */
|
||||
xdp_frame->mem = xdp->rxq->mem;
|
||||
|
||||
return xdp_frame;
|
||||
}
|
||||
|
||||
void xdp_return_frame(struct xdp_frame *xdpf);
|
||||
|
||||
int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
|
||||
struct net_device *dev, u32 queue_index);
|
||||
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
|
||||
void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
|
||||
bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
|
||||
int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
|
||||
enum xdp_mem_type type, void *allocator);
|
||||
|
||||
/* Drivers not supporting XDP metadata can use this helper, which
|
||||
* rejects any room expansion for metadata as a result.
|
||||
*/
|
||||
static __always_inline void
|
||||
xdp_set_data_meta_invalid(struct xdp_buff *xdp)
|
||||
{
|
||||
xdp->data_meta = xdp->data + 1;
|
||||
}
|
||||
|
||||
static __always_inline bool
|
||||
xdp_data_meta_unsupported(const struct xdp_buff *xdp)
|
||||
{
|
||||
return unlikely(xdp->data_meta > xdp->data);
|
||||
}
|
||||
|
||||
#endif /* __LINUX_NET_XDP_H__ */
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <linux/bpf.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/ptr_ring.h>
|
||||
#include <net/xdp.h>
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
@ -137,27 +138,6 @@ free_cmap:
|
|||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static void __cpu_map_queue_destructor(void *ptr)
|
||||
{
|
||||
/* The tear-down procedure should have made sure that queue is
|
||||
* empty. See __cpu_map_entry_replace() and work-queue
|
||||
* invoked cpu_map_kthread_stop(). Catch any broken behaviour
|
||||
* gracefully and warn once.
|
||||
*/
|
||||
if (WARN_ON_ONCE(ptr))
|
||||
page_frag_free(ptr);
|
||||
}
|
||||
|
||||
static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
|
||||
{
|
||||
if (atomic_dec_and_test(&rcpu->refcnt)) {
|
||||
/* The queue should be empty at this point */
|
||||
ptr_ring_cleanup(rcpu->queue, __cpu_map_queue_destructor);
|
||||
kfree(rcpu->queue);
|
||||
kfree(rcpu);
|
||||
}
|
||||
}
|
||||
|
||||
static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
|
||||
{
|
||||
atomic_inc(&rcpu->refcnt);
|
||||
|
@ -179,45 +159,8 @@ static void cpu_map_kthread_stop(struct work_struct *work)
|
|||
kthread_stop(rcpu->kthread);
|
||||
}
|
||||
|
||||
/* For now, xdp_pkt is a cpumap internal data structure, with info
|
||||
* carried between enqueue to dequeue. It is mapped into the top
|
||||
* headroom of the packet, to avoid allocating separate mem.
|
||||
*/
|
||||
struct xdp_pkt {
|
||||
void *data;
|
||||
u16 len;
|
||||
u16 headroom;
|
||||
u16 metasize;
|
||||
struct net_device *dev_rx;
|
||||
};
|
||||
|
||||
/* Convert xdp_buff to xdp_pkt */
|
||||
static struct xdp_pkt *convert_to_xdp_pkt(struct xdp_buff *xdp)
|
||||
{
|
||||
struct xdp_pkt *xdp_pkt;
|
||||
int metasize;
|
||||
int headroom;
|
||||
|
||||
/* Assure headroom is available for storing info */
|
||||
headroom = xdp->data - xdp->data_hard_start;
|
||||
metasize = xdp->data - xdp->data_meta;
|
||||
metasize = metasize > 0 ? metasize : 0;
|
||||
if (unlikely((headroom - metasize) < sizeof(*xdp_pkt)))
|
||||
return NULL;
|
||||
|
||||
/* Store info in top of packet */
|
||||
xdp_pkt = xdp->data_hard_start;
|
||||
|
||||
xdp_pkt->data = xdp->data;
|
||||
xdp_pkt->len = xdp->data_end - xdp->data;
|
||||
xdp_pkt->headroom = headroom - sizeof(*xdp_pkt);
|
||||
xdp_pkt->metasize = metasize;
|
||||
|
||||
return xdp_pkt;
|
||||
}
|
||||
|
||||
static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
|
||||
struct xdp_pkt *xdp_pkt)
|
||||
struct xdp_frame *xdpf)
|
||||
{
|
||||
unsigned int frame_size;
|
||||
void *pkt_data_start;
|
||||
|
@ -232,7 +175,7 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
|
|||
* would be preferred to set frame_size to 2048 or 4096
|
||||
* depending on the driver.
|
||||
* frame_size = 2048;
|
||||
* frame_len = frame_size - sizeof(*xdp_pkt);
|
||||
* frame_len = frame_size - sizeof(*xdp_frame);
|
||||
*
|
||||
* Instead, with info avail, skb_shared_info in placed after
|
||||
* packet len. This, unfortunately fakes the truesize.
|
||||
|
@ -240,21 +183,21 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
|
|||
* is not at a fixed memory location, with mixed length
|
||||
* packets, which is bad for cache-line hotness.
|
||||
*/
|
||||
frame_size = SKB_DATA_ALIGN(xdp_pkt->len) + xdp_pkt->headroom +
|
||||
frame_size = SKB_DATA_ALIGN(xdpf->len) + xdpf->headroom +
|
||||
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
||||
|
||||
pkt_data_start = xdp_pkt->data - xdp_pkt->headroom;
|
||||
pkt_data_start = xdpf->data - xdpf->headroom;
|
||||
skb = build_skb(pkt_data_start, frame_size);
|
||||
if (!skb)
|
||||
return NULL;
|
||||
|
||||
skb_reserve(skb, xdp_pkt->headroom);
|
||||
__skb_put(skb, xdp_pkt->len);
|
||||
if (xdp_pkt->metasize)
|
||||
skb_metadata_set(skb, xdp_pkt->metasize);
|
||||
skb_reserve(skb, xdpf->headroom);
|
||||
__skb_put(skb, xdpf->len);
|
||||
if (xdpf->metasize)
|
||||
skb_metadata_set(skb, xdpf->metasize);
|
||||
|
||||
/* Essential SKB info: protocol and skb->dev */
|
||||
skb->protocol = eth_type_trans(skb, xdp_pkt->dev_rx);
|
||||
skb->protocol = eth_type_trans(skb, xdpf->dev_rx);
|
||||
|
||||
/* Optional SKB info, currently missing:
|
||||
* - HW checksum info (skb->ip_summed)
|
||||
|
@ -265,6 +208,31 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
|
|||
return skb;
|
||||
}
|
||||
|
||||
static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
|
||||
{
|
||||
/* The tear-down procedure should have made sure that queue is
|
||||
* empty. See __cpu_map_entry_replace() and work-queue
|
||||
* invoked cpu_map_kthread_stop(). Catch any broken behaviour
|
||||
* gracefully and warn once.
|
||||
*/
|
||||
struct xdp_frame *xdpf;
|
||||
|
||||
while ((xdpf = ptr_ring_consume(ring)))
|
||||
if (WARN_ON_ONCE(xdpf))
|
||||
xdp_return_frame(xdpf);
|
||||
}
|
||||
|
||||
static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
|
||||
{
|
||||
if (atomic_dec_and_test(&rcpu->refcnt)) {
|
||||
/* The queue should be empty at this point */
|
||||
__cpu_map_ring_cleanup(rcpu->queue);
|
||||
ptr_ring_cleanup(rcpu->queue, NULL);
|
||||
kfree(rcpu->queue);
|
||||
kfree(rcpu);
|
||||
}
|
||||
}
|
||||
|
||||
static int cpu_map_kthread_run(void *data)
|
||||
{
|
||||
struct bpf_cpu_map_entry *rcpu = data;
|
||||
|
@ -278,7 +246,7 @@ static int cpu_map_kthread_run(void *data)
|
|||
*/
|
||||
while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
|
||||
unsigned int processed = 0, drops = 0, sched = 0;
|
||||
struct xdp_pkt *xdp_pkt;
|
||||
struct xdp_frame *xdpf;
|
||||
|
||||
/* Release CPU reschedule checks */
|
||||
if (__ptr_ring_empty(rcpu->queue)) {
|
||||
|
@ -301,13 +269,13 @@ static int cpu_map_kthread_run(void *data)
|
|||
* kthread CPU pinned. Lockless access to ptr_ring
|
||||
* consume side valid as no-resize allowed of queue.
|
||||
*/
|
||||
while ((xdp_pkt = __ptr_ring_consume(rcpu->queue))) {
|
||||
while ((xdpf = __ptr_ring_consume(rcpu->queue))) {
|
||||
struct sk_buff *skb;
|
||||
int ret;
|
||||
|
||||
skb = cpu_map_build_skb(rcpu, xdp_pkt);
|
||||
skb = cpu_map_build_skb(rcpu, xdpf);
|
||||
if (!skb) {
|
||||
page_frag_free(xdp_pkt);
|
||||
xdp_return_frame(xdpf);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -604,13 +572,13 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
|
|||
spin_lock(&q->producer_lock);
|
||||
|
||||
for (i = 0; i < bq->count; i++) {
|
||||
void *xdp_pkt = bq->q[i];
|
||||
struct xdp_frame *xdpf = bq->q[i];
|
||||
int err;
|
||||
|
||||
err = __ptr_ring_produce(q, xdp_pkt);
|
||||
err = __ptr_ring_produce(q, xdpf);
|
||||
if (err) {
|
||||
drops++;
|
||||
page_frag_free(xdp_pkt); /* Free xdp_pkt */
|
||||
xdp_return_frame(xdpf);
|
||||
}
|
||||
processed++;
|
||||
}
|
||||
|
@ -625,7 +593,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
|
|||
/* Runs under RCU-read-side, plus in softirq under NAPI protection.
|
||||
* Thus, safe percpu variable access.
|
||||
*/
|
||||
static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)
|
||||
static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
|
||||
{
|
||||
struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
|
||||
|
||||
|
@ -636,28 +604,28 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)
|
|||
* driver to code invoking us to finished, due to driver
|
||||
* (e.g. ixgbe) recycle tricks based on page-refcnt.
|
||||
*
|
||||
* Thus, incoming xdp_pkt is always queued here (else we race
|
||||
* Thus, incoming xdp_frame is always queued here (else we race
|
||||
* with another CPU on page-refcnt and remaining driver code).
|
||||
* Queue time is very short, as driver will invoke flush
|
||||
* operation, when completing napi->poll call.
|
||||
*/
|
||||
bq->q[bq->count++] = xdp_pkt;
|
||||
bq->q[bq->count++] = xdpf;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
|
||||
struct net_device *dev_rx)
|
||||
{
|
||||
struct xdp_pkt *xdp_pkt;
|
||||
struct xdp_frame *xdpf;
|
||||
|
||||
xdp_pkt = convert_to_xdp_pkt(xdp);
|
||||
if (unlikely(!xdp_pkt))
|
||||
xdpf = convert_to_xdp_frame(xdp);
|
||||
if (unlikely(!xdpf))
|
||||
return -EOVERFLOW;
|
||||
|
||||
/* Info needed when constructing SKB on remote CPU */
|
||||
xdp_pkt->dev_rx = dev_rx;
|
||||
xdpf->dev_rx = dev_rx;
|
||||
|
||||
bq_enqueue(rcpu, xdp_pkt);
|
||||
bq_enqueue(rcpu, xdpf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -423,6 +423,9 @@ config MAY_USE_DEVLINK
|
|||
on MAY_USE_DEVLINK to ensure they do not cause link errors when
|
||||
devlink is a loadable module and the driver using it is built-in.
|
||||
|
||||
config PAGE_POOL
|
||||
bool
|
||||
|
||||
endif # if NET
|
||||
|
||||
# Used by archs to tell that they support BPF JIT compiler plus which flavour.
|
||||
|
|
|
@ -14,6 +14,7 @@ obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
|
|||
fib_notifier.o xdp.o
|
||||
|
||||
obj-y += net-sysfs.o
|
||||
obj-$(CONFIG_PAGE_POOL) += page_pool.o
|
||||
obj-$(CONFIG_PROC_FS) += net-procfs.o
|
||||
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
|
||||
obj-$(CONFIG_NETPOLL) += netpoll.o
|
||||
|
|
|
@ -2692,6 +2692,7 @@ static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
|
|||
|
||||
BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
|
||||
{
|
||||
void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
|
||||
unsigned long metalen = xdp_get_metalen(xdp);
|
||||
void *data_start = xdp->data_hard_start + metalen;
|
||||
void *data = xdp->data + offset;
|
||||
|
@ -2700,6 +2701,13 @@ BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
|
|||
data > xdp->data_end - ETH_HLEN))
|
||||
return -EINVAL;
|
||||
|
||||
/* Avoid info leak, when reusing area prev used by xdp_frame */
|
||||
if (data < xdp_frame_end) {
|
||||
unsigned long clearlen = xdp_frame_end - data;
|
||||
|
||||
memset(data, 0, clearlen);
|
||||
}
|
||||
|
||||
if (metalen)
|
||||
memmove(xdp->data_meta + offset,
|
||||
xdp->data_meta, metalen);
|
||||
|
@ -2749,13 +2757,18 @@ static int __bpf_tx_xdp(struct net_device *dev,
|
|||
struct xdp_buff *xdp,
|
||||
u32 index)
|
||||
{
|
||||
struct xdp_frame *xdpf;
|
||||
int err;
|
||||
|
||||
if (!dev->netdev_ops->ndo_xdp_xmit) {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
|
||||
xdpf = convert_to_xdp_frame(xdp);
|
||||
if (unlikely(!xdpf))
|
||||
return -EOVERFLOW;
|
||||
|
||||
err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
|
||||
if (err)
|
||||
return err;
|
||||
dev->netdev_ops->ndo_xdp_flush(dev);
|
||||
|
@ -2771,11 +2784,19 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
|
|||
|
||||
if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
|
||||
struct net_device *dev = fwd;
|
||||
struct xdp_frame *xdpf;
|
||||
|
||||
if (!dev->netdev_ops->ndo_xdp_xmit)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
|
||||
xdpf = convert_to_xdp_frame(xdp);
|
||||
if (unlikely(!xdpf))
|
||||
return -EOVERFLOW;
|
||||
|
||||
/* TODO: move to inside map code instead, for bulk support
|
||||
* err = dev_map_enqueue(dev, xdp);
|
||||
*/
|
||||
err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
|
||||
if (err)
|
||||
return err;
|
||||
__dev_map_insert_ctx(map, index);
|
||||
|
|
|
@ -0,0 +1,317 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* page_pool.c
|
||||
* Author: Jesper Dangaard Brouer <netoptimizer@brouer.com>
|
||||
* Copyright (C) 2016 Red Hat, Inc.
|
||||
*/
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <net/page_pool.h>
|
||||
#include <linux/dma-direction.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/page-flags.h>
|
||||
#include <linux/mm.h> /* for __put_page() */
|
||||
|
||||
static int page_pool_init(struct page_pool *pool,
|
||||
const struct page_pool_params *params)
|
||||
{
|
||||
unsigned int ring_qsize = 1024; /* Default */
|
||||
|
||||
memcpy(&pool->p, params, sizeof(pool->p));
|
||||
|
||||
/* Validate only known flags were used */
|
||||
if (pool->p.flags & ~(PP_FLAG_ALL))
|
||||
return -EINVAL;
|
||||
|
||||
if (pool->p.pool_size)
|
||||
ring_qsize = pool->p.pool_size;
|
||||
|
||||
/* Sanity limit mem that can be pinned down */
|
||||
if (ring_qsize > 32768)
|
||||
return -E2BIG;
|
||||
|
||||
/* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL.
|
||||
* DMA_BIDIRECTIONAL is for allowing page used for DMA sending,
|
||||
* which is the XDP_TX use-case.
|
||||
*/
|
||||
if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
|
||||
(pool->p.dma_dir != DMA_BIDIRECTIONAL))
|
||||
return -EINVAL;
|
||||
|
||||
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct page_pool *page_pool_create(const struct page_pool_params *params)
|
||||
{
|
||||
struct page_pool *pool;
|
||||
int err = 0;
|
||||
|
||||
pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid);
|
||||
if (!pool)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
err = page_pool_init(pool, params);
|
||||
if (err < 0) {
|
||||
pr_warn("%s() gave up with errno %d\n", __func__, err);
|
||||
kfree(pool);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
return pool;
|
||||
}
|
||||
EXPORT_SYMBOL(page_pool_create);
|
||||
|
||||
/* fast path */
|
||||
static struct page *__page_pool_get_cached(struct page_pool *pool)
|
||||
{
|
||||
struct ptr_ring *r = &pool->ring;
|
||||
struct page *page;
|
||||
|
||||
/* Quicker fallback, avoid locks when ring is empty */
|
||||
if (__ptr_ring_empty(r))
|
||||
return NULL;
|
||||
|
||||
/* Test for safe-context, caller should provide this guarantee */
|
||||
if (likely(in_serving_softirq())) {
|
||||
if (likely(pool->alloc.count)) {
|
||||
/* Fast-path */
|
||||
page = pool->alloc.cache[--pool->alloc.count];
|
||||
return page;
|
||||
}
|
||||
/* Slower-path: Alloc array empty, time to refill
|
||||
*
|
||||
* Open-coded bulk ptr_ring consumer.
|
||||
*
|
||||
* Discussion: the ring consumer lock is not really
|
||||
* needed due to the softirq/NAPI protection, but
|
||||
* later need the ability to reclaim pages on the
|
||||
* ring. Thus, keeping the locks.
|
||||
*/
|
||||
spin_lock(&r->consumer_lock);
|
||||
while ((page = __ptr_ring_consume(r))) {
|
||||
if (pool->alloc.count == PP_ALLOC_CACHE_REFILL)
|
||||
break;
|
||||
pool->alloc.cache[pool->alloc.count++] = page;
|
||||
}
|
||||
spin_unlock(&r->consumer_lock);
|
||||
return page;
|
||||
}
|
||||
|
||||
/* Slow-path: Get page from locked ring queue */
|
||||
page = ptr_ring_consume(&pool->ring);
|
||||
return page;
|
||||
}
|
||||
|
||||
/* slow path */
|
||||
noinline
|
||||
static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
|
||||
gfp_t _gfp)
|
||||
{
|
||||
struct page *page;
|
||||
gfp_t gfp = _gfp;
|
||||
dma_addr_t dma;
|
||||
|
||||
/* We could always set __GFP_COMP, and avoid this branch, as
|
||||
* prep_new_page() can handle order-0 with __GFP_COMP.
|
||||
*/
|
||||
if (pool->p.order)
|
||||
gfp |= __GFP_COMP;
|
||||
|
||||
/* FUTURE development:
|
||||
*
|
||||
* Current slow-path essentially falls back to single page
|
||||
* allocations, which doesn't improve performance. This code
|
||||
* need bulk allocation support from the page allocator code.
|
||||
*/
|
||||
|
||||
/* Cache was empty, do real allocation */
|
||||
page = alloc_pages_node(pool->p.nid, gfp, pool->p.order);
|
||||
if (!page)
|
||||
return NULL;
|
||||
|
||||
if (!(pool->p.flags & PP_FLAG_DMA_MAP))
|
||||
goto skip_dma_map;
|
||||
|
||||
/* Setup DMA mapping: use page->private for DMA-addr
|
||||
* This mapping is kept for lifetime of page, until leaving pool.
|
||||
*/
|
||||
dma = dma_map_page(pool->p.dev, page, 0,
|
||||
(PAGE_SIZE << pool->p.order),
|
||||
pool->p.dma_dir);
|
||||
if (dma_mapping_error(pool->p.dev, dma)) {
|
||||
put_page(page);
|
||||
return NULL;
|
||||
}
|
||||
set_page_private(page, dma); /* page->private = dma; */
|
||||
|
||||
skip_dma_map:
|
||||
/* When page just alloc'ed is should/must have refcnt 1. */
|
||||
return page;
|
||||
}
|
||||
|
||||
/* For using page_pool replace: alloc_pages() API calls, but provide
|
||||
* synchronization guarantee for allocation side.
|
||||
*/
|
||||
struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
/* Fast-path: Get a page from cache */
|
||||
page = __page_pool_get_cached(pool);
|
||||
if (page)
|
||||
return page;
|
||||
|
||||
/* Slow-path: cache empty, do real allocation */
|
||||
page = __page_pool_alloc_pages_slow(pool, gfp);
|
||||
return page;
|
||||
}
|
||||
EXPORT_SYMBOL(page_pool_alloc_pages);
|
||||
|
||||
/* Cleanup page_pool state from page */
|
||||
static void __page_pool_clean_page(struct page_pool *pool,
|
||||
struct page *page)
|
||||
{
|
||||
if (!(pool->p.flags & PP_FLAG_DMA_MAP))
|
||||
return;
|
||||
|
||||
/* DMA unmap */
|
||||
dma_unmap_page(pool->p.dev, page_private(page),
|
||||
PAGE_SIZE << pool->p.order, pool->p.dma_dir);
|
||||
set_page_private(page, 0);
|
||||
}
|
||||
|
||||
/* Return a page to the page allocator, cleaning up our state */
|
||||
static void __page_pool_return_page(struct page_pool *pool, struct page *page)
|
||||
{
|
||||
__page_pool_clean_page(pool, page);
|
||||
put_page(page);
|
||||
/* An optimization would be to call __free_pages(page, pool->p.order)
|
||||
* knowing page is not part of page-cache (thus avoiding a
|
||||
* __page_cache_release() call).
|
||||
*/
|
||||
}
|
||||
|
||||
static bool __page_pool_recycle_into_ring(struct page_pool *pool,
|
||||
struct page *page)
|
||||
{
|
||||
int ret;
|
||||
/* BH protection not needed if current is serving softirq */
|
||||
if (in_serving_softirq())
|
||||
ret = ptr_ring_produce(&pool->ring, page);
|
||||
else
|
||||
ret = ptr_ring_produce_bh(&pool->ring, page);
|
||||
|
||||
return (ret == 0) ? true : false;
|
||||
}
|
||||
|
||||
/* Only allow direct recycling in special circumstances, into the
|
||||
* alloc side cache. E.g. during RX-NAPI processing for XDP_DROP use-case.
|
||||
*
|
||||
* Caller must provide appropriate safe context.
|
||||
*/
|
||||
static bool __page_pool_recycle_direct(struct page *page,
|
||||
struct page_pool *pool)
|
||||
{
|
||||
if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE))
|
||||
return false;
|
||||
|
||||
/* Caller MUST have verified/know (page_ref_count(page) == 1) */
|
||||
pool->alloc.cache[pool->alloc.count++] = page;
|
||||
return true;
|
||||
}
|
||||
|
||||
void __page_pool_put_page(struct page_pool *pool,
|
||||
struct page *page, bool allow_direct)
|
||||
{
|
||||
/* This allocator is optimized for the XDP mode that uses
|
||||
* one-frame-per-page, but have fallbacks that act like the
|
||||
* regular page allocator APIs.
|
||||
*
|
||||
* refcnt == 1 means page_pool owns page, and can recycle it.
|
||||
*/
|
||||
if (likely(page_ref_count(page) == 1)) {
|
||||
/* Read barrier done in page_ref_count / READ_ONCE */
|
||||
|
||||
if (allow_direct && in_serving_softirq())
|
||||
if (__page_pool_recycle_direct(page, pool))
|
||||
return;
|
||||
|
||||
if (!__page_pool_recycle_into_ring(pool, page)) {
|
||||
/* Cache full, fallback to free pages */
|
||||
__page_pool_return_page(pool, page);
|
||||
}
|
||||
return;
|
||||
}
|
||||
/* Fallback/non-XDP mode: API user have elevated refcnt.
|
||||
*
|
||||
* Many drivers split up the page into fragments, and some
|
||||
* want to keep doing this to save memory and do refcnt based
|
||||
* recycling. Support this use case too, to ease drivers
|
||||
* switching between XDP/non-XDP.
|
||||
*
|
||||
* In-case page_pool maintains the DMA mapping, API user must
|
||||
* call page_pool_put_page once. In this elevated refcnt
|
||||
* case, the DMA is unmapped/released, as driver is likely
|
||||
* doing refcnt based recycle tricks, meaning another process
|
||||
* will be invoking put_page.
|
||||
*/
|
||||
__page_pool_clean_page(pool, page);
|
||||
put_page(page);
|
||||
}
|
||||
EXPORT_SYMBOL(__page_pool_put_page);
|
||||
|
||||
static void __page_pool_empty_ring(struct page_pool *pool)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
/* Empty recycle ring */
|
||||
while ((page = ptr_ring_consume(&pool->ring))) {
|
||||
/* Verify the refcnt invariant of cached pages */
|
||||
if (!(page_ref_count(page) == 1))
|
||||
pr_crit("%s() page_pool refcnt %d violation\n",
|
||||
__func__, page_ref_count(page));
|
||||
|
||||
__page_pool_return_page(pool, page);
|
||||
}
|
||||
}
|
||||
|
||||
static void __page_pool_destroy_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct page_pool *pool;
|
||||
|
||||
pool = container_of(rcu, struct page_pool, rcu);
|
||||
|
||||
WARN(pool->alloc.count, "API usage violation");
|
||||
|
||||
__page_pool_empty_ring(pool);
|
||||
ptr_ring_cleanup(&pool->ring, NULL);
|
||||
kfree(pool);
|
||||
}
|
||||
|
||||
/* Cleanup and release resources */
|
||||
void page_pool_destroy(struct page_pool *pool)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
/* Empty alloc cache, assume caller made sure this is
|
||||
* no-longer in use, and page_pool_alloc_pages() cannot be
|
||||
* call concurrently.
|
||||
*/
|
||||
while (pool->alloc.count) {
|
||||
page = pool->alloc.cache[--pool->alloc.count];
|
||||
__page_pool_return_page(pool, page);
|
||||
}
|
||||
|
||||
/* No more consumers should exist, but producers could still
|
||||
* be in-flight.
|
||||
*/
|
||||
__page_pool_empty_ring(pool);
|
||||
|
||||
/* An xdp_mem_allocator can still ref page_pool pointer */
|
||||
call_rcu(&pool->rcu, __page_pool_destroy_rcu);
|
||||
}
|
||||
EXPORT_SYMBOL(page_pool_destroy);
|
269
net/core/xdp.c
269
net/core/xdp.c
|
@ -5,6 +5,10 @@
|
|||
*/
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/rhashtable.h>
|
||||
#include <net/page_pool.h>
|
||||
|
||||
#include <net/xdp.h>
|
||||
|
||||
|
@ -13,6 +17,104 @@
|
|||
#define REG_STATE_UNREGISTERED 0x2
|
||||
#define REG_STATE_UNUSED 0x3
|
||||
|
||||
static DEFINE_IDA(mem_id_pool);
|
||||
static DEFINE_MUTEX(mem_id_lock);
|
||||
#define MEM_ID_MAX 0xFFFE
|
||||
#define MEM_ID_MIN 1
|
||||
static int mem_id_next = MEM_ID_MIN;
|
||||
|
||||
static bool mem_id_init; /* false */
|
||||
static struct rhashtable *mem_id_ht;
|
||||
|
||||
struct xdp_mem_allocator {
|
||||
struct xdp_mem_info mem;
|
||||
union {
|
||||
void *allocator;
|
||||
struct page_pool *page_pool;
|
||||
};
|
||||
struct rhash_head node;
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
|
||||
{
|
||||
const u32 *k = data;
|
||||
const u32 key = *k;
|
||||
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id)
|
||||
!= sizeof(u32));
|
||||
|
||||
/* Use cyclic increasing ID as direct hash key, see rht_bucket_index */
|
||||
return key << RHT_HASH_RESERVED_SPACE;
|
||||
}
|
||||
|
||||
static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg,
|
||||
const void *ptr)
|
||||
{
|
||||
const struct xdp_mem_allocator *xa = ptr;
|
||||
u32 mem_id = *(u32 *)arg->key;
|
||||
|
||||
return xa->mem.id != mem_id;
|
||||
}
|
||||
|
||||
static const struct rhashtable_params mem_id_rht_params = {
|
||||
.nelem_hint = 64,
|
||||
.head_offset = offsetof(struct xdp_mem_allocator, node),
|
||||
.key_offset = offsetof(struct xdp_mem_allocator, mem.id),
|
||||
.key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id),
|
||||
.max_size = MEM_ID_MAX,
|
||||
.min_size = 8,
|
||||
.automatic_shrinking = true,
|
||||
.hashfn = xdp_mem_id_hashfn,
|
||||
.obj_cmpfn = xdp_mem_id_cmp,
|
||||
};
|
||||
|
||||
static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
|
||||
{
|
||||
struct xdp_mem_allocator *xa;
|
||||
|
||||
xa = container_of(rcu, struct xdp_mem_allocator, rcu);
|
||||
|
||||
/* Allow this ID to be reused */
|
||||
ida_simple_remove(&mem_id_pool, xa->mem.id);
|
||||
|
||||
/* Notice, driver is expected to free the *allocator,
|
||||
* e.g. page_pool, and MUST also use RCU free.
|
||||
*/
|
||||
|
||||
/* Poison memory */
|
||||
xa->mem.id = 0xFFFF;
|
||||
xa->mem.type = 0xF0F0;
|
||||
xa->allocator = (void *)0xDEAD9001;
|
||||
|
||||
kfree(xa);
|
||||
}
|
||||
|
||||
static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
|
||||
{
|
||||
struct xdp_mem_allocator *xa;
|
||||
int id = xdp_rxq->mem.id;
|
||||
int err;
|
||||
|
||||
if (id == 0)
|
||||
return;
|
||||
|
||||
mutex_lock(&mem_id_lock);
|
||||
|
||||
xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
|
||||
if (!xa) {
|
||||
mutex_unlock(&mem_id_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
err = rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params);
|
||||
WARN_ON(err);
|
||||
|
||||
call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
|
||||
|
||||
mutex_unlock(&mem_id_lock);
|
||||
}
|
||||
|
||||
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
|
||||
{
|
||||
/* Simplify driver cleanup code paths, allow unreg "unused" */
|
||||
|
@ -21,8 +123,14 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
|
|||
|
||||
WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
|
||||
|
||||
__xdp_rxq_info_unreg_mem_model(xdp_rxq);
|
||||
|
||||
xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
|
||||
xdp_rxq->dev = NULL;
|
||||
|
||||
/* Reset mem info to defaults */
|
||||
xdp_rxq->mem.id = 0;
|
||||
xdp_rxq->mem.type = 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
|
||||
|
||||
|
@ -71,3 +179,164 @@ bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq)
|
|||
return (xdp_rxq->reg_state == REG_STATE_REGISTERED);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg);
|
||||
|
||||
static int __mem_id_init_hash_table(void)
|
||||
{
|
||||
struct rhashtable *rht;
|
||||
int ret;
|
||||
|
||||
if (unlikely(mem_id_init))
|
||||
return 0;
|
||||
|
||||
rht = kzalloc(sizeof(*rht), GFP_KERNEL);
|
||||
if (!rht)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = rhashtable_init(rht, &mem_id_rht_params);
|
||||
if (ret < 0) {
|
||||
kfree(rht);
|
||||
return ret;
|
||||
}
|
||||
mem_id_ht = rht;
|
||||
smp_mb(); /* mutex lock should provide enough pairing */
|
||||
mem_id_init = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Allocate a cyclic ID that maps to allocator pointer.
|
||||
* See: https://www.kernel.org/doc/html/latest/core-api/idr.html
|
||||
*
|
||||
* Caller must lock mem_id_lock.
|
||||
*/
|
||||
static int __mem_id_cyclic_get(gfp_t gfp)
|
||||
{
|
||||
int retries = 1;
|
||||
int id;
|
||||
|
||||
again:
|
||||
id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp);
|
||||
if (id < 0) {
|
||||
if (id == -ENOSPC) {
|
||||
/* Cyclic allocator, reset next id */
|
||||
if (retries--) {
|
||||
mem_id_next = MEM_ID_MIN;
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
return id; /* errno */
|
||||
}
|
||||
mem_id_next = id + 1;
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
static bool __is_supported_mem_type(enum xdp_mem_type type)
|
||||
{
|
||||
if (type == MEM_TYPE_PAGE_POOL)
|
||||
return is_page_pool_compiled_in();
|
||||
|
||||
if (type >= MEM_TYPE_MAX)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
|
||||
enum xdp_mem_type type, void *allocator)
|
||||
{
|
||||
struct xdp_mem_allocator *xdp_alloc;
|
||||
gfp_t gfp = GFP_KERNEL;
|
||||
int id, errno, ret;
|
||||
void *ptr;
|
||||
|
||||
if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
|
||||
WARN(1, "Missing register, driver bug");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (!__is_supported_mem_type(type))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
xdp_rxq->mem.type = type;
|
||||
|
||||
if (!allocator) {
|
||||
if (type == MEM_TYPE_PAGE_POOL)
|
||||
return -EINVAL; /* Setup time check page_pool req */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Delay init of rhashtable to save memory if feature isn't used */
|
||||
if (!mem_id_init) {
|
||||
mutex_lock(&mem_id_lock);
|
||||
ret = __mem_id_init_hash_table();
|
||||
mutex_unlock(&mem_id_lock);
|
||||
if (ret < 0) {
|
||||
WARN_ON(1);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
|
||||
if (!xdp_alloc)
|
||||
return -ENOMEM;
|
||||
|
||||
mutex_lock(&mem_id_lock);
|
||||
id = __mem_id_cyclic_get(gfp);
|
||||
if (id < 0) {
|
||||
errno = id;
|
||||
goto err;
|
||||
}
|
||||
xdp_rxq->mem.id = id;
|
||||
xdp_alloc->mem = xdp_rxq->mem;
|
||||
xdp_alloc->allocator = allocator;
|
||||
|
||||
/* Insert allocator into ID lookup table */
|
||||
ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
|
||||
if (IS_ERR(ptr)) {
|
||||
errno = PTR_ERR(ptr);
|
||||
goto err;
|
||||
}
|
||||
|
||||
mutex_unlock(&mem_id_lock);
|
||||
|
||||
return 0;
|
||||
err:
|
||||
mutex_unlock(&mem_id_lock);
|
||||
kfree(xdp_alloc);
|
||||
return errno;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
|
||||
|
||||
void xdp_return_frame(struct xdp_frame *xdpf)
|
||||
{
|
||||
struct xdp_mem_info *mem = &xdpf->mem;
|
||||
struct xdp_mem_allocator *xa;
|
||||
void *data = xdpf->data;
|
||||
struct page *page;
|
||||
|
||||
switch (mem->type) {
|
||||
case MEM_TYPE_PAGE_POOL:
|
||||
rcu_read_lock();
|
||||
/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
|
||||
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
|
||||
page = virt_to_head_page(data);
|
||||
if (xa)
|
||||
page_pool_put_page(xa->page_pool, page);
|
||||
else
|
||||
put_page(page);
|
||||
rcu_read_unlock();
|
||||
break;
|
||||
case MEM_TYPE_PAGE_SHARED:
|
||||
page_frag_free(data);
|
||||
break;
|
||||
case MEM_TYPE_PAGE_ORDER0:
|
||||
page = virt_to_page(data); /* Assumes order0 page*/
|
||||
put_page(page);
|
||||
break;
|
||||
default:
|
||||
/* Not possible, checked in xdp_rxq_info_reg_mem_model() */
|
||||
break;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_return_frame);
|
||||
|
|
Loading…
Reference in New Issue