tap: XDP support
This patch tries to implement XDP for tun. The implementation was split into two parts: - fast path: small and no gso packet. We try to do XDP at page level before build_skb(). For XDP_TX, since creating/destroying queues were completely under control of userspace, it was implemented through generic XDP helper after skb has been built. This could be optimized in the future. - slow path: big or gso packet. We try to do it after skb was created through generic XDP helpers. Test were done through pktgen with small packets. xdp1 test shows ~41.1% improvement: Before: ~1.7Mpps After: ~2.3Mpps xdp_redirect to ixgbe shows ~60% improvement: Before: ~0.8Mpps After: ~1.38Mpps Suggested-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
7c4974786f
commit
761876c857
|
@ -73,6 +73,8 @@
|
||||||
#include <linux/seq_file.h>
|
#include <linux/seq_file.h>
|
||||||
#include <linux/uio.h>
|
#include <linux/uio.h>
|
||||||
#include <linux/skb_array.h>
|
#include <linux/skb_array.h>
|
||||||
|
#include <linux/bpf.h>
|
||||||
|
#include <linux/bpf_trace.h>
|
||||||
|
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
|
|
||||||
|
@ -105,7 +107,8 @@ do { \
|
||||||
} while (0)
|
} while (0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
|
#define TUN_HEADROOM 256
|
||||||
|
#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD + TUN_HEADROOM)
|
||||||
|
|
||||||
/* TUN device flags */
|
/* TUN device flags */
|
||||||
|
|
||||||
|
@ -224,6 +227,7 @@ struct tun_struct {
|
||||||
u32 flow_count;
|
u32 flow_count;
|
||||||
u32 rx_batched;
|
u32 rx_batched;
|
||||||
struct tun_pcpu_stats __percpu *pcpu_stats;
|
struct tun_pcpu_stats __percpu *pcpu_stats;
|
||||||
|
struct bpf_prog __rcu *xdp_prog;
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef CONFIG_TUN_VNET_CROSS_LE
|
#ifdef CONFIG_TUN_VNET_CROSS_LE
|
||||||
|
@ -590,6 +594,7 @@ static void tun_detach(struct tun_file *tfile, bool clean)
|
||||||
static void tun_detach_all(struct net_device *dev)
|
static void tun_detach_all(struct net_device *dev)
|
||||||
{
|
{
|
||||||
struct tun_struct *tun = netdev_priv(dev);
|
struct tun_struct *tun = netdev_priv(dev);
|
||||||
|
struct bpf_prog *xdp_prog = rtnl_dereference(tun->xdp_prog);
|
||||||
struct tun_file *tfile, *tmp;
|
struct tun_file *tfile, *tmp;
|
||||||
int i, n = tun->numqueues;
|
int i, n = tun->numqueues;
|
||||||
|
|
||||||
|
@ -622,6 +627,9 @@ static void tun_detach_all(struct net_device *dev)
|
||||||
}
|
}
|
||||||
BUG_ON(tun->numdisabled != 0);
|
BUG_ON(tun->numdisabled != 0);
|
||||||
|
|
||||||
|
if (xdp_prog)
|
||||||
|
bpf_prog_put(xdp_prog);
|
||||||
|
|
||||||
if (tun->flags & IFF_PERSIST)
|
if (tun->flags & IFF_PERSIST)
|
||||||
module_put(THIS_MODULE);
|
module_put(THIS_MODULE);
|
||||||
}
|
}
|
||||||
|
@ -1008,6 +1016,46 @@ tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
|
||||||
stats->tx_dropped = tx_dropped;
|
stats->tx_dropped = tx_dropped;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog,
|
||||||
|
struct netlink_ext_ack *extack)
|
||||||
|
{
|
||||||
|
struct tun_struct *tun = netdev_priv(dev);
|
||||||
|
struct bpf_prog *old_prog;
|
||||||
|
|
||||||
|
old_prog = rtnl_dereference(tun->xdp_prog);
|
||||||
|
rcu_assign_pointer(tun->xdp_prog, prog);
|
||||||
|
if (old_prog)
|
||||||
|
bpf_prog_put(old_prog);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static u32 tun_xdp_query(struct net_device *dev)
|
||||||
|
{
|
||||||
|
struct tun_struct *tun = netdev_priv(dev);
|
||||||
|
const struct bpf_prog *xdp_prog;
|
||||||
|
|
||||||
|
xdp_prog = rtnl_dereference(tun->xdp_prog);
|
||||||
|
if (xdp_prog)
|
||||||
|
return xdp_prog->aux->id;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int tun_xdp(struct net_device *dev, struct netdev_xdp *xdp)
|
||||||
|
{
|
||||||
|
switch (xdp->command) {
|
||||||
|
case XDP_SETUP_PROG:
|
||||||
|
return tun_xdp_set(dev, xdp->prog, xdp->extack);
|
||||||
|
case XDP_QUERY_PROG:
|
||||||
|
xdp->prog_id = tun_xdp_query(dev);
|
||||||
|
xdp->prog_attached = !!xdp->prog_id;
|
||||||
|
return 0;
|
||||||
|
default:
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static const struct net_device_ops tun_netdev_ops = {
|
static const struct net_device_ops tun_netdev_ops = {
|
||||||
.ndo_uninit = tun_net_uninit,
|
.ndo_uninit = tun_net_uninit,
|
||||||
.ndo_open = tun_net_open,
|
.ndo_open = tun_net_open,
|
||||||
|
@ -1038,6 +1086,7 @@ static const struct net_device_ops tap_netdev_ops = {
|
||||||
.ndo_features_check = passthru_features_check,
|
.ndo_features_check = passthru_features_check,
|
||||||
.ndo_set_rx_headroom = tun_set_headroom,
|
.ndo_set_rx_headroom = tun_set_headroom,
|
||||||
.ndo_get_stats64 = tun_net_get_stats64,
|
.ndo_get_stats64 = tun_net_get_stats64,
|
||||||
|
.ndo_xdp = tun_xdp,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void tun_flow_init(struct tun_struct *tun)
|
static void tun_flow_init(struct tun_struct *tun)
|
||||||
|
@ -1217,16 +1266,22 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct sk_buff *tun_build_skb(struct tun_file *tfile,
|
static struct sk_buff *tun_build_skb(struct tun_struct *tun,
|
||||||
|
struct tun_file *tfile,
|
||||||
struct iov_iter *from,
|
struct iov_iter *from,
|
||||||
int len)
|
struct virtio_net_hdr *hdr,
|
||||||
|
int len, int *generic_xdp)
|
||||||
{
|
{
|
||||||
struct page_frag *alloc_frag = &tfile->alloc_frag;
|
struct page_frag *alloc_frag = &tfile->alloc_frag;
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
|
struct bpf_prog *xdp_prog;
|
||||||
int buflen = SKB_DATA_ALIGN(len + TUN_RX_PAD) +
|
int buflen = SKB_DATA_ALIGN(len + TUN_RX_PAD) +
|
||||||
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
||||||
|
unsigned int delta = 0;
|
||||||
char *buf;
|
char *buf;
|
||||||
size_t copied;
|
size_t copied;
|
||||||
|
bool xdp_xmit = false;
|
||||||
|
int err;
|
||||||
|
|
||||||
if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
|
if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
|
@ -1238,16 +1293,77 @@ static struct sk_buff *tun_build_skb(struct tun_file *tfile,
|
||||||
if (copied != len)
|
if (copied != len)
|
||||||
return ERR_PTR(-EFAULT);
|
return ERR_PTR(-EFAULT);
|
||||||
|
|
||||||
skb = build_skb(buf, buflen);
|
if (hdr->gso_type)
|
||||||
if (!skb)
|
*generic_xdp = 1;
|
||||||
return ERR_PTR(-ENOMEM);
|
else
|
||||||
|
*generic_xdp = 0;
|
||||||
|
|
||||||
skb_reserve(skb, TUN_RX_PAD);
|
rcu_read_lock();
|
||||||
skb_put(skb, len);
|
xdp_prog = rcu_dereference(tun->xdp_prog);
|
||||||
|
if (xdp_prog && !*generic_xdp) {
|
||||||
|
struct xdp_buff xdp;
|
||||||
|
void *orig_data;
|
||||||
|
u32 act;
|
||||||
|
|
||||||
|
xdp.data_hard_start = buf;
|
||||||
|
xdp.data = buf + TUN_RX_PAD;
|
||||||
|
xdp.data_end = xdp.data + len;
|
||||||
|
orig_data = xdp.data;
|
||||||
|
act = bpf_prog_run_xdp(xdp_prog, &xdp);
|
||||||
|
|
||||||
|
switch (act) {
|
||||||
|
case XDP_REDIRECT:
|
||||||
|
get_page(alloc_frag->page);
|
||||||
|
alloc_frag->offset += buflen;
|
||||||
|
err = xdp_do_redirect(tun->dev, &xdp, xdp_prog);
|
||||||
|
if (err)
|
||||||
|
goto err_redirect;
|
||||||
|
return NULL;
|
||||||
|
case XDP_TX:
|
||||||
|
xdp_xmit = true;
|
||||||
|
/* fall through */
|
||||||
|
case XDP_PASS:
|
||||||
|
delta = orig_data - xdp.data;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
bpf_warn_invalid_xdp_action(act);
|
||||||
|
/* fall through */
|
||||||
|
case XDP_ABORTED:
|
||||||
|
trace_xdp_exception(tun->dev, xdp_prog, act);
|
||||||
|
/* fall through */
|
||||||
|
case XDP_DROP:
|
||||||
|
goto err_xdp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
skb = build_skb(buf, buflen);
|
||||||
|
if (!skb) {
|
||||||
|
rcu_read_unlock();
|
||||||
|
return ERR_PTR(-ENOMEM);
|
||||||
|
}
|
||||||
|
|
||||||
|
skb_reserve(skb, TUN_RX_PAD - delta);
|
||||||
|
skb_put(skb, len + delta);
|
||||||
get_page(alloc_frag->page);
|
get_page(alloc_frag->page);
|
||||||
alloc_frag->offset += buflen;
|
alloc_frag->offset += buflen;
|
||||||
|
|
||||||
|
if (xdp_xmit) {
|
||||||
|
skb->dev = tun->dev;
|
||||||
|
generic_xdp_tx(skb, xdp_prog);
|
||||||
|
rcu_read_lock();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
return skb;
|
return skb;
|
||||||
|
|
||||||
|
err_redirect:
|
||||||
|
put_page(alloc_frag->page);
|
||||||
|
err_xdp:
|
||||||
|
rcu_read_unlock();
|
||||||
|
this_cpu_inc(tun->pcpu_stats->rx_dropped);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get packet from user space buffer */
|
/* Get packet from user space buffer */
|
||||||
|
@ -1266,6 +1382,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
|
||||||
bool zerocopy = false;
|
bool zerocopy = false;
|
||||||
int err;
|
int err;
|
||||||
u32 rxhash;
|
u32 rxhash;
|
||||||
|
int generic_xdp = 1;
|
||||||
|
|
||||||
if (!(tun->dev->flags & IFF_UP))
|
if (!(tun->dev->flags & IFF_UP))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
@ -1324,11 +1441,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
|
if (tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
|
||||||
skb = tun_build_skb(tfile, from, len);
|
skb = tun_build_skb(tun, tfile, from, &gso, len, &generic_xdp);
|
||||||
if (IS_ERR(skb)) {
|
if (IS_ERR(skb)) {
|
||||||
this_cpu_inc(tun->pcpu_stats->rx_dropped);
|
this_cpu_inc(tun->pcpu_stats->rx_dropped);
|
||||||
return PTR_ERR(skb);
|
return PTR_ERR(skb);
|
||||||
}
|
}
|
||||||
|
if (!skb)
|
||||||
|
return total_len;
|
||||||
} else {
|
} else {
|
||||||
if (!zerocopy) {
|
if (!zerocopy) {
|
||||||
copylen = len;
|
copylen = len;
|
||||||
|
@ -1402,6 +1521,22 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
|
||||||
skb_reset_network_header(skb);
|
skb_reset_network_header(skb);
|
||||||
skb_probe_transport_header(skb, 0);
|
skb_probe_transport_header(skb, 0);
|
||||||
|
|
||||||
|
if (generic_xdp) {
|
||||||
|
struct bpf_prog *xdp_prog;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
xdp_prog = rcu_dereference(tun->xdp_prog);
|
||||||
|
if (xdp_prog) {
|
||||||
|
ret = do_xdp_generic(xdp_prog, skb);
|
||||||
|
if (ret != XDP_PASS) {
|
||||||
|
rcu_read_unlock();
|
||||||
|
return total_len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
rxhash = __skb_get_hash_symmetric(skb);
|
rxhash = __skb_get_hash_symmetric(skb);
|
||||||
#ifndef CONFIG_4KSTACKS
|
#ifndef CONFIG_4KSTACKS
|
||||||
tun_rx_batched(tun, tfile, skb, more);
|
tun_rx_batched(tun, tfile, skb, more);
|
||||||
|
|
Loading…
Reference in New Issue