Merge branch 'gre-add-collect_md-mode-for-ERSPAN-tunnel'

William Tu says:

====================
gre: add collect_md mode for ERSPAN tunnel

This patch series provide collect_md mode for ERSPAN tunnel.  The fist patch
refactors the existing gre_fb_xmit function by exacting the route cache
portion into a new function called prepare_fb_xmit.  The second patch
introduces the collect_md mode for ERSPAN tunnel, by calling the
prepare_fb_xmit function and adding ERSPAN specific logic.  The final patch
adds the test case using bpf_skb_{set,get}_tunnel_{key,opt}.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2017-08-28 15:04:52 -07:00
commit d5a915b978
4 changed files with 232 additions and 21 deletions

View File

@ -154,8 +154,10 @@ struct ip_tunnel {
#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800) #define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800)
#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) #define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)
#define TUNNEL_NOCACHE __cpu_to_be16(0x2000) #define TUNNEL_NOCACHE __cpu_to_be16(0x2000)
#define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000)
#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT) #define TUNNEL_OPTIONS_PRESENT \
(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)
struct tnl_ptk_info { struct tnl_ptk_info {
__be16 flags; __be16 flags;

View File

@ -113,6 +113,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static struct rtnl_link_ops ipgre_link_ops __read_mostly; static struct rtnl_link_ops ipgre_link_ops __read_mostly;
static int ipgre_tunnel_init(struct net_device *dev); static int ipgre_tunnel_init(struct net_device *dev);
static void erspan_build_header(struct sk_buff *skb,
__be32 id, u32 index, bool truncate);
static unsigned int ipgre_net_id __read_mostly; static unsigned int ipgre_net_id __read_mostly;
static unsigned int gre_tap_net_id __read_mostly; static unsigned int gre_tap_net_id __read_mostly;
@ -287,7 +289,33 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
false, false) < 0) false, false) < 0)
goto drop; goto drop;
tunnel->index = ntohl(index); if (tunnel->collect_md) {
struct ip_tunnel_info *info;
struct erspan_metadata *md;
__be64 tun_id;
__be16 flags;
tpi->flags |= TUNNEL_KEY;
flags = tpi->flags;
tun_id = key32_to_tunnel_id(tpi->key);
tun_dst = ip_tun_rx_dst(skb, flags,
tun_id, sizeof(*md));
if (!tun_dst)
return PACKET_REJECT;
md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
if (!md)
return PACKET_REJECT;
md->index = index;
info = &tun_dst->u.tun_info;
info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
info->options_len = sizeof(*md);
} else {
tunnel->index = ntohl(index);
}
skb_reset_mac_header(skb); skb_reset_mac_header(skb);
ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
return PACKET_RCVD; return PACKET_RCVD;
@ -432,39 +460,33 @@ static struct rtable *gre_get_rt(struct sk_buff *skb,
return ip_route_output_key(net, fl); return ip_route_output_key(net, fl);
} }
static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
__be16 proto) struct net_device *dev,
struct flowi4 *fl,
int tunnel_hlen)
{ {
struct ip_tunnel_info *tun_info; struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key; const struct ip_tunnel_key *key;
struct rtable *rt = NULL; struct rtable *rt = NULL;
struct flowi4 fl;
int min_headroom; int min_headroom;
int tunnel_hlen;
__be16 df, flags;
bool use_cache; bool use_cache;
int err; int err;
tun_info = skb_tunnel_info(skb); tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
ip_tunnel_info_af(tun_info) != AF_INET))
goto err_free_skb;
key = &tun_info->key; key = &tun_info->key;
use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
if (use_cache) if (use_cache)
rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr); rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
if (!rt) { if (!rt) {
rt = gre_get_rt(skb, dev, &fl, key); rt = gre_get_rt(skb, dev, fl, key);
if (IS_ERR(rt)) if (IS_ERR(rt))
goto err_free_skb; goto err_free_skb;
if (use_cache) if (use_cache)
dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
fl.saddr); fl->saddr);
} }
tunnel_hlen = gre_calc_hlen(key->tun_flags);
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ tunnel_hlen + sizeof(struct iphdr); + tunnel_hlen + sizeof(struct iphdr);
if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
@ -476,6 +498,37 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
if (unlikely(err)) if (unlikely(err))
goto err_free_rt; goto err_free_rt;
} }
return rt;
err_free_rt:
ip_rt_put(rt);
err_free_skb:
kfree_skb(skb);
dev->stats.tx_dropped++;
return NULL;
}
static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct rtable *rt = NULL;
struct flowi4 fl;
int tunnel_hlen;
__be16 df, flags;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
ip_tunnel_info_af(tun_info) != AF_INET))
goto err_free_skb;
key = &tun_info->key;
tunnel_hlen = gre_calc_hlen(key->tun_flags);
rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
if (!rt)
return;
/* Push Tunnel header. */ /* Push Tunnel header. */
if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
@ -498,6 +551,64 @@ err_free_skb:
dev->stats.tx_dropped++; dev->stats.tx_dropped++;
} }
static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct erspan_metadata *md;
struct rtable *rt = NULL;
bool truncate = false;
struct flowi4 fl;
int tunnel_hlen;
__be16 df;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
ip_tunnel_info_af(tun_info) != AF_INET))
goto err_free_skb;
key = &tun_info->key;
/* ERSPAN has fixed 8 byte GRE header */
tunnel_hlen = 8 + sizeof(struct erspanhdr);
rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
if (!rt)
return;
if (gre_handle_offloads(skb, false))
goto err_free_rt;
if (skb->len > dev->mtu) {
pskb_trim(skb, dev->mtu);
truncate = true;
}
md = ip_tunnel_info_opts(tun_info);
if (!md)
goto err_free_rt;
erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
ntohl(md->index), truncate);
gre_build_header(skb, 8, TUNNEL_SEQ,
htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
key->tos, key->ttl, df, false);
return;
err_free_rt:
ip_rt_put(rt);
err_free_skb:
kfree_skb(skb);
dev->stats.tx_dropped++;
}
static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{ {
struct ip_tunnel_info *info = skb_tunnel_info(skb); struct ip_tunnel_info *info = skb_tunnel_info(skb);
@ -611,6 +722,11 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel *tunnel = netdev_priv(dev);
bool truncate = false; bool truncate = false;
if (tunnel->collect_md) {
erspan_fb_xmit(skb, dev, skb->protocol);
return NETDEV_TX_OK;
}
if (gre_handle_offloads(skb, false)) if (gre_handle_offloads(skb, false))
goto free_skb; goto free_skb;
@ -973,9 +1089,12 @@ static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
return ret; return ret;
/* ERSPAN should only have GRE sequence and key flag */ /* ERSPAN should only have GRE sequence and key flag */
flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); if (data[IFLA_GRE_OFLAGS])
flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
if (flags != (GRE_SEQ | GRE_KEY)) if (data[IFLA_GRE_IFLAGS])
flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
if (!data[IFLA_GRE_COLLECT_METADATA] &&
flags != (GRE_SEQ | GRE_KEY))
return -EINVAL; return -EINVAL;
/* ERSPAN Session ID only has 10-bit. Since we reuse /* ERSPAN Session ID only has 10-bit. Since we reuse

View File

@ -17,6 +17,7 @@
#include <uapi/linux/pkt_cls.h> #include <uapi/linux/pkt_cls.h>
#include <net/ipv6.h> #include <net/ipv6.h>
#include "bpf_helpers.h" #include "bpf_helpers.h"
#include "bpf_endian.h"
#define _htonl __builtin_bswap32 #define _htonl __builtin_bswap32
#define ERROR(ret) do {\ #define ERROR(ret) do {\
@ -38,6 +39,10 @@ struct vxlan_metadata {
u32 gbp; u32 gbp;
}; };
struct erspan_metadata {
__be32 index;
};
SEC("gre_set_tunnel") SEC("gre_set_tunnel")
int _gre_set_tunnel(struct __sk_buff *skb) int _gre_set_tunnel(struct __sk_buff *skb)
{ {
@ -76,6 +81,63 @@ int _gre_get_tunnel(struct __sk_buff *skb)
return TC_ACT_OK; return TC_ACT_OK;
} }
SEC("erspan_set_tunnel")
int _erspan_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
struct erspan_metadata md;
int ret;
__builtin_memset(&key, 0x0, sizeof(key));
key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
key.tunnel_id = 2;
key.tunnel_tos = 0;
key.tunnel_ttl = 64;
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
}
md.index = htonl(123);
ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
SEC("erspan_get_tunnel")
int _erspan_get_tunnel(struct __sk_buff *skb)
{
char fmt[] = "key %d remote ip 0x%x erspan index 0x%x\n";
struct bpf_tunnel_key key;
struct erspan_metadata md;
u32 index;
int ret;
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
}
ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
}
index = bpf_ntohl(md.index);
bpf_trace_printk(fmt, sizeof(fmt),
key.tunnel_id, key.remote_ipv4, index);
return TC_ACT_OK;
}
SEC("vxlan_set_tunnel") SEC("vxlan_set_tunnel")
int _vxlan_set_tunnel(struct __sk_buff *skb) int _vxlan_set_tunnel(struct __sk_buff *skb)
{ {
@ -378,5 +440,4 @@ int _ip6ip6_get_tunnel(struct __sk_buff *skb)
return TC_ACT_OK; return TC_ACT_OK;
} }
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";

View File

@ -32,6 +32,19 @@ function add_gre_tunnel {
ip addr add dev $DEV 10.1.1.200/24 ip addr add dev $DEV 10.1.1.200/24
} }
function add_erspan_tunnel {
# in namespace
ip netns exec at_ns0 \
ip link add dev $DEV_NS type $TYPE seq key 2 local 172.16.1.100 remote 172.16.1.200 erspan 123
ip netns exec at_ns0 ip link set dev $DEV_NS up
ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
# out of namespace
ip link add dev $DEV type $TYPE external
ip link set dev $DEV up
ip addr add dev $DEV 10.1.1.200/24
}
function add_vxlan_tunnel { function add_vxlan_tunnel {
# Set static ARP entry here because iptables set-mark works # Set static ARP entry here because iptables set-mark works
# on L3 packet, as a result not applying to ARP packets, # on L3 packet, as a result not applying to ARP packets,
@ -99,6 +112,18 @@ function test_gre {
cleanup cleanup
} }
function test_erspan {
TYPE=erspan
DEV_NS=erspan00
DEV=erspan11
config_device
add_erspan_tunnel
attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
ping -c 1 10.1.1.100
ip netns exec at_ns0 ping -c 1 10.1.1.200
cleanup
}
function test_vxlan { function test_vxlan {
TYPE=vxlan TYPE=vxlan
DEV_NS=vxlan00 DEV_NS=vxlan00
@ -151,14 +176,18 @@ function cleanup {
ip link del gretap11 ip link del gretap11
ip link del vxlan11 ip link del vxlan11
ip link del geneve11 ip link del geneve11
ip link del erspan11
pkill tcpdump pkill tcpdump
pkill cat pkill cat
set -ex set -ex
} }
trap cleanup 0 2 3 6 9
cleanup cleanup
echo "Testing GRE tunnel..." echo "Testing GRE tunnel..."
test_gre test_gre
echo "Testing ERSPAN tunnel..."
test_erspan
echo "Testing VXLAN tunnel..." echo "Testing VXLAN tunnel..."
test_vxlan test_vxlan
echo "Testing GENEVE tunnel..." echo "Testing GENEVE tunnel..."