Merge branch 'bpf-fib-mtu-check'
David Ahern says: ==================== Packets that exceed the egress MTU can not be forwarded in the fast path. Add IPv4 and IPv6 MTU helpers that take a FIB lookup result (versus the typical dst path) and add the calls to bpf_ipv{4,6}_fib_lookup. v2 - add ip6_mtu_from_fib6 to ipv6_stub - only call the new MTU helpers for fib lookups in XDP path; skb path uses is_skb_forwardable to determine if the packet can be sent via the egress device from the FIB lookup ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
commit
3fb48d881d
|
@ -236,6 +236,8 @@ struct ipv6_stub {
|
|||
struct flowi6 *fl6, int oif,
|
||||
const struct sk_buff *skb,
|
||||
int strict);
|
||||
u32 (*ip6_mtu_from_fib6)(struct fib6_info *f6i, struct in6_addr *daddr,
|
||||
struct in6_addr *saddr);
|
||||
|
||||
void (*udpv6_encap_enable)(void);
|
||||
void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
|
||||
|
|
|
@ -412,6 +412,12 @@ static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
|
|||
return f6i->fib6_nh.nh_dev;
|
||||
}
|
||||
|
||||
static inline
|
||||
struct lwtunnel_state *fib6_info_nh_lwt(const struct fib6_info *f6i)
|
||||
{
|
||||
return f6i->fib6_nh.nh_lwtstate;
|
||||
}
|
||||
|
||||
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
|
||||
unsigned int flags);
|
||||
|
||||
|
|
|
@ -300,6 +300,9 @@ static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
|
|||
return mtu;
|
||||
}
|
||||
|
||||
u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
|
||||
struct in6_addr *saddr);
|
||||
|
||||
struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
|
||||
struct net_device *dev, struct sk_buff *skb,
|
||||
const void *daddr);
|
||||
|
|
|
@ -449,4 +449,6 @@ static inline void fib_proc_exit(struct net *net)
|
|||
}
|
||||
#endif
|
||||
|
||||
u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr);
|
||||
|
||||
#endif /* _NET_FIB_H */
|
||||
|
|
|
@ -4089,7 +4089,7 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
|
|||
|
||||
#if IS_ENABLED(CONFIG_INET)
|
||||
static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
|
||||
u32 flags)
|
||||
u32 flags, bool check_mtu)
|
||||
{
|
||||
struct in_device *in_dev;
|
||||
struct neighbour *neigh;
|
||||
|
@ -4098,6 +4098,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
|
|||
struct fib_nh *nh;
|
||||
struct flowi4 fl4;
|
||||
int err;
|
||||
u32 mtu;
|
||||
|
||||
dev = dev_get_by_index_rcu(net, params->ifindex);
|
||||
if (unlikely(!dev))
|
||||
|
@ -4149,6 +4150,12 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
|
|||
if (res.fi->fib_nhs > 1)
|
||||
fib_select_path(net, &res, &fl4, NULL);
|
||||
|
||||
if (check_mtu) {
|
||||
mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
|
||||
if (params->tot_len > mtu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
nh = &res.fi->fib_nh[res.nh_sel];
|
||||
|
||||
/* do not handle lwt encaps right now */
|
||||
|
@ -4177,7 +4184,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
|
|||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
|
||||
u32 flags)
|
||||
u32 flags, bool check_mtu)
|
||||
{
|
||||
struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
|
||||
struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
|
||||
|
@ -4188,6 +4195,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
|
|||
struct flowi6 fl6;
|
||||
int strict = 0;
|
||||
int oif;
|
||||
u32 mtu;
|
||||
|
||||
/* link local addresses are never forwarded */
|
||||
if (rt6_need_strict(dst) || rt6_need_strict(src))
|
||||
|
@ -4250,6 +4258,12 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
|
|||
fl6.flowi6_oif, NULL,
|
||||
strict);
|
||||
|
||||
if (check_mtu) {
|
||||
mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
|
||||
if (params->tot_len > mtu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (f6i->fib6_nh.nh_lwtstate)
|
||||
return 0;
|
||||
|
||||
|
@ -4282,12 +4296,12 @@ BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
|
|||
#if IS_ENABLED(CONFIG_INET)
|
||||
case AF_INET:
|
||||
return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params,
|
||||
flags);
|
||||
flags, true);
|
||||
#endif
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
case AF_INET6:
|
||||
return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params,
|
||||
flags);
|
||||
flags, true);
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
|
@ -4306,20 +4320,34 @@ static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
|
|||
BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
|
||||
struct bpf_fib_lookup *, params, int, plen, u32, flags)
|
||||
{
|
||||
struct net *net = dev_net(skb->dev);
|
||||
int index = 0;
|
||||
|
||||
if (plen < sizeof(*params))
|
||||
return -EINVAL;
|
||||
|
||||
switch (params->family) {
|
||||
#if IS_ENABLED(CONFIG_INET)
|
||||
case AF_INET:
|
||||
return bpf_ipv4_fib_lookup(dev_net(skb->dev), params, flags);
|
||||
index = bpf_ipv4_fib_lookup(net, params, flags, false);
|
||||
break;
|
||||
#endif
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
case AF_INET6:
|
||||
return bpf_ipv6_fib_lookup(dev_net(skb->dev), params, flags);
|
||||
index = bpf_ipv6_fib_lookup(net, params, flags, false);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
return -ENOTSUPP;
|
||||
|
||||
if (index > 0) {
|
||||
struct net_device *dev;
|
||||
|
||||
dev = dev_get_by_index_rcu(net, index);
|
||||
if (!is_skb_forwardable(dev, skb))
|
||||
index = 0;
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
|
||||
|
|
|
@ -1352,6 +1352,37 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/* MTU selection:
|
||||
* 1. mtu on route is locked - use it
|
||||
* 2. mtu from nexthop exception
|
||||
* 3. mtu from egress device
|
||||
*/
|
||||
|
||||
u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
|
||||
{
|
||||
struct fib_info *fi = res->fi;
|
||||
struct fib_nh *nh = &fi->fib_nh[res->nh_sel];
|
||||
struct net_device *dev = nh->nh_dev;
|
||||
u32 mtu = 0;
|
||||
|
||||
if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
|
||||
fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
|
||||
mtu = fi->fib_mtu;
|
||||
|
||||
if (likely(!mtu)) {
|
||||
struct fib_nh_exception *fnhe;
|
||||
|
||||
fnhe = find_exception(nh, daddr);
|
||||
if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
|
||||
mtu = fnhe->fnhe_pmtu;
|
||||
}
|
||||
|
||||
if (likely(!mtu))
|
||||
mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
|
||||
|
||||
return mtu - lwtunnel_headroom(nh->nh_lwtstate, mtu);
|
||||
}
|
||||
|
||||
static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
|
||||
__be32 daddr, const bool do_cache)
|
||||
{
|
||||
|
|
|
@ -161,12 +161,20 @@ eafnosupport_fib6_multipath_select(const struct net *net, struct fib6_info *f6i,
|
|||
return f6i;
|
||||
}
|
||||
|
||||
static u32
|
||||
eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
|
||||
struct in6_addr *saddr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
|
||||
.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
|
||||
.fib6_get_table = eafnosupport_fib6_get_table,
|
||||
.fib6_table_lookup = eafnosupport_fib6_table_lookup,
|
||||
.fib6_lookup = eafnosupport_fib6_lookup,
|
||||
.fib6_multipath_select = eafnosupport_fib6_multipath_select,
|
||||
.ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(ipv6_stub);
|
||||
|
||||
|
|
|
@ -894,6 +894,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
|
|||
.fib6_table_lookup = fib6_table_lookup,
|
||||
.fib6_lookup = fib6_lookup,
|
||||
.fib6_multipath_select = fib6_multipath_select,
|
||||
.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
|
||||
.udpv6_encap_enable = udpv6_encap_enable,
|
||||
.ndisc_send_na = ndisc_send_na,
|
||||
.nd_tbl = &nd_tbl,
|
||||
|
|
|
@ -2603,6 +2603,54 @@ out:
|
|||
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
|
||||
}
|
||||
|
||||
/* MTU selection:
|
||||
* 1. mtu on route is locked - use it
|
||||
* 2. mtu from nexthop exception
|
||||
* 3. mtu from egress device
|
||||
*
|
||||
* based on ip6_dst_mtu_forward and exception logic of
|
||||
* rt6_find_cached_rt; called with rcu_read_lock
|
||||
*/
|
||||
u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
|
||||
struct in6_addr *saddr)
|
||||
{
|
||||
struct rt6_exception_bucket *bucket;
|
||||
struct rt6_exception *rt6_ex;
|
||||
struct in6_addr *src_key;
|
||||
struct inet6_dev *idev;
|
||||
u32 mtu = 0;
|
||||
|
||||
if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
|
||||
mtu = f6i->fib6_pmtu;
|
||||
if (mtu)
|
||||
goto out;
|
||||
}
|
||||
|
||||
src_key = NULL;
|
||||
#ifdef CONFIG_IPV6_SUBTREES
|
||||
if (f6i->fib6_src.plen)
|
||||
src_key = saddr;
|
||||
#endif
|
||||
|
||||
bucket = rcu_dereference(f6i->rt6i_exception_bucket);
|
||||
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
|
||||
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
|
||||
mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
|
||||
|
||||
if (likely(!mtu)) {
|
||||
struct net_device *dev = fib6_info_nh_dev(f6i);
|
||||
|
||||
mtu = IPV6_MIN_MTU;
|
||||
idev = __in6_dev_get(dev);
|
||||
if (idev && idev->cnf.mtu6 > mtu)
|
||||
mtu = idev->cnf.mtu6;
|
||||
}
|
||||
|
||||
mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
|
||||
out:
|
||||
return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
|
||||
}
|
||||
|
||||
struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
|
||||
struct flowi6 *fl6)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue