ipv4: reset rt_iif for recirculated mcast/bcast out pkts

Multicast or broadcast egress packets have rt_iif set to the oif. These
packets might be recirculated back as input and lookup to the raw
sockets may fail because they are bound to the incoming interface
(skb_iif). If rt_iif is not zero, during the lookup, inet_iif() function
returns rt_iif instead of skb_iif. Hence, the lookup fails.

v2: Make it non vrf specific (David Ahern). Reword the changelog to
    reflect it.
Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Stephen Suryaputra 2019-06-26 02:21:16 -04:00 committed by David S. Miller
parent ee4297420d
commit 5b18f12898
3 changed files with 46 additions and 0 deletions

View File

@ -221,6 +221,7 @@ void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt);
struct rtable *rt_dst_alloc(struct net_device *dev,
unsigned int flags, u16 type,
bool nopolicy, bool noxfrm, bool will_cache);
struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt);
struct in_ifaddr;
void fib_add_ifaddr(struct in_ifaddr *);

View File

@ -318,6 +318,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
static int ip_mc_finish_output(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct rtable *new_rt;
int ret;
ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
@ -326,6 +327,17 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk,
return ret;
}
/* Reset rt_iif so that inet_iif() will return skb->skb_iif. Setting
* this to non-zero causes ipi_ifindex in in_pktinfo to be overwritten,
* see ipv4_pktinfo_prepare().
*/
new_rt = rt_dst_clone(net->loopback_dev, skb_rtable(skb));
if (new_rt) {
new_rt->rt_iif = 0;
skb_dst_drop(skb);
skb_dst_set(skb, &new_rt->dst);
}
return dev_loopback_xmit(net, sk, skb);
}

View File

@ -1647,6 +1647,39 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
}
EXPORT_SYMBOL(rt_dst_alloc);
struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
{
struct rtable *new_rt;
new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
rt->dst.flags);
if (new_rt) {
new_rt->rt_genid = rt_genid_ipv4(dev_net(dev));
new_rt->rt_flags = rt->rt_flags;
new_rt->rt_type = rt->rt_type;
new_rt->rt_is_input = rt->rt_is_input;
new_rt->rt_iif = rt->rt_iif;
new_rt->rt_pmtu = rt->rt_pmtu;
new_rt->rt_mtu_locked = rt->rt_mtu_locked;
new_rt->rt_gw_family = rt->rt_gw_family;
if (rt->rt_gw_family == AF_INET)
new_rt->rt_gw4 = rt->rt_gw4;
else if (rt->rt_gw_family == AF_INET6)
new_rt->rt_gw6 = rt->rt_gw6;
INIT_LIST_HEAD(&new_rt->rt_uncached);
new_rt->dst.flags |= DST_HOST;
new_rt->dst.input = rt->dst.input;
new_rt->dst.output = rt->dst.output;
new_rt->dst.error = rt->dst.error;
new_rt->dst.lastuse = jiffies;
new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate);
}
return new_rt;
}
EXPORT_SYMBOL(rt_dst_clone);
/* called in rcu_read_lock() section */
int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev,