diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 474396353e7f..64499766e00f 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -36,11 +36,12 @@ #include #include +#define RT_FL_TOS(oldflp4) \ + ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) + #define DRV_NAME "vrf" #define DRV_VERSION "1.0" -#define vrf_is_slave(dev) ((dev)->flags & IFF_SLAVE) - #define vrf_master_get_rcu(dev) \ ((struct net_device *)rcu_dereference(dev->rx_handler_data)) @@ -208,7 +209,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, .flowi4_oif = vrf_dev->ifindex, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_tos = RT_TOS(ip4h->tos), - .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_VRFSRC | + .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF, .daddr = ip4h->daddr, }; @@ -433,7 +434,7 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) if (ret < 0) goto out_unregister; - port_dev->flags |= IFF_SLAVE; + port_dev->priv_flags |= IFF_L3MDEV_SLAVE; __vrf_insert_slave(queue, slave); cycle_netdev(port_dev); @@ -448,7 +449,7 @@ out_fail: static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev) { - if (netif_is_l3_master(port_dev) || vrf_is_slave(port_dev)) + if (netif_is_l3_master(port_dev) || netif_is_l3_slave(port_dev)) return -EINVAL; return do_vrf_add_slave(dev, port_dev); @@ -462,7 +463,7 @@ static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev) struct slave *slave; netdev_upper_dev_unlink(port_dev, dev); - port_dev->flags &= ~IFF_SLAVE; + port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE; netdev_rx_handler_unregister(port_dev); @@ -545,7 +546,7 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev, { struct rtable *rth = NULL; - if (!(fl4->flowi4_flags & FLOWI_FLAG_VRFSRC)) { + if (!(fl4->flowi4_flags & FLOWI_FLAG_L3MDEV_SRC)) { struct net_vrf *vrf = netdev_priv(dev); rth = vrf->rth; @@ -555,9 +556,41 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev, return rth; } +/* called under rcu_read_lock */ +static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4) +{ + struct fib_result res = { .tclassid = 0 }; + struct net *net = dev_net(dev); + u32 orig_tos = fl4->flowi4_tos; + u8 flags = fl4->flowi4_flags; + u8 scope = fl4->flowi4_scope; + u8 tos = RT_FL_TOS(fl4); + + if (unlikely(!fl4->daddr)) + return; + + fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF; + fl4->flowi4_iif = LOOPBACK_IFINDEX; + fl4->flowi4_tos = tos & IPTOS_RT_MASK; + fl4->flowi4_scope = ((tos & RTO_ONLINK) ? + RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); + + if (!fib_lookup(net, fl4, &res, 0)) { + if (res.type == RTN_LOCAL) + fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr; + else + fib_select_path(net, &res, fl4, -1); + } + + fl4->flowi4_flags = flags; + fl4->flowi4_tos = orig_tos; + fl4->flowi4_scope = scope; +} + static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, .l3mdev_get_rtable = vrf_get_rtable, + .l3mdev_get_saddr = vrf_get_saddr, }; static void vrf_get_drvinfo(struct net_device *dev, @@ -672,7 +705,7 @@ static int vrf_device_event(struct notifier_block *unused, if (event == NETDEV_UNREGISTER) { struct net_device *vrf_dev; - if (!vrf_is_slave(dev) || netif_is_l3_master(dev)) + if (!netif_is_l3_slave(dev)) goto out; vrf_dev = netdev_master_upper_dev_get(dev); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b9450784ae06..b3374402c1ea 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1261,6 +1261,7 @@ struct net_device_ops { * @IFF_L3MDEV_MASTER: device is an L3 master device * @IFF_NO_QUEUE: device can run without qdisc attached * @IFF_OPENVSWITCH: device is a Open vSwitch master + * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1286,6 +1287,7 @@ enum netdev_priv_flags { IFF_L3MDEV_MASTER = 1<<20, IFF_NO_QUEUE = 1<<21, IFF_OPENVSWITCH = 1<<22, + IFF_L3MDEV_SLAVE = 1<<23, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -3830,6 +3832,11 @@ static inline bool netif_is_l3_master(const struct net_device *dev) return dev->priv_flags & IFF_L3MDEV_MASTER; } +static inline bool netif_is_l3_slave(const struct net_device *dev) +{ + return dev->priv_flags & IFF_L3MDEV_SLAVE; +} + static inline bool netif_is_bridge_master(const struct net_device *dev) { return dev->priv_flags & IFF_EBRIDGE; diff --git a/include/net/flow.h b/include/net/flow.h index 9b85db85f13c..83969eebebf3 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -34,7 +34,7 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 -#define FLOWI_FLAG_VRFSRC 0x04 +#define FLOWI_FLAG_L3MDEV_SRC 0x04 #define FLOWI_FLAG_SKIP_NH_OIF 0x08 __u32 flowic_secid; struct flowi_tunnel flowic_tun_key; diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 7a51fd8d99e4..ac5c6e80586a 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -329,6 +329,8 @@ static inline int fib_multipath_hash(__be32 saddr, __be32 daddr) } void fib_select_multipath(struct fib_result *res, int hash); +void fib_select_path(struct net *net, struct fib_result *res, + struct flowi4 *fl4, int mp_hash); /* Exported by fib_trie.c */ void fib_trie_init(void); diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 87cee05a0a17..44a19a171104 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -17,12 +17,16 @@ * @l3mdev_fib_table: Get FIB table id to use for lookups * * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device + * + * @l3mdev_get_saddr: Get source address for a flow */ struct l3mdev_ops { u32 (*l3mdev_fib_table)(const struct net_device *dev); struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, const struct flowi4 *fl4); + void (*l3mdev_get_saddr)(struct net_device *dev, + struct flowi4 *fl4); }; #ifdef CONFIG_NET_L3_MASTER_DEV @@ -100,6 +104,25 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) return rc; } +static inline void l3mdev_get_saddr(struct net *net, int ifindex, + struct flowi4 *fl4) +{ + struct net_device *dev; + + if (ifindex) { + + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, ifindex); + if (dev && netif_is_l3_master(dev) && + dev->l3mdev_ops->l3mdev_get_saddr) { + dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4); + } + + rcu_read_unlock(); + } +} + #else static inline int l3mdev_master_ifindex_rcu(struct net_device *dev) @@ -144,6 +167,10 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) return false; } +static inline void l3mdev_get_saddr(struct net *net, int ifindex, + struct flowi4 *fl4) +{ +} #endif #endif /* _NET_L3MDEV_H_ */ diff --git a/include/net/route.h b/include/net/route.h index d32cb76f5302..ee81307863d5 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -266,9 +266,6 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 if (inet_sk(sk)->transparent) flow_flags |= FLOWI_FLAG_ANYSRC; - if (netif_index_is_l3_master(sock_net(sk), oif)) - flow_flags |= FLOWI_FLAG_VRFSRC | FLOWI_FLAG_SKIP_NH_OIF; - flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, protocol, flow_flags, dst, src, dport, sport); } @@ -285,6 +282,10 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, ip_route_connect_init(fl4, dst, src, tos, oif, protocol, sport, dport, sk); + if (!src && oif) { + l3mdev_get_saddr(net, oif, fl4); + src = fl4->saddr; + } if (!dst || !src) { rt = __ip_route_output_key(net, fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 7bd698c3bd3a..af77298c8b4f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1557,3 +1557,24 @@ void fib_select_multipath(struct fib_result *res, int hash) res->nh_sel = 0; } #endif + +void fib_select_path(struct net *net, struct fib_result *res, + struct flowi4 *fl4, int mp_hash) +{ +#ifdef CONFIG_IP_ROUTE_MULTIPATH + if (res->fi->fib_nhs > 1 && fl4->flowi4_oif == 0) { + if (mp_hash < 0) + mp_hash = fib_multipath_hash(fl4->saddr, fl4->daddr); + fib_select_multipath(res, mp_hash); + } + else +#endif + if (!res->prefixlen && + res->table->tb_num_default > 1 && + res->type == RTN_UNICAST && !fl4->flowi4_oif) + fib_select_default(fl4, res); + + if (!fl4->saddr) + fl4->saddr = FIB_RES_PREFSRC(net, *res); +} +EXPORT_SYMBOL_GPL(fib_select_path); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 28ef8a913130..09a07e8b2f35 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -484,6 +484,7 @@ static int raw_getfrag(void *from, char *to, int offset, int len, int odd, static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); struct ipcm_cookie ipc; struct rtable *rt = NULL; struct flowi4 fl4; @@ -543,7 +544,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { - err = ip_cmsg_send(sock_net(sk), msg, &ipc, false); + err = ip_cmsg_send(net, msg, &ipc, false); if (err) goto out; if (ipc.opt) @@ -598,6 +599,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0); + if (!saddr && ipc.oif) + l3mdev_get_saddr(net, ipc.oif, &fl4); + if (!inet->hdrincl) { rfv.msg = msg; rfv.hlen = 0; @@ -608,7 +612,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); - rt = ip_route_output_flow(sock_net(sk), &fl4, sk); + rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 54297d3a0559..54e6f456a760 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2238,21 +2238,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, goto make_route; } -#ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0) { - if (mp_hash < 0) - mp_hash = fib_multipath_hash(fl4->saddr, fl4->daddr); - fib_select_multipath(&res, mp_hash); - } - else -#endif - if (!res.prefixlen && - res.table->tb_num_default > 1 && - res.type == RTN_UNICAST && !fl4->flowi4_oif) - fib_select_default(fl4, &res); - - if (!fl4->saddr) - fl4->saddr = FIB_RES_PREFSRC(net, res); + fib_select_path(net, &res, fl4, mp_hash); dev_out = FIB_RES_DEV(res); fl4->flowi4_oif = dev_out->ifindex; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 156ba75b6000..e1fc129099ea 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1017,30 +1017,14 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl4 = &fl4_stack; - /* unconnected socket. If output device is enslaved to a VRF - * device lookup source address from VRF table. This mimics - * behavior of ip_route_connect{_init}. - */ - if (netif_index_is_l3_master(net, ipc.oif)) { - flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, - RT_SCOPE_UNIVERSE, sk->sk_protocol, - (flow_flags | FLOWI_FLAG_VRFSRC | - FLOWI_FLAG_SKIP_NH_OIF), - faddr, saddr, dport, - inet->inet_sport); - - rt = ip_route_output_flow(net, fl4, sk); - if (!IS_ERR(rt)) { - saddr = fl4->saddr; - ip_rt_put(rt); - } - } - flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, flow_flags, faddr, saddr, dport, inet->inet_sport); + if (!saddr && ipc.oif) + l3mdev_get_saddr(net, ipc.oif, fl4); + security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) { diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index ddf75ad41713..8e5ead366e7f 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -26,11 +26,11 @@ int l3mdev_master_ifindex_rcu(struct net_device *dev) if (netif_is_l3_master(dev)) { ifindex = dev->ifindex; - } else if (dev->flags & IFF_SLAVE) { + } else if (netif_is_l3_slave(dev)) { struct net_device *master; master = netdev_master_upper_dev_get_rcu(dev); - if (master && netif_is_l3_master(master)) + if (master) ifindex = master->ifindex; } @@ -54,7 +54,7 @@ u32 l3mdev_fib_table_rcu(const struct net_device *dev) if (netif_is_l3_master(dev)) { if (dev->l3mdev_ops->l3mdev_fib_table) tb_id = dev->l3mdev_ops->l3mdev_fib_table(dev); - } else if (dev->flags & IFF_SLAVE) { + } else if (netif_is_l3_slave(dev)) { /* Users of netdev_master_upper_dev_get_rcu need non-const, * but current inet_*type functions take a const */ @@ -62,7 +62,7 @@ u32 l3mdev_fib_table_rcu(const struct net_device *dev) const struct net_device *master; master = netdev_master_upper_dev_get_rcu(_dev); - if (master && netif_is_l3_master(master) && + if (master && master->l3mdev_ops->l3mdev_fib_table) tb_id = master->l3mdev_ops->l3mdev_fib_table(master); }