bonding: Fix stacked device detection in arp monitoring

Prior to commit fbd929f2dc
	bonding: support QinQ for bond arp interval

the arp monitoring code allowed for proper detection of devices
stacked on top of vlans.  Since the above commit, the
code can still detect a device stacked on top of single
vlan, but not a device stacked on top of Q-in-Q configuration.
The search will only set the inner vlan tag if the route
device is the vlan device.  However, this is not always the
case, as it is possible to extend the stacked configuration.

With this patch it is possible to provision devices on
top Q-in-Q vlan configuration that should be used as
a source of ARP monitoring information.

For example:
ip link add link bond0 vlan10 type vlan proto 802.1q id 10
ip link add link vlan10 vlan100 type vlan proto 802.1q id 100
ip link add link vlan100 type macvlan

Note:  This patch limites the number of stacked VLANs to 2,
just like before.  The original, however had another issue
in that if we had more then 2 levels of VLANs, we would end
up generating incorrectly tagged traffic.  This is no longer
possible.

Fixes: fbd929f2dc (bonding: support QinQ for bond arp interval)
CC: Jay Vosburgh <j.vosburgh@gmail.com>
CC: Veaceslav Falico <vfalico@redhat.com>
CC: Andy Gospodarek <andy@greyhouse.net>
CC: Ding Tianhong <dingtianhong@huawei.com>
CC: Patric McHardy <kaber@trash.net>
Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Vlad Yasevich 2014-05-16 17:20:38 -04:00 committed by David S. Miller
parent 6bd64ac0f9
commit 44a4085538
5 changed files with 107 additions and 69 deletions

View File

@ -2126,10 +2126,10 @@ static bool bond_has_this_ip(struct bonding *bond, __be32 ip)
*/ */
static void bond_arp_send(struct net_device *slave_dev, int arp_op, static void bond_arp_send(struct net_device *slave_dev, int arp_op,
__be32 dest_ip, __be32 src_ip, __be32 dest_ip, __be32 src_ip,
struct bond_vlan_tag *inner, struct bond_vlan_tag *tags)
struct bond_vlan_tag *outer)
{ {
struct sk_buff *skb; struct sk_buff *skb;
int i;
pr_debug("arp %d on slave %s: dst %pI4 src %pI4\n", pr_debug("arp %d on slave %s: dst %pI4 src %pI4\n",
arp_op, slave_dev->name, &dest_ip, &src_ip); arp_op, slave_dev->name, &dest_ip, &src_ip);
@ -2141,21 +2141,26 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op,
net_err_ratelimited("ARP packet allocation failed\n"); net_err_ratelimited("ARP packet allocation failed\n");
return; return;
} }
if (outer->vlan_id) {
if (inner->vlan_id) { /* Go through all the tags backwards and add them to the packet */
for (i = BOND_MAX_VLAN_ENCAP - 1; i > 0; i--) {
if (!tags[i].vlan_id)
continue;
pr_debug("inner tag: proto %X vid %X\n", pr_debug("inner tag: proto %X vid %X\n",
ntohs(inner->vlan_proto), inner->vlan_id); ntohs(tags[i].vlan_proto), tags[i].vlan_id);
skb = __vlan_put_tag(skb, inner->vlan_proto, skb = __vlan_put_tag(skb, tags[i].vlan_proto,
inner->vlan_id); tags[i].vlan_id);
if (!skb) { if (!skb) {
net_err_ratelimited("failed to insert inner VLAN tag\n"); net_err_ratelimited("failed to insert inner VLAN tag\n");
return; return;
} }
} }
/* Set the outer tag */
pr_debug("outer reg: proto %X vid %X\n", if (tags[0].vlan_id) {
ntohs(outer->vlan_proto), outer->vlan_id); pr_debug("outer tag: proto %X vid %X\n",
skb = vlan_put_tag(skb, outer->vlan_proto, outer->vlan_id); ntohs(tags[0].vlan_proto), tags[0].vlan_id);
skb = vlan_put_tag(skb, tags[0].vlan_proto, tags[0].vlan_id);
if (!skb) { if (!skb) {
net_err_ratelimited("failed to insert outer VLAN tag\n"); net_err_ratelimited("failed to insert outer VLAN tag\n");
return; return;
@ -2164,22 +2169,52 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op,
arp_xmit(skb); arp_xmit(skb);
} }
/* Validate the device path between the @start_dev and the @end_dev.
* The path is valid if the @end_dev is reachable through device
* stacking.
* When the path is validated, collect any vlan information in the
* path.
*/
static bool bond_verify_device_path(struct net_device *start_dev,
struct net_device *end_dev,
struct bond_vlan_tag *tags)
{
struct net_device *upper;
struct list_head *iter;
int idx;
if (start_dev == end_dev)
return true;
netdev_for_each_upper_dev_rcu(start_dev, upper, iter) {
if (bond_verify_device_path(upper, end_dev, tags)) {
if (is_vlan_dev(upper)) {
idx = vlan_get_encap_level(upper);
if (idx >= BOND_MAX_VLAN_ENCAP)
return false;
tags[idx].vlan_proto =
vlan_dev_vlan_proto(upper);
tags[idx].vlan_id = vlan_dev_vlan_id(upper);
}
return true;
}
}
return false;
}
static void bond_arp_send_all(struct bonding *bond, struct slave *slave) static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
{ {
struct net_device *upper, *vlan_upper;
struct list_head *iter, *vlan_iter;
struct rtable *rt; struct rtable *rt;
struct bond_vlan_tag inner, outer; struct bond_vlan_tag tags[BOND_MAX_VLAN_ENCAP];
__be32 *targets = bond->params.arp_targets, addr; __be32 *targets = bond->params.arp_targets, addr;
int i; int i;
bool ret;
for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) { for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) {
pr_debug("basa: target %pI4\n", &targets[i]); pr_debug("basa: target %pI4\n", &targets[i]);
inner.vlan_proto = 0; memset(tags, 0, sizeof(tags));
inner.vlan_id = 0;
outer.vlan_proto = 0;
outer.vlan_id = 0;
/* Find out through which dev should the packet go */ /* Find out through which dev should the packet go */
rt = ip_route_output(dev_net(bond->dev), targets[i], 0, rt = ip_route_output(dev_net(bond->dev), targets[i], 0,
@ -2192,7 +2227,8 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
net_warn_ratelimited("%s: no route to arp_ip_target %pI4 and arp_validate is set\n", net_warn_ratelimited("%s: no route to arp_ip_target %pI4 and arp_validate is set\n",
bond->dev->name, bond->dev->name,
&targets[i]); &targets[i]);
bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 0, &inner, &outer); bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
0, tags);
continue; continue;
} }
@ -2201,51 +2237,11 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
goto found; goto found;
rcu_read_lock(); rcu_read_lock();
/* first we search only for vlan devices. for every vlan ret = bond_verify_device_path(bond->dev, rt->dst.dev, tags);
* found we verify its upper dev list, searching for the rcu_read_unlock();
* rt->dst.dev. If found we save the tag of the vlan and
* proceed to send the packet.
*/
netdev_for_each_all_upper_dev_rcu(bond->dev, vlan_upper,
vlan_iter) {
if (!is_vlan_dev(vlan_upper))
continue;
if (vlan_upper == rt->dst.dev) { if (ret)
outer.vlan_proto = vlan_dev_vlan_proto(vlan_upper);
outer.vlan_id = vlan_dev_vlan_id(vlan_upper);
rcu_read_unlock();
goto found; goto found;
}
netdev_for_each_all_upper_dev_rcu(vlan_upper, upper,
iter) {
if (upper == rt->dst.dev) {
/* If the upper dev is a vlan dev too,
* set the vlan tag to inner tag.
*/
if (is_vlan_dev(upper)) {
inner.vlan_proto = vlan_dev_vlan_proto(upper);
inner.vlan_id = vlan_dev_vlan_id(upper);
}
outer.vlan_proto = vlan_dev_vlan_proto(vlan_upper);
outer.vlan_id = vlan_dev_vlan_id(vlan_upper);
rcu_read_unlock();
goto found;
}
}
}
/* if the device we're looking for is not on top of any of
* our upper vlans, then just search for any dev that
* matches, and in case it's a vlan - save the id
*/
netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) {
if (upper == rt->dst.dev) {
rcu_read_unlock();
goto found;
}
}
rcu_read_unlock();
/* Not our device - skip */ /* Not our device - skip */
pr_debug("%s: no path to arp_ip_target %pI4 via rt.dev %s\n", pr_debug("%s: no path to arp_ip_target %pI4 via rt.dev %s\n",
@ -2259,7 +2255,7 @@ found:
addr = bond_confirm_addr(rt->dst.dev, targets[i], 0); addr = bond_confirm_addr(rt->dst.dev, targets[i], 0);
ip_rt_put(rt); ip_rt_put(rt);
bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
addr, &inner, &outer); addr, tags);
} }
} }

View File

@ -36,6 +36,7 @@
#define bond_version DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n" #define bond_version DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n"
#define BOND_MAX_VLAN_ENCAP 2
#define BOND_MAX_ARP_TARGETS 16 #define BOND_MAX_ARP_TARGETS 16
#define BOND_DEFAULT_MIIMON 100 #define BOND_DEFAULT_MIIMON 100

View File

@ -484,4 +484,10 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb,
*/ */
skb->protocol = htons(ETH_P_802_2); skb->protocol = htons(ETH_P_802_2);
} }
static inline int vlan_get_encap_level(struct net_device *dev)
{
BUG_ON(!is_vlan_dev(dev));
return vlan_dev_priv(dev)->nest_level;
}
#endif /* !(_LINUX_IF_VLAN_H_) */ #endif /* !(_LINUX_IF_VLAN_H_) */

View File

@ -3056,9 +3056,18 @@ extern int weight_p;
extern int bpf_jit_enable; extern int bpf_jit_enable;
bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
struct list_head **iter);
struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
struct list_head **iter); struct list_head **iter);
/* iterate through upper list, must be called under RCU read lock */
#define netdev_for_each_upper_dev_rcu(dev, updev, iter) \
for (iter = &(dev)->adj_list.upper, \
updev = netdev_upper_get_next_dev_rcu(dev, &(iter)); \
updev; \
updev = netdev_upper_get_next_dev_rcu(dev, &(iter)))
/* iterate through upper list, must be called under RCU read lock */ /* iterate through upper list, must be called under RCU read lock */
#define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \ #define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \
for (iter = &(dev)->all_adj_list.upper, \ for (iter = &(dev)->all_adj_list.upper, \

View File

@ -4541,6 +4541,32 @@ void *netdev_adjacent_get_private(struct list_head *adj_list)
} }
EXPORT_SYMBOL(netdev_adjacent_get_private); EXPORT_SYMBOL(netdev_adjacent_get_private);
/**
* netdev_upper_get_next_dev_rcu - Get the next dev from upper list
* @dev: device
* @iter: list_head ** of the current position
*
* Gets the next device from the dev's upper list, starting from iter
* position. The caller must hold RCU read lock.
*/
struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
struct list_head **iter)
{
struct netdev_adjacent *upper;
WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
if (&upper->list == &dev->adj_list.upper)
return NULL;
*iter = &upper->list;
return upper->dev;
}
EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
/** /**
* netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
* @dev: device * @dev: device