Merge branch 'vxlan-fix-default-fdb-entry-user-space-notify-ordering-race'
Roopa Prabhu says:
====================
vxlan: fix default fdb entry user-space notify ordering/race
Problem:
In vxlan_newlink, a default fdb entry is added before register_netdev.
The default fdb creation function notifies user-space of the
fdb entry on the vxlan device which user-space does not know about yet.
(RTM_NEWNEIGH goes before RTM_NEWLINK for the same ifindex).
This series fixes the user-space netlink notification ordering issue
with the following changes:
- decouple fdb notify from fdb create.
- Move fdb notify after register_netdev.
- modify rtnl_configure_link to allow configuring a link early.
- Call rtnl_configure_link in vxlan newlink handler to notify
userspace about the newlink before fdb notify and
hence avoiding the user-space race.
====================
Fixes: afbd8bae9c
("vxlan: add implicit fdb entry for default destination")
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
This commit is contained in:
commit
a2f1483b91
|
@ -637,8 +637,61 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
|
|||
return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
|
||||
}
|
||||
|
||||
/* Add new entry to forwarding table -- assumes lock held */
|
||||
static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan,
|
||||
const u8 *mac, __u16 state,
|
||||
__be32 src_vni, __u8 ndm_flags)
|
||||
{
|
||||
struct vxlan_fdb *f;
|
||||
|
||||
f = kmalloc(sizeof(*f), GFP_ATOMIC);
|
||||
if (!f)
|
||||
return NULL;
|
||||
f->state = state;
|
||||
f->flags = ndm_flags;
|
||||
f->updated = f->used = jiffies;
|
||||
f->vni = src_vni;
|
||||
INIT_LIST_HEAD(&f->remotes);
|
||||
memcpy(f->eth_addr, mac, ETH_ALEN);
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
static int vxlan_fdb_create(struct vxlan_dev *vxlan,
|
||||
const u8 *mac, union vxlan_addr *ip,
|
||||
__u16 state, __be16 port, __be32 src_vni,
|
||||
__be32 vni, __u32 ifindex, __u8 ndm_flags,
|
||||
struct vxlan_fdb **fdb)
|
||||
{
|
||||
struct vxlan_rdst *rd = NULL;
|
||||
struct vxlan_fdb *f;
|
||||
int rc;
|
||||
|
||||
if (vxlan->cfg.addrmax &&
|
||||
vxlan->addrcnt >= vxlan->cfg.addrmax)
|
||||
return -ENOSPC;
|
||||
|
||||
netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
|
||||
f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags);
|
||||
if (!f)
|
||||
return -ENOMEM;
|
||||
|
||||
rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
|
||||
if (rc < 0) {
|
||||
kfree(f);
|
||||
return rc;
|
||||
}
|
||||
|
||||
++vxlan->addrcnt;
|
||||
hlist_add_head_rcu(&f->hlist,
|
||||
vxlan_fdb_head(vxlan, mac, src_vni));
|
||||
|
||||
*fdb = f;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Add new entry to forwarding table -- assumes lock held */
|
||||
static int vxlan_fdb_update(struct vxlan_dev *vxlan,
|
||||
const u8 *mac, union vxlan_addr *ip,
|
||||
__u16 state, __u16 flags,
|
||||
__be16 port, __be32 src_vni, __be32 vni,
|
||||
|
@ -688,37 +741,17 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
|
|||
if (!(flags & NLM_F_CREATE))
|
||||
return -ENOENT;
|
||||
|
||||
if (vxlan->cfg.addrmax &&
|
||||
vxlan->addrcnt >= vxlan->cfg.addrmax)
|
||||
return -ENOSPC;
|
||||
|
||||
/* Disallow replace to add a multicast entry */
|
||||
if ((flags & NLM_F_REPLACE) &&
|
||||
(is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
|
||||
f = kmalloc(sizeof(*f), GFP_ATOMIC);
|
||||
if (!f)
|
||||
return -ENOMEM;
|
||||
|
||||
notify = 1;
|
||||
f->state = state;
|
||||
f->flags = ndm_flags;
|
||||
f->updated = f->used = jiffies;
|
||||
f->vni = src_vni;
|
||||
INIT_LIST_HEAD(&f->remotes);
|
||||
memcpy(f->eth_addr, mac, ETH_ALEN);
|
||||
|
||||
rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
|
||||
if (rc < 0) {
|
||||
kfree(f);
|
||||
rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
|
||||
vni, ifindex, ndm_flags, &f);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
}
|
||||
|
||||
++vxlan->addrcnt;
|
||||
hlist_add_head_rcu(&f->hlist,
|
||||
vxlan_fdb_head(vxlan, mac, src_vni));
|
||||
notify = 1;
|
||||
}
|
||||
|
||||
if (notify) {
|
||||
|
@ -742,13 +775,15 @@ static void vxlan_fdb_free(struct rcu_head *head)
|
|||
kfree(f);
|
||||
}
|
||||
|
||||
static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
|
||||
static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
|
||||
bool do_notify)
|
||||
{
|
||||
netdev_dbg(vxlan->dev,
|
||||
"delete %pM\n", f->eth_addr);
|
||||
|
||||
--vxlan->addrcnt;
|
||||
vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);
|
||||
if (do_notify)
|
||||
vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);
|
||||
|
||||
hlist_del_rcu(&f->hlist);
|
||||
call_rcu(&f->rcu, vxlan_fdb_free);
|
||||
|
@ -864,7 +899,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
|
|||
return -EAFNOSUPPORT;
|
||||
|
||||
spin_lock_bh(&vxlan->hash_lock);
|
||||
err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags,
|
||||
err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
|
||||
port, src_vni, vni, ifindex, ndm->ndm_flags);
|
||||
spin_unlock_bh(&vxlan->hash_lock);
|
||||
|
||||
|
@ -898,7 +933,7 @@ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
|
|||
goto out;
|
||||
}
|
||||
|
||||
vxlan_fdb_destroy(vxlan, f);
|
||||
vxlan_fdb_destroy(vxlan, f, true);
|
||||
|
||||
out:
|
||||
return 0;
|
||||
|
@ -1007,7 +1042,7 @@ static bool vxlan_snoop(struct net_device *dev,
|
|||
|
||||
/* close off race between vxlan_flush and incoming packets */
|
||||
if (netif_running(dev))
|
||||
vxlan_fdb_create(vxlan, src_mac, src_ip,
|
||||
vxlan_fdb_update(vxlan, src_mac, src_ip,
|
||||
NUD_REACHABLE,
|
||||
NLM_F_EXCL|NLM_F_CREATE,
|
||||
vxlan->cfg.dst_port,
|
||||
|
@ -2366,7 +2401,7 @@ static void vxlan_cleanup(struct timer_list *t)
|
|||
"garbage collect %pM\n",
|
||||
f->eth_addr);
|
||||
f->state = NUD_STALE;
|
||||
vxlan_fdb_destroy(vxlan, f);
|
||||
vxlan_fdb_destroy(vxlan, f, true);
|
||||
} else if (time_before(timeout, next_timer))
|
||||
next_timer = timeout;
|
||||
}
|
||||
|
@ -2417,7 +2452,7 @@ static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
|
|||
spin_lock_bh(&vxlan->hash_lock);
|
||||
f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
|
||||
if (f)
|
||||
vxlan_fdb_destroy(vxlan, f);
|
||||
vxlan_fdb_destroy(vxlan, f, true);
|
||||
spin_unlock_bh(&vxlan->hash_lock);
|
||||
}
|
||||
|
||||
|
@ -2471,7 +2506,7 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all)
|
|||
continue;
|
||||
/* the all_zeros_mac entry is deleted at vxlan_uninit */
|
||||
if (!is_zero_ether_addr(f->eth_addr))
|
||||
vxlan_fdb_destroy(vxlan, f);
|
||||
vxlan_fdb_destroy(vxlan, f, true);
|
||||
}
|
||||
}
|
||||
spin_unlock_bh(&vxlan->hash_lock);
|
||||
|
@ -3162,6 +3197,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
|
|||
{
|
||||
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
|
||||
struct vxlan_dev *vxlan = netdev_priv(dev);
|
||||
struct vxlan_fdb *f = NULL;
|
||||
int err;
|
||||
|
||||
err = vxlan_dev_configure(net, dev, conf, false, extack);
|
||||
|
@ -3175,24 +3211,35 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
|
|||
err = vxlan_fdb_create(vxlan, all_zeros_mac,
|
||||
&vxlan->default_dst.remote_ip,
|
||||
NUD_REACHABLE | NUD_PERMANENT,
|
||||
NLM_F_EXCL | NLM_F_CREATE,
|
||||
vxlan->cfg.dst_port,
|
||||
vxlan->default_dst.remote_vni,
|
||||
vxlan->default_dst.remote_vni,
|
||||
vxlan->default_dst.remote_ifindex,
|
||||
NTF_SELF);
|
||||
NTF_SELF, &f);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
err = register_netdevice(dev);
|
||||
if (err)
|
||||
goto errout;
|
||||
|
||||
err = rtnl_configure_link(dev, NULL);
|
||||
if (err) {
|
||||
vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
|
||||
return err;
|
||||
unregister_netdevice(dev);
|
||||
goto errout;
|
||||
}
|
||||
|
||||
/* notify default fdb entry */
|
||||
if (f)
|
||||
vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH);
|
||||
|
||||
list_add(&vxlan->next, &vn->vxlan_list);
|
||||
return 0;
|
||||
errout:
|
||||
if (f)
|
||||
vxlan_fdb_destroy(vxlan, f, false);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
|
||||
|
@ -3427,6 +3474,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
|
|||
struct vxlan_rdst *dst = &vxlan->default_dst;
|
||||
struct vxlan_rdst old_dst;
|
||||
struct vxlan_config conf;
|
||||
struct vxlan_fdb *f = NULL;
|
||||
int err;
|
||||
|
||||
err = vxlan_nl2conf(tb, data,
|
||||
|
@ -3455,16 +3503,16 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
|
|||
err = vxlan_fdb_create(vxlan, all_zeros_mac,
|
||||
&dst->remote_ip,
|
||||
NUD_REACHABLE | NUD_PERMANENT,
|
||||
NLM_F_CREATE | NLM_F_APPEND,
|
||||
vxlan->cfg.dst_port,
|
||||
dst->remote_vni,
|
||||
dst->remote_vni,
|
||||
dst->remote_ifindex,
|
||||
NTF_SELF);
|
||||
NTF_SELF, &f);
|
||||
if (err) {
|
||||
spin_unlock_bh(&vxlan->hash_lock);
|
||||
return err;
|
||||
}
|
||||
vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH);
|
||||
}
|
||||
spin_unlock_bh(&vxlan->hash_lock);
|
||||
}
|
||||
|
|
|
@ -2759,9 +2759,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
|
|||
return err;
|
||||
}
|
||||
|
||||
dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
|
||||
|
||||
__dev_notify_flags(dev, old_flags, ~0U);
|
||||
if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) {
|
||||
__dev_notify_flags(dev, old_flags, 0U);
|
||||
} else {
|
||||
dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
|
||||
__dev_notify_flags(dev, old_flags, ~0U);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(rtnl_configure_link);
|
||||
|
|
Loading…
Reference in New Issue