OpenCloudOS-Kernel/net/bridge/br_mdb.c

493 lines
11 KiB
C
Raw Normal View History

#include <linux/err.h>
#include <linux/igmp.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/rculist.h>
#include <linux/skbuff.h>
#include <linux/if_ether.h>
#include <net/ip.h>
#include <net/netlink.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
#endif
#include "br_private.h"
static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *p;
struct nlattr *nest;
if (!br->multicast_router || hlist_empty(&br->router_list))
return 0;
nest = nla_nest_start(skb, MDBA_ROUTER);
if (nest == NULL)
return -EMSGSIZE;
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 09:06:00 +08:00
hlist_for_each_entry_rcu(p, &br->router_list, rlist) {
if (p && nla_put_u32(skb, MDBA_ROUTER_PORT, p->dev->ifindex))
goto fail;
}
nla_nest_end(skb, nest);
return 0;
fail:
nla_nest_cancel(skb, nest);
return -EMSGSIZE;
}
static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_mdb_htable *mdb;
struct nlattr *nest, *nest2;
int i, err = 0;
int idx = 0, s_idx = cb->args[1];
if (br->multicast_disabled)
return 0;
mdb = rcu_dereference(br->mdb);
if (!mdb)
return 0;
nest = nla_nest_start(skb, MDBA_MDB);
if (nest == NULL)
return -EMSGSIZE;
for (i = 0; i < mdb->max; i++) {
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p, **pp;
struct net_bridge_port *port;
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 09:06:00 +08:00
hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) {
if (idx < s_idx)
goto skip;
nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY);
if (nest2 == NULL) {
err = -EMSGSIZE;
goto out;
}
for (pp = &mp->ports;
(p = rcu_dereference(*pp)) != NULL;
pp = &p->next) {
port = p->port;
if (port) {
struct br_mdb_entry e;
memset(&e, 0, sizeof(e));
e.ifindex = port->dev->ifindex;
e.state = p->state;
if (p->addr.proto == htons(ETH_P_IP))
e.addr.u.ip4 = p->addr.u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
if (p->addr.proto == htons(ETH_P_IPV6))
e.addr.u.ip6 = p->addr.u.ip6;
#endif
e.addr.proto = p->addr.proto;
if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(e), &e)) {
nla_nest_cancel(skb, nest2);
err = -EMSGSIZE;
goto out;
}
}
}
nla_nest_end(skb, nest2);
skip:
idx++;
}
}
out:
cb->args[1] = idx;
nla_nest_end(skb, nest);
return err;
}
static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net_device *dev;
struct net *net = sock_net(skb->sk);
struct nlmsghdr *nlh = NULL;
int idx = 0, s_idx;
s_idx = cb->args[0];
rcu_read_lock();
/* In theory this could be wrapped to 0... */
cb->seq = net->dev_base_seq + br_mdb_rehash_seq;
for_each_netdev_rcu(net, dev) {
if (dev->priv_flags & IFF_EBRIDGE) {
struct br_port_msg *bpm;
if (idx < s_idx)
goto skip;
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_GETMDB,
sizeof(*bpm), NLM_F_MULTI);
if (nlh == NULL)
break;
bpm = nlmsg_data(nlh);
memset(bpm, 0, sizeof(*bpm));
bpm->ifindex = dev->ifindex;
if (br_mdb_fill_info(skb, cb, dev) < 0)
goto out;
if (br_rports_fill_info(skb, cb, dev) < 0)
goto out;
cb->args[1] = 0;
nlmsg_end(skb, nlh);
skip:
idx++;
}
}
out:
if (nlh)
nlmsg_end(skb, nlh);
rcu_read_unlock();
cb->args[0] = idx;
return skb->len;
}
static int nlmsg_populate_mdb_fill(struct sk_buff *skb,
struct net_device *dev,
struct br_mdb_entry *entry, u32 pid,
u32 seq, int type, unsigned int flags)
{
struct nlmsghdr *nlh;
struct br_port_msg *bpm;
struct nlattr *nest, *nest2;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), NLM_F_MULTI);
if (!nlh)
return -EMSGSIZE;
bpm = nlmsg_data(nlh);
memset(bpm, 0, sizeof(*bpm));
bpm->family = AF_BRIDGE;
bpm->ifindex = dev->ifindex;
nest = nla_nest_start(skb, MDBA_MDB);
if (nest == NULL)
goto cancel;
nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY);
if (nest2 == NULL)
goto end;
if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(*entry), entry))
goto end;
nla_nest_end(skb, nest2);
nla_nest_end(skb, nest);
return nlmsg_end(skb, nlh);
end:
nla_nest_end(skb, nest);
cancel:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
static inline size_t rtnl_mdb_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct br_port_msg))
+ nla_total_size(sizeof(struct br_mdb_entry));
}
static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry,
int type)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC);
if (!skb)
goto errout;
err = nlmsg_populate_mdb_fill(skb, dev, entry, 0, 0, type, NTF_SELF);
if (err < 0) {
kfree_skb(skb);
goto errout;
}
rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_ATOMIC);
return;
errout:
rtnl_set_sk_err(net, RTNLGRP_MDB, err);
}
void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
struct br_ip *group, int type)
{
struct br_mdb_entry entry;
memset(&entry, 0, sizeof(entry));
entry.ifindex = port->dev->ifindex;
entry.addr.proto = group->proto;
entry.addr.u.ip4 = group->u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
entry.addr.u.ip6 = group->u.ip6;
#endif
__br_mdb_notify(dev, &entry, type);
}
static bool is_valid_mdb_entry(struct br_mdb_entry *entry)
{
if (entry->ifindex == 0)
return false;
if (entry->addr.proto == htons(ETH_P_IP)) {
if (!ipv4_is_multicast(entry->addr.u.ip4))
return false;
if (ipv4_is_local_multicast(entry->addr.u.ip4))
return false;
#if IS_ENABLED(CONFIG_IPV6)
} else if (entry->addr.proto == htons(ETH_P_IPV6)) {
if (!ipv6_is_transient_multicast(&entry->addr.u.ip6))
return false;
#endif
} else
return false;
if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY)
return false;
return true;
}
static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net_device **pdev, struct br_mdb_entry **pentry)
{
struct net *net = sock_net(skb->sk);
struct br_mdb_entry *entry;
struct br_port_msg *bpm;
struct nlattr *tb[MDBA_SET_ENTRY_MAX+1];
struct net_device *dev;
int err;
err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY, NULL);
if (err < 0)
return err;
bpm = nlmsg_data(nlh);
if (bpm->ifindex == 0) {
pr_info("PF_BRIDGE: br_mdb_parse() with invalid ifindex\n");
return -EINVAL;
}
dev = __dev_get_by_index(net, bpm->ifindex);
if (dev == NULL) {
pr_info("PF_BRIDGE: br_mdb_parse() with unknown ifindex\n");
return -ENODEV;
}
if (!(dev->priv_flags & IFF_EBRIDGE)) {
pr_info("PF_BRIDGE: br_mdb_parse() with non-bridge\n");
return -EOPNOTSUPP;
}
*pdev = dev;
if (!tb[MDBA_SET_ENTRY] ||
nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) {
pr_info("PF_BRIDGE: br_mdb_parse() with invalid attr\n");
return -EINVAL;
}
entry = nla_data(tb[MDBA_SET_ENTRY]);
if (!is_valid_mdb_entry(entry)) {
pr_info("PF_BRIDGE: br_mdb_parse() with invalid entry\n");
return -EINVAL;
}
*pentry = entry;
return 0;
}
static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
struct br_ip *group, unsigned char state)
{
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
struct net_bridge_mdb_htable *mdb;
int err;
mdb = mlock_dereference(br->mdb, br);
mp = br_mdb_ip_get(mdb, group);
if (!mp) {
mp = br_multicast_new_group(br, port, group);
err = PTR_ERR(mp);
if (IS_ERR(mp))
return err;
}
for (pp = &mp->ports;
(p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
if (p->port == port)
return -EEXIST;
if ((unsigned long)p->port < (unsigned long)port)
break;
}
p = br_multicast_new_port_group(port, group, *pp, state);
if (unlikely(!p))
return -ENOMEM;
rcu_assign_pointer(*pp, p);
br_mdb_notify(br->dev, port, group, RTM_NEWMDB);
return 0;
}
static int __br_mdb_add(struct net *net, struct net_bridge *br,
struct br_mdb_entry *entry)
{
struct br_ip ip;
struct net_device *dev;
struct net_bridge_port *p;
int ret;
if (!netif_running(br->dev) || br->multicast_disabled)
return -EINVAL;
dev = __dev_get_by_index(net, entry->ifindex);
if (!dev)
return -ENODEV;
p = br_port_get_rtnl(dev);
if (!p || p->br != br || p->state == BR_STATE_DISABLED)
return -EINVAL;
ip.proto = entry->addr.proto;
if (ip.proto == htons(ETH_P_IP))
ip.u.ip4 = entry->addr.u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
else
ip.u.ip6 = entry->addr.u.ip6;
#endif
spin_lock_bh(&br->multicast_lock);
ret = br_mdb_add_group(br, p, &ip, entry->state);
spin_unlock_bh(&br->multicast_lock);
return ret;
}
static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct br_mdb_entry *entry;
struct net_device *dev;
struct net_bridge *br;
int err;
err = br_mdb_parse(skb, nlh, &dev, &entry);
if (err < 0)
return err;
br = netdev_priv(dev);
err = __br_mdb_add(net, br, entry);
if (!err)
__br_mdb_notify(dev, entry, RTM_NEWMDB);
return err;
}
static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
{
struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
struct br_ip ip;
int err = -EINVAL;
if (!netif_running(br->dev) || br->multicast_disabled)
return -EINVAL;
if (timer_pending(&br->multicast_querier_timer))
return -EBUSY;
ip.proto = entry->addr.proto;
if (ip.proto == htons(ETH_P_IP))
ip.u.ip4 = entry->addr.u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
else
ip.u.ip6 = entry->addr.u.ip6;
#endif
spin_lock_bh(&br->multicast_lock);
mdb = mlock_dereference(br->mdb, br);
mp = br_mdb_ip_get(mdb, &ip);
if (!mp)
goto unlock;
for (pp = &mp->ports;
(p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
if (!p->port || p->port->dev->ifindex != entry->ifindex)
continue;
if (p->port->state == BR_STATE_DISABLED)
goto unlock;
rcu_assign_pointer(*pp, p->next);
hlist_del_init(&p->mglist);
del_timer(&p->timer);
call_rcu_bh(&p->rcu, br_multicast_free_pg);
err = 0;
if (!mp->ports && !mp->mglist &&
netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
break;
}
unlock:
spin_unlock_bh(&br->multicast_lock);
return err;
}
static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net_device *dev;
struct br_mdb_entry *entry;
struct net_bridge *br;
int err;
err = br_mdb_parse(skb, nlh, &dev, &entry);
if (err < 0)
return err;
br = netdev_priv(dev);
err = __br_mdb_del(br, entry);
if (!err)
__br_mdb_notify(dev, entry, RTM_DELMDB);
return err;
}
void br_mdb_init(void)
{
rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL);
rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL);
rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL);
}
void br_mdb_uninit(void)
{
rtnl_unregister(PF_BRIDGE, RTM_GETMDB);
rtnl_unregister(PF_BRIDGE, RTM_NEWMDB);
rtnl_unregister(PF_BRIDGE, RTM_DELMDB);
}