From 5d5eacb34c9e1fdc0a47b885d832eaa4de860dc7 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Thu, 10 Jul 2014 07:01:58 -0400 Subject: [PATCH 1/2] bridge: fdb dumping takes a filter device Dumping a bridge fdb dumps every fdb entry held. With this change we are going to filter on selected bridge port. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 ++- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 7 ++++--- drivers/net/vxlan.c | 3 ++- include/linux/netdevice.h | 4 +++- include/linux/rtnetlink.h | 1 + net/bridge/br_fdb.c | 5 +++++ net/bridge/br_private.h | 2 +- net/core/rtnetlink.c | 9 ++++++--- 8 files changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e49352d68ede..2899f783ee1d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7095,13 +7095,14 @@ static int i40e_ndo_fdb_del(struct ndmsg *ndm, static int i40e_ndo_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, + struct net_device *filter_dev, int idx) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_pf *pf = np->vsi->back; if (pf->flags & I40E_FLAG_SRIOV_ENABLED) - idx = ndo_dflt_fdb_dump(skb, cb, dev, idx); + idx = ndo_dflt_fdb_dump(skb, cb, dev, filter_dev, idx); return idx; } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index f8de2ae01a5a..0fdbcc8319f7 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -427,16 +427,17 @@ static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } static int qlcnic_fdb_dump(struct sk_buff *skb, struct netlink_callback *ncb, - struct net_device *netdev, int idx) + struct net_device *netdev, + struct net_device *filter_dev, int idx) { struct qlcnic_adapter *adapter = netdev_priv(netdev); if (!adapter->fdb_mac_learn) - return ndo_dflt_fdb_dump(skb, ncb, netdev, idx); + return ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx); if ((adapter->flags & QLCNIC_ESWITCH_ENABLED) || qlcnic_sriov_check(adapter)) - idx = ndo_dflt_fdb_dump(skb, ncb, netdev, idx); + idx = ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx); return idx; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index c2d360150804..e6808f7e4e32 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -933,7 +933,8 @@ out: /* Dump forwarding table */ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct net_device *dev, int idx) + struct net_device *dev, + struct net_device *filter_dev, int idx) { struct vxlan_dev *vxlan = netdev_priv(dev); unsigned int h; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8b43a28ee0bc..3a320db96180 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -943,7 +943,8 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * const unsigned char *addr) * Deletes the FDB entry from dev coresponding to addr. * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, - * struct net_device *dev, int idx) + * struct net_device *dev, struct net_device *filter_dev, + * int idx) * Used to add FDB entries to dump requests. Implementers should add * entries to skb and update idx with the number of entries. * @@ -1114,6 +1115,7 @@ struct net_device_ops { int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, + struct net_device *filter_dev, int idx); int (*ndo_bridge_setlink)(struct net_device *dev, diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 953937ea5233..167bae7bdfa4 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -78,6 +78,7 @@ extern void __rtnl_unlock(void); extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, + struct net_device *filter_dev, int idx); extern int ndo_dflt_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 7be33667a839..6edecd11ecf0 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -676,6 +676,7 @@ errout: int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, + struct net_device *filter_dev, int idx) { struct net_bridge *br = netdev_priv(dev); @@ -691,6 +692,10 @@ int br_fdb_dump(struct sk_buff *skb, if (idx < cb->args[0]) goto skip; + if (filter_dev && (!f->dst || !f->dst->dev || + f->dst->dev != filter_dev)) + goto skip; + if (fdb_fill_info(skb, br, f, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 23caf5b0309e..62a7fa2e3569 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -399,7 +399,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 nlh_flags); int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct net_device *dev, int idx); + struct net_device *dev, struct net_device *fdev, int idx); int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 27acaf7ff6d7..90a906e7ac26 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2517,6 +2517,7 @@ skip: int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, + struct net_device *filter_dev, int idx) { int err; @@ -2547,13 +2548,15 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) br_dev = netdev_master_upper_dev_get(dev); ops = br_dev->netdev_ops; if (ops->ndo_fdb_dump) - idx = ops->ndo_fdb_dump(skb, cb, dev, idx); + idx = ops->ndo_fdb_dump(skb, cb, dev, NULL, + idx); } if (dev->netdev_ops->ndo_fdb_dump) - idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx); + idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL, + idx); else - idx = ndo_dflt_fdb_dump(skb, cb, dev, idx); + idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); } rcu_read_unlock(); From 5e6d243587990a588143b9da3974833649595587 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Thu, 10 Jul 2014 07:01:59 -0400 Subject: [PATCH 2/2] bridge: netlink dump interface at par with brctl Actually better than brctl showmacs because we can filter by bridge port in the kernel. The current bridge netlink interface doesnt scale when you have many bridges each with large fdbs or even bridges with many bridge ports And now for the science non-fiction novel you have all been waiting for.. //lets see what bridge ports we have root@moja-1:/configs/may30-iprt/bridge# ./bridge link show 8: eth1 state DOWN : mtu 1500 master br0 state disabled priority 32 cost 19 17: sw1-p1 state DOWN : mtu 1500 master br0 state disabled priority 32 cost 100 // show all.. root@moja-1:/configs/may30-iprt/bridge# ./bridge fdb show 33:33:00:00:00:01 dev bond0 self permanent 33:33:00:00:00:01 dev dummy0 self permanent 33:33:00:00:00:01 dev ifb0 self permanent 33:33:00:00:00:01 dev ifb1 self permanent 33:33:00:00:00:01 dev eth0 self permanent 01:00:5e:00:00:01 dev eth0 self permanent 33:33:ff:22:01:01 dev eth0 self permanent 02:00:00:12:01:02 dev eth1 vlan 0 master br0 permanent 00:17:42:8a:b4:05 dev eth1 vlan 0 master br0 permanent 00:17:42:8a:b4:07 dev eth1 self permanent 33:33:00:00:00:01 dev eth1 self permanent 33:33:00:00:00:01 dev gretap0 self permanent da:ac:46:27:d9:53 dev sw1-p1 vlan 0 master br0 permanent 33:33:00:00:00:01 dev sw1-p1 self permanent //filter by bridge root@moja-1:/configs/may30-iprt/bridge# ./bridge fdb show br br0 02:00:00:12:01:02 dev eth1 vlan 0 master br0 permanent 00:17:42:8a:b4:05 dev eth1 vlan 0 master br0 permanent 00:17:42:8a:b4:07 dev eth1 self permanent 33:33:00:00:00:01 dev eth1 self permanent da:ac:46:27:d9:53 dev sw1-p1 vlan 0 master br0 permanent 33:33:00:00:00:01 dev sw1-p1 self permanent // bridge sw1 has no ports attached.. root@moja-1:/configs/may30-iprt/bridge# ./bridge fdb show br sw1 //filter by port root@moja-1:/configs/may30-iprt/bridge# ./bridge fdb show brport eth1 02:00:00:12:01:02 vlan 0 master br0 permanent 00:17:42:8a:b4:05 vlan 0 master br0 permanent 00:17:42:8a:b4:07 self permanent 33:33:00:00:00:01 self permanent // filter by port + bridge root@moja-1:/configs/may30-iprt/bridge# ./bridge fdb show br br0 brport sw1-p1 da:ac:46:27:d9:53 vlan 0 master br0 permanent 33:33:00:00:00:01 self permanent // for shits and giggles (as they say in New Brunswick), lets // change the mac that br0 uses // Note: a magical fdb entry with no brport is added ... root@moja-1:/configs/may30-iprt/bridge# ip link set dev br0 address 02:00:00:12:01:04 // lets see if we can see the unicorn .. root@moja-1:/configs/may30-iprt/bridge# ./bridge fdb show 33:33:00:00:00:01 dev bond0 self permanent 33:33:00:00:00:01 dev dummy0 self permanent 33:33:00:00:00:01 dev ifb0 self permanent 33:33:00:00:00:01 dev ifb1 self permanent 33:33:00:00:00:01 dev eth0 self permanent 01:00:5e:00:00:01 dev eth0 self permanent 33:33:ff:22:01:01 dev eth0 self permanent 02:00:00:12:01:02 dev eth1 vlan 0 master br0 permanent 00:17:42:8a:b4:05 dev eth1 vlan 0 master br0 permanent 00:17:42:8a:b4:07 dev eth1 self permanent 33:33:00:00:00:01 dev eth1 self permanent 33:33:00:00:00:01 dev gretap0 self permanent 02:00:00:12:01:04 dev br0 vlan 0 master br0 permanent <=== there it is da:ac:46:27:d9:53 dev sw1-p1 vlan 0 master br0 permanent 33:33:00:00:00:01 dev sw1-p1 self permanent //can we see it if we filter by bridge? root@moja-1:/configs/may30-iprt/bridge# ./bridge fdb show br br0 02:00:00:12:01:02 dev eth1 vlan 0 master br0 permanent 00:17:42:8a:b4:05 dev eth1 vlan 0 master br0 permanent 00:17:42:8a:b4:07 dev eth1 self permanent 33:33:00:00:00:01 dev eth1 self permanent 02:00:00:12:01:04 dev br0 vlan 0 master br0 permanent <=== there it is da:ac:46:27:d9:53 dev sw1-p1 vlan 0 master br0 permanent 33:33:00:00:00:01 dev sw1-p1 self permanent Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 15 +++++++-- net/core/rtnetlink.c | 74 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 70 insertions(+), 19 deletions(-) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 6edecd11ecf0..0bb9d8b63dd2 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -692,9 +692,18 @@ int br_fdb_dump(struct sk_buff *skb, if (idx < cb->args[0]) goto skip; - if (filter_dev && (!f->dst || !f->dst->dev || - f->dst->dev != filter_dev)) - goto skip; + if (filter_dev && + (!f->dst || f->dst->dev != filter_dev)) { + if (filter_dev != dev) + goto skip; + /* !f->dst is a speacial case for bridge + * It means the MAC belongs to the bridge + * Therefore need a little more filtering + * we only want to dump the !f->dst case + */ + if (f->dst) + goto skip; + } if (fdb_fill_info(skb, br, f, NETLINK_CB(cb->skb).portid, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 90a906e7ac26..1f8a59e02c48 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2535,30 +2535,72 @@ EXPORT_SYMBOL(ndo_dflt_fdb_dump); static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) { - int idx = 0; - struct net *net = sock_net(skb->sk); struct net_device *dev; + struct nlattr *tb[IFLA_MAX+1]; + struct net_device *bdev = NULL; + struct net_device *br_dev = NULL; + const struct net_device_ops *ops = NULL; + const struct net_device_ops *cops = NULL; + struct ifinfomsg *ifm = nlmsg_data(cb->nlh); + struct net *net = sock_net(skb->sk); + int brport_idx = 0; + int br_idx = 0; + int idx = 0; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { - if (dev->priv_flags & IFF_BRIDGE_PORT) { - struct net_device *br_dev; - const struct net_device_ops *ops; + if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, + ifla_policy) == 0) { + if (tb[IFLA_MASTER]) + br_idx = nla_get_u32(tb[IFLA_MASTER]); + } - br_dev = netdev_master_upper_dev_get(dev); - ops = br_dev->netdev_ops; - if (ops->ndo_fdb_dump) - idx = ops->ndo_fdb_dump(skb, cb, dev, NULL, - idx); + brport_idx = ifm->ifi_index; + + if (br_idx) { + br_dev = __dev_get_by_index(net, br_idx); + if (!br_dev) + return -ENODEV; + + ops = br_dev->netdev_ops; + bdev = br_dev; + } + + for_each_netdev(net, dev) { + if (brport_idx && (dev->ifindex != brport_idx)) + continue; + + if (!br_idx) { /* user did not specify a specific bridge */ + if (dev->priv_flags & IFF_BRIDGE_PORT) { + br_dev = netdev_master_upper_dev_get(dev); + cops = br_dev->netdev_ops; + } + + bdev = dev; + } else { + if (dev != br_dev && + !(dev->priv_flags & IFF_BRIDGE_PORT)) + continue; + + if (br_dev != netdev_master_upper_dev_get(dev) && + !(dev->priv_flags & IFF_EBRIDGE)) + continue; + + bdev = br_dev; + cops = ops; } + if (dev->priv_flags & IFF_BRIDGE_PORT) { + if (cops && cops->ndo_fdb_dump) + idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev, + idx); + } + + idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); if (dev->netdev_ops->ndo_fdb_dump) - idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL, + idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, bdev, dev, idx); - else - idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); + + cops = NULL; } - rcu_read_unlock(); cb->args[0] = idx; return skb->len;