linux-sg2042/net/bridge/br_multicast.c

2616 lines
63 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Bridge multicast support.
*
* Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au>
*/
#include <linux/err.h>
#include <linux/export.h>
#include <linux/if_ether.h>
#include <linux/igmp.h>
#include <linux/in.h>
#include <linux/jhash.h>
#include <linux/kernel.h>
#include <linux/log2.h>
#include <linux/netdevice.h>
#include <linux/netfilter_bridge.h>
#include <linux/random.h>
#include <linux/rculist.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/timer.h>
#include <linux/inetdevice.h>
#include <linux/mroute.h>
#include <net/ip.h>
#include <net/switchdev.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <linux/icmpv6.h>
#include <net/ipv6.h>
#include <net/mld.h>
#include <net/ip6_checksum.h>
#include <net/addrconf.h>
#endif
#include "br_private.h"
static const struct rhashtable_params br_mdb_rht_params = {
.head_offset = offsetof(struct net_bridge_mdb_entry, rhnode),
.key_offset = offsetof(struct net_bridge_mdb_entry, addr),
.key_len = sizeof(struct br_ip),
.automatic_shrinking = true,
};
static void br_multicast_start_querier(struct net_bridge *br,
struct bridge_mcast_own_query *query);
static void br_multicast_add_router(struct net_bridge *br,
struct net_bridge_port *port);
static void br_ip4_multicast_leave_group(struct net_bridge *br,
struct net_bridge_port *port,
__be32 group,
__u16 vid,
const unsigned char *src);
static void __del_port_router(struct net_bridge_port *p);
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_leave_group(struct net_bridge *br,
struct net_bridge_port *port,
const struct in6_addr *group,
__u16 vid, const unsigned char *src);
#endif
static struct net_bridge_mdb_entry *br_mdb_ip_get_rcu(struct net_bridge *br,
struct br_ip *dst)
{
return rhashtable_lookup(&br->mdb_hash_tbl, dst, br_mdb_rht_params);
}
struct net_bridge_mdb_entry *br_mdb_ip_get(struct net_bridge *br,
struct br_ip *dst)
{
struct net_bridge_mdb_entry *ent;
lockdep_assert_held_once(&br->multicast_lock);
rcu_read_lock();
ent = rhashtable_lookup(&br->mdb_hash_tbl, dst, br_mdb_rht_params);
rcu_read_unlock();
return ent;
}
static struct net_bridge_mdb_entry *br_mdb_ip4_get(struct net_bridge *br,
__be32 dst, __u16 vid)
{
struct br_ip br_dst;
memset(&br_dst, 0, sizeof(br_dst));
br_dst.u.ip4 = dst;
br_dst.proto = htons(ETH_P_IP);
br_dst.vid = vid;
return br_mdb_ip_get(br, &br_dst);
}
#if IS_ENABLED(CONFIG_IPV6)
static struct net_bridge_mdb_entry *br_mdb_ip6_get(struct net_bridge *br,
const struct in6_addr *dst,
__u16 vid)
{
struct br_ip br_dst;
memset(&br_dst, 0, sizeof(br_dst));
br_dst.u.ip6 = *dst;
br_dst.proto = htons(ETH_P_IPV6);
br_dst.vid = vid;
return br_mdb_ip_get(br, &br_dst);
}
#endif
struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
struct sk_buff *skb, u16 vid)
{
struct br_ip ip;
if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
return NULL;
if (BR_INPUT_SKB_CB(skb)->igmp)
return NULL;
memset(&ip, 0, sizeof(ip));
ip.proto = skb->protocol;
ip.vid = vid;
switch (skb->protocol) {
case htons(ETH_P_IP):
ip.u.ip4 = ip_hdr(skb)->daddr;
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
ip.u.ip6 = ipv6_hdr(skb)->daddr;
break;
#endif
default:
return NULL;
}
return br_mdb_ip_get_rcu(br, &ip);
}
static void br_multicast_group_expired(struct timer_list *t)
{
struct net_bridge_mdb_entry *mp = from_timer(mp, t, timer);
struct net_bridge *br = mp->br;
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) || timer_pending(&mp->timer))
goto out;
br_multicast_host_leave(mp, true);
if (mp->ports)
goto out;
rhashtable_remove_fast(&br->mdb_hash_tbl, &mp->rhnode,
br_mdb_rht_params);
hlist_del_rcu(&mp->mdb_node);
kfree_rcu(mp, rcu);
out:
spin_unlock(&br->multicast_lock);
}
static void br_multicast_del_group_src(struct net_bridge_group_src *src)
{
struct net_bridge *br = src->pg->port->br;
hlist_del_init_rcu(&src->node);
src->pg->src_ents--;
hlist_add_head(&src->del_node, &br->src_gc_list);
queue_work(system_long_wq, &br->src_gc_work);
}
void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
struct net_bridge_port_group *pg,
struct net_bridge_port_group __rcu **pp)
{
struct net_bridge *br = pg->port->br;
struct net_bridge_group_src *ent;
struct hlist_node *tmp;
rcu_assign_pointer(*pp, pg->next);
hlist_del_init(&pg->mglist);
del_timer(&pg->timer);
hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node)
br_multicast_del_group_src(ent);
br_mdb_notify(br->dev, pg->port, &pg->addr, RTM_DELMDB, pg->flags);
kfree_rcu(pg, rcu);
if (!mp->ports && !mp->host_joined && netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
}
static void br_multicast_find_del_pg(struct net_bridge *br,
struct net_bridge_port_group *pg)
{
struct net_bridge_port_group __rcu **pp;
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
mp = br_mdb_ip_get(br, &pg->addr);
if (WARN_ON(!mp))
return;
for (pp = &mp->ports;
(p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
if (p != pg)
continue;
br_multicast_del_pg(mp, pg, pp);
return;
}
WARN_ON(1);
}
static void br_multicast_port_group_expired(struct timer_list *t)
{
struct net_bridge_port_group *pg = from_timer(pg, t, timer);
struct net_bridge *br = pg->port->br;
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) || timer_pending(&pg->timer) ||
hlist_unhashed(&pg->mglist) || pg->flags & MDB_PG_FLAGS_PERMANENT)
goto out;
br_multicast_find_del_pg(br, pg);
out:
spin_unlock(&br->multicast_lock);
}
static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
__be32 group,
u8 *igmp_type)
{
struct igmpv3_query *ihv3;
size_t igmp_hdr_size;
struct sk_buff *skb;
struct igmphdr *ih;
struct ethhdr *eth;
struct iphdr *iph;
igmp_hdr_size = sizeof(*ih);
if (br->multicast_igmp_version == 3)
igmp_hdr_size = sizeof(*ihv3);
skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*iph) +
igmp_hdr_size + 4);
if (!skb)
goto out;
skb->protocol = htons(ETH_P_IP);
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
ether_addr_copy(eth->h_source, br->dev->dev_addr);
eth->h_dest[0] = 1;
eth->h_dest[1] = 0;
eth->h_dest[2] = 0x5e;
eth->h_dest[3] = 0;
eth->h_dest[4] = 0;
eth->h_dest[5] = 1;
eth->h_proto = htons(ETH_P_IP);
skb_put(skb, sizeof(*eth));
skb_set_network_header(skb, skb->len);
iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = 6;
iph->tos = 0xc0;
iph->tot_len = htons(sizeof(*iph) + igmp_hdr_size + 4);
iph->id = 0;
iph->frag_off = htons(IP_DF);
iph->ttl = 1;
iph->protocol = IPPROTO_IGMP;
iph->saddr = br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR) ?
inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0;
iph->daddr = htonl(INADDR_ALLHOSTS_GROUP);
((u8 *)&iph[1])[0] = IPOPT_RA;
((u8 *)&iph[1])[1] = 4;
((u8 *)&iph[1])[2] = 0;
((u8 *)&iph[1])[3] = 0;
ip_send_check(iph);
skb_put(skb, 24);
skb_set_transport_header(skb, skb->len);
*igmp_type = IGMP_HOST_MEMBERSHIP_QUERY;
switch (br->multicast_igmp_version) {
case 2:
ih = igmp_hdr(skb);
ih->type = IGMP_HOST_MEMBERSHIP_QUERY;
ih->code = (group ? br->multicast_last_member_interval :
br->multicast_query_response_interval) /
(HZ / IGMP_TIMER_SCALE);
ih->group = group;
ih->csum = 0;
ih->csum = ip_compute_csum((void *)ih, sizeof(*ih));
break;
case 3:
ihv3 = igmpv3_query_hdr(skb);
ihv3->type = IGMP_HOST_MEMBERSHIP_QUERY;
ihv3->code = (group ? br->multicast_last_member_interval :
br->multicast_query_response_interval) /
(HZ / IGMP_TIMER_SCALE);
ihv3->group = group;
ihv3->qqic = br->multicast_query_interval / HZ;
ihv3->nsrcs = 0;
ihv3->resv = 0;
ihv3->suppress = 0;
ihv3->qrv = 2;
ihv3->csum = 0;
ihv3->csum = ip_compute_csum((void *)ihv3, sizeof(*ihv3));
break;
}
skb_put(skb, igmp_hdr_size);
__skb_pull(skb, sizeof(*eth));
out:
return skb;
}
#if IS_ENABLED(CONFIG_IPV6)
static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
const struct in6_addr *grp,
u8 *igmp_type)
{
struct mld2_query *mld2q;
unsigned long interval;
struct ipv6hdr *ip6h;
struct mld_msg *mldq;
size_t mld_hdr_size;
struct sk_buff *skb;
struct ethhdr *eth;
u8 *hopopt;
mld_hdr_size = sizeof(*mldq);
if (br->multicast_mld_version == 2)
mld_hdr_size = sizeof(*mld2q);
skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*ip6h) +
8 + mld_hdr_size);
if (!skb)
goto out;
skb->protocol = htons(ETH_P_IPV6);
/* Ethernet header */
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
ether_addr_copy(eth->h_source, br->dev->dev_addr);
eth->h_proto = htons(ETH_P_IPV6);
skb_put(skb, sizeof(*eth));
/* IPv6 header + HbH option */
skb_set_network_header(skb, skb->len);
ip6h = ipv6_hdr(skb);
*(__force __be32 *)ip6h = htonl(0x60000000);
ip6h->payload_len = htons(8 + mld_hdr_size);
ip6h->nexthdr = IPPROTO_HOPOPTS;
ip6h->hop_limit = 1;
ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
&ip6h->saddr)) {
kfree_skb(skb);
br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, false);
return NULL;
}
br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
hopopt = (u8 *)(ip6h + 1);
hopopt[0] = IPPROTO_ICMPV6; /* next hdr */
hopopt[1] = 0; /* length of HbH */
hopopt[2] = IPV6_TLV_ROUTERALERT; /* Router Alert */
hopopt[3] = 2; /* Length of RA Option */
hopopt[4] = 0; /* Type = 0x0000 (MLD) */
hopopt[5] = 0;
hopopt[6] = IPV6_TLV_PAD1; /* Pad1 */
hopopt[7] = IPV6_TLV_PAD1; /* Pad1 */
skb_put(skb, sizeof(*ip6h) + 8);
/* ICMPv6 */
skb_set_transport_header(skb, skb->len);
interval = ipv6_addr_any(grp) ?
br->multicast_query_response_interval :
br->multicast_last_member_interval;
*igmp_type = ICMPV6_MGM_QUERY;
switch (br->multicast_mld_version) {
case 1:
mldq = (struct mld_msg *)icmp6_hdr(skb);
mldq->mld_type = ICMPV6_MGM_QUERY;
mldq->mld_code = 0;
mldq->mld_cksum = 0;
mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
mldq->mld_reserved = 0;
mldq->mld_mca = *grp;
mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
sizeof(*mldq), IPPROTO_ICMPV6,
csum_partial(mldq,
sizeof(*mldq),
0));
break;
case 2:
mld2q = (struct mld2_query *)icmp6_hdr(skb);
mld2q->mld2q_mrc = htons((u16)jiffies_to_msecs(interval));
mld2q->mld2q_type = ICMPV6_MGM_QUERY;
mld2q->mld2q_code = 0;
mld2q->mld2q_cksum = 0;
mld2q->mld2q_resv1 = 0;
mld2q->mld2q_resv2 = 0;
mld2q->mld2q_suppress = 0;
mld2q->mld2q_qrv = 2;
mld2q->mld2q_nsrcs = 0;
mld2q->mld2q_qqic = br->multicast_query_interval / HZ;
mld2q->mld2q_mca = *grp;
mld2q->mld2q_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
sizeof(*mld2q),
IPPROTO_ICMPV6,
csum_partial(mld2q,
sizeof(*mld2q),
0));
break;
}
skb_put(skb, mld_hdr_size);
__skb_pull(skb, sizeof(*eth));
out:
return skb;
}
#endif
static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
struct br_ip *addr,
u8 *igmp_type)
{
switch (addr->proto) {
case htons(ETH_P_IP):
return br_ip4_multicast_alloc_query(br, addr->u.ip4, igmp_type);
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
return br_ip6_multicast_alloc_query(br, &addr->u.ip6,
igmp_type);
#endif
}
return NULL;
}
struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
struct br_ip *group)
{
struct net_bridge_mdb_entry *mp;
int err;
mp = br_mdb_ip_get(br, group);
if (mp)
return mp;
if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) {
br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
return ERR_PTR(-E2BIG);
}
mp = kzalloc(sizeof(*mp), GFP_ATOMIC);
if (unlikely(!mp))
return ERR_PTR(-ENOMEM);
mp->br = br;
mp->addr = *group;
timer_setup(&mp->timer, br_multicast_group_expired, 0);
err = rhashtable_lookup_insert_fast(&br->mdb_hash_tbl, &mp->rhnode,
br_mdb_rht_params);
if (err) {
kfree(mp);
mp = ERR_PTR(err);
} else {
hlist_add_head_rcu(&mp->mdb_node, &br->mdb_list);
}
return mp;
}
static void br_multicast_group_src_expired(struct timer_list *t)
{
struct net_bridge_group_src *src = from_timer(src, t, timer);
struct net_bridge_port_group *pg;
struct net_bridge *br = src->br;
spin_lock(&br->multicast_lock);
if (hlist_unhashed(&src->node) || !netif_running(br->dev) ||
timer_pending(&src->timer))
goto out;
pg = src->pg;
if (pg->filter_mode == MCAST_INCLUDE) {
br_multicast_del_group_src(src);
if (!hlist_empty(&pg->src_list))
goto out;
br_multicast_find_del_pg(br, pg);
}
out:
spin_unlock(&br->multicast_lock);
}
static struct net_bridge_group_src *
br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip)
{
struct net_bridge_group_src *ent;
switch (ip->proto) {
case htons(ETH_P_IP):
hlist_for_each_entry(ent, &pg->src_list, node)
if (ip->u.ip4 == ent->addr.u.ip4)
return ent;
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
hlist_for_each_entry(ent, &pg->src_list, node)
if (!ipv6_addr_cmp(&ent->addr.u.ip6, &ip->u.ip6))
return ent;
break;
#endif
}
return NULL;
}
static struct net_bridge_group_src *
br_multicast_new_group_src(struct net_bridge_port_group *pg, struct br_ip *src_ip)
{
struct net_bridge_group_src *grp_src;
if (unlikely(pg->src_ents >= PG_SRC_ENT_LIMIT))
return NULL;
switch (src_ip->proto) {
case htons(ETH_P_IP):
if (ipv4_is_zeronet(src_ip->u.ip4) ||
ipv4_is_multicast(src_ip->u.ip4))
return NULL;
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
if (ipv6_addr_any(&src_ip->u.ip6) ||
ipv6_addr_is_multicast(&src_ip->u.ip6))
return NULL;
break;
#endif
}
grp_src = kzalloc(sizeof(*grp_src), GFP_ATOMIC);
if (unlikely(!grp_src))
return NULL;
grp_src->pg = pg;
grp_src->br = pg->port->br;
grp_src->addr = *src_ip;
timer_setup(&grp_src->timer, br_multicast_group_src_expired, 0);
hlist_add_head_rcu(&grp_src->node, &pg->src_list);
pg->src_ents++;
return grp_src;
}
struct net_bridge_port_group *br_multicast_new_port_group(
struct net_bridge_port *port,
struct br_ip *group,
struct net_bridge_port_group __rcu *next,
unsigned char flags,
const unsigned char *src,
u8 filter_mode)
{
struct net_bridge_port_group *p;
p = kzalloc(sizeof(*p), GFP_ATOMIC);
if (unlikely(!p))
return NULL;
p->addr = *group;
p->port = port;
p->flags = flags;
p->filter_mode = filter_mode;
INIT_HLIST_HEAD(&p->src_list);
rcu_assign_pointer(p->next, next);
hlist_add_head(&p->mglist, &port->mglist);
timer_setup(&p->timer, br_multicast_port_group_expired, 0);
if (src)
memcpy(p->eth_addr, src, ETH_ALEN);
else
eth_broadcast_addr(p->eth_addr);
return p;
}
static bool br_port_group_equal(struct net_bridge_port_group *p,
struct net_bridge_port *port,
const unsigned char *src)
{
if (p->port != port)
return false;
if (!(port->flags & BR_MULTICAST_TO_UNICAST))
return true;
return ether_addr_equal(src, p->eth_addr);
}
void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify)
{
if (!mp->host_joined) {
mp->host_joined = true;
if (notify)
br_mdb_notify(mp->br->dev, NULL, &mp->addr,
RTM_NEWMDB, 0);
}
mod_timer(&mp->timer, jiffies + mp->br->multicast_membership_interval);
}
void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify)
{
if (!mp->host_joined)
return;
mp->host_joined = false;
if (notify)
br_mdb_notify(mp->br->dev, NULL, &mp->addr, RTM_DELMDB, 0);
}
static int br_multicast_add_group(struct net_bridge *br,
struct net_bridge_port *port,
struct br_ip *group,
const unsigned char *src,
u8 filter_mode)
{
struct net_bridge_port_group __rcu **pp;
struct net_bridge_port_group *p;
struct net_bridge_mdb_entry *mp;
Revert "bridge: only expire the mdb entry when query is received" While this commit was a good attempt to fix issues occuring when no multicast querier is present, this commit still has two more issues: 1) There are cases where mdb entries do not expire even if there is a querier present. The bridge will unnecessarily continue flooding multicast packets on the according ports. 2) Never removing an mdb entry could be exploited for a Denial of Service by an attacker on the local link, slowly, but steadily eating up all memory. Actually, this commit became obsolete with "bridge: disable snooping if there is no querier" (b00589af3b) which included fixes for a few more cases. Therefore reverting the following commits (the commit stated in the commit message plus three of its follow up fixes): ==================== Revert "bridge: update mdb expiration timer upon reports." This reverts commit f144febd93d5ee534fdf23505ab091b2b9088edc. Revert "bridge: do not call setup_timer() multiple times" This reverts commit 1faabf2aab1fdaa1ace4e8c829d1b9cf7bfec2f1. Revert "bridge: fix some kernel warning in multicast timer" This reverts commit c7e8e8a8f7a70b343ca1e0f90a31e35ab2d16de1. Revert "bridge: only expire the mdb entry when query is received" This reverts commit 9f00b2e7cf241fa389733d41b615efdaa2cb0f5b. ==================== CC: Cong Wang <amwang@redhat.com> Signed-off-by: Linus Lüssing <linus.luessing@web.de> Reviewed-by: Vlad Yasevich <vyasevich@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-10-20 06:58:57 +08:00
unsigned long now = jiffies;
int err;
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) ||
(port && port->state == BR_STATE_DISABLED))
goto out;
mp = br_multicast_new_group(br, group);
err = PTR_ERR(mp);
if (IS_ERR(mp))
goto err;
if (!port) {
br_multicast_host_join(mp, true);
goto out;
}
for (pp = &mp->ports;
(p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
if (br_port_group_equal(p, port, src))
Revert "bridge: only expire the mdb entry when query is received" While this commit was a good attempt to fix issues occuring when no multicast querier is present, this commit still has two more issues: 1) There are cases where mdb entries do not expire even if there is a querier present. The bridge will unnecessarily continue flooding multicast packets on the according ports. 2) Never removing an mdb entry could be exploited for a Denial of Service by an attacker on the local link, slowly, but steadily eating up all memory. Actually, this commit became obsolete with "bridge: disable snooping if there is no querier" (b00589af3b) which included fixes for a few more cases. Therefore reverting the following commits (the commit stated in the commit message plus three of its follow up fixes): ==================== Revert "bridge: update mdb expiration timer upon reports." This reverts commit f144febd93d5ee534fdf23505ab091b2b9088edc. Revert "bridge: do not call setup_timer() multiple times" This reverts commit 1faabf2aab1fdaa1ace4e8c829d1b9cf7bfec2f1. Revert "bridge: fix some kernel warning in multicast timer" This reverts commit c7e8e8a8f7a70b343ca1e0f90a31e35ab2d16de1. Revert "bridge: only expire the mdb entry when query is received" This reverts commit 9f00b2e7cf241fa389733d41b615efdaa2cb0f5b. ==================== CC: Cong Wang <amwang@redhat.com> Signed-off-by: Linus Lüssing <linus.luessing@web.de> Reviewed-by: Vlad Yasevich <vyasevich@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-10-20 06:58:57 +08:00
goto found;
if ((unsigned long)p->port < (unsigned long)port)
break;
}
p = br_multicast_new_port_group(port, group, *pp, 0, src, filter_mode);
if (unlikely(!p))
goto err;
rcu_assign_pointer(*pp, p);
br_mdb_notify(br->dev, port, group, RTM_NEWMDB, 0);
Revert "bridge: only expire the mdb entry when query is received" While this commit was a good attempt to fix issues occuring when no multicast querier is present, this commit still has two more issues: 1) There are cases where mdb entries do not expire even if there is a querier present. The bridge will unnecessarily continue flooding multicast packets on the according ports. 2) Never removing an mdb entry could be exploited for a Denial of Service by an attacker on the local link, slowly, but steadily eating up all memory. Actually, this commit became obsolete with "bridge: disable snooping if there is no querier" (b00589af3b) which included fixes for a few more cases. Therefore reverting the following commits (the commit stated in the commit message plus three of its follow up fixes): ==================== Revert "bridge: update mdb expiration timer upon reports." This reverts commit f144febd93d5ee534fdf23505ab091b2b9088edc. Revert "bridge: do not call setup_timer() multiple times" This reverts commit 1faabf2aab1fdaa1ace4e8c829d1b9cf7bfec2f1. Revert "bridge: fix some kernel warning in multicast timer" This reverts commit c7e8e8a8f7a70b343ca1e0f90a31e35ab2d16de1. Revert "bridge: only expire the mdb entry when query is received" This reverts commit 9f00b2e7cf241fa389733d41b615efdaa2cb0f5b. ==================== CC: Cong Wang <amwang@redhat.com> Signed-off-by: Linus Lüssing <linus.luessing@web.de> Reviewed-by: Vlad Yasevich <vyasevich@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-10-20 06:58:57 +08:00
found:
mod_timer(&p->timer, now + br->multicast_membership_interval);
out:
err = 0;
err:
spin_unlock(&br->multicast_lock);
return err;
}
static int br_ip4_multicast_add_group(struct net_bridge *br,
struct net_bridge_port *port,
__be32 group,
__u16 vid,
const unsigned char *src,
bool igmpv2)
{
struct br_ip br_group;
u8 filter_mode;
if (ipv4_is_local_multicast(group))
return 0;
memset(&br_group, 0, sizeof(br_group));
br_group.u.ip4 = group;
br_group.proto = htons(ETH_P_IP);
br_group.vid = vid;
filter_mode = igmpv2 ? MCAST_EXCLUDE : MCAST_INCLUDE;
return br_multicast_add_group(br, port, &br_group, src, filter_mode);
}
#if IS_ENABLED(CONFIG_IPV6)
static int br_ip6_multicast_add_group(struct net_bridge *br,
struct net_bridge_port *port,
const struct in6_addr *group,
__u16 vid,
const unsigned char *src,
bool mldv1)
{
struct br_ip br_group;
u8 filter_mode;
if (ipv6_addr_is_ll_all_nodes(group))
return 0;
memset(&br_group, 0, sizeof(br_group));
br_group.u.ip6 = *group;
br_group.proto = htons(ETH_P_IPV6);
br_group.vid = vid;
filter_mode = mldv1 ? MCAST_EXCLUDE : MCAST_INCLUDE;
return br_multicast_add_group(br, port, &br_group, src, filter_mode);
}
#endif
static void br_multicast_router_expired(struct timer_list *t)
{
struct net_bridge_port *port =
from_timer(port, t, multicast_router_timer);
struct net_bridge *br = port->br;
spin_lock(&br->multicast_lock);
if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
port->multicast_router == MDB_RTR_TYPE_PERM ||
timer_pending(&port->multicast_router_timer))
goto out;
__del_port_router(port);
out:
spin_unlock(&br->multicast_lock);
}
static void br_mc_router_state_change(struct net_bridge *p,
bool is_mc_router)
{
struct switchdev_attr attr = {
.orig_dev = p->dev,
.id = SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
.flags = SWITCHDEV_F_DEFER,
.u.mrouter = is_mc_router,
};
switchdev_port_attr_set(p->dev, &attr);
}
static void br_multicast_local_router_expired(struct timer_list *t)
{
struct net_bridge *br = from_timer(br, t, multicast_router_timer);
spin_lock(&br->multicast_lock);
if (br->multicast_router == MDB_RTR_TYPE_DISABLED ||
br->multicast_router == MDB_RTR_TYPE_PERM ||
timer_pending(&br->multicast_router_timer))
goto out;
br_mc_router_state_change(br, false);
out:
spin_unlock(&br->multicast_lock);
}
static void br_multicast_querier_expired(struct net_bridge *br,
struct bridge_mcast_own_query *query)
{
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
goto out;
br_multicast_start_querier(br, query);
out:
spin_unlock(&br->multicast_lock);
}
static void br_ip4_multicast_querier_expired(struct timer_list *t)
{
struct net_bridge *br = from_timer(br, t, ip4_other_query.timer);
br_multicast_querier_expired(br, &br->ip4_own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_querier_expired(struct timer_list *t)
{
struct net_bridge *br = from_timer(br, t, ip6_other_query.timer);
br_multicast_querier_expired(br, &br->ip6_own_query);
}
#endif
static void br_multicast_select_own_querier(struct net_bridge *br,
struct br_ip *ip,
struct sk_buff *skb)
{
if (ip->proto == htons(ETH_P_IP))
br->ip4_querier.addr.u.ip4 = ip_hdr(skb)->saddr;
#if IS_ENABLED(CONFIG_IPV6)
else
br->ip6_querier.addr.u.ip6 = ipv6_hdr(skb)->saddr;
#endif
}
static void __br_multicast_send_query(struct net_bridge *br,
struct net_bridge_port *port,
struct br_ip *ip)
{
struct sk_buff *skb;
u8 igmp_type;
skb = br_multicast_alloc_query(br, ip, &igmp_type);
if (!skb)
return;
if (port) {
skb->dev = port->dev;
br_multicast_count(br, port, skb, igmp_type,
BR_MCAST_DIR_TX);
2015-09-16 09:04:16 +08:00
NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
dev_net(port->dev), NULL, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
} else {
br_multicast_select_own_querier(br, ip, skb);
br_multicast_count(br, port, skb, igmp_type,
BR_MCAST_DIR_RX);
netif_rx(skb);
}
}
static void br_multicast_send_query(struct net_bridge *br,
struct net_bridge_port *port,
struct bridge_mcast_own_query *own_query)
{
struct bridge_mcast_other_query *other_query = NULL;
struct br_ip br_group;
unsigned long time;
if (!netif_running(br->dev) ||
!br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
!br_opt_get(br, BROPT_MULTICAST_QUERIER))
return;
memset(&br_group.u, 0, sizeof(br_group.u));
if (port ? (own_query == &port->ip4_own_query) :
(own_query == &br->ip4_own_query)) {
other_query = &br->ip4_other_query;
br_group.proto = htons(ETH_P_IP);
#if IS_ENABLED(CONFIG_IPV6)
} else {
other_query = &br->ip6_other_query;
br_group.proto = htons(ETH_P_IPV6);
#endif
}
if (!other_query || timer_pending(&other_query->timer))
return;
__br_multicast_send_query(br, port, &br_group);
time = jiffies;
time += own_query->startup_sent < br->multicast_startup_query_count ?
br->multicast_startup_query_interval :
br->multicast_query_interval;
mod_timer(&own_query->timer, time);
}
static void
br_multicast_port_query_expired(struct net_bridge_port *port,
struct bridge_mcast_own_query *query)
{
struct net_bridge *br = port->br;
spin_lock(&br->multicast_lock);
if (port->state == BR_STATE_DISABLED ||
port->state == BR_STATE_BLOCKING)
goto out;
if (query->startup_sent < br->multicast_startup_query_count)
query->startup_sent++;
br_multicast_send_query(port->br, port, query);
out:
spin_unlock(&br->multicast_lock);
}
static void br_ip4_multicast_port_query_expired(struct timer_list *t)
{
struct net_bridge_port *port = from_timer(port, t, ip4_own_query.timer);
br_multicast_port_query_expired(port, &port->ip4_own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_port_query_expired(struct timer_list *t)
{
struct net_bridge_port *port = from_timer(port, t, ip6_own_query.timer);
br_multicast_port_query_expired(port, &port->ip6_own_query);
}
#endif
static void br_mc_disabled_update(struct net_device *dev, bool value)
{
struct switchdev_attr attr = {
.orig_dev = dev,
.id = SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED,
.flags = SWITCHDEV_F_DEFER,
.u.mc_disabled = !value,
};
switchdev_port_attr_set(dev, &attr);
}
int br_multicast_add_port(struct net_bridge_port *port)
{
port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
timer_setup(&port->multicast_router_timer,
br_multicast_router_expired, 0);
timer_setup(&port->ip4_own_query.timer,
br_ip4_multicast_port_query_expired, 0);
#if IS_ENABLED(CONFIG_IPV6)
timer_setup(&port->ip6_own_query.timer,
br_ip6_multicast_port_query_expired, 0);
#endif
br_mc_disabled_update(port->dev,
br_opt_get(port->br, BROPT_MULTICAST_ENABLED));
port->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats);
if (!port->mcast_stats)
return -ENOMEM;
return 0;
}
void br_multicast_del_port(struct net_bridge_port *port)
{
struct net_bridge *br = port->br;
struct net_bridge_port_group *pg;
struct hlist_node *n;
/* Take care of the remaining groups, only perm ones should be left */
spin_lock_bh(&br->multicast_lock);
hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
br_multicast_find_del_pg(br, pg);
spin_unlock_bh(&br->multicast_lock);
del_timer_sync(&port->multicast_router_timer);
free_percpu(port->mcast_stats);
}
static void br_multicast_enable(struct bridge_mcast_own_query *query)
{
query->startup_sent = 0;
if (try_to_del_timer_sync(&query->timer) >= 0 ||
del_timer(&query->timer))
mod_timer(&query->timer, jiffies);
}
bridge: multicast: restore perm router ports on multicast enable Satish reported a problem with the perm multicast router ports not getting reenabled after some series of events, in particular if it happens that the multicast snooping has been disabled and the port goes to disabled state then it will be deleted from the router port list, but if it moves into non-disabled state it will not be re-added because the mcast snooping is still disabled, and enabling snooping later does nothing. Here are the steps to reproduce, setup br0 with snooping enabled and eth1 added as a perm router (multicast_router = 2): 1. $ echo 0 > /sys/class/net/br0/bridge/multicast_snooping 2. $ ip l set eth1 down ^ This step deletes the interface from the router list 3. $ ip l set eth1 up ^ This step does not add it again because mcast snooping is disabled 4. $ echo 1 > /sys/class/net/br0/bridge/multicast_snooping 5. $ bridge -d -s mdb show <empty> At this point we have mcast enabled and eth1 as a perm router (value = 2) but it is not in the router list which is incorrect. After this change: 1. $ echo 0 > /sys/class/net/br0/bridge/multicast_snooping 2. $ ip l set eth1 down ^ This step deletes the interface from the router list 3. $ ip l set eth1 up ^ This step does not add it again because mcast snooping is disabled 4. $ echo 1 > /sys/class/net/br0/bridge/multicast_snooping 5. $ bridge -d -s mdb show router ports on br0: eth1 Note: we can directly do br_multicast_enable_port for all because the querier timer already has checks for the port state and will simply expire if it's in blocking/disabled. See the comment added by commit 9aa66382163e7 ("bridge: multicast: add a comment to br_port_state_selection about blocking state") Fixes: 561f1103a2b7 ("bridge: Add multicast_snooping sysfs toggle") Reported-by: Satish Ashok <sashok@cumulusnetworks.com> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-10-19 00:09:48 +08:00
static void __br_multicast_enable_port(struct net_bridge_port *port)
{
struct net_bridge *br = port->br;
if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) || !netif_running(br->dev))
bridge: multicast: restore perm router ports on multicast enable Satish reported a problem with the perm multicast router ports not getting reenabled after some series of events, in particular if it happens that the multicast snooping has been disabled and the port goes to disabled state then it will be deleted from the router port list, but if it moves into non-disabled state it will not be re-added because the mcast snooping is still disabled, and enabling snooping later does nothing. Here are the steps to reproduce, setup br0 with snooping enabled and eth1 added as a perm router (multicast_router = 2): 1. $ echo 0 > /sys/class/net/br0/bridge/multicast_snooping 2. $ ip l set eth1 down ^ This step deletes the interface from the router list 3. $ ip l set eth1 up ^ This step does not add it again because mcast snooping is disabled 4. $ echo 1 > /sys/class/net/br0/bridge/multicast_snooping 5. $ bridge -d -s mdb show <empty> At this point we have mcast enabled and eth1 as a perm router (value = 2) but it is not in the router list which is incorrect. After this change: 1. $ echo 0 > /sys/class/net/br0/bridge/multicast_snooping 2. $ ip l set eth1 down ^ This step deletes the interface from the router list 3. $ ip l set eth1 up ^ This step does not add it again because mcast snooping is disabled 4. $ echo 1 > /sys/class/net/br0/bridge/multicast_snooping 5. $ bridge -d -s mdb show router ports on br0: eth1 Note: we can directly do br_multicast_enable_port for all because the querier timer already has checks for the port state and will simply expire if it's in blocking/disabled. See the comment added by commit 9aa66382163e7 ("bridge: multicast: add a comment to br_port_state_selection about blocking state") Fixes: 561f1103a2b7 ("bridge: Add multicast_snooping sysfs toggle") Reported-by: Satish Ashok <sashok@cumulusnetworks.com> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-10-19 00:09:48 +08:00
return;
br_multicast_enable(&port->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
br_multicast_enable(&port->ip6_own_query);
#endif
if (port->multicast_router == MDB_RTR_TYPE_PERM &&
hlist_unhashed(&port->rlist))
br_multicast_add_router(br, port);
bridge: multicast: restore perm router ports on multicast enable Satish reported a problem with the perm multicast router ports not getting reenabled after some series of events, in particular if it happens that the multicast snooping has been disabled and the port goes to disabled state then it will be deleted from the router port list, but if it moves into non-disabled state it will not be re-added because the mcast snooping is still disabled, and enabling snooping later does nothing. Here are the steps to reproduce, setup br0 with snooping enabled and eth1 added as a perm router (multicast_router = 2): 1. $ echo 0 > /sys/class/net/br0/bridge/multicast_snooping 2. $ ip l set eth1 down ^ This step deletes the interface from the router list 3. $ ip l set eth1 up ^ This step does not add it again because mcast snooping is disabled 4. $ echo 1 > /sys/class/net/br0/bridge/multicast_snooping 5. $ bridge -d -s mdb show <empty> At this point we have mcast enabled and eth1 as a perm router (value = 2) but it is not in the router list which is incorrect. After this change: 1. $ echo 0 > /sys/class/net/br0/bridge/multicast_snooping 2. $ ip l set eth1 down ^ This step deletes the interface from the router list 3. $ ip l set eth1 up ^ This step does not add it again because mcast snooping is disabled 4. $ echo 1 > /sys/class/net/br0/bridge/multicast_snooping 5. $ bridge -d -s mdb show router ports on br0: eth1 Note: we can directly do br_multicast_enable_port for all because the querier timer already has checks for the port state and will simply expire if it's in blocking/disabled. See the comment added by commit 9aa66382163e7 ("bridge: multicast: add a comment to br_port_state_selection about blocking state") Fixes: 561f1103a2b7 ("bridge: Add multicast_snooping sysfs toggle") Reported-by: Satish Ashok <sashok@cumulusnetworks.com> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-10-19 00:09:48 +08:00
}
bridge: multicast: restore perm router ports on multicast enable Satish reported a problem with the perm multicast router ports not getting reenabled after some series of events, in particular if it happens that the multicast snooping has been disabled and the port goes to disabled state then it will be deleted from the router port list, but if it moves into non-disabled state it will not be re-added because the mcast snooping is still disabled, and enabling snooping later does nothing. Here are the steps to reproduce, setup br0 with snooping enabled and eth1 added as a perm router (multicast_router = 2): 1. $ echo 0 > /sys/class/net/br0/bridge/multicast_snooping 2. $ ip l set eth1 down ^ This step deletes the interface from the router list 3. $ ip l set eth1 up ^ This step does not add it again because mcast snooping is disabled 4. $ echo 1 > /sys/class/net/br0/bridge/multicast_snooping 5. $ bridge -d -s mdb show <empty> At this point we have mcast enabled and eth1 as a perm router (value = 2) but it is not in the router list which is incorrect. After this change: 1. $ echo 0 > /sys/class/net/br0/bridge/multicast_snooping 2. $ ip l set eth1 down ^ This step deletes the interface from the router list 3. $ ip l set eth1 up ^ This step does not add it again because mcast snooping is disabled 4. $ echo 1 > /sys/class/net/br0/bridge/multicast_snooping 5. $ bridge -d -s mdb show router ports on br0: eth1 Note: we can directly do br_multicast_enable_port for all because the querier timer already has checks for the port state and will simply expire if it's in blocking/disabled. See the comment added by commit 9aa66382163e7 ("bridge: multicast: add a comment to br_port_state_selection about blocking state") Fixes: 561f1103a2b7 ("bridge: Add multicast_snooping sysfs toggle") Reported-by: Satish Ashok <sashok@cumulusnetworks.com> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-10-19 00:09:48 +08:00
void br_multicast_enable_port(struct net_bridge_port *port)
{
struct net_bridge *br = port->br;
spin_lock(&br->multicast_lock);
__br_multicast_enable_port(port);
spin_unlock(&br->multicast_lock);
}
void br_multicast_disable_port(struct net_bridge_port *port)
{
struct net_bridge *br = port->br;
struct net_bridge_port_group *pg;
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 09:06:00 +08:00
struct hlist_node *n;
spin_lock(&br->multicast_lock);
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 09:06:00 +08:00
hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
if (!(pg->flags & MDB_PG_FLAGS_PERMANENT))
br_multicast_find_del_pg(br, pg);
__del_port_router(port);
del_timer(&port->multicast_router_timer);
del_timer(&port->ip4_own_query.timer);
#if IS_ENABLED(CONFIG_IPV6)
del_timer(&port->ip6_own_query.timer);
#endif
spin_unlock(&br->multicast_lock);
}
static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
struct net_bridge_port *port,
struct sk_buff *skb,
u16 vid)
{
const unsigned char *src;
struct igmpv3_report *ih;
struct igmpv3_grec *grec;
int i;
int len;
int num;
int type;
int err = 0;
__be32 group;
net: bridge: mcast: fix stale nsrcs pointer in igmp3/mld2 report handling We take a pointer to grec prior to calling pskb_may_pull and use it afterwards to get nsrcs so record nsrcs before the pull when handling igmp3 and we get a pointer to nsrcs and call pskb_may_pull when handling mld2 which again could lead to reading 2 bytes out-of-bounds. ================================================================== BUG: KASAN: use-after-free in br_multicast_rcv+0x480c/0x4ad0 [bridge] Read of size 2 at addr ffff8880421302b4 by task ksoftirqd/1/16 CPU: 1 PID: 16 Comm: ksoftirqd/1 Tainted: G OE 5.2.0-rc6+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Call Trace: dump_stack+0x71/0xab print_address_description+0x6a/0x280 ? br_multicast_rcv+0x480c/0x4ad0 [bridge] __kasan_report+0x152/0x1aa ? br_multicast_rcv+0x480c/0x4ad0 [bridge] ? br_multicast_rcv+0x480c/0x4ad0 [bridge] kasan_report+0xe/0x20 br_multicast_rcv+0x480c/0x4ad0 [bridge] ? br_multicast_disable_port+0x150/0x150 [bridge] ? ktime_get_with_offset+0xb4/0x150 ? __kasan_kmalloc.constprop.6+0xa6/0xf0 ? __netif_receive_skb+0x1b0/0x1b0 ? br_fdb_update+0x10e/0x6e0 [bridge] ? br_handle_frame_finish+0x3c6/0x11d0 [bridge] br_handle_frame_finish+0x3c6/0x11d0 [bridge] ? br_pass_frame_up+0x3a0/0x3a0 [bridge] ? virtnet_probe+0x1c80/0x1c80 [virtio_net] br_handle_frame+0x731/0xd90 [bridge] ? select_idle_sibling+0x25/0x7d0 ? br_handle_frame_finish+0x11d0/0x11d0 [bridge] __netif_receive_skb_core+0xced/0x2d70 ? virtqueue_get_buf_ctx+0x230/0x1130 [virtio_ring] ? do_xdp_generic+0x20/0x20 ? virtqueue_napi_complete+0x39/0x70 [virtio_net] ? virtnet_poll+0x94d/0xc78 [virtio_net] ? receive_buf+0x5120/0x5120 [virtio_net] ? __netif_receive_skb_one_core+0x97/0x1d0 __netif_receive_skb_one_core+0x97/0x1d0 ? __netif_receive_skb_core+0x2d70/0x2d70 ? _raw_write_trylock+0x100/0x100 ? __queue_work+0x41e/0xbe0 process_backlog+0x19c/0x650 ? _raw_read_lock_irq+0x40/0x40 net_rx_action+0x71e/0xbc0 ? __switch_to_asm+0x40/0x70 ? napi_complete_done+0x360/0x360 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __schedule+0x85e/0x14d0 __do_softirq+0x1db/0x5f9 ? takeover_tasklets+0x5f0/0x5f0 run_ksoftirqd+0x26/0x40 smpboot_thread_fn+0x443/0x680 ? sort_range+0x20/0x20 ? schedule+0x94/0x210 ? __kthread_parkme+0x78/0xf0 ? sort_range+0x20/0x20 kthread+0x2ae/0x3a0 ? kthread_create_worker_on_cpu+0xc0/0xc0 ret_from_fork+0x35/0x40 The buggy address belongs to the page: page:ffffea0001084c00 refcount:0 mapcount:-128 mapping:0000000000000000 index:0x0 flags: 0xffffc000000000() raw: 00ffffc000000000 ffffea0000cfca08 ffffea0001098608 0000000000000000 raw: 0000000000000000 0000000000000003 00000000ffffff7f 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888042130180: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888042130200: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff > ffff888042130280: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff888042130300: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888042130380: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ================================================================== Disabling lock debugging due to kernel taint Fixes: bc8c20acaea1 ("bridge: multicast: treat igmpv3 report with INCLUDE and no sources as a leave") Reported-by: Martin Weinelt <martin@linuxlounge.net> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Tested-by: Martin Weinelt <martin@linuxlounge.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-07-02 20:00:18 +08:00
u16 nsrcs;
ih = igmpv3_report_hdr(skb);
num = ntohs(ih->ngrec);
len = skb_transport_offset(skb) + sizeof(*ih);
for (i = 0; i < num; i++) {
len += sizeof(*grec);
if (!ip_mc_may_pull(skb, len))
return -EINVAL;
grec = (void *)(skb->data + len - sizeof(*grec));
group = grec->grec_mca;
type = grec->grec_type;
net: bridge: mcast: fix stale nsrcs pointer in igmp3/mld2 report handling We take a pointer to grec prior to calling pskb_may_pull and use it afterwards to get nsrcs so record nsrcs before the pull when handling igmp3 and we get a pointer to nsrcs and call pskb_may_pull when handling mld2 which again could lead to reading 2 bytes out-of-bounds. ================================================================== BUG: KASAN: use-after-free in br_multicast_rcv+0x480c/0x4ad0 [bridge] Read of size 2 at addr ffff8880421302b4 by task ksoftirqd/1/16 CPU: 1 PID: 16 Comm: ksoftirqd/1 Tainted: G OE 5.2.0-rc6+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Call Trace: dump_stack+0x71/0xab print_address_description+0x6a/0x280 ? br_multicast_rcv+0x480c/0x4ad0 [bridge] __kasan_report+0x152/0x1aa ? br_multicast_rcv+0x480c/0x4ad0 [bridge] ? br_multicast_rcv+0x480c/0x4ad0 [bridge] kasan_report+0xe/0x20 br_multicast_rcv+0x480c/0x4ad0 [bridge] ? br_multicast_disable_port+0x150/0x150 [bridge] ? ktime_get_with_offset+0xb4/0x150 ? __kasan_kmalloc.constprop.6+0xa6/0xf0 ? __netif_receive_skb+0x1b0/0x1b0 ? br_fdb_update+0x10e/0x6e0 [bridge] ? br_handle_frame_finish+0x3c6/0x11d0 [bridge] br_handle_frame_finish+0x3c6/0x11d0 [bridge] ? br_pass_frame_up+0x3a0/0x3a0 [bridge] ? virtnet_probe+0x1c80/0x1c80 [virtio_net] br_handle_frame+0x731/0xd90 [bridge] ? select_idle_sibling+0x25/0x7d0 ? br_handle_frame_finish+0x11d0/0x11d0 [bridge] __netif_receive_skb_core+0xced/0x2d70 ? virtqueue_get_buf_ctx+0x230/0x1130 [virtio_ring] ? do_xdp_generic+0x20/0x20 ? virtqueue_napi_complete+0x39/0x70 [virtio_net] ? virtnet_poll+0x94d/0xc78 [virtio_net] ? receive_buf+0x5120/0x5120 [virtio_net] ? __netif_receive_skb_one_core+0x97/0x1d0 __netif_receive_skb_one_core+0x97/0x1d0 ? __netif_receive_skb_core+0x2d70/0x2d70 ? _raw_write_trylock+0x100/0x100 ? __queue_work+0x41e/0xbe0 process_backlog+0x19c/0x650 ? _raw_read_lock_irq+0x40/0x40 net_rx_action+0x71e/0xbc0 ? __switch_to_asm+0x40/0x70 ? napi_complete_done+0x360/0x360 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __schedule+0x85e/0x14d0 __do_softirq+0x1db/0x5f9 ? takeover_tasklets+0x5f0/0x5f0 run_ksoftirqd+0x26/0x40 smpboot_thread_fn+0x443/0x680 ? sort_range+0x20/0x20 ? schedule+0x94/0x210 ? __kthread_parkme+0x78/0xf0 ? sort_range+0x20/0x20 kthread+0x2ae/0x3a0 ? kthread_create_worker_on_cpu+0xc0/0xc0 ret_from_fork+0x35/0x40 The buggy address belongs to the page: page:ffffea0001084c00 refcount:0 mapcount:-128 mapping:0000000000000000 index:0x0 flags: 0xffffc000000000() raw: 00ffffc000000000 ffffea0000cfca08 ffffea0001098608 0000000000000000 raw: 0000000000000000 0000000000000003 00000000ffffff7f 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888042130180: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888042130200: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff > ffff888042130280: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff888042130300: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888042130380: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ================================================================== Disabling lock debugging due to kernel taint Fixes: bc8c20acaea1 ("bridge: multicast: treat igmpv3 report with INCLUDE and no sources as a leave") Reported-by: Martin Weinelt <martin@linuxlounge.net> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Tested-by: Martin Weinelt <martin@linuxlounge.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-07-02 20:00:18 +08:00
nsrcs = ntohs(grec->grec_nsrcs);
net: bridge: mcast: fix stale nsrcs pointer in igmp3/mld2 report handling We take a pointer to grec prior to calling pskb_may_pull and use it afterwards to get nsrcs so record nsrcs before the pull when handling igmp3 and we get a pointer to nsrcs and call pskb_may_pull when handling mld2 which again could lead to reading 2 bytes out-of-bounds. ================================================================== BUG: KASAN: use-after-free in br_multicast_rcv+0x480c/0x4ad0 [bridge] Read of size 2 at addr ffff8880421302b4 by task ksoftirqd/1/16 CPU: 1 PID: 16 Comm: ksoftirqd/1 Tainted: G OE 5.2.0-rc6+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Call Trace: dump_stack+0x71/0xab print_address_description+0x6a/0x280 ? br_multicast_rcv+0x480c/0x4ad0 [bridge] __kasan_report+0x152/0x1aa ? br_multicast_rcv+0x480c/0x4ad0 [bridge] ? br_multicast_rcv+0x480c/0x4ad0 [bridge] kasan_report+0xe/0x20 br_multicast_rcv+0x480c/0x4ad0 [bridge] ? br_multicast_disable_port+0x150/0x150 [bridge] ? ktime_get_with_offset+0xb4/0x150 ? __kasan_kmalloc.constprop.6+0xa6/0xf0 ? __netif_receive_skb+0x1b0/0x1b0 ? br_fdb_update+0x10e/0x6e0 [bridge] ? br_handle_frame_finish+0x3c6/0x11d0 [bridge] br_handle_frame_finish+0x3c6/0x11d0 [bridge] ? br_pass_frame_up+0x3a0/0x3a0 [bridge] ? virtnet_probe+0x1c80/0x1c80 [virtio_net] br_handle_frame+0x731/0xd90 [bridge] ? select_idle_sibling+0x25/0x7d0 ? br_handle_frame_finish+0x11d0/0x11d0 [bridge] __netif_receive_skb_core+0xced/0x2d70 ? virtqueue_get_buf_ctx+0x230/0x1130 [virtio_ring] ? do_xdp_generic+0x20/0x20 ? virtqueue_napi_complete+0x39/0x70 [virtio_net] ? virtnet_poll+0x94d/0xc78 [virtio_net] ? receive_buf+0x5120/0x5120 [virtio_net] ? __netif_receive_skb_one_core+0x97/0x1d0 __netif_receive_skb_one_core+0x97/0x1d0 ? __netif_receive_skb_core+0x2d70/0x2d70 ? _raw_write_trylock+0x100/0x100 ? __queue_work+0x41e/0xbe0 process_backlog+0x19c/0x650 ? _raw_read_lock_irq+0x40/0x40 net_rx_action+0x71e/0xbc0 ? __switch_to_asm+0x40/0x70 ? napi_complete_done+0x360/0x360 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __schedule+0x85e/0x14d0 __do_softirq+0x1db/0x5f9 ? takeover_tasklets+0x5f0/0x5f0 run_ksoftirqd+0x26/0x40 smpboot_thread_fn+0x443/0x680 ? sort_range+0x20/0x20 ? schedule+0x94/0x210 ? __kthread_parkme+0x78/0xf0 ? sort_range+0x20/0x20 kthread+0x2ae/0x3a0 ? kthread_create_worker_on_cpu+0xc0/0xc0 ret_from_fork+0x35/0x40 The buggy address belongs to the page: page:ffffea0001084c00 refcount:0 mapcount:-128 mapping:0000000000000000 index:0x0 flags: 0xffffc000000000() raw: 00ffffc000000000 ffffea0000cfca08 ffffea0001098608 0000000000000000 raw: 0000000000000000 0000000000000003 00000000ffffff7f 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888042130180: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888042130200: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff > ffff888042130280: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff888042130300: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888042130380: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ================================================================== Disabling lock debugging due to kernel taint Fixes: bc8c20acaea1 ("bridge: multicast: treat igmpv3 report with INCLUDE and no sources as a leave") Reported-by: Martin Weinelt <martin@linuxlounge.net> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Tested-by: Martin Weinelt <martin@linuxlounge.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-07-02 20:00:18 +08:00
len += nsrcs * 4;
if (!ip_mc_may_pull(skb, len))
return -EINVAL;
/* We treat this as an IGMPv2 report for now. */
switch (type) {
case IGMPV3_MODE_IS_INCLUDE:
case IGMPV3_MODE_IS_EXCLUDE:
case IGMPV3_CHANGE_TO_INCLUDE:
case IGMPV3_CHANGE_TO_EXCLUDE:
case IGMPV3_ALLOW_NEW_SOURCES:
case IGMPV3_BLOCK_OLD_SOURCES:
break;
default:
continue;
}
src = eth_hdr(skb)->h_source;
if ((type == IGMPV3_CHANGE_TO_INCLUDE ||
type == IGMPV3_MODE_IS_INCLUDE) &&
net: bridge: mcast: fix stale nsrcs pointer in igmp3/mld2 report handling We take a pointer to grec prior to calling pskb_may_pull and use it afterwards to get nsrcs so record nsrcs before the pull when handling igmp3 and we get a pointer to nsrcs and call pskb_may_pull when handling mld2 which again could lead to reading 2 bytes out-of-bounds. ================================================================== BUG: KASAN: use-after-free in br_multicast_rcv+0x480c/0x4ad0 [bridge] Read of size 2 at addr ffff8880421302b4 by task ksoftirqd/1/16 CPU: 1 PID: 16 Comm: ksoftirqd/1 Tainted: G OE 5.2.0-rc6+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Call Trace: dump_stack+0x71/0xab print_address_description+0x6a/0x280 ? br_multicast_rcv+0x480c/0x4ad0 [bridge] __kasan_report+0x152/0x1aa ? br_multicast_rcv+0x480c/0x4ad0 [bridge] ? br_multicast_rcv+0x480c/0x4ad0 [bridge] kasan_report+0xe/0x20 br_multicast_rcv+0x480c/0x4ad0 [bridge] ? br_multicast_disable_port+0x150/0x150 [bridge] ? ktime_get_with_offset+0xb4/0x150 ? __kasan_kmalloc.constprop.6+0xa6/0xf0 ? __netif_receive_skb+0x1b0/0x1b0 ? br_fdb_update+0x10e/0x6e0 [bridge] ? br_handle_frame_finish+0x3c6/0x11d0 [bridge] br_handle_frame_finish+0x3c6/0x11d0 [bridge] ? br_pass_frame_up+0x3a0/0x3a0 [bridge] ? virtnet_probe+0x1c80/0x1c80 [virtio_net] br_handle_frame+0x731/0xd90 [bridge] ? select_idle_sibling+0x25/0x7d0 ? br_handle_frame_finish+0x11d0/0x11d0 [bridge] __netif_receive_skb_core+0xced/0x2d70 ? virtqueue_get_buf_ctx+0x230/0x1130 [virtio_ring] ? do_xdp_generic+0x20/0x20 ? virtqueue_napi_complete+0x39/0x70 [virtio_net] ? virtnet_poll+0x94d/0xc78 [virtio_net] ? receive_buf+0x5120/0x5120 [virtio_net] ? __netif_receive_skb_one_core+0x97/0x1d0 __netif_receive_skb_one_core+0x97/0x1d0 ? __netif_receive_skb_core+0x2d70/0x2d70 ? _raw_write_trylock+0x100/0x100 ? __queue_work+0x41e/0xbe0 process_backlog+0x19c/0x650 ? _raw_read_lock_irq+0x40/0x40 net_rx_action+0x71e/0xbc0 ? __switch_to_asm+0x40/0x70 ? napi_complete_done+0x360/0x360 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __schedule+0x85e/0x14d0 __do_softirq+0x1db/0x5f9 ? takeover_tasklets+0x5f0/0x5f0 run_ksoftirqd+0x26/0x40 smpboot_thread_fn+0x443/0x680 ? sort_range+0x20/0x20 ? schedule+0x94/0x210 ? __kthread_parkme+0x78/0xf0 ? sort_range+0x20/0x20 kthread+0x2ae/0x3a0 ? kthread_create_worker_on_cpu+0xc0/0xc0 ret_from_fork+0x35/0x40 The buggy address belongs to the page: page:ffffea0001084c00 refcount:0 mapcount:-128 mapping:0000000000000000 index:0x0 flags: 0xffffc000000000() raw: 00ffffc000000000 ffffea0000cfca08 ffffea0001098608 0000000000000000 raw: 0000000000000000 0000000000000003 00000000ffffff7f 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888042130180: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888042130200: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff > ffff888042130280: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff888042130300: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888042130380: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ================================================================== Disabling lock debugging due to kernel taint Fixes: bc8c20acaea1 ("bridge: multicast: treat igmpv3 report with INCLUDE and no sources as a leave") Reported-by: Martin Weinelt <martin@linuxlounge.net> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Tested-by: Martin Weinelt <martin@linuxlounge.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-07-02 20:00:18 +08:00
nsrcs == 0) {
br_ip4_multicast_leave_group(br, port, group, vid, src);
} else {
err = br_ip4_multicast_add_group(br, port, group, vid,
src, true);
if (err)
break;
}
}
return err;
}
#if IS_ENABLED(CONFIG_IPV6)
static int br_ip6_multicast_mld2_report(struct net_bridge *br,
struct net_bridge_port *port,
struct sk_buff *skb,
u16 vid)
{
unsigned int nsrcs_offset;
const unsigned char *src;
struct icmp6hdr *icmp6h;
struct mld2_grec *grec;
unsigned int grec_len;
int i;
int len;
int num;
int err = 0;
if (!ipv6_mc_may_pull(skb, sizeof(*icmp6h)))
return -EINVAL;
icmp6h = icmp6_hdr(skb);
num = ntohs(icmp6h->icmp6_dataun.un_data16[1]);
len = skb_transport_offset(skb) + sizeof(*icmp6h);
for (i = 0; i < num; i++) {
net: bridge: mcast: fix stale nsrcs pointer in igmp3/mld2 report handling We take a pointer to grec prior to calling pskb_may_pull and use it afterwards to get nsrcs so record nsrcs before the pull when handling igmp3 and we get a pointer to nsrcs and call pskb_may_pull when handling mld2 which again could lead to reading 2 bytes out-of-bounds. ================================================================== BUG: KASAN: use-after-free in br_multicast_rcv+0x480c/0x4ad0 [bridge] Read of size 2 at addr ffff8880421302b4 by task ksoftirqd/1/16 CPU: 1 PID: 16 Comm: ksoftirqd/1 Tainted: G OE 5.2.0-rc6+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Call Trace: dump_stack+0x71/0xab print_address_description+0x6a/0x280 ? br_multicast_rcv+0x480c/0x4ad0 [bridge] __kasan_report+0x152/0x1aa ? br_multicast_rcv+0x480c/0x4ad0 [bridge] ? br_multicast_rcv+0x480c/0x4ad0 [bridge] kasan_report+0xe/0x20 br_multicast_rcv+0x480c/0x4ad0 [bridge] ? br_multicast_disable_port+0x150/0x150 [bridge] ? ktime_get_with_offset+0xb4/0x150 ? __kasan_kmalloc.constprop.6+0xa6/0xf0 ? __netif_receive_skb+0x1b0/0x1b0 ? br_fdb_update+0x10e/0x6e0 [bridge] ? br_handle_frame_finish+0x3c6/0x11d0 [bridge] br_handle_frame_finish+0x3c6/0x11d0 [bridge] ? br_pass_frame_up+0x3a0/0x3a0 [bridge] ? virtnet_probe+0x1c80/0x1c80 [virtio_net] br_handle_frame+0x731/0xd90 [bridge] ? select_idle_sibling+0x25/0x7d0 ? br_handle_frame_finish+0x11d0/0x11d0 [bridge] __netif_receive_skb_core+0xced/0x2d70 ? virtqueue_get_buf_ctx+0x230/0x1130 [virtio_ring] ? do_xdp_generic+0x20/0x20 ? virtqueue_napi_complete+0x39/0x70 [virtio_net] ? virtnet_poll+0x94d/0xc78 [virtio_net] ? receive_buf+0x5120/0x5120 [virtio_net] ? __netif_receive_skb_one_core+0x97/0x1d0 __netif_receive_skb_one_core+0x97/0x1d0 ? __netif_receive_skb_core+0x2d70/0x2d70 ? _raw_write_trylock+0x100/0x100 ? __queue_work+0x41e/0xbe0 process_backlog+0x19c/0x650 ? _raw_read_lock_irq+0x40/0x40 net_rx_action+0x71e/0xbc0 ? __switch_to_asm+0x40/0x70 ? napi_complete_done+0x360/0x360 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __schedule+0x85e/0x14d0 __do_softirq+0x1db/0x5f9 ? takeover_tasklets+0x5f0/0x5f0 run_ksoftirqd+0x26/0x40 smpboot_thread_fn+0x443/0x680 ? sort_range+0x20/0x20 ? schedule+0x94/0x210 ? __kthread_parkme+0x78/0xf0 ? sort_range+0x20/0x20 kthread+0x2ae/0x3a0 ? kthread_create_worker_on_cpu+0xc0/0xc0 ret_from_fork+0x35/0x40 The buggy address belongs to the page: page:ffffea0001084c00 refcount:0 mapcount:-128 mapping:0000000000000000 index:0x0 flags: 0xffffc000000000() raw: 00ffffc000000000 ffffea0000cfca08 ffffea0001098608 0000000000000000 raw: 0000000000000000 0000000000000003 00000000ffffff7f 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888042130180: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888042130200: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff > ffff888042130280: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff888042130300: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888042130380: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ================================================================== Disabling lock debugging due to kernel taint Fixes: bc8c20acaea1 ("bridge: multicast: treat igmpv3 report with INCLUDE and no sources as a leave") Reported-by: Martin Weinelt <martin@linuxlounge.net> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Tested-by: Martin Weinelt <martin@linuxlounge.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-07-02 20:00:18 +08:00
__be16 *_nsrcs, __nsrcs;
u16 nsrcs;
nsrcs_offset = len + offsetof(struct mld2_grec, grec_nsrcs);
if (skb_transport_offset(skb) + ipv6_transport_len(skb) <
nsrcs_offset + sizeof(__nsrcs))
return -EINVAL;
net: bridge: mcast: fix stale nsrcs pointer in igmp3/mld2 report handling We take a pointer to grec prior to calling pskb_may_pull and use it afterwards to get nsrcs so record nsrcs before the pull when handling igmp3 and we get a pointer to nsrcs and call pskb_may_pull when handling mld2 which again could lead to reading 2 bytes out-of-bounds. ================================================================== BUG: KASAN: use-after-free in br_multicast_rcv+0x480c/0x4ad0 [bridge] Read of size 2 at addr ffff8880421302b4 by task ksoftirqd/1/16 CPU: 1 PID: 16 Comm: ksoftirqd/1 Tainted: G OE 5.2.0-rc6+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Call Trace: dump_stack+0x71/0xab print_address_description+0x6a/0x280 ? br_multicast_rcv+0x480c/0x4ad0 [bridge] __kasan_report+0x152/0x1aa ? br_multicast_rcv+0x480c/0x4ad0 [bridge] ? br_multicast_rcv+0x480c/0x4ad0 [bridge] kasan_report+0xe/0x20 br_multicast_rcv+0x480c/0x4ad0 [bridge] ? br_multicast_disable_port+0x150/0x150 [bridge] ? ktime_get_with_offset+0xb4/0x150 ? __kasan_kmalloc.constprop.6+0xa6/0xf0 ? __netif_receive_skb+0x1b0/0x1b0 ? br_fdb_update+0x10e/0x6e0 [bridge] ? br_handle_frame_finish+0x3c6/0x11d0 [bridge] br_handle_frame_finish+0x3c6/0x11d0 [bridge] ? br_pass_frame_up+0x3a0/0x3a0 [bridge] ? virtnet_probe+0x1c80/0x1c80 [virtio_net] br_handle_frame+0x731/0xd90 [bridge] ? select_idle_sibling+0x25/0x7d0 ? br_handle_frame_finish+0x11d0/0x11d0 [bridge] __netif_receive_skb_core+0xced/0x2d70 ? virtqueue_get_buf_ctx+0x230/0x1130 [virtio_ring] ? do_xdp_generic+0x20/0x20 ? virtqueue_napi_complete+0x39/0x70 [virtio_net] ? virtnet_poll+0x94d/0xc78 [virtio_net] ? receive_buf+0x5120/0x5120 [virtio_net] ? __netif_receive_skb_one_core+0x97/0x1d0 __netif_receive_skb_one_core+0x97/0x1d0 ? __netif_receive_skb_core+0x2d70/0x2d70 ? _raw_write_trylock+0x100/0x100 ? __queue_work+0x41e/0xbe0 process_backlog+0x19c/0x650 ? _raw_read_lock_irq+0x40/0x40 net_rx_action+0x71e/0xbc0 ? __switch_to_asm+0x40/0x70 ? napi_complete_done+0x360/0x360 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __schedule+0x85e/0x14d0 __do_softirq+0x1db/0x5f9 ? takeover_tasklets+0x5f0/0x5f0 run_ksoftirqd+0x26/0x40 smpboot_thread_fn+0x443/0x680 ? sort_range+0x20/0x20 ? schedule+0x94/0x210 ? __kthread_parkme+0x78/0xf0 ? sort_range+0x20/0x20 kthread+0x2ae/0x3a0 ? kthread_create_worker_on_cpu+0xc0/0xc0 ret_from_fork+0x35/0x40 The buggy address belongs to the page: page:ffffea0001084c00 refcount:0 mapcount:-128 mapping:0000000000000000 index:0x0 flags: 0xffffc000000000() raw: 00ffffc000000000 ffffea0000cfca08 ffffea0001098608 0000000000000000 raw: 0000000000000000 0000000000000003 00000000ffffff7f 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888042130180: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888042130200: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff > ffff888042130280: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff888042130300: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888042130380: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ================================================================== Disabling lock debugging due to kernel taint Fixes: bc8c20acaea1 ("bridge: multicast: treat igmpv3 report with INCLUDE and no sources as a leave") Reported-by: Martin Weinelt <martin@linuxlounge.net> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Tested-by: Martin Weinelt <martin@linuxlounge.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-07-02 20:00:18 +08:00
_nsrcs = skb_header_pointer(skb, nsrcs_offset,
sizeof(__nsrcs), &__nsrcs);
if (!_nsrcs)
return -EINVAL;
net: bridge: mcast: fix stale nsrcs pointer in igmp3/mld2 report handling We take a pointer to grec prior to calling pskb_may_pull and use it afterwards to get nsrcs so record nsrcs before the pull when handling igmp3 and we get a pointer to nsrcs and call pskb_may_pull when handling mld2 which again could lead to reading 2 bytes out-of-bounds. ================================================================== BUG: KASAN: use-after-free in br_multicast_rcv+0x480c/0x4ad0 [bridge] Read of size 2 at addr ffff8880421302b4 by task ksoftirqd/1/16 CPU: 1 PID: 16 Comm: ksoftirqd/1 Tainted: G OE 5.2.0-rc6+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Call Trace: dump_stack+0x71/0xab print_address_description+0x6a/0x280 ? br_multicast_rcv+0x480c/0x4ad0 [bridge] __kasan_report+0x152/0x1aa ? br_multicast_rcv+0x480c/0x4ad0 [bridge] ? br_multicast_rcv+0x480c/0x4ad0 [bridge] kasan_report+0xe/0x20 br_multicast_rcv+0x480c/0x4ad0 [bridge] ? br_multicast_disable_port+0x150/0x150 [bridge] ? ktime_get_with_offset+0xb4/0x150 ? __kasan_kmalloc.constprop.6+0xa6/0xf0 ? __netif_receive_skb+0x1b0/0x1b0 ? br_fdb_update+0x10e/0x6e0 [bridge] ? br_handle_frame_finish+0x3c6/0x11d0 [bridge] br_handle_frame_finish+0x3c6/0x11d0 [bridge] ? br_pass_frame_up+0x3a0/0x3a0 [bridge] ? virtnet_probe+0x1c80/0x1c80 [virtio_net] br_handle_frame+0x731/0xd90 [bridge] ? select_idle_sibling+0x25/0x7d0 ? br_handle_frame_finish+0x11d0/0x11d0 [bridge] __netif_receive_skb_core+0xced/0x2d70 ? virtqueue_get_buf_ctx+0x230/0x1130 [virtio_ring] ? do_xdp_generic+0x20/0x20 ? virtqueue_napi_complete+0x39/0x70 [virtio_net] ? virtnet_poll+0x94d/0xc78 [virtio_net] ? receive_buf+0x5120/0x5120 [virtio_net] ? __netif_receive_skb_one_core+0x97/0x1d0 __netif_receive_skb_one_core+0x97/0x1d0 ? __netif_receive_skb_core+0x2d70/0x2d70 ? _raw_write_trylock+0x100/0x100 ? __queue_work+0x41e/0xbe0 process_backlog+0x19c/0x650 ? _raw_read_lock_irq+0x40/0x40 net_rx_action+0x71e/0xbc0 ? __switch_to_asm+0x40/0x70 ? napi_complete_done+0x360/0x360 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __schedule+0x85e/0x14d0 __do_softirq+0x1db/0x5f9 ? takeover_tasklets+0x5f0/0x5f0 run_ksoftirqd+0x26/0x40 smpboot_thread_fn+0x443/0x680 ? sort_range+0x20/0x20 ? schedule+0x94/0x210 ? __kthread_parkme+0x78/0xf0 ? sort_range+0x20/0x20 kthread+0x2ae/0x3a0 ? kthread_create_worker_on_cpu+0xc0/0xc0 ret_from_fork+0x35/0x40 The buggy address belongs to the page: page:ffffea0001084c00 refcount:0 mapcount:-128 mapping:0000000000000000 index:0x0 flags: 0xffffc000000000() raw: 00ffffc000000000 ffffea0000cfca08 ffffea0001098608 0000000000000000 raw: 0000000000000000 0000000000000003 00000000ffffff7f 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888042130180: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888042130200: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff > ffff888042130280: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff888042130300: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888042130380: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ================================================================== Disabling lock debugging due to kernel taint Fixes: bc8c20acaea1 ("bridge: multicast: treat igmpv3 report with INCLUDE and no sources as a leave") Reported-by: Martin Weinelt <martin@linuxlounge.net> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Tested-by: Martin Weinelt <martin@linuxlounge.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-07-02 20:00:18 +08:00
nsrcs = ntohs(*_nsrcs);
grec_len = struct_size(grec, grec_src, nsrcs);
if (!ipv6_mc_may_pull(skb, len + grec_len))
return -EINVAL;
grec = (struct mld2_grec *)(skb->data + len);
len += grec_len;
/* We treat these as MLDv1 reports for now. */
switch (grec->grec_type) {
case MLD2_MODE_IS_INCLUDE:
case MLD2_MODE_IS_EXCLUDE:
case MLD2_CHANGE_TO_INCLUDE:
case MLD2_CHANGE_TO_EXCLUDE:
case MLD2_ALLOW_NEW_SOURCES:
case MLD2_BLOCK_OLD_SOURCES:
break;
default:
continue;
}
src = eth_hdr(skb)->h_source;
if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE ||
grec->grec_type == MLD2_MODE_IS_INCLUDE) &&
net: bridge: mcast: fix stale nsrcs pointer in igmp3/mld2 report handling We take a pointer to grec prior to calling pskb_may_pull and use it afterwards to get nsrcs so record nsrcs before the pull when handling igmp3 and we get a pointer to nsrcs and call pskb_may_pull when handling mld2 which again could lead to reading 2 bytes out-of-bounds. ================================================================== BUG: KASAN: use-after-free in br_multicast_rcv+0x480c/0x4ad0 [bridge] Read of size 2 at addr ffff8880421302b4 by task ksoftirqd/1/16 CPU: 1 PID: 16 Comm: ksoftirqd/1 Tainted: G OE 5.2.0-rc6+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Call Trace: dump_stack+0x71/0xab print_address_description+0x6a/0x280 ? br_multicast_rcv+0x480c/0x4ad0 [bridge] __kasan_report+0x152/0x1aa ? br_multicast_rcv+0x480c/0x4ad0 [bridge] ? br_multicast_rcv+0x480c/0x4ad0 [bridge] kasan_report+0xe/0x20 br_multicast_rcv+0x480c/0x4ad0 [bridge] ? br_multicast_disable_port+0x150/0x150 [bridge] ? ktime_get_with_offset+0xb4/0x150 ? __kasan_kmalloc.constprop.6+0xa6/0xf0 ? __netif_receive_skb+0x1b0/0x1b0 ? br_fdb_update+0x10e/0x6e0 [bridge] ? br_handle_frame_finish+0x3c6/0x11d0 [bridge] br_handle_frame_finish+0x3c6/0x11d0 [bridge] ? br_pass_frame_up+0x3a0/0x3a0 [bridge] ? virtnet_probe+0x1c80/0x1c80 [virtio_net] br_handle_frame+0x731/0xd90 [bridge] ? select_idle_sibling+0x25/0x7d0 ? br_handle_frame_finish+0x11d0/0x11d0 [bridge] __netif_receive_skb_core+0xced/0x2d70 ? virtqueue_get_buf_ctx+0x230/0x1130 [virtio_ring] ? do_xdp_generic+0x20/0x20 ? virtqueue_napi_complete+0x39/0x70 [virtio_net] ? virtnet_poll+0x94d/0xc78 [virtio_net] ? receive_buf+0x5120/0x5120 [virtio_net] ? __netif_receive_skb_one_core+0x97/0x1d0 __netif_receive_skb_one_core+0x97/0x1d0 ? __netif_receive_skb_core+0x2d70/0x2d70 ? _raw_write_trylock+0x100/0x100 ? __queue_work+0x41e/0xbe0 process_backlog+0x19c/0x650 ? _raw_read_lock_irq+0x40/0x40 net_rx_action+0x71e/0xbc0 ? __switch_to_asm+0x40/0x70 ? napi_complete_done+0x360/0x360 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __schedule+0x85e/0x14d0 __do_softirq+0x1db/0x5f9 ? takeover_tasklets+0x5f0/0x5f0 run_ksoftirqd+0x26/0x40 smpboot_thread_fn+0x443/0x680 ? sort_range+0x20/0x20 ? schedule+0x94/0x210 ? __kthread_parkme+0x78/0xf0 ? sort_range+0x20/0x20 kthread+0x2ae/0x3a0 ? kthread_create_worker_on_cpu+0xc0/0xc0 ret_from_fork+0x35/0x40 The buggy address belongs to the page: page:ffffea0001084c00 refcount:0 mapcount:-128 mapping:0000000000000000 index:0x0 flags: 0xffffc000000000() raw: 00ffffc000000000 ffffea0000cfca08 ffffea0001098608 0000000000000000 raw: 0000000000000000 0000000000000003 00000000ffffff7f 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888042130180: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888042130200: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff > ffff888042130280: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff888042130300: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888042130380: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ================================================================== Disabling lock debugging due to kernel taint Fixes: bc8c20acaea1 ("bridge: multicast: treat igmpv3 report with INCLUDE and no sources as a leave") Reported-by: Martin Weinelt <martin@linuxlounge.net> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Tested-by: Martin Weinelt <martin@linuxlounge.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-07-02 20:00:18 +08:00
nsrcs == 0) {
br_ip6_multicast_leave_group(br, port, &grec->grec_mca,
vid, src);
} else {
err = br_ip6_multicast_add_group(br, port,
&grec->grec_mca, vid,
src, true);
if (err)
break;
}
}
return err;
}
#endif
static bool br_ip4_multicast_select_querier(struct net_bridge *br,
struct net_bridge_port *port,
__be32 saddr)
{
if (!timer_pending(&br->ip4_own_query.timer) &&
!timer_pending(&br->ip4_other_query.timer))
goto update;
if (!br->ip4_querier.addr.u.ip4)
goto update;
if (ntohl(saddr) <= ntohl(br->ip4_querier.addr.u.ip4))
goto update;
return false;
update:
br->ip4_querier.addr.u.ip4 = saddr;
/* update protected by general multicast_lock by caller */
rcu_assign_pointer(br->ip4_querier.port, port);
return true;
}
#if IS_ENABLED(CONFIG_IPV6)
static bool br_ip6_multicast_select_querier(struct net_bridge *br,
struct net_bridge_port *port,
struct in6_addr *saddr)
{
if (!timer_pending(&br->ip6_own_query.timer) &&
!timer_pending(&br->ip6_other_query.timer))
goto update;
if (ipv6_addr_cmp(saddr, &br->ip6_querier.addr.u.ip6) <= 0)
goto update;
return false;
update:
br->ip6_querier.addr.u.ip6 = *saddr;
/* update protected by general multicast_lock by caller */
rcu_assign_pointer(br->ip6_querier.port, port);
return true;
}
#endif
static bool br_multicast_select_querier(struct net_bridge *br,
struct net_bridge_port *port,
struct br_ip *saddr)
{
switch (saddr->proto) {
case htons(ETH_P_IP):
return br_ip4_multicast_select_querier(br, port, saddr->u.ip4);
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
return br_ip6_multicast_select_querier(br, port, &saddr->u.ip6);
#endif
}
return false;
}
static void
br_multicast_update_query_timer(struct net_bridge *br,
struct bridge_mcast_other_query *query,
unsigned long max_delay)
{
if (!timer_pending(&query->timer))
query->delay_time = jiffies + max_delay;
mod_timer(&query->timer, jiffies + br->multicast_querier_interval);
}
static void br_port_mc_router_state_change(struct net_bridge_port *p,
bool is_mc_router)
{
struct switchdev_attr attr = {
.orig_dev = p->dev,
.id = SWITCHDEV_ATTR_ID_PORT_MROUTER,
.flags = SWITCHDEV_F_DEFER,
.u.mrouter = is_mc_router,
};
switchdev_port_attr_set(p->dev, &attr);
}
/*
* Add port to router_list
* list is maintained ordered by pointer value
* and locked by br->multicast_lock and RCU
*/
static void br_multicast_add_router(struct net_bridge *br,
struct net_bridge_port *port)
{
struct net_bridge_port *p;
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 09:06:00 +08:00
struct hlist_node *slot = NULL;
if (!hlist_unhashed(&port->rlist))
return;
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 09:06:00 +08:00
hlist_for_each_entry(p, &br->router_list, rlist) {
if ((unsigned long) port >= (unsigned long) p)
break;
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 09:06:00 +08:00
slot = &p->rlist;
}
if (slot)
hlist_add_behind_rcu(&port->rlist, slot);
else
hlist_add_head_rcu(&port->rlist, &br->router_list);
br_rtr_notify(br->dev, port, RTM_NEWMDB);
br_port_mc_router_state_change(port, true);
}
static void br_multicast_mark_router(struct net_bridge *br,
struct net_bridge_port *port)
{
unsigned long now = jiffies;
if (!port) {
if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) {
if (!timer_pending(&br->multicast_router_timer))
br_mc_router_state_change(br, true);
mod_timer(&br->multicast_router_timer,
now + br->multicast_querier_interval);
}
return;
}
if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
port->multicast_router == MDB_RTR_TYPE_PERM)
return;
br_multicast_add_router(br, port);
mod_timer(&port->multicast_router_timer,
now + br->multicast_querier_interval);
}
static void br_multicast_query_received(struct net_bridge *br,
struct net_bridge_port *port,
struct bridge_mcast_other_query *query,
struct br_ip *saddr,
unsigned long max_delay)
{
if (!br_multicast_select_querier(br, port, saddr))
return;
br_multicast_update_query_timer(br, query, max_delay);
br_multicast_mark_router(br, port);
}
static void br_ip4_multicast_query(struct net_bridge *br,
struct net_bridge_port *port,
struct sk_buff *skb,
u16 vid)
{
unsigned int transport_len = ip_transport_len(skb);
const struct iphdr *iph = ip_hdr(skb);
struct igmphdr *ih = igmp_hdr(skb);
struct net_bridge_mdb_entry *mp;
struct igmpv3_query *ih3;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
struct br_ip saddr;
unsigned long max_delay;
unsigned long now = jiffies;
__be32 group;
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) ||
(port && port->state == BR_STATE_DISABLED))
goto out;
group = ih->group;
if (transport_len == sizeof(*ih)) {
max_delay = ih->code * (HZ / IGMP_TIMER_SCALE);
if (!max_delay) {
max_delay = 10 * HZ;
group = 0;
}
} else if (transport_len >= sizeof(*ih3)) {
ih3 = igmpv3_query_hdr(skb);
if (ih3->nsrcs)
goto out;
max_delay = ih3->code ?
IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1;
} else {
goto out;
}
if (!group) {
saddr.proto = htons(ETH_P_IP);
saddr.u.ip4 = iph->saddr;
br_multicast_query_received(br, port, &br->ip4_other_query,
&saddr, max_delay);
goto out;
}
mp = br_mdb_ip4_get(br, group, vid);
if (!mp)
goto out;
max_delay *= br->multicast_last_member_count;
if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, now + max_delay) :
try_to_del_timer_sync(&mp->timer) >= 0))
mod_timer(&mp->timer, now + max_delay);
for (pp = &mp->ports;
(p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
if (timer_pending(&p->timer) ?
time_after(p->timer.expires, now + max_delay) :
try_to_del_timer_sync(&p->timer) >= 0)
mod_timer(&p->timer, now + max_delay);
}
out:
spin_unlock(&br->multicast_lock);
}
#if IS_ENABLED(CONFIG_IPV6)
static int br_ip6_multicast_query(struct net_bridge *br,
struct net_bridge_port *port,
struct sk_buff *skb,
u16 vid)
{
unsigned int transport_len = ipv6_transport_len(skb);
struct mld_msg *mld;
struct net_bridge_mdb_entry *mp;
struct mld2_query *mld2q;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
struct br_ip saddr;
unsigned long max_delay;
unsigned long now = jiffies;
unsigned int offset = skb_transport_offset(skb);
const struct in6_addr *group = NULL;
bool is_general_query;
int err = 0;
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) ||
(port && port->state == BR_STATE_DISABLED))
goto out;
if (transport_len == sizeof(*mld)) {
if (!pskb_may_pull(skb, offset + sizeof(*mld))) {
err = -EINVAL;
goto out;
}
mld = (struct mld_msg *) icmp6_hdr(skb);
max_delay = msecs_to_jiffies(ntohs(mld->mld_maxdelay));
if (max_delay)
group = &mld->mld_mca;
} else {
if (!pskb_may_pull(skb, offset + sizeof(*mld2q))) {
err = -EINVAL;
goto out;
}
mld2q = (struct mld2_query *)icmp6_hdr(skb);
if (!mld2q->mld2q_nsrcs)
group = &mld2q->mld2q_mca;
max_delay = max(msecs_to_jiffies(mldv2_mrc(mld2q)), 1UL);
}
is_general_query = group && ipv6_addr_any(group);
if (is_general_query) {
saddr.proto = htons(ETH_P_IPV6);
saddr.u.ip6 = ipv6_hdr(skb)->saddr;
br_multicast_query_received(br, port, &br->ip6_other_query,
&saddr, max_delay);
goto out;
} else if (!group) {
goto out;
}
mp = br_mdb_ip6_get(br, group, vid);
if (!mp)
goto out;
max_delay *= br->multicast_last_member_count;
if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, now + max_delay) :
try_to_del_timer_sync(&mp->timer) >= 0))
mod_timer(&mp->timer, now + max_delay);
for (pp = &mp->ports;
(p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
if (timer_pending(&p->timer) ?
time_after(p->timer.expires, now + max_delay) :
try_to_del_timer_sync(&p->timer) >= 0)
mod_timer(&p->timer, now + max_delay);
}
out:
spin_unlock(&br->multicast_lock);
return err;
}
#endif
static void
br_multicast_leave_group(struct net_bridge *br,
struct net_bridge_port *port,
struct br_ip *group,
struct bridge_mcast_other_query *other_query,
struct bridge_mcast_own_query *own_query,
const unsigned char *src)
{
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
unsigned long now;
unsigned long time;
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) ||
(port && port->state == BR_STATE_DISABLED))
goto out;
mp = br_mdb_ip_get(br, group);
if (!mp)
goto out;
if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) {
struct net_bridge_port_group __rcu **pp;
for (pp = &mp->ports;
(p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
if (!br_port_group_equal(p, port, src))
continue;
if (p->flags & MDB_PG_FLAGS_PERMANENT)
break;
p->flags |= MDB_PG_FLAGS_FAST_LEAVE;
br_multicast_del_pg(mp, p, pp);
}
goto out;
}
if (timer_pending(&other_query->timer))
goto out;
if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) {
__br_multicast_send_query(br, port, &mp->addr);
time = jiffies + br->multicast_last_member_count *
br->multicast_last_member_interval;
mod_timer(&own_query->timer, time);
for (p = mlock_dereference(mp->ports, br);
p != NULL;
p = mlock_dereference(p->next, br)) {
if (!br_port_group_equal(p, port, src))
continue;
if (!hlist_unhashed(&p->mglist) &&
(timer_pending(&p->timer) ?
time_after(p->timer.expires, time) :
try_to_del_timer_sync(&p->timer) >= 0)) {
mod_timer(&p->timer, time);
}
break;
}
}
now = jiffies;
time = now + br->multicast_last_member_count *
br->multicast_last_member_interval;
if (!port) {
if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, time) :
try_to_del_timer_sync(&mp->timer) >= 0)) {
mod_timer(&mp->timer, time);
}
Revert "bridge: only expire the mdb entry when query is received" While this commit was a good attempt to fix issues occuring when no multicast querier is present, this commit still has two more issues: 1) There are cases where mdb entries do not expire even if there is a querier present. The bridge will unnecessarily continue flooding multicast packets on the according ports. 2) Never removing an mdb entry could be exploited for a Denial of Service by an attacker on the local link, slowly, but steadily eating up all memory. Actually, this commit became obsolete with "bridge: disable snooping if there is no querier" (b00589af3b) which included fixes for a few more cases. Therefore reverting the following commits (the commit stated in the commit message plus three of its follow up fixes): ==================== Revert "bridge: update mdb expiration timer upon reports." This reverts commit f144febd93d5ee534fdf23505ab091b2b9088edc. Revert "bridge: do not call setup_timer() multiple times" This reverts commit 1faabf2aab1fdaa1ace4e8c829d1b9cf7bfec2f1. Revert "bridge: fix some kernel warning in multicast timer" This reverts commit c7e8e8a8f7a70b343ca1e0f90a31e35ab2d16de1. Revert "bridge: only expire the mdb entry when query is received" This reverts commit 9f00b2e7cf241fa389733d41b615efdaa2cb0f5b. ==================== CC: Cong Wang <amwang@redhat.com> Signed-off-by: Linus Lüssing <linus.luessing@web.de> Reviewed-by: Vlad Yasevich <vyasevich@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-10-20 06:58:57 +08:00
goto out;
}
for (p = mlock_dereference(mp->ports, br);
p != NULL;
p = mlock_dereference(p->next, br)) {
if (p->port != port)
continue;
if (!hlist_unhashed(&p->mglist) &&
(timer_pending(&p->timer) ?
time_after(p->timer.expires, time) :
try_to_del_timer_sync(&p->timer) >= 0)) {
mod_timer(&p->timer, time);
}
break;
}
out:
spin_unlock(&br->multicast_lock);
}
static void br_ip4_multicast_leave_group(struct net_bridge *br,
struct net_bridge_port *port,
__be32 group,
__u16 vid,
const unsigned char *src)
{
struct br_ip br_group;
struct bridge_mcast_own_query *own_query;
if (ipv4_is_local_multicast(group))
return;
own_query = port ? &port->ip4_own_query : &br->ip4_own_query;
memset(&br_group, 0, sizeof(br_group));
br_group.u.ip4 = group;
br_group.proto = htons(ETH_P_IP);
br_group.vid = vid;
br_multicast_leave_group(br, port, &br_group, &br->ip4_other_query,
own_query, src);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_leave_group(struct net_bridge *br,
struct net_bridge_port *port,
const struct in6_addr *group,
__u16 vid,
const unsigned char *src)
{
struct br_ip br_group;
struct bridge_mcast_own_query *own_query;
if (ipv6_addr_is_ll_all_nodes(group))
return;
own_query = port ? &port->ip6_own_query : &br->ip6_own_query;
memset(&br_group, 0, sizeof(br_group));
br_group.u.ip6 = *group;
br_group.proto = htons(ETH_P_IPV6);
br_group.vid = vid;
br_multicast_leave_group(br, port, &br_group, &br->ip6_other_query,
own_query, src);
}
#endif
static void br_multicast_err_count(const struct net_bridge *br,
const struct net_bridge_port *p,
__be16 proto)
{
struct bridge_mcast_stats __percpu *stats;
struct bridge_mcast_stats *pstats;
if (!br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED))
return;
if (p)
stats = p->mcast_stats;
else
stats = br->mcast_stats;
if (WARN_ON(!stats))
return;
pstats = this_cpu_ptr(stats);
u64_stats_update_begin(&pstats->syncp);
switch (proto) {
case htons(ETH_P_IP):
pstats->mstats.igmp_parse_errors++;
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
pstats->mstats.mld_parse_errors++;
break;
#endif
}
u64_stats_update_end(&pstats->syncp);
}
static void br_multicast_pim(struct net_bridge *br,
struct net_bridge_port *port,
const struct sk_buff *skb)
{
unsigned int offset = skb_transport_offset(skb);
struct pimhdr *pimhdr, _pimhdr;
pimhdr = skb_header_pointer(skb, offset, sizeof(_pimhdr), &_pimhdr);
if (!pimhdr || pim_hdr_version(pimhdr) != PIM_VERSION ||
pim_hdr_type(pimhdr) != PIM_TYPE_HELLO)
return;
br_multicast_mark_router(br, port);
}
static int br_ip4_multicast_mrd_rcv(struct net_bridge *br,
struct net_bridge_port *port,
struct sk_buff *skb)
{
if (ip_hdr(skb)->protocol != IPPROTO_IGMP ||
igmp_hdr(skb)->type != IGMP_MRDISC_ADV)
return -ENOMSG;
br_multicast_mark_router(br, port);
return 0;
}
static int br_multicast_ipv4_rcv(struct net_bridge *br,
struct net_bridge_port *port,
struct sk_buff *skb,
u16 vid)
{
const unsigned char *src;
struct igmphdr *ih;
int err;
err = ip_mc_check_igmp(skb);
if (err == -ENOMSG) {
if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) {
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
} else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) {
if (ip_hdr(skb)->protocol == IPPROTO_PIM)
br_multicast_pim(br, port, skb);
} else if (ipv4_is_all_snoopers(ip_hdr(skb)->daddr)) {
br_ip4_multicast_mrd_rcv(br, port, skb);
}
return 0;
} else if (err < 0) {
br_multicast_err_count(br, port, skb->protocol);
return err;
}
ih = igmp_hdr(skb);
src = eth_hdr(skb)->h_source;
BR_INPUT_SKB_CB(skb)->igmp = ih->type;
switch (ih->type) {
case IGMP_HOST_MEMBERSHIP_REPORT:
case IGMPV2_HOST_MEMBERSHIP_REPORT:
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
err = br_ip4_multicast_add_group(br, port, ih->group, vid, src,
true);
break;
case IGMPV3_HOST_MEMBERSHIP_REPORT:
err = br_ip4_multicast_igmp3_report(br, port, skb, vid);
break;
case IGMP_HOST_MEMBERSHIP_QUERY:
br_ip4_multicast_query(br, port, skb, vid);
break;
case IGMP_HOST_LEAVE_MESSAGE:
br_ip4_multicast_leave_group(br, port, ih->group, vid, src);
break;
}
br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
BR_MCAST_DIR_RX);
return err;
}
#if IS_ENABLED(CONFIG_IPV6)
static int br_ip6_multicast_mrd_rcv(struct net_bridge *br,
struct net_bridge_port *port,
struct sk_buff *skb)
{
int ret;
if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6)
return -ENOMSG;
ret = ipv6_mc_check_icmpv6(skb);
if (ret < 0)
return ret;
if (icmp6_hdr(skb)->icmp6_type != ICMPV6_MRDISC_ADV)
return -ENOMSG;
br_multicast_mark_router(br, port);
return 0;
}
static int br_multicast_ipv6_rcv(struct net_bridge *br,
struct net_bridge_port *port,
struct sk_buff *skb,
u16 vid)
{
const unsigned char *src;
struct mld_msg *mld;
int err;
err = ipv6_mc_check_mld(skb);
if (err == -ENOMSG) {
if (!ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr))
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
if (ipv6_addr_is_all_snoopers(&ipv6_hdr(skb)->daddr)) {
err = br_ip6_multicast_mrd_rcv(br, port, skb);
if (err < 0 && err != -ENOMSG) {
br_multicast_err_count(br, port, skb->protocol);
return err;
}
}
return 0;
} else if (err < 0) {
br_multicast_err_count(br, port, skb->protocol);
return err;
}
mld = (struct mld_msg *)skb_transport_header(skb);
BR_INPUT_SKB_CB(skb)->igmp = mld->mld_type;
switch (mld->mld_type) {
case ICMPV6_MGM_REPORT:
src = eth_hdr(skb)->h_source;
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid,
src, true);
break;
case ICMPV6_MLD2_REPORT:
err = br_ip6_multicast_mld2_report(br, port, skb, vid);
break;
case ICMPV6_MGM_QUERY:
err = br_ip6_multicast_query(br, port, skb, vid);
break;
case ICMPV6_MGM_REDUCTION:
src = eth_hdr(skb)->h_source;
br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid, src);
break;
}
br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
BR_MCAST_DIR_RX);
return err;
}
#endif
int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
struct sk_buff *skb, u16 vid)
{
int ret = 0;
BR_INPUT_SKB_CB(skb)->igmp = 0;
BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
return 0;
switch (skb->protocol) {
case htons(ETH_P_IP):
ret = br_multicast_ipv4_rcv(br, port, skb, vid);
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
ret = br_multicast_ipv6_rcv(br, port, skb, vid);
break;
#endif
}
return ret;
}
static void br_multicast_query_expired(struct net_bridge *br,
struct bridge_mcast_own_query *query,
struct bridge_mcast_querier *querier)
{
spin_lock(&br->multicast_lock);
if (query->startup_sent < br->multicast_startup_query_count)
query->startup_sent++;
RCU_INIT_POINTER(querier->port, NULL);
br_multicast_send_query(br, NULL, query);
spin_unlock(&br->multicast_lock);
}
static void br_ip4_multicast_query_expired(struct timer_list *t)
{
struct net_bridge *br = from_timer(br, t, ip4_own_query.timer);
br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_query_expired(struct timer_list *t)
{
struct net_bridge *br = from_timer(br, t, ip6_own_query.timer);
br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier);
}
#endif
static void __grp_src_gc(struct hlist_head *head)
{
struct net_bridge_group_src *ent;
struct hlist_node *tmp;
hlist_for_each_entry_safe(ent, tmp, head, del_node) {
hlist_del_init(&ent->del_node);
del_timer_sync(&ent->timer);
kfree_rcu(ent, rcu);
}
}
static void br_multicast_src_gc(struct work_struct *work)
{
struct net_bridge *br = container_of(work, struct net_bridge,
src_gc_work);
HLIST_HEAD(deleted_head);
spin_lock_bh(&br->multicast_lock);
hlist_move_list(&br->src_gc_list, &deleted_head);
spin_unlock_bh(&br->multicast_lock);
__grp_src_gc(&deleted_head);
}
void br_multicast_init(struct net_bridge *br)
{
br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX;
br->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
br->multicast_last_member_count = 2;
br->multicast_startup_query_count = 2;
br->multicast_last_member_interval = HZ;
br->multicast_query_response_interval = 10 * HZ;
br->multicast_startup_query_interval = 125 * HZ / 4;
br->multicast_query_interval = 125 * HZ;
br->multicast_querier_interval = 255 * HZ;
br->multicast_membership_interval = 260 * HZ;
br->ip4_other_query.delay_time = 0;
br->ip4_querier.port = NULL;
br->multicast_igmp_version = 2;
#if IS_ENABLED(CONFIG_IPV6)
br->multicast_mld_version = 1;
br->ip6_other_query.delay_time = 0;
br->ip6_querier.port = NULL;
#endif
br_opt_toggle(br, BROPT_MULTICAST_ENABLED, true);
br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
spin_lock_init(&br->multicast_lock);
timer_setup(&br->multicast_router_timer,
br_multicast_local_router_expired, 0);
timer_setup(&br->ip4_other_query.timer,
br_ip4_multicast_querier_expired, 0);
timer_setup(&br->ip4_own_query.timer,
br_ip4_multicast_query_expired, 0);
#if IS_ENABLED(CONFIG_IPV6)
timer_setup(&br->ip6_other_query.timer,
br_ip6_multicast_querier_expired, 0);
timer_setup(&br->ip6_own_query.timer,
br_ip6_multicast_query_expired, 0);
#endif
INIT_HLIST_HEAD(&br->mdb_list);
INIT_HLIST_HEAD(&br->src_gc_list);
INIT_WORK(&br->src_gc_work, br_multicast_src_gc);
}
static void br_ip4_multicast_join_snoopers(struct net_bridge *br)
{
struct in_device *in_dev = in_dev_get(br->dev);
if (!in_dev)
return;
net: Fix ip_mc_{dec,inc}_group allocation context After 4effd28c1245 ("bridge: join all-snoopers multicast address"), I started seeing the following sleep in atomic warnings: [ 26.763893] BUG: sleeping function called from invalid context at mm/slab.h:421 [ 26.771425] in_atomic(): 1, irqs_disabled(): 0, pid: 1658, name: sh [ 26.777855] INFO: lockdep is turned off. [ 26.781916] CPU: 0 PID: 1658 Comm: sh Not tainted 5.0.0-rc4 #20 [ 26.787943] Hardware name: BCM97278SV (DT) [ 26.792118] Call trace: [ 26.794645] dump_backtrace+0x0/0x170 [ 26.798391] show_stack+0x24/0x30 [ 26.801787] dump_stack+0xa4/0xe4 [ 26.805182] ___might_sleep+0x208/0x218 [ 26.809102] __might_sleep+0x78/0x88 [ 26.812762] kmem_cache_alloc_trace+0x64/0x28c [ 26.817301] igmp_group_dropped+0x150/0x230 [ 26.821573] ip_mc_dec_group+0x1b0/0x1f8 [ 26.825585] br_ip4_multicast_leave_snoopers.isra.11+0x174/0x190 [ 26.831704] br_multicast_toggle+0x78/0xcc [ 26.835887] store_bridge_parm+0xc4/0xfc [ 26.839894] multicast_snooping_store+0x3c/0x4c [ 26.844517] dev_attr_store+0x44/0x5c [ 26.848262] sysfs_kf_write+0x50/0x68 [ 26.852006] kernfs_fop_write+0x14c/0x1b4 [ 26.856102] __vfs_write+0x60/0x190 [ 26.859668] vfs_write+0xc8/0x168 [ 26.863059] ksys_write+0x70/0xc8 [ 26.866449] __arm64_sys_write+0x24/0x30 [ 26.870458] el0_svc_common+0xa0/0x11c [ 26.874291] el0_svc_handler+0x38/0x70 [ 26.878120] el0_svc+0x8/0xc while toggling the bridge's multicast_snooping attribute dynamically. Pass a gfp_t down to igmpv3_add_delrec(), introduce __igmp_group_dropped() and introduce __ip_mc_dec_group() to take a gfp_t argument. Similarly introduce ____ip_mc_inc_group() and __ip_mc_inc_group() to allow caller to specify gfp_t. IPv6 part of the patch appears fine. Fixes: 4effd28c1245 ("bridge: join all-snoopers multicast address") Signed-off-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-02-02 12:20:52 +08:00
__ip_mc_inc_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP), GFP_ATOMIC);
in_dev_put(in_dev);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_join_snoopers(struct net_bridge *br)
{
struct in6_addr addr;
ipv6_addr_set(&addr, htonl(0xff020000), 0, 0, htonl(0x6a));
ipv6_dev_mc_inc(br->dev, &addr);
}
#else
static inline void br_ip6_multicast_join_snoopers(struct net_bridge *br)
{
}
#endif
static void br_multicast_join_snoopers(struct net_bridge *br)
{
br_ip4_multicast_join_snoopers(br);
br_ip6_multicast_join_snoopers(br);
}
static void br_ip4_multicast_leave_snoopers(struct net_bridge *br)
{
struct in_device *in_dev = in_dev_get(br->dev);
if (WARN_ON(!in_dev))
return;
net: Fix ip_mc_{dec,inc}_group allocation context After 4effd28c1245 ("bridge: join all-snoopers multicast address"), I started seeing the following sleep in atomic warnings: [ 26.763893] BUG: sleeping function called from invalid context at mm/slab.h:421 [ 26.771425] in_atomic(): 1, irqs_disabled(): 0, pid: 1658, name: sh [ 26.777855] INFO: lockdep is turned off. [ 26.781916] CPU: 0 PID: 1658 Comm: sh Not tainted 5.0.0-rc4 #20 [ 26.787943] Hardware name: BCM97278SV (DT) [ 26.792118] Call trace: [ 26.794645] dump_backtrace+0x0/0x170 [ 26.798391] show_stack+0x24/0x30 [ 26.801787] dump_stack+0xa4/0xe4 [ 26.805182] ___might_sleep+0x208/0x218 [ 26.809102] __might_sleep+0x78/0x88 [ 26.812762] kmem_cache_alloc_trace+0x64/0x28c [ 26.817301] igmp_group_dropped+0x150/0x230 [ 26.821573] ip_mc_dec_group+0x1b0/0x1f8 [ 26.825585] br_ip4_multicast_leave_snoopers.isra.11+0x174/0x190 [ 26.831704] br_multicast_toggle+0x78/0xcc [ 26.835887] store_bridge_parm+0xc4/0xfc [ 26.839894] multicast_snooping_store+0x3c/0x4c [ 26.844517] dev_attr_store+0x44/0x5c [ 26.848262] sysfs_kf_write+0x50/0x68 [ 26.852006] kernfs_fop_write+0x14c/0x1b4 [ 26.856102] __vfs_write+0x60/0x190 [ 26.859668] vfs_write+0xc8/0x168 [ 26.863059] ksys_write+0x70/0xc8 [ 26.866449] __arm64_sys_write+0x24/0x30 [ 26.870458] el0_svc_common+0xa0/0x11c [ 26.874291] el0_svc_handler+0x38/0x70 [ 26.878120] el0_svc+0x8/0xc while toggling the bridge's multicast_snooping attribute dynamically. Pass a gfp_t down to igmpv3_add_delrec(), introduce __igmp_group_dropped() and introduce __ip_mc_dec_group() to take a gfp_t argument. Similarly introduce ____ip_mc_inc_group() and __ip_mc_inc_group() to allow caller to specify gfp_t. IPv6 part of the patch appears fine. Fixes: 4effd28c1245 ("bridge: join all-snoopers multicast address") Signed-off-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-02-02 12:20:52 +08:00
__ip_mc_dec_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP), GFP_ATOMIC);
in_dev_put(in_dev);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_leave_snoopers(struct net_bridge *br)
{
struct in6_addr addr;
ipv6_addr_set(&addr, htonl(0xff020000), 0, 0, htonl(0x6a));
ipv6_dev_mc_dec(br->dev, &addr);
}
#else
static inline void br_ip6_multicast_leave_snoopers(struct net_bridge *br)
{
}
#endif
static void br_multicast_leave_snoopers(struct net_bridge *br)
{
br_ip4_multicast_leave_snoopers(br);
br_ip6_multicast_leave_snoopers(br);
}
static void __br_multicast_open(struct net_bridge *br,
struct bridge_mcast_own_query *query)
{
query->startup_sent = 0;
if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
return;
mod_timer(&query->timer, jiffies);
}
void br_multicast_open(struct net_bridge *br)
{
if (br_opt_get(br, BROPT_MULTICAST_ENABLED))
br_multicast_join_snoopers(br);
__br_multicast_open(br, &br->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
__br_multicast_open(br, &br->ip6_own_query);
#endif
}
void br_multicast_stop(struct net_bridge *br)
{
del_timer_sync(&br->multicast_router_timer);
del_timer_sync(&br->ip4_other_query.timer);
del_timer_sync(&br->ip4_own_query.timer);
#if IS_ENABLED(CONFIG_IPV6)
del_timer_sync(&br->ip6_other_query.timer);
del_timer_sync(&br->ip6_own_query.timer);
#endif
if (br_opt_get(br, BROPT_MULTICAST_ENABLED))
br_multicast_leave_snoopers(br);
}
void br_multicast_dev_del(struct net_bridge *br)
{
struct net_bridge_mdb_entry *mp;
HLIST_HEAD(deleted_head);
struct hlist_node *tmp;
spin_lock_bh(&br->multicast_lock);
hlist_for_each_entry_safe(mp, tmp, &br->mdb_list, mdb_node) {
del_timer(&mp->timer);
rhashtable_remove_fast(&br->mdb_hash_tbl, &mp->rhnode,
br_mdb_rht_params);
hlist_del_rcu(&mp->mdb_node);
kfree_rcu(mp, rcu);
}
hlist_move_list(&br->src_gc_list, &deleted_head);
spin_unlock_bh(&br->multicast_lock);
__grp_src_gc(&deleted_head);
cancel_work_sync(&br->src_gc_work);
rcu_barrier();
}
int br_multicast_set_router(struct net_bridge *br, unsigned long val)
{
int err = -EINVAL;
spin_lock_bh(&br->multicast_lock);
switch (val) {
case MDB_RTR_TYPE_DISABLED:
case MDB_RTR_TYPE_PERM:
br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM);
del_timer(&br->multicast_router_timer);
br->multicast_router = val;
err = 0;
break;
case MDB_RTR_TYPE_TEMP_QUERY:
if (br->multicast_router != MDB_RTR_TYPE_TEMP_QUERY)
br_mc_router_state_change(br, false);
br->multicast_router = val;
err = 0;
break;
}
spin_unlock_bh(&br->multicast_lock);
return err;
}
static void __del_port_router(struct net_bridge_port *p)
{
if (hlist_unhashed(&p->rlist))
return;
hlist_del_init_rcu(&p->rlist);
br_rtr_notify(p->br->dev, p, RTM_DELMDB);
br_port_mc_router_state_change(p, false);
/* don't allow timer refresh */
if (p->multicast_router == MDB_RTR_TYPE_TEMP)
p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
}
int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
{
struct net_bridge *br = p->br;
unsigned long now = jiffies;
int err = -EINVAL;
spin_lock(&br->multicast_lock);
if (p->multicast_router == val) {
/* Refresh the temp router port timer */
if (p->multicast_router == MDB_RTR_TYPE_TEMP)
mod_timer(&p->multicast_router_timer,
now + br->multicast_querier_interval);
err = 0;
goto unlock;
}
switch (val) {
case MDB_RTR_TYPE_DISABLED:
p->multicast_router = MDB_RTR_TYPE_DISABLED;
__del_port_router(p);
del_timer(&p->multicast_router_timer);
break;
case MDB_RTR_TYPE_TEMP_QUERY:
p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
__del_port_router(p);
break;
case MDB_RTR_TYPE_PERM:
p->multicast_router = MDB_RTR_TYPE_PERM;
del_timer(&p->multicast_router_timer);
br_multicast_add_router(br, p);
break;
case MDB_RTR_TYPE_TEMP:
p->multicast_router = MDB_RTR_TYPE_TEMP;
br_multicast_mark_router(br, p);
break;
default:
goto unlock;
}
err = 0;
unlock:
spin_unlock(&br->multicast_lock);
return err;
}
static void br_multicast_start_querier(struct net_bridge *br,
struct bridge_mcast_own_query *query)
{
struct net_bridge_port *port;
__br_multicast_open(br, query);
rcu_read_lock();
list_for_each_entry_rcu(port, &br->port_list, list) {
if (port->state == BR_STATE_DISABLED ||
port->state == BR_STATE_BLOCKING)
continue;
if (query == &br->ip4_own_query)
br_multicast_enable(&port->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
else
br_multicast_enable(&port->ip6_own_query);
#endif
}
rcu_read_unlock();
}
int br_multicast_toggle(struct net_bridge *br, unsigned long val)
{
bridge: multicast: restore perm router ports on multicast enable Satish reported a problem with the perm multicast router ports not getting reenabled after some series of events, in particular if it happens that the multicast snooping has been disabled and the port goes to disabled state then it will be deleted from the router port list, but if it moves into non-disabled state it will not be re-added because the mcast snooping is still disabled, and enabling snooping later does nothing. Here are the steps to reproduce, setup br0 with snooping enabled and eth1 added as a perm router (multicast_router = 2): 1. $ echo 0 > /sys/class/net/br0/bridge/multicast_snooping 2. $ ip l set eth1 down ^ This step deletes the interface from the router list 3. $ ip l set eth1 up ^ This step does not add it again because mcast snooping is disabled 4. $ echo 1 > /sys/class/net/br0/bridge/multicast_snooping 5. $ bridge -d -s mdb show <empty> At this point we have mcast enabled and eth1 as a perm router (value = 2) but it is not in the router list which is incorrect. After this change: 1. $ echo 0 > /sys/class/net/br0/bridge/multicast_snooping 2. $ ip l set eth1 down ^ This step deletes the interface from the router list 3. $ ip l set eth1 up ^ This step does not add it again because mcast snooping is disabled 4. $ echo 1 > /sys/class/net/br0/bridge/multicast_snooping 5. $ bridge -d -s mdb show router ports on br0: eth1 Note: we can directly do br_multicast_enable_port for all because the querier timer already has checks for the port state and will simply expire if it's in blocking/disabled. See the comment added by commit 9aa66382163e7 ("bridge: multicast: add a comment to br_port_state_selection about blocking state") Fixes: 561f1103a2b7 ("bridge: Add multicast_snooping sysfs toggle") Reported-by: Satish Ashok <sashok@cumulusnetworks.com> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-10-19 00:09:48 +08:00
struct net_bridge_port *port;
spin_lock_bh(&br->multicast_lock);
if (!!br_opt_get(br, BROPT_MULTICAST_ENABLED) == !!val)
goto unlock;
br_mc_disabled_update(br->dev, val);
br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val);
if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) {
br_multicast_leave_snoopers(br);
goto unlock;
}
if (!netif_running(br->dev))
goto unlock;
bridge: multicast: restore perm router ports on multicast enable Satish reported a problem with the perm multicast router ports not getting reenabled after some series of events, in particular if it happens that the multicast snooping has been disabled and the port goes to disabled state then it will be deleted from the router port list, but if it moves into non-disabled state it will not be re-added because the mcast snooping is still disabled, and enabling snooping later does nothing. Here are the steps to reproduce, setup br0 with snooping enabled and eth1 added as a perm router (multicast_router = 2): 1. $ echo 0 > /sys/class/net/br0/bridge/multicast_snooping 2. $ ip l set eth1 down ^ This step deletes the interface from the router list 3. $ ip l set eth1 up ^ This step does not add it again because mcast snooping is disabled 4. $ echo 1 > /sys/class/net/br0/bridge/multicast_snooping 5. $ bridge -d -s mdb show <empty> At this point we have mcast enabled and eth1 as a perm router (value = 2) but it is not in the router list which is incorrect. After this change: 1. $ echo 0 > /sys/class/net/br0/bridge/multicast_snooping 2. $ ip l set eth1 down ^ This step deletes the interface from the router list 3. $ ip l set eth1 up ^ This step does not add it again because mcast snooping is disabled 4. $ echo 1 > /sys/class/net/br0/bridge/multicast_snooping 5. $ bridge -d -s mdb show router ports on br0: eth1 Note: we can directly do br_multicast_enable_port for all because the querier timer already has checks for the port state and will simply expire if it's in blocking/disabled. See the comment added by commit 9aa66382163e7 ("bridge: multicast: add a comment to br_port_state_selection about blocking state") Fixes: 561f1103a2b7 ("bridge: Add multicast_snooping sysfs toggle") Reported-by: Satish Ashok <sashok@cumulusnetworks.com> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-10-19 00:09:48 +08:00
br_multicast_open(br);
list_for_each_entry(port, &br->port_list, list)
__br_multicast_enable_port(port);
unlock:
spin_unlock_bh(&br->multicast_lock);
return 0;
}
bool br_multicast_enabled(const struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
return !!br_opt_get(br, BROPT_MULTICAST_ENABLED);
}
EXPORT_SYMBOL_GPL(br_multicast_enabled);
bool br_multicast_router(const struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
bool is_router;
spin_lock_bh(&br->multicast_lock);
is_router = br_multicast_is_router(br);
spin_unlock_bh(&br->multicast_lock);
return is_router;
}
EXPORT_SYMBOL_GPL(br_multicast_router);
int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
{
unsigned long max_delay;
val = !!val;
spin_lock_bh(&br->multicast_lock);
if (br_opt_get(br, BROPT_MULTICAST_QUERIER) == val)
goto unlock;
br_opt_toggle(br, BROPT_MULTICAST_QUERIER, !!val);
if (!val)
goto unlock;
max_delay = br->multicast_query_response_interval;
if (!timer_pending(&br->ip4_other_query.timer))
br->ip4_other_query.delay_time = jiffies + max_delay;
br_multicast_start_querier(br, &br->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
if (!timer_pending(&br->ip6_other_query.timer))
br->ip6_other_query.delay_time = jiffies + max_delay;
br_multicast_start_querier(br, &br->ip6_own_query);
#endif
unlock:
spin_unlock_bh(&br->multicast_lock);
return 0;
}
int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val)
{
/* Currently we support only version 2 and 3 */
switch (val) {
case 2:
case 3:
break;
default:
return -EINVAL;
}
spin_lock_bh(&br->multicast_lock);
br->multicast_igmp_version = val;
spin_unlock_bh(&br->multicast_lock);
return 0;
}
#if IS_ENABLED(CONFIG_IPV6)
int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val)
{
/* Currently we support version 1 and 2 */
switch (val) {
case 1:
case 2:
break;
default:
return -EINVAL;
}
spin_lock_bh(&br->multicast_lock);
br->multicast_mld_version = val;
spin_unlock_bh(&br->multicast_lock);
return 0;
}
#endif
/**
* br_multicast_list_adjacent - Returns snooped multicast addresses
* @dev: The bridge port adjacent to which to retrieve addresses
* @br_ip_list: The list to store found, snooped multicast IP addresses in
*
* Creates a list of IP addresses (struct br_ip_list) sensed by the multicast
* snooping feature on all bridge ports of dev's bridge device, excluding
* the addresses from dev itself.
*
* Returns the number of items added to br_ip_list.
*
* Notes:
* - br_ip_list needs to be initialized by caller
* - br_ip_list might contain duplicates in the end
* (needs to be taken care of by caller)
* - br_ip_list needs to be freed by caller
*/
int br_multicast_list_adjacent(struct net_device *dev,
struct list_head *br_ip_list)
{
struct net_bridge *br;
struct net_bridge_port *port;
struct net_bridge_port_group *group;
struct br_ip_list *entry;
int count = 0;
rcu_read_lock();
if (!br_ip_list || !netif_is_bridge_port(dev))
goto unlock;
port = br_port_get_rcu(dev);
if (!port || !port->br)
goto unlock;
br = port->br;
list_for_each_entry_rcu(port, &br->port_list, list) {
if (!port->dev || port->dev == dev)
continue;
hlist_for_each_entry_rcu(group, &port->mglist, mglist) {
entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry)
goto unlock;
entry->addr = group->addr;
list_add(&entry->list, br_ip_list);
count++;
}
}
unlock:
rcu_read_unlock();
return count;
}
EXPORT_SYMBOL_GPL(br_multicast_list_adjacent);
/**
* br_multicast_has_querier_anywhere - Checks for a querier on a bridge
* @dev: The bridge port providing the bridge on which to check for a querier
* @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6
*
* Checks whether the given interface has a bridge on top and if so returns
* true if a valid querier exists anywhere on the bridged link layer.
* Otherwise returns false.
*/
bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto)
{
struct net_bridge *br;
struct net_bridge_port *port;
struct ethhdr eth;
bool ret = false;
rcu_read_lock();
if (!netif_is_bridge_port(dev))
goto unlock;
port = br_port_get_rcu(dev);
if (!port || !port->br)
goto unlock;
br = port->br;
memset(&eth, 0, sizeof(eth));
eth.h_proto = htons(proto);
ret = br_multicast_querier_exists(br, &eth);
unlock:
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(br_multicast_has_querier_anywhere);
/**
* br_multicast_has_querier_adjacent - Checks for a querier behind a bridge port
* @dev: The bridge port adjacent to which to check for a querier
* @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6
*
* Checks whether the given interface has a bridge on top and if so returns
* true if a selected querier is behind one of the other ports of this
* bridge. Otherwise returns false.
*/
bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto)
{
struct net_bridge *br;
struct net_bridge_port *port;
bool ret = false;
rcu_read_lock();
if (!netif_is_bridge_port(dev))
goto unlock;
port = br_port_get_rcu(dev);
if (!port || !port->br)
goto unlock;
br = port->br;
switch (proto) {
case ETH_P_IP:
if (!timer_pending(&br->ip4_other_query.timer) ||
rcu_dereference(br->ip4_querier.port) == port)
goto unlock;
break;
#if IS_ENABLED(CONFIG_IPV6)
case ETH_P_IPV6:
if (!timer_pending(&br->ip6_other_query.timer) ||
rcu_dereference(br->ip6_querier.port) == port)
goto unlock;
break;
#endif
default:
goto unlock;
}
ret = true;
unlock:
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent);
static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
const struct sk_buff *skb, u8 type, u8 dir)
{
struct bridge_mcast_stats *pstats = this_cpu_ptr(stats);
__be16 proto = skb->protocol;
unsigned int t_len;
u64_stats_update_begin(&pstats->syncp);
switch (proto) {
case htons(ETH_P_IP):
t_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb);
switch (type) {
case IGMP_HOST_MEMBERSHIP_REPORT:
pstats->mstats.igmp_v1reports[dir]++;
break;
case IGMPV2_HOST_MEMBERSHIP_REPORT:
pstats->mstats.igmp_v2reports[dir]++;
break;
case IGMPV3_HOST_MEMBERSHIP_REPORT:
pstats->mstats.igmp_v3reports[dir]++;
break;
case IGMP_HOST_MEMBERSHIP_QUERY:
if (t_len != sizeof(struct igmphdr)) {
pstats->mstats.igmp_v3queries[dir]++;
} else {
unsigned int offset = skb_transport_offset(skb);
struct igmphdr *ih, _ihdr;
ih = skb_header_pointer(skb, offset,
sizeof(_ihdr), &_ihdr);
if (!ih)
break;
if (!ih->code)
pstats->mstats.igmp_v1queries[dir]++;
else
pstats->mstats.igmp_v2queries[dir]++;
}
break;
case IGMP_HOST_LEAVE_MESSAGE:
pstats->mstats.igmp_leaves[dir]++;
break;
}
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
t_len = ntohs(ipv6_hdr(skb)->payload_len) +
sizeof(struct ipv6hdr);
t_len -= skb_network_header_len(skb);
switch (type) {
case ICMPV6_MGM_REPORT:
pstats->mstats.mld_v1reports[dir]++;
break;
case ICMPV6_MLD2_REPORT:
pstats->mstats.mld_v2reports[dir]++;
break;
case ICMPV6_MGM_QUERY:
if (t_len != sizeof(struct mld_msg))
pstats->mstats.mld_v2queries[dir]++;
else
pstats->mstats.mld_v1queries[dir]++;
break;
case ICMPV6_MGM_REDUCTION:
pstats->mstats.mld_leaves[dir]++;
break;
}
break;
#endif /* CONFIG_IPV6 */
}
u64_stats_update_end(&pstats->syncp);
}
void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
const struct sk_buff *skb, u8 type, u8 dir)
{
struct bridge_mcast_stats __percpu *stats;
/* if multicast_disabled is true then igmp type can't be set */
if (!type || !br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED))
return;
if (p)
stats = p->mcast_stats;
else
stats = br->mcast_stats;
if (WARN_ON(!stats))
return;
br_mcast_stats_add(stats, skb, type, dir);
}
int br_multicast_init_stats(struct net_bridge *br)
{
br->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats);
if (!br->mcast_stats)
return -ENOMEM;
return 0;
}
void br_multicast_uninit_stats(struct net_bridge *br)
{
free_percpu(br->mcast_stats);
}
/* noinline for https://bugs.llvm.org/show_bug.cgi?id=45802#c9 */
static noinline_for_stack void mcast_stats_add_dir(u64 *dst, u64 *src)
{
dst[BR_MCAST_DIR_RX] += src[BR_MCAST_DIR_RX];
dst[BR_MCAST_DIR_TX] += src[BR_MCAST_DIR_TX];
}
void br_multicast_get_stats(const struct net_bridge *br,
const struct net_bridge_port *p,
struct br_mcast_stats *dest)
{
struct bridge_mcast_stats __percpu *stats;
struct br_mcast_stats tdst;
int i;
memset(dest, 0, sizeof(*dest));
if (p)
stats = p->mcast_stats;
else
stats = br->mcast_stats;
if (WARN_ON(!stats))
return;
memset(&tdst, 0, sizeof(tdst));
for_each_possible_cpu(i) {
struct bridge_mcast_stats *cpu_stats = per_cpu_ptr(stats, i);
struct br_mcast_stats temp;
unsigned int start;
do {
start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
memcpy(&temp, &cpu_stats->mstats, sizeof(temp));
} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
mcast_stats_add_dir(tdst.igmp_v1queries, temp.igmp_v1queries);
mcast_stats_add_dir(tdst.igmp_v2queries, temp.igmp_v2queries);
mcast_stats_add_dir(tdst.igmp_v3queries, temp.igmp_v3queries);
mcast_stats_add_dir(tdst.igmp_leaves, temp.igmp_leaves);
mcast_stats_add_dir(tdst.igmp_v1reports, temp.igmp_v1reports);
mcast_stats_add_dir(tdst.igmp_v2reports, temp.igmp_v2reports);
mcast_stats_add_dir(tdst.igmp_v3reports, temp.igmp_v3reports);
tdst.igmp_parse_errors += temp.igmp_parse_errors;
mcast_stats_add_dir(tdst.mld_v1queries, temp.mld_v1queries);
mcast_stats_add_dir(tdst.mld_v2queries, temp.mld_v2queries);
mcast_stats_add_dir(tdst.mld_leaves, temp.mld_leaves);
mcast_stats_add_dir(tdst.mld_v1reports, temp.mld_v1reports);
mcast_stats_add_dir(tdst.mld_v2reports, temp.mld_v2reports);
tdst.mld_parse_errors += temp.mld_parse_errors;
}
memcpy(dest, &tdst, sizeof(*dest));
}
int br_mdb_hash_init(struct net_bridge *br)
{
return rhashtable_init(&br->mdb_hash_tbl, &br_mdb_rht_params);
}
void br_mdb_hash_fini(struct net_bridge *br)
{
rhashtable_destroy(&br->mdb_hash_tbl);
}