Merge branch 'bond_lock_removal'

Nikolay Aleksandrov says:

====================
bonding: get rid of bond->lock

This patch-set removes the last users of bond->lock and converts the places
that needed it for sync to use curr_slave_lock or RCU as appropriate.
I've run this with lockdep and have stress-tested it via loading/unloading
and enslaving/releasing in parallel while outputting bond's proc, I didn't
see any issues. Please pay special attention to the procfs change, I've
done about an hour of stress-testing on it and have checked that the event
that causes the bonding to delete its proc entry (NETDEV_UNREGISTER) is
called before ndo_uninit() and the freeing of the dev so any readers will
sync with that. Also ran sparse checks and there were no splats.

v2: Add patch 0001/cxgb4 bond->lock removal, RTNL should be held in the
    notifier call, the other patches are the same. Also tested with
    allmodconfig to make sure there're no more users of bond->lock.
Changes from the RFC:
 use RCU in procfs instead of RTNL since RTNL might lead to a deadlock with
 unloading and also is much slower. The bond destruction syncs with proc
 via the proc locks. There's one new patch that converts primary_slave to
 use RCU as it was necessary to fix a longstanding bugs in sysfs and
 procfs and to make it easy to migrate bond's procfs to RCU. And of course
 rebased on top of net-next current.

This is the first patch-set in a series that should simplify the bond's
locking requirements and will make it easier to define the locking
conditions necessary for the various paths. The goal is to rely on RTNL
and rcu alone, an extra lock would be needed in a few special cases that
would be documented very well.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2014-09-09 17:31:43 -07:00
commit 29c10a817f
9 changed files with 62 additions and 131 deletions

View File

@ -2057,7 +2057,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
struct port *port;
bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER;
read_lock(&bond->lock);
read_lock(&bond->curr_slave_lock);
rcu_read_lock();
/* check if there are any slaves */
@ -2120,7 +2120,7 @@ re_arm:
}
}
rcu_read_unlock();
read_unlock(&bond->lock);
read_unlock(&bond->curr_slave_lock);
if (should_notify_rtnl && rtnl_trylock()) {
bond_slave_state_notify(bond);
@ -2395,7 +2395,6 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond,
return 0;
}
/* Wrapper used to hold bond->lock so no slave manipulation can occur */
int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)
{
int ret;
@ -2487,9 +2486,9 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
if (!lacpdu)
return ret;
read_lock(&bond->lock);
read_lock(&bond->curr_slave_lock);
ret = bond_3ad_rx_indication(lacpdu, slave, skb->len);
read_unlock(&bond->lock);
read_unlock(&bond->curr_slave_lock);
return ret;
}

View File

@ -1775,8 +1775,7 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
* Set the bond->curr_active_slave to @new_slave and handle
* mac address swapping and promiscuity changes as needed.
*
* If new_slave is NULL, caller must hold curr_slave_lock or
* bond->lock for write.
* If new_slave is NULL, caller must hold curr_slave_lock for write
*
* If new_slave is not NULL, caller must hold RTNL, curr_slave_lock
* for write. Processing here may sleep, so no other locks may be held.
@ -1857,12 +1856,8 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave
write_lock_bh(&bond->curr_slave_lock);
}
/*
* Called with RTNL
*/
/* Called with RTNL */
int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)
__acquires(&bond->lock)
__releases(&bond->lock)
{
struct bonding *bond = netdev_priv(bond_dev);
struct sockaddr *sa = addr;
@ -1895,14 +1890,12 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)
} else {
alb_set_slave_mac_addr(curr_active, bond_dev->dev_addr);
read_lock(&bond->lock);
alb_send_learning_packets(curr_active,
bond_dev->dev_addr, false);
if (bond->alb_info.rlb_enabled) {
/* inform clients mac address has changed */
rlb_req_update_slave_clients(bond, curr_active);
}
read_unlock(&bond->lock);
}
return 0;

View File

@ -708,7 +708,7 @@ out:
static bool bond_should_change_active(struct bonding *bond)
{
struct slave *prim = bond->primary_slave;
struct slave *prim = rtnl_dereference(bond->primary_slave);
struct slave *curr = bond_deref_active_protected(bond);
if (!prim || !curr || curr->link != BOND_LINK_UP)
@ -732,13 +732,14 @@ static bool bond_should_change_active(struct bonding *bond)
*/
static struct slave *bond_find_best_slave(struct bonding *bond)
{
struct slave *slave, *bestslave = NULL;
struct slave *slave, *bestslave = NULL, *primary;
struct list_head *iter;
int mintime = bond->params.updelay;
if (bond->primary_slave && bond->primary_slave->link == BOND_LINK_UP &&
primary = rtnl_dereference(bond->primary_slave);
if (primary && primary->link == BOND_LINK_UP &&
bond_should_change_active(bond))
return bond->primary_slave;
return primary;
bond_for_each_slave(bond, slave, iter) {
if (slave->link == BOND_LINK_UP)
@ -1482,7 +1483,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
if (bond_uses_primary(bond) && bond->params.primary[0]) {
/* if there is a primary slave, remember it */
if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
bond->primary_slave = new_slave;
rcu_assign_pointer(bond->primary_slave, new_slave);
bond->force_primary = true;
}
}
@ -1596,8 +1597,8 @@ err_detach:
bond_hw_addr_flush(bond_dev, slave_dev);
vlan_vids_del_by_dev(slave_dev, bond_dev);
if (bond->primary_slave == new_slave)
bond->primary_slave = NULL;
if (rcu_access_pointer(bond->primary_slave) == new_slave)
RCU_INIT_POINTER(bond->primary_slave, NULL);
if (rcu_access_pointer(bond->curr_active_slave) == new_slave) {
block_netpoll_tx();
write_lock_bh(&bond->curr_slave_lock);
@ -1606,6 +1607,8 @@ err_detach:
write_unlock_bh(&bond->curr_slave_lock);
unblock_netpoll_tx();
}
/* either primary_slave or curr_active_slave might've changed */
synchronize_rcu();
slave_disable_netpoll(new_slave);
err_close:
@ -1687,13 +1690,15 @@ static int __bond_release_one(struct net_device *bond_dev,
* for this slave anymore.
*/
netdev_rx_handler_unregister(slave_dev);
write_lock_bh(&bond->lock);
/* Inform AD package of unbinding of slave. */
if (BOND_MODE(bond) == BOND_MODE_8023AD)
if (BOND_MODE(bond) == BOND_MODE_8023AD) {
/* Sync against bond_3ad_rx_indication and
* bond_3ad_state_machine_handler
*/
write_lock_bh(&bond->curr_slave_lock);
bond_3ad_unbind_slave(slave);
write_unlock_bh(&bond->lock);
write_unlock_bh(&bond->curr_slave_lock);
}
netdev_info(bond_dev, "Releasing %s interface %s\n",
bond_is_active_slave(slave) ? "active" : "backup",
@ -1712,8 +1717,8 @@ static int __bond_release_one(struct net_device *bond_dev,
bond_dev->name, slave_dev->name);
}
if (bond->primary_slave == slave)
bond->primary_slave = NULL;
if (rtnl_dereference(bond->primary_slave) == slave)
RCU_INIT_POINTER(bond->primary_slave, NULL);
if (oldcurrent == slave) {
write_lock_bh(&bond->curr_slave_lock);
@ -1974,7 +1979,7 @@ static int bond_miimon_inspect(struct bonding *bond)
static void bond_miimon_commit(struct bonding *bond)
{
struct list_head *iter;
struct slave *slave;
struct slave *slave, *primary;
bond_for_each_slave(bond, slave, iter) {
switch (slave->new_link) {
@ -1985,13 +1990,14 @@ static void bond_miimon_commit(struct bonding *bond)
slave->link = BOND_LINK_UP;
slave->last_link_up = jiffies;
primary = rtnl_dereference(bond->primary_slave);
if (BOND_MODE(bond) == BOND_MODE_8023AD) {
/* prevent it from being the active one */
bond_set_backup_slave(slave);
} else if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
/* make it immediately active */
bond_set_active_slave(slave);
} else if (slave != bond->primary_slave) {
} else if (slave != primary) {
/* prevent it from being the active one */
bond_set_backup_slave(slave);
}
@ -2009,8 +2015,7 @@ static void bond_miimon_commit(struct bonding *bond)
bond_alb_handle_link_change(bond, slave,
BOND_LINK_UP);
if (!bond->curr_active_slave ||
(slave == bond->primary_slave))
if (!bond->curr_active_slave || slave == primary)
goto do_failover;
continue;
@ -2631,7 +2636,7 @@ static void bond_ab_arp_commit(struct bonding *bond)
slave->dev->name);
if (!rtnl_dereference(bond->curr_active_slave) ||
(slave == bond->primary_slave))
slave == rtnl_dereference(bond->primary_slave))
goto do_failover;
}
@ -2858,7 +2863,7 @@ static int bond_master_netdev_event(unsigned long event,
static int bond_slave_netdev_event(unsigned long event,
struct net_device *slave_dev)
{
struct slave *slave = bond_slave_get_rtnl(slave_dev);
struct slave *slave = bond_slave_get_rtnl(slave_dev), *primary;
struct bonding *bond;
struct net_device *bond_dev;
u32 old_speed;
@ -2872,6 +2877,7 @@ static int bond_slave_netdev_event(unsigned long event,
return NOTIFY_DONE;
bond_dev = slave->bond->dev;
bond = slave->bond;
primary = rtnl_dereference(bond->primary_slave);
switch (event) {
case NETDEV_UNREGISTER:
@ -2919,18 +2925,18 @@ static int bond_slave_netdev_event(unsigned long event,
!bond->params.primary[0])
break;
if (slave == bond->primary_slave) {
if (slave == primary) {
/* slave's name changed - he's no longer primary */
bond->primary_slave = NULL;
RCU_INIT_POINTER(bond->primary_slave, NULL);
} else if (!strcmp(slave_dev->name, bond->params.primary)) {
/* we have a new primary slave */
bond->primary_slave = slave;
rcu_assign_pointer(bond->primary_slave, slave);
} else { /* we didn't change primary - exit */
break;
}
netdev_info(bond->dev, "Primary slave changed to %s, reselecting active slave\n",
bond->primary_slave ? slave_dev->name : "none");
primary ? slave_dev->name : "none");
block_netpoll_tx();
write_lock_bh(&bond->curr_slave_lock);
@ -3099,7 +3105,6 @@ static int bond_open(struct net_device *bond_dev)
struct slave *slave;
/* reset slave->backup and slave->inactive */
read_lock(&bond->lock);
if (bond_has_slaves(bond)) {
read_lock(&bond->curr_slave_lock);
bond_for_each_slave(bond, slave, iter) {
@ -3114,7 +3119,6 @@ static int bond_open(struct net_device *bond_dev)
}
read_unlock(&bond->curr_slave_lock);
}
read_unlock(&bond->lock);
bond_work_init_all(bond);
@ -3169,7 +3173,6 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
memset(stats, 0, sizeof(*stats));
read_lock_bh(&bond->lock);
bond_for_each_slave(bond, slave, iter) {
const struct rtnl_link_stats64 *sstats =
dev_get_stats(slave->dev, &temp);
@ -3200,7 +3203,6 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors;
stats->tx_window_errors += sstats->tx_window_errors;
}
read_unlock_bh(&bond->lock);
return stats;
}
@ -3240,13 +3242,11 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
if (mii->reg_num == 1) {
mii->val_out = 0;
read_lock(&bond->lock);
read_lock(&bond->curr_slave_lock);
if (netif_carrier_ok(bond->dev))
mii->val_out = BMSR_LSTATUS;
read_unlock(&bond->curr_slave_lock);
read_unlock(&bond->lock);
}
return 0;
@ -3422,21 +3422,6 @@ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)
netdev_dbg(bond_dev, "bond=%p, new_mtu=%d\n", bond, new_mtu);
/* Can't hold bond->lock with bh disabled here since
* some base drivers panic. On the other hand we can't
* hold bond->lock without bh disabled because we'll
* deadlock. The only solution is to rely on the fact
* that we're under rtnl_lock here, and the slaves
* list won't change. This doesn't solve the problem
* of setting the slave's MTU while it is
* transmitting, but the assumption is that the base
* driver can handle that.
*
* TODO: figure out a way to safely iterate the slaves
* list, but without holding a lock around the actual
* call to the base driver.
*/
bond_for_each_slave(bond, slave, iter) {
netdev_dbg(bond_dev, "s %p c_m %p\n",
slave, slave->dev->netdev_ops->ndo_change_mtu);
@ -3511,21 +3496,6 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr)
if (!is_valid_ether_addr(sa->sa_data))
return -EADDRNOTAVAIL;
/* Can't hold bond->lock with bh disabled here since
* some base drivers panic. On the other hand we can't
* hold bond->lock without bh disabled because we'll
* deadlock. The only solution is to rely on the fact
* that we're under rtnl_lock here, and the slaves
* list won't change. This doesn't solve the problem
* of setting the slave's hw address while it is
* transmitting, but the assumption is that the base
* driver can handle that.
*
* TODO: figure out a way to safely iterate the slaves
* list, but without holding a lock around the actual
* call to the base driver.
*/
bond_for_each_slave(bond, slave, iter) {
netdev_dbg(bond_dev, "slave %p %s\n", slave, slave->dev->name);
res = dev_set_mac_address(slave->dev, addr);
@ -3851,7 +3821,6 @@ static int bond_ethtool_get_settings(struct net_device *bond_dev,
* the true receive or transmit bandwidth (not all modes are symmetric)
* this is an accurate maximum.
*/
read_lock(&bond->lock);
bond_for_each_slave(bond, slave, iter) {
if (bond_slave_can_tx(slave)) {
if (slave->speed != SPEED_UNKNOWN)
@ -3862,7 +3831,6 @@ static int bond_ethtool_get_settings(struct net_device *bond_dev,
}
}
ethtool_cmd_speed_set(ecmd, speed ? : SPEED_UNKNOWN);
read_unlock(&bond->lock);
return 0;
}
@ -3925,7 +3893,6 @@ void bond_setup(struct net_device *bond_dev)
struct bonding *bond = netdev_priv(bond_dev);
/* initialize rwlocks */
rwlock_init(&bond->lock);
rwlock_init(&bond->curr_slave_lock);
bond->params = bonding_defaults;

View File

@ -443,6 +443,7 @@ static int bond_fill_info(struct sk_buff *skb,
unsigned int packets_per_slave;
int ifindex, i, targets_added;
struct nlattr *targets;
struct slave *primary;
if (nla_put_u8(skb, IFLA_BOND_MODE, BOND_MODE(bond)))
goto nla_put_failure;
@ -492,9 +493,9 @@ static int bond_fill_info(struct sk_buff *skb,
bond->params.arp_all_targets))
goto nla_put_failure;
if (bond->primary_slave &&
nla_put_u32(skb, IFLA_BOND_PRIMARY,
bond->primary_slave->dev->ifindex))
primary = rtnl_dereference(bond->primary_slave);
if (primary &&
nla_put_u32(skb, IFLA_BOND_PRIMARY, primary->dev->ifindex))
goto nla_put_failure;
if (nla_put_u8(skb, IFLA_BOND_PRIMARY_RESELECT,

View File

@ -955,14 +955,7 @@ static int _bond_option_arp_ip_target_add(struct bonding *bond, __be32 target)
static int bond_option_arp_ip_target_add(struct bonding *bond, __be32 target)
{
int ret;
/* not to race with bond_arp_rcv */
write_lock_bh(&bond->lock);
ret = _bond_option_arp_ip_target_add(bond, target);
write_unlock_bh(&bond->lock);
return ret;
return _bond_option_arp_ip_target_add(bond, target);
}
static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target)
@ -991,9 +984,6 @@ static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target)
netdev_info(bond->dev, "Removing ARP target %pI4\n", &target);
/* not to race with bond_arp_rcv */
write_lock_bh(&bond->lock);
bond_for_each_slave(bond, slave, iter) {
targets_rx = slave->target_last_arp_rx;
for (i = ind; (i < BOND_MAX_ARP_TARGETS-1) && targets[i+1]; i++)
@ -1004,8 +994,6 @@ static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target)
targets[i] = targets[i+1];
targets[i] = 0;
write_unlock_bh(&bond->lock);
return 0;
}
@ -1013,11 +1001,8 @@ void bond_option_arp_ip_targets_clear(struct bonding *bond)
{
int i;
/* not to race with bond_arp_rcv */
write_lock_bh(&bond->lock);
for (i = 0; i < BOND_MAX_ARP_TARGETS; i++)
_bond_options_arp_ip_target_set(bond, i, 0, 0);
write_unlock_bh(&bond->lock);
}
static int bond_option_arp_ip_targets_set(struct bonding *bond,
@ -1081,7 +1066,6 @@ static int bond_option_primary_set(struct bonding *bond,
struct slave *slave;
block_netpoll_tx();
read_lock(&bond->lock);
write_lock_bh(&bond->curr_slave_lock);
p = strchr(primary, '\n');
@ -1090,7 +1074,7 @@ static int bond_option_primary_set(struct bonding *bond,
/* check to see if we are clearing primary */
if (!strlen(primary)) {
netdev_info(bond->dev, "Setting primary slave to None\n");
bond->primary_slave = NULL;
RCU_INIT_POINTER(bond->primary_slave, NULL);
memset(bond->params.primary, 0, sizeof(bond->params.primary));
bond_select_active_slave(bond);
goto out;
@ -1100,16 +1084,16 @@ static int bond_option_primary_set(struct bonding *bond,
if (strncmp(slave->dev->name, primary, IFNAMSIZ) == 0) {
netdev_info(bond->dev, "Setting %s as primary slave\n",
slave->dev->name);
bond->primary_slave = slave;
rcu_assign_pointer(bond->primary_slave, slave);
strcpy(bond->params.primary, slave->dev->name);
bond_select_active_slave(bond);
goto out;
}
}
if (bond->primary_slave) {
if (rtnl_dereference(bond->primary_slave)) {
netdev_info(bond->dev, "Setting primary slave to None\n");
bond->primary_slave = NULL;
RCU_INIT_POINTER(bond->primary_slave, NULL);
bond_select_active_slave(bond);
}
strncpy(bond->params.primary, primary, IFNAMSIZ);
@ -1120,7 +1104,6 @@ static int bond_option_primary_set(struct bonding *bond,
out:
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
unblock_netpoll_tx();
return 0;

View File

@ -7,21 +7,18 @@
static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
__acquires(&bond->lock)
{
struct bonding *bond = seq->private;
struct list_head *iter;
struct slave *slave;
loff_t off = 0;
/* make sure the bond won't be taken away */
rcu_read_lock();
read_lock(&bond->lock);
if (*pos == 0)
return SEQ_START_TOKEN;
bond_for_each_slave(bond, slave, iter)
bond_for_each_slave_rcu(bond, slave, iter)
if (++off == *pos)
return slave;
@ -37,12 +34,9 @@ static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++*pos;
if (v == SEQ_START_TOKEN)
return bond_first_slave(bond);
return bond_first_slave_rcu(bond);
if (bond_is_last_slave(bond, v))
return NULL;
bond_for_each_slave(bond, slave, iter) {
bond_for_each_slave_rcu(bond, slave, iter) {
if (found)
return slave;
if (slave == v)
@ -53,12 +47,8 @@ static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void bond_info_seq_stop(struct seq_file *seq, void *v)
__releases(&bond->lock)
__releases(RCU)
{
struct bonding *bond = seq->private;
read_unlock(&bond->lock);
rcu_read_unlock();
}
@ -66,7 +56,7 @@ static void bond_info_show_master(struct seq_file *seq)
{
struct bonding *bond = seq->private;
const struct bond_opt_value *optval;
struct slave *curr;
struct slave *curr, *primary;
int i;
curr = rcu_dereference(bond->curr_active_slave);
@ -92,10 +82,10 @@ static void bond_info_show_master(struct seq_file *seq)
}
if (bond_uses_primary(bond)) {
primary = rcu_dereference(bond->primary_slave);
seq_printf(seq, "Primary Slave: %s",
(bond->primary_slave) ?
bond->primary_slave->dev->name : "None");
if (bond->primary_slave) {
primary ? primary->dev->name : "None");
if (primary) {
optval = bond_opt_get_val(BOND_OPT_PRIMARY_RESELECT,
bond->params.primary_reselect);
seq_printf(seq, " (primary_reselect %s)",

View File

@ -425,11 +425,15 @@ static ssize_t bonding_show_primary(struct device *d,
struct device_attribute *attr,
char *buf)
{
int count = 0;
struct bonding *bond = to_bond(d);
struct slave *primary;
int count = 0;
if (bond->primary_slave)
count = sprintf(buf, "%s\n", bond->primary_slave->dev->name);
rcu_read_lock();
primary = rcu_dereference(bond->primary_slave);
if (primary)
count = sprintf(buf, "%s\n", primary->dev->name);
rcu_read_unlock();
return count;
}

View File

@ -83,7 +83,7 @@
* @pos: current slave
* @iter: list_head * iterator
*
* Caller must hold bond->lock
* Caller must hold RTNL
*/
#define bond_for_each_slave(bond, pos, iter) \
netdev_for_each_lower_private((bond)->dev, pos, iter)
@ -185,22 +185,18 @@ struct slave {
/*
* Here are the locking policies for the two bonding locks:
*
* 1) Get bond->lock when reading/writing slave list.
* 1) Get rcu_read_lock when reading or RTNL when writing slave list.
* 2) Get bond->curr_slave_lock when reading/writing bond->curr_active_slave.
* (It is unnecessary when the write-lock is put with bond->lock.)
* 3) When we lock with bond->curr_slave_lock, we must lock with bond->lock
* beforehand.
*/
struct bonding {
struct net_device *dev; /* first - useful for panic debug */
struct slave __rcu *curr_active_slave;
struct slave __rcu *current_arp_slave;
struct slave *primary_slave;
struct slave __rcu *primary_slave;
bool force_primary;
s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
int (*recv_probe)(const struct sk_buff *, struct bonding *,
struct slave *);
rwlock_t lock;
rwlock_t curr_slave_lock;
u8 send_peer_notif;
u8 igmp_retrans;

View File

@ -4390,7 +4390,6 @@ static int cxgb4_inet6addr_handler(struct notifier_block *this,
* bond. We need to find such different adapters and add clip
* in all of them only once.
*/
read_lock(&bond->lock);
bond_for_each_slave(bond, slave, iter) {
if (!first_pdev) {
ret = clip_add(slave->dev, ifa, event);
@ -4404,7 +4403,6 @@ static int cxgb4_inet6addr_handler(struct notifier_block *this,
to_pci_dev(slave->dev->dev.parent))
ret = clip_add(slave->dev, ifa, event);
}
read_unlock(&bond->lock);
} else
ret = clip_add(ifa->idev->dev, ifa, event);