linux-sg2042/drivers/net/bonding/bond_sysfs.c

1832 lines
47 KiB
C
Raw Normal View History

/*
* Copyright(c) 2004-2005 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The full GNU General Public License is included in this distribution in the
* file called LICENSE.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/in.h>
#include <linux/sysfs.h>
#include <linux/ctype.h>
#include <linux/inet.h>
#include <linux/rtnetlink.h>
#include <linux/etherdevice.h>
[NET]: Make the device list and device lookups per namespace. This patch makes most of the generic device layer network namespace safe. This patch makes dev_base_head a network namespace variable, and then it picks up a few associated variables. The functions: dev_getbyhwaddr dev_getfirsthwbytype dev_get_by_flags dev_get_by_name __dev_get_by_name dev_get_by_index __dev_get_by_index dev_ioctl dev_ethtool dev_load wireless_process_ioctl were modified to take a network namespace argument, and deal with it. vlan_ioctl_set and brioctl_set were modified so their hooks will receive a network namespace argument. So basically anthing in the core of the network stack that was affected to by the change of dev_base was modified to handle multiple network namespaces. The rest of the network stack was simply modified to explicitly use &init_net the initial network namespace. This can be fixed when those components of the network stack are modified to handle multiple network namespaces. For now the ifindex generator is left global. Fundametally ifindex numbers are per namespace, or else we will have corner case problems with migration when we get that far. At the same time there are assumptions in the network stack that the ifindex of a network device won't change. Making the ifindex number global seems a good compromise until the network stack can cope with ifindex changes when you change namespaces, and the like. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2007-09-18 02:56:21 +08:00
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/nsproxy.h>
#include "bonding.h"
#define to_dev(obj) container_of(obj, struct device, kobj)
#define to_bond(cd) ((struct bonding *)(netdev_priv(to_net_dev(cd))))
/*
* "show" function for the bond_masters attribute.
* The class parameter is ignored.
*/
static ssize_t bonding_show_bonds(struct class *cls,
struct class_attribute *attr,
char *buf)
{
struct bond_net *bn =
container_of(attr, struct bond_net, class_attr_bonding_masters);
int res = 0;
struct bonding *bond;
rtnl_lock();
list_for_each_entry(bond, &bn->dev_list, bond_list) {
if (res > (PAGE_SIZE - IFNAMSIZ)) {
/* not enough space for another interface name */
if ((PAGE_SIZE - res) > 10)
res = PAGE_SIZE - 10;
res += sprintf(buf + res, "++more++ ");
break;
}
res += sprintf(buf + res, "%s ", bond->dev->name);
}
if (res)
buf[res-1] = '\n'; /* eat the leftover space */
rtnl_unlock();
return res;
}
static struct net_device *bond_get_by_name(struct bond_net *bn, const char *ifname)
{
struct bonding *bond;
list_for_each_entry(bond, &bn->dev_list, bond_list) {
if (strncmp(bond->dev->name, ifname, IFNAMSIZ) == 0)
return bond->dev;
}
return NULL;
}
/*
* "store" function for the bond_masters attribute. This is what
* creates and deletes entire bonds.
*
* The class parameter is ignored.
*
*/
static ssize_t bonding_store_bonds(struct class *cls,
struct class_attribute *attr,
const char *buffer, size_t count)
{
struct bond_net *bn =
container_of(attr, struct bond_net, class_attr_bonding_masters);
char command[IFNAMSIZ + 1] = {0, };
char *ifname;
int rv, res = count;
sscanf(buffer, "%16s", command); /* IFNAMSIZ*/
ifname = command + 1;
if ((strlen(command) <= 1) ||
!dev_valid_name(ifname))
goto err_no_cmd;
if (command[0] == '+') {
pr_info("%s is being created...\n", ifname);
rv = bond_create(bn->net, ifname);
if (rv) {
if (rv == -EEXIST)
pr_info("%s already exists.\n", ifname);
else
pr_info("%s creation failed.\n", ifname);
res = rv;
}
} else if (command[0] == '-') {
struct net_device *bond_dev;
rtnl_lock();
bond_dev = bond_get_by_name(bn, ifname);
if (bond_dev) {
pr_info("%s is being deleted...\n", ifname);
unregister_netdevice(bond_dev);
} else {
pr_err("unable to delete non-existent %s\n", ifname);
res = -ENODEV;
}
rtnl_unlock();
} else
goto err_no_cmd;
/* Always return either count or an error. If you return 0, you'll
* get called forever, which is bad.
*/
return res;
err_no_cmd:
pr_err("no command found in bonding_masters. Use +ifname or -ifname.\n");
return -EPERM;
}
static const void *bonding_namespace(struct class *cls,
const struct class_attribute *attr)
{
const struct bond_net *bn =
container_of(attr, struct bond_net, class_attr_bonding_masters);
return bn->net;
}
/* class attribute for bond_masters file. This ends up in /sys/class/net */
static const struct class_attribute class_attr_bonding_masters = {
.attr = {
.name = "bonding_masters",
.mode = S_IWUSR | S_IRUGO,
},
.show = bonding_show_bonds,
.store = bonding_store_bonds,
.namespace = bonding_namespace,
};
int bond_create_slave_symlinks(struct net_device *master,
struct net_device *slave)
{
char linkname[IFNAMSIZ+7];
int ret = 0;
/* first, create a link from the slave back to the master */
ret = sysfs_create_link(&(slave->dev.kobj), &(master->dev.kobj),
"master");
if (ret)
return ret;
/* next, create a link from the master to the slave */
sprintf(linkname, "slave_%s", slave->name);
ret = sysfs_create_link(&(master->dev.kobj), &(slave->dev.kobj),
linkname);
/* free the master link created earlier in case of error */
if (ret)
sysfs_remove_link(&(slave->dev.kobj), "master");
return ret;
}
void bond_destroy_slave_symlinks(struct net_device *master,
struct net_device *slave)
{
char linkname[IFNAMSIZ+7];
sysfs_remove_link(&(slave->dev.kobj), "master");
sprintf(linkname, "slave_%s", slave->name);
sysfs_remove_link(&(master->dev.kobj), linkname);
}
/*
* Show the slaves in the current bond.
*/
static ssize_t bonding_show_slaves(struct device *d,
struct device_attribute *attr, char *buf)
{
struct bonding *bond = to_bond(d);
struct slave *slave;
int res = 0;
read_lock(&bond->lock);
bond_for_each_slave(bond, slave) {
if (res > (PAGE_SIZE - IFNAMSIZ)) {
/* not enough space for another interface name */
if ((PAGE_SIZE - res) > 10)
res = PAGE_SIZE - 10;
res += sprintf(buf + res, "++more++ ");
break;
}
res += sprintf(buf + res, "%s ", slave->dev->name);
}
read_unlock(&bond->lock);
if (res)
buf[res-1] = '\n'; /* eat the leftover space */
return res;
}
/*
* Set the slaves in the current bond.
* This is supposed to be only thin wrapper for bond_enslave and bond_release.
* All hard work should be done there.
*/
static ssize_t bonding_store_slaves(struct device *d,
struct device_attribute *attr,
const char *buffer, size_t count)
{
char command[IFNAMSIZ + 1] = { 0, };
char *ifname;
int res, ret = count;
struct net_device *dev;
struct bonding *bond = to_bond(d);
if (!rtnl_trylock())
return restart_syscall();
sscanf(buffer, "%16s", command); /* IFNAMSIZ*/
ifname = command + 1;
if ((strlen(command) <= 1) ||
!dev_valid_name(ifname))
goto err_no_cmd;
dev = __dev_get_by_name(dev_net(bond->dev), ifname);
if (!dev) {
pr_info("%s: Interface %s does not exist!\n",
bond->dev->name, ifname);
ret = -ENODEV;
goto out;
}
switch (command[0]) {
case '+':
pr_info("%s: Adding slave %s.\n", bond->dev->name, dev->name);
res = bond_enslave(bond->dev, dev);
break;
case '-':
pr_info("%s: Removing slave %s.\n", bond->dev->name, dev->name);
res = bond_release(bond->dev, dev);
break;
default:
goto err_no_cmd;
}
if (res)
ret = res;
goto out;
err_no_cmd:
pr_err("no command found in slaves file for bond %s. Use +ifname or -ifname.\n",
bond->dev->name);
ret = -EPERM;
out:
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(slaves, S_IRUGO | S_IWUSR, bonding_show_slaves,
bonding_store_slaves);
/*
* Show and set the bonding mode. The bond interface must be down to
* change the mode.
*/
static ssize_t bonding_show_mode(struct device *d,
struct device_attribute *attr, char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%s %d\n",
bond_mode_tbl[bond->params.mode].modename,
bond->params.mode);
}
static ssize_t bonding_store_mode(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret = count;
struct bonding *bond = to_bond(d);
if (!rtnl_trylock())
return restart_syscall();
if (bond->dev->flags & IFF_UP) {
pr_err("unable to update mode of %s because interface is up.\n",
bond->dev->name);
ret = -EPERM;
goto out;
}
if (!list_empty(&bond->slave_list)) {
pr_err("unable to update mode of %s because it has slaves.\n",
bond->dev->name);
ret = -EPERM;
goto out;
}
new_value = bond_parse_parm(buf, bond_mode_tbl);
if (new_value < 0) {
pr_err("%s: Ignoring invalid mode value %.*s.\n",
bond->dev->name, (int)strlen(buf) - 1, buf);
ret = -EINVAL;
goto out;
}
if ((new_value == BOND_MODE_ALB ||
new_value == BOND_MODE_TLB) &&
bond->params.arp_interval) {
pr_err("%s: %s mode is incompatible with arp monitoring.\n",
bond->dev->name, bond_mode_tbl[new_value].modename);
ret = -EINVAL;
goto out;
}
/* don't cache arp_validate between modes */
bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
bond->params.mode = new_value;
bond_set_mode_ops(bond, bond->params.mode);
pr_info("%s: setting mode to %s (%d).\n",
bond->dev->name, bond_mode_tbl[new_value].modename,
new_value);
out:
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(mode, S_IRUGO | S_IWUSR,
bonding_show_mode, bonding_store_mode);
/*
* Show and set the bonding transmit hash method.
*/
static ssize_t bonding_show_xmit_hash(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%s %d\n",
xmit_hashtype_tbl[bond->params.xmit_policy].modename,
bond->params.xmit_policy);
}
static ssize_t bonding_store_xmit_hash(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret = count;
struct bonding *bond = to_bond(d);
new_value = bond_parse_parm(buf, xmit_hashtype_tbl);
if (new_value < 0) {
pr_err("%s: Ignoring invalid xmit hash policy value %.*s.\n",
bond->dev->name,
(int)strlen(buf) - 1, buf);
ret = -EINVAL;
} else {
bond->params.xmit_policy = new_value;
bond_set_mode_ops(bond, bond->params.mode);
pr_info("%s: setting xmit hash policy to %s (%d).\n",
bond->dev->name,
xmit_hashtype_tbl[new_value].modename, new_value);
}
return ret;
}
static DEVICE_ATTR(xmit_hash_policy, S_IRUGO | S_IWUSR,
bonding_show_xmit_hash, bonding_store_xmit_hash);
/*
* Show and set arp_validate.
*/
static ssize_t bonding_show_arp_validate(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%s %d\n",
arp_validate_tbl[bond->params.arp_validate].modename,
bond->params.arp_validate);
}
static ssize_t bonding_store_arp_validate(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
int new_value, ret = count;
if (!rtnl_trylock())
return restart_syscall();
new_value = bond_parse_parm(buf, arp_validate_tbl);
if (new_value < 0) {
pr_err("%s: Ignoring invalid arp_validate value %s\n",
bond->dev->name, buf);
ret = -EINVAL;
goto out;
}
if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) {
pr_err("%s: arp_validate only supported in active-backup mode.\n",
bond->dev->name);
ret = -EINVAL;
goto out;
}
pr_info("%s: setting arp_validate to %s (%d).\n",
bond->dev->name, arp_validate_tbl[new_value].modename,
new_value);
if (bond->dev->flags & IFF_UP) {
if (!new_value)
bond->recv_probe = NULL;
else if (bond->params.arp_interval)
bond->recv_probe = bond_arp_rcv;
}
bond->params.arp_validate = new_value;
out:
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate,
bonding_store_arp_validate);
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 17:49:34 +08:00
/*
* Show and set arp_all_targets.
*/
static ssize_t bonding_show_arp_all_targets(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
int value = bond->params.arp_all_targets;
return sprintf(buf, "%s %d\n", arp_all_targets_tbl[value].modename,
value);
}
static ssize_t bonding_store_arp_all_targets(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
int new_value;
new_value = bond_parse_parm(buf, arp_all_targets_tbl);
if (new_value < 0) {
pr_err("%s: Ignoring invalid arp_all_targets value %s\n",
bond->dev->name, buf);
return -EINVAL;
}
pr_info("%s: setting arp_all_targets to %s (%d).\n",
bond->dev->name, arp_all_targets_tbl[new_value].modename,
new_value);
bond->params.arp_all_targets = new_value;
return count;
}
static DEVICE_ATTR(arp_all_targets, S_IRUGO | S_IWUSR,
bonding_show_arp_all_targets, bonding_store_arp_all_targets);
/*
* Show and store fail_over_mac. User only allowed to change the
* value when there are no slaves.
*/
static ssize_t bonding_show_fail_over_mac(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%s %d\n",
fail_over_mac_tbl[bond->params.fail_over_mac].modename,
bond->params.fail_over_mac);
}
static ssize_t bonding_store_fail_over_mac(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret = count;
struct bonding *bond = to_bond(d);
if (!rtnl_trylock())
return restart_syscall();
if (!list_empty(&bond->slave_list)) {
pr_err("%s: Can't alter fail_over_mac with slaves in bond.\n",
bond->dev->name);
ret = -EPERM;
goto out;
}
new_value = bond_parse_parm(buf, fail_over_mac_tbl);
if (new_value < 0) {
pr_err("%s: Ignoring invalid fail_over_mac value %s.\n",
bond->dev->name, buf);
ret = -EINVAL;
goto out;
}
bond->params.fail_over_mac = new_value;
pr_info("%s: Setting fail_over_mac to %s (%d).\n",
bond->dev->name, fail_over_mac_tbl[new_value].modename,
new_value);
out:
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(fail_over_mac, S_IRUGO | S_IWUSR,
bonding_show_fail_over_mac, bonding_store_fail_over_mac);
/*
* Show and set the arp timer interval. There are two tricky bits
* here. First, if ARP monitoring is activated, then we must disable
* MII monitoring. Second, if the ARP timer isn't running, we must
* start it.
*/
static ssize_t bonding_show_arp_interval(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.arp_interval);
}
static ssize_t bonding_store_arp_interval(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
int new_value, ret = count;
bonding: fix miimon and arp_interval delayed work race conditions First I would give three observations which will be used later. Observation 1: if (delayed_work_pending(wq)) cancel_delayed_work(wq) This usage is wrong because the pending bit is cleared just before the work's fn is executed and if the function re-arms itself we might end up with the work still running. It's safe to call cancel_delayed_work_sync() even if the work is not queued at all. Observation 2: Use of INIT_DELAYED_WORK() Work needs to be initialized only once prior to (de/en)queueing. Observation 3: IFF_UP is set only after ndo_open is called Related race conditions: 1. Race between bonding_store_miimon() and bonding_store_arp_interval() Because of Obs.1 we can end up having both works enqueued. 2. Multiple races with INIT_DELAYED_WORK() Since the works are not protected by anything between INIT_DELAYED_WORK() and calls to (en/de)queue it is possible for races between the following functions: (races are also possible between the calls to INIT_DELAYED_WORK() and workqueue code) bonding_store_miimon() - bonding_store_arp_interval(), bond_close(), bond_open(), enqueued functions bonding_store_arp_interval() - bonding_store_miimon(), bond_close(), bond_open(), enqueued functions 3. By Obs.1 we need to change bond_cancel_all() Bugs 1 and 2 are fixed by moving all work initializations in bond_open which by Obs. 2 and Obs. 3 and the fact that we make sure that all works are cancelled in bond_close(), is guaranteed not to have any work enqueued. Also RTNL lock is now acquired in bonding_store_miimon/arp_interval so they can't race with bond_close and bond_open. The opposing work is cancelled only if the IFF_UP flag is set and it is cancelled unconditionally. The opposing work is already cancelled if the interface is down so no need to cancel it again. This way we don't need new synchronizations for the bonding workqueue. These bugs (and fixes) are tied together and belong in the same patch. Note: I have left 1 line intentionally over 80 characters (84) because I didn't like how it looks broken down. If you'd prefer it otherwise, then simply break it. v2: Make description text < 75 columns Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: Jay Vosburgh <fubar@us.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-29 09:31:31 +08:00
if (!rtnl_trylock())
return restart_syscall();
if (sscanf(buf, "%d", &new_value) != 1) {
pr_err("%s: no arp_interval value specified.\n",
bond->dev->name);
ret = -EINVAL;
goto out;
}
if (new_value < 0) {
pr_err("%s: Invalid arp_interval value %d not in range 0-%d; rejected.\n",
bond->dev->name, new_value, INT_MAX);
ret = -EINVAL;
goto out;
}
if (bond->params.mode == BOND_MODE_ALB ||
bond->params.mode == BOND_MODE_TLB) {
pr_info("%s: ARP monitoring cannot be used with ALB/TLB. Only MII monitoring is supported on %s.\n",
bond->dev->name, bond->dev->name);
ret = -EINVAL;
goto out;
}
pr_info("%s: Setting ARP monitoring interval to %d.\n",
bond->dev->name, new_value);
bond->params.arp_interval = new_value;
if (new_value) {
if (bond->params.miimon) {
pr_info("%s: ARP monitoring cannot be used with MII monitoring. %s Disabling MII monitoring.\n",
bond->dev->name, bond->dev->name);
bond->params.miimon = 0;
}
if (!bond->params.arp_targets[0])
pr_info("%s: ARP monitoring has been set up, but no ARP targets have been specified.\n",
bond->dev->name);
}
if (bond->dev->flags & IFF_UP) {
/* If the interface is up, we may need to fire off
* the ARP timer. If the interface is down, the
* timer will get fired off when the open function
* is called.
*/
if (!new_value) {
if (bond->params.arp_validate)
bond->recv_probe = NULL;
cancel_delayed_work_sync(&bond->arp_work);
} else {
/* arp_validate can be set only in active-backup mode */
if (bond->params.arp_validate)
bond->recv_probe = bond_arp_rcv;
cancel_delayed_work_sync(&bond->mii_work);
queue_delayed_work(bond->wq, &bond->arp_work, 0);
}
}
out:
bonding: fix miimon and arp_interval delayed work race conditions First I would give three observations which will be used later. Observation 1: if (delayed_work_pending(wq)) cancel_delayed_work(wq) This usage is wrong because the pending bit is cleared just before the work's fn is executed and if the function re-arms itself we might end up with the work still running. It's safe to call cancel_delayed_work_sync() even if the work is not queued at all. Observation 2: Use of INIT_DELAYED_WORK() Work needs to be initialized only once prior to (de/en)queueing. Observation 3: IFF_UP is set only after ndo_open is called Related race conditions: 1. Race between bonding_store_miimon() and bonding_store_arp_interval() Because of Obs.1 we can end up having both works enqueued. 2. Multiple races with INIT_DELAYED_WORK() Since the works are not protected by anything between INIT_DELAYED_WORK() and calls to (en/de)queue it is possible for races between the following functions: (races are also possible between the calls to INIT_DELAYED_WORK() and workqueue code) bonding_store_miimon() - bonding_store_arp_interval(), bond_close(), bond_open(), enqueued functions bonding_store_arp_interval() - bonding_store_miimon(), bond_close(), bond_open(), enqueued functions 3. By Obs.1 we need to change bond_cancel_all() Bugs 1 and 2 are fixed by moving all work initializations in bond_open which by Obs. 2 and Obs. 3 and the fact that we make sure that all works are cancelled in bond_close(), is guaranteed not to have any work enqueued. Also RTNL lock is now acquired in bonding_store_miimon/arp_interval so they can't race with bond_close and bond_open. The opposing work is cancelled only if the IFF_UP flag is set and it is cancelled unconditionally. The opposing work is already cancelled if the interface is down so no need to cancel it again. This way we don't need new synchronizations for the bonding workqueue. These bugs (and fixes) are tied together and belong in the same patch. Note: I have left 1 line intentionally over 80 characters (84) because I didn't like how it looks broken down. If you'd prefer it otherwise, then simply break it. v2: Make description text < 75 columns Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: Jay Vosburgh <fubar@us.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-29 09:31:31 +08:00
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(arp_interval, S_IRUGO | S_IWUSR,
bonding_show_arp_interval, bonding_store_arp_interval);
/*
* Show and set the arp targets.
*/
static ssize_t bonding_show_arp_targets(struct device *d,
struct device_attribute *attr,
char *buf)
{
int i, res = 0;
struct bonding *bond = to_bond(d);
for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) {
if (bond->params.arp_targets[i])
res += sprintf(buf + res, "%pI4 ",
&bond->params.arp_targets[i]);
}
if (res)
buf[res-1] = '\n'; /* eat the leftover space */
return res;
}
static ssize_t bonding_store_arp_targets(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 17:49:34 +08:00
struct slave *slave;
__be32 newtarget, *targets;
unsigned long *targets_rx;
int ind, i, j, ret = -EINVAL;
targets = bond->params.arp_targets;
newtarget = in_aton(buf + 1);
/* look for adds */
if (buf[0] == '+') {
if ((newtarget == 0) || (newtarget == htonl(INADDR_BROADCAST))) {
pr_err("%s: invalid ARP target %pI4 specified for addition\n",
bond->dev->name, &newtarget);
goto out;
}
if (bond_get_targets_ip(targets, newtarget) != -1) { /* dup */
pr_err("%s: ARP target %pI4 is already present\n",
bond->dev->name, &newtarget);
goto out;
}
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 17:49:34 +08:00
ind = bond_get_targets_ip(targets, 0); /* first free slot */
if (ind == -1) {
pr_err("%s: ARP target table is full!\n",
bond->dev->name);
goto out;
}
pr_info("%s: adding ARP target %pI4.\n", bond->dev->name,
&newtarget);
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 17:49:34 +08:00
/* not to race with bond_arp_rcv */
write_lock_bh(&bond->lock);
bond_for_each_slave(bond, slave)
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 17:49:34 +08:00
slave->target_last_arp_rx[ind] = jiffies;
targets[ind] = newtarget;
write_unlock_bh(&bond->lock);
} else if (buf[0] == '-') {
if ((newtarget == 0) || (newtarget == htonl(INADDR_BROADCAST))) {
pr_err("%s: invalid ARP target %pI4 specified for removal\n",
bond->dev->name, &newtarget);
goto out;
}
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 17:49:34 +08:00
ind = bond_get_targets_ip(targets, newtarget);
if (ind == -1) {
pr_err("%s: unable to remove nonexistent ARP target %pI4.\n",
bond->dev->name, &newtarget);
goto out;
}
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 17:49:34 +08:00
if (ind == 0 && !targets[1] && bond->params.arp_interval)
pr_warn("%s: removing last arp target with arp_interval on\n",
bond->dev->name);
pr_info("%s: removing ARP target %pI4.\n", bond->dev->name,
&newtarget);
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 17:49:34 +08:00
write_lock_bh(&bond->lock);
bond_for_each_slave(bond, slave) {
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 17:49:34 +08:00
targets_rx = slave->target_last_arp_rx;
j = ind;
for (; (j < BOND_MAX_ARP_TARGETS-1) && targets[j+1]; j++)
targets_rx[j] = targets_rx[j+1];
targets_rx[j] = 0;
}
for (i = ind; (i < BOND_MAX_ARP_TARGETS-1) && targets[i+1]; i++)
targets[i] = targets[i+1];
targets[i] = 0;
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 17:49:34 +08:00
write_unlock_bh(&bond->lock);
} else {
pr_err("no command found in arp_ip_targets file for bond %s. Use +<addr> or -<addr>.\n",
bond->dev->name);
ret = -EPERM;
goto out;
}
ret = count;
out:
return ret;
}
static DEVICE_ATTR(arp_ip_target, S_IRUGO | S_IWUSR , bonding_show_arp_targets, bonding_store_arp_targets);
/*
* Show and set the up and down delays. These must be multiples of the
* MII monitoring value, and are stored internally as the multiplier.
* Thus, we must translate to MS for the real world.
*/
static ssize_t bonding_show_downdelay(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.downdelay * bond->params.miimon);
}
static ssize_t bonding_store_downdelay(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret = count;
struct bonding *bond = to_bond(d);
if (!(bond->params.miimon)) {
pr_err("%s: Unable to set down delay as MII monitoring is disabled\n",
bond->dev->name);
ret = -EPERM;
goto out;
}
if (sscanf(buf, "%d", &new_value) != 1) {
pr_err("%s: no down delay value specified.\n", bond->dev->name);
ret = -EINVAL;
goto out;
}
if (new_value < 0) {
pr_err("%s: Invalid down delay value %d not in range %d-%d; rejected.\n",
bond->dev->name, new_value, 0, INT_MAX);
ret = -EINVAL;
goto out;
} else {
if ((new_value % bond->params.miimon) != 0) {
pr_warning("%s: Warning: down delay (%d) is not a multiple of miimon (%d), delay rounded to %d ms\n",
bond->dev->name, new_value,
bond->params.miimon,
(new_value / bond->params.miimon) *
bond->params.miimon);
}
bond->params.downdelay = new_value / bond->params.miimon;
pr_info("%s: Setting down delay to %d.\n",
bond->dev->name,
bond->params.downdelay * bond->params.miimon);
}
out:
return ret;
}
static DEVICE_ATTR(downdelay, S_IRUGO | S_IWUSR,
bonding_show_downdelay, bonding_store_downdelay);
static ssize_t bonding_show_updelay(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.updelay * bond->params.miimon);
}
static ssize_t bonding_store_updelay(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret = count;
struct bonding *bond = to_bond(d);
if (!(bond->params.miimon)) {
pr_err("%s: Unable to set up delay as MII monitoring is disabled\n",
bond->dev->name);
ret = -EPERM;
goto out;
}
if (sscanf(buf, "%d", &new_value) != 1) {
pr_err("%s: no up delay value specified.\n",
bond->dev->name);
ret = -EINVAL;
goto out;
}
if (new_value < 0) {
pr_err("%s: Invalid up delay value %d not in range %d-%d; rejected.\n",
bond->dev->name, new_value, 0, INT_MAX);
ret = -EINVAL;
goto out;
} else {
if ((new_value % bond->params.miimon) != 0) {
pr_warning("%s: Warning: up delay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n",
bond->dev->name, new_value,
bond->params.miimon,
(new_value / bond->params.miimon) *
bond->params.miimon);
}
bond->params.updelay = new_value / bond->params.miimon;
pr_info("%s: Setting up delay to %d.\n",
bond->dev->name,
bond->params.updelay * bond->params.miimon);
}
out:
return ret;
}
static DEVICE_ATTR(updelay, S_IRUGO | S_IWUSR,
bonding_show_updelay, bonding_store_updelay);
/*
* Show and set the LACP interval. Interface must be down, and the mode
* must be set to 802.3ad mode.
*/
static ssize_t bonding_show_lacp(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%s %d\n",
bond_lacp_tbl[bond->params.lacp_fast].modename,
bond->params.lacp_fast);
}
static ssize_t bonding_store_lacp(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
int new_value, ret = count;
if (!rtnl_trylock())
return restart_syscall();
if (bond->dev->flags & IFF_UP) {
pr_err("%s: Unable to update LACP rate because interface is up.\n",
bond->dev->name);
ret = -EPERM;
goto out;
}
if (bond->params.mode != BOND_MODE_8023AD) {
pr_err("%s: Unable to update LACP rate because bond is not in 802.3ad mode.\n",
bond->dev->name);
ret = -EPERM;
goto out;
}
new_value = bond_parse_parm(buf, bond_lacp_tbl);
if ((new_value == 1) || (new_value == 0)) {
bond->params.lacp_fast = new_value;
bond_3ad_update_lacp_rate(bond);
pr_info("%s: Setting LACP rate to %s (%d).\n",
bond->dev->name, bond_lacp_tbl[new_value].modename,
new_value);
} else {
pr_err("%s: Ignoring invalid LACP rate value %.*s.\n",
bond->dev->name, (int)strlen(buf) - 1, buf);
ret = -EINVAL;
}
out:
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(lacp_rate, S_IRUGO | S_IWUSR,
bonding_show_lacp, bonding_store_lacp);
static ssize_t bonding_show_min_links(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.min_links);
}
static ssize_t bonding_store_min_links(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
int ret;
unsigned int new_value;
ret = kstrtouint(buf, 0, &new_value);
if (ret < 0) {
pr_err("%s: Ignoring invalid min links value %s.\n",
bond->dev->name, buf);
return ret;
}
pr_info("%s: Setting min links value to %u\n",
bond->dev->name, new_value);
bond->params.min_links = new_value;
return count;
}
static DEVICE_ATTR(min_links, S_IRUGO | S_IWUSR,
bonding_show_min_links, bonding_store_min_links);
static ssize_t bonding_show_ad_select(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%s %d\n",
ad_select_tbl[bond->params.ad_select].modename,
bond->params.ad_select);
}
static ssize_t bonding_store_ad_select(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret = count;
struct bonding *bond = to_bond(d);
if (bond->dev->flags & IFF_UP) {
pr_err("%s: Unable to update ad_select because interface is up.\n",
bond->dev->name);
ret = -EPERM;
goto out;
}
new_value = bond_parse_parm(buf, ad_select_tbl);
if (new_value != -1) {
bond->params.ad_select = new_value;
pr_info("%s: Setting ad_select to %s (%d).\n",
bond->dev->name, ad_select_tbl[new_value].modename,
new_value);
} else {
pr_err("%s: Ignoring invalid ad_select value %.*s.\n",
bond->dev->name, (int)strlen(buf) - 1, buf);
ret = -EINVAL;
}
out:
return ret;
}
static DEVICE_ATTR(ad_select, S_IRUGO | S_IWUSR,
bonding_show_ad_select, bonding_store_ad_select);
/*
* Show and set the number of peer notifications to send after a failover event.
*/
static ssize_t bonding_show_num_peer_notif(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.num_peer_notif);
}
static ssize_t bonding_store_num_peer_notif(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
int err = kstrtou8(buf, 10, &bond->params.num_peer_notif);
return err ? err : count;
}
static DEVICE_ATTR(num_grat_arp, S_IRUGO | S_IWUSR,
bonding_show_num_peer_notif, bonding_store_num_peer_notif);
static DEVICE_ATTR(num_unsol_na, S_IRUGO | S_IWUSR,
bonding_show_num_peer_notif, bonding_store_num_peer_notif);
/*
* Show and set the MII monitor interval. There are two tricky bits
* here. First, if MII monitoring is activated, then we must disable
* ARP monitoring. Second, if the timer isn't running, we must
* start it.
*/
static ssize_t bonding_show_miimon(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.miimon);
}
static ssize_t bonding_store_miimon(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret = count;
struct bonding *bond = to_bond(d);
bonding: fix miimon and arp_interval delayed work race conditions First I would give three observations which will be used later. Observation 1: if (delayed_work_pending(wq)) cancel_delayed_work(wq) This usage is wrong because the pending bit is cleared just before the work's fn is executed and if the function re-arms itself we might end up with the work still running. It's safe to call cancel_delayed_work_sync() even if the work is not queued at all. Observation 2: Use of INIT_DELAYED_WORK() Work needs to be initialized only once prior to (de/en)queueing. Observation 3: IFF_UP is set only after ndo_open is called Related race conditions: 1. Race between bonding_store_miimon() and bonding_store_arp_interval() Because of Obs.1 we can end up having both works enqueued. 2. Multiple races with INIT_DELAYED_WORK() Since the works are not protected by anything between INIT_DELAYED_WORK() and calls to (en/de)queue it is possible for races between the following functions: (races are also possible between the calls to INIT_DELAYED_WORK() and workqueue code) bonding_store_miimon() - bonding_store_arp_interval(), bond_close(), bond_open(), enqueued functions bonding_store_arp_interval() - bonding_store_miimon(), bond_close(), bond_open(), enqueued functions 3. By Obs.1 we need to change bond_cancel_all() Bugs 1 and 2 are fixed by moving all work initializations in bond_open which by Obs. 2 and Obs. 3 and the fact that we make sure that all works are cancelled in bond_close(), is guaranteed not to have any work enqueued. Also RTNL lock is now acquired in bonding_store_miimon/arp_interval so they can't race with bond_close and bond_open. The opposing work is cancelled only if the IFF_UP flag is set and it is cancelled unconditionally. The opposing work is already cancelled if the interface is down so no need to cancel it again. This way we don't need new synchronizations for the bonding workqueue. These bugs (and fixes) are tied together and belong in the same patch. Note: I have left 1 line intentionally over 80 characters (84) because I didn't like how it looks broken down. If you'd prefer it otherwise, then simply break it. v2: Make description text < 75 columns Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: Jay Vosburgh <fubar@us.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-29 09:31:31 +08:00
if (!rtnl_trylock())
return restart_syscall();
if (sscanf(buf, "%d", &new_value) != 1) {
pr_err("%s: no miimon value specified.\n",
bond->dev->name);
ret = -EINVAL;
goto out;
}
if (new_value < 0) {
pr_err("%s: Invalid miimon value %d not in range %d-%d; rejected.\n",
bond->dev->name, new_value, 0, INT_MAX);
ret = -EINVAL;
goto out;
}
pr_info("%s: Setting MII monitoring interval to %d.\n",
bond->dev->name, new_value);
bond->params.miimon = new_value;
if (bond->params.updelay)
pr_info("%s: Note: Updating updelay (to %d) since it is a multiple of the miimon value.\n",
bond->dev->name,
bond->params.updelay * bond->params.miimon);
if (bond->params.downdelay)
pr_info("%s: Note: Updating downdelay (to %d) since it is a multiple of the miimon value.\n",
bond->dev->name,
bond->params.downdelay * bond->params.miimon);
if (new_value && bond->params.arp_interval) {
pr_info("%s: MII monitoring cannot be used with ARP monitoring. Disabling ARP monitoring...\n",
bond->dev->name);
bond->params.arp_interval = 0;
if (bond->params.arp_validate)
bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
}
if (bond->dev->flags & IFF_UP) {
/* If the interface is up, we may need to fire off
* the MII timer. If the interface is down, the
* timer will get fired off when the open function
* is called.
*/
if (!new_value) {
cancel_delayed_work_sync(&bond->mii_work);
} else {
bonding: fix miimon and arp_interval delayed work race conditions First I would give three observations which will be used later. Observation 1: if (delayed_work_pending(wq)) cancel_delayed_work(wq) This usage is wrong because the pending bit is cleared just before the work's fn is executed and if the function re-arms itself we might end up with the work still running. It's safe to call cancel_delayed_work_sync() even if the work is not queued at all. Observation 2: Use of INIT_DELAYED_WORK() Work needs to be initialized only once prior to (de/en)queueing. Observation 3: IFF_UP is set only after ndo_open is called Related race conditions: 1. Race between bonding_store_miimon() and bonding_store_arp_interval() Because of Obs.1 we can end up having both works enqueued. 2. Multiple races with INIT_DELAYED_WORK() Since the works are not protected by anything between INIT_DELAYED_WORK() and calls to (en/de)queue it is possible for races between the following functions: (races are also possible between the calls to INIT_DELAYED_WORK() and workqueue code) bonding_store_miimon() - bonding_store_arp_interval(), bond_close(), bond_open(), enqueued functions bonding_store_arp_interval() - bonding_store_miimon(), bond_close(), bond_open(), enqueued functions 3. By Obs.1 we need to change bond_cancel_all() Bugs 1 and 2 are fixed by moving all work initializations in bond_open which by Obs. 2 and Obs. 3 and the fact that we make sure that all works are cancelled in bond_close(), is guaranteed not to have any work enqueued. Also RTNL lock is now acquired in bonding_store_miimon/arp_interval so they can't race with bond_close and bond_open. The opposing work is cancelled only if the IFF_UP flag is set and it is cancelled unconditionally. The opposing work is already cancelled if the interface is down so no need to cancel it again. This way we don't need new synchronizations for the bonding workqueue. These bugs (and fixes) are tied together and belong in the same patch. Note: I have left 1 line intentionally over 80 characters (84) because I didn't like how it looks broken down. If you'd prefer it otherwise, then simply break it. v2: Make description text < 75 columns Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: Jay Vosburgh <fubar@us.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-29 09:31:31 +08:00
cancel_delayed_work_sync(&bond->arp_work);
queue_delayed_work(bond->wq, &bond->mii_work, 0);
}
}
out:
bonding: fix miimon and arp_interval delayed work race conditions First I would give three observations which will be used later. Observation 1: if (delayed_work_pending(wq)) cancel_delayed_work(wq) This usage is wrong because the pending bit is cleared just before the work's fn is executed and if the function re-arms itself we might end up with the work still running. It's safe to call cancel_delayed_work_sync() even if the work is not queued at all. Observation 2: Use of INIT_DELAYED_WORK() Work needs to be initialized only once prior to (de/en)queueing. Observation 3: IFF_UP is set only after ndo_open is called Related race conditions: 1. Race between bonding_store_miimon() and bonding_store_arp_interval() Because of Obs.1 we can end up having both works enqueued. 2. Multiple races with INIT_DELAYED_WORK() Since the works are not protected by anything between INIT_DELAYED_WORK() and calls to (en/de)queue it is possible for races between the following functions: (races are also possible between the calls to INIT_DELAYED_WORK() and workqueue code) bonding_store_miimon() - bonding_store_arp_interval(), bond_close(), bond_open(), enqueued functions bonding_store_arp_interval() - bonding_store_miimon(), bond_close(), bond_open(), enqueued functions 3. By Obs.1 we need to change bond_cancel_all() Bugs 1 and 2 are fixed by moving all work initializations in bond_open which by Obs. 2 and Obs. 3 and the fact that we make sure that all works are cancelled in bond_close(), is guaranteed not to have any work enqueued. Also RTNL lock is now acquired in bonding_store_miimon/arp_interval so they can't race with bond_close and bond_open. The opposing work is cancelled only if the IFF_UP flag is set and it is cancelled unconditionally. The opposing work is already cancelled if the interface is down so no need to cancel it again. This way we don't need new synchronizations for the bonding workqueue. These bugs (and fixes) are tied together and belong in the same patch. Note: I have left 1 line intentionally over 80 characters (84) because I didn't like how it looks broken down. If you'd prefer it otherwise, then simply break it. v2: Make description text < 75 columns Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: Jay Vosburgh <fubar@us.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-29 09:31:31 +08:00
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(miimon, S_IRUGO | S_IWUSR,
bonding_show_miimon, bonding_store_miimon);
/*
* Show and set the primary slave. The store function is much
* simpler than bonding_store_slaves function because it only needs to
* handle one interface name.
* The bond must be a mode that supports a primary for this be
* set.
*/
static ssize_t bonding_show_primary(struct device *d,
struct device_attribute *attr,
char *buf)
{
int count = 0;
struct bonding *bond = to_bond(d);
if (bond->primary_slave)
count = sprintf(buf, "%s\n", bond->primary_slave->dev->name);
return count;
}
static ssize_t bonding_store_primary(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
char ifname[IFNAMSIZ];
struct slave *slave;
if (!rtnl_trylock())
return restart_syscall();
block_netpoll_tx();
read_lock(&bond->lock);
write_lock_bh(&bond->curr_slave_lock);
if (!USES_PRIMARY(bond->params.mode)) {
pr_info("%s: Unable to set primary slave; %s is in mode %d\n",
bond->dev->name, bond->dev->name, bond->params.mode);
goto out;
}
sscanf(buf, "%15s", ifname); /* IFNAMSIZ */
/* check to see if we are clearing primary */
if (!strlen(ifname) || buf[0] == '\n') {
pr_info("%s: Setting primary slave to None.\n",
bond->dev->name);
bond->primary_slave = NULL;
memset(bond->params.primary, 0, sizeof(bond->params.primary));
bond_select_active_slave(bond);
goto out;
}
bond_for_each_slave(bond, slave) {
if (strncmp(slave->dev->name, ifname, IFNAMSIZ) == 0) {
pr_info("%s: Setting %s as primary slave.\n",
bond->dev->name, slave->dev->name);
bond->primary_slave = slave;
strcpy(bond->params.primary, slave->dev->name);
bond_select_active_slave(bond);
goto out;
}
}
strncpy(bond->params.primary, ifname, IFNAMSIZ);
bond->params.primary[IFNAMSIZ - 1] = 0;
pr_info("%s: Recording %s as primary, "
"but it has not been enslaved to %s yet.\n",
bond->dev->name, ifname, bond->dev->name);
out:
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
unblock_netpoll_tx();
rtnl_unlock();
return count;
}
static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR,
bonding_show_primary, bonding_store_primary);
/*
* Show and set the primary_reselect flag.
*/
static ssize_t bonding_show_primary_reselect(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%s %d\n",
pri_reselect_tbl[bond->params.primary_reselect].modename,
bond->params.primary_reselect);
}
static ssize_t bonding_store_primary_reselect(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret = count;
struct bonding *bond = to_bond(d);
if (!rtnl_trylock())
return restart_syscall();
new_value = bond_parse_parm(buf, pri_reselect_tbl);
if (new_value < 0) {
pr_err("%s: Ignoring invalid primary_reselect value %.*s.\n",
bond->dev->name,
(int) strlen(buf) - 1, buf);
ret = -EINVAL;
goto out;
}
bond->params.primary_reselect = new_value;
pr_info("%s: setting primary_reselect to %s (%d).\n",
bond->dev->name, pri_reselect_tbl[new_value].modename,
new_value);
block_netpoll_tx();
read_lock(&bond->lock);
write_lock_bh(&bond->curr_slave_lock);
bond_select_active_slave(bond);
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
unblock_netpoll_tx();
out:
rtnl_unlock();
return ret;
}
static DEVICE_ATTR(primary_reselect, S_IRUGO | S_IWUSR,
bonding_show_primary_reselect,
bonding_store_primary_reselect);
/*
* Show and set the use_carrier flag.
*/
static ssize_t bonding_show_carrier(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.use_carrier);
}
static ssize_t bonding_store_carrier(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret = count;
struct bonding *bond = to_bond(d);
if (sscanf(buf, "%d", &new_value) != 1) {
pr_err("%s: no use_carrier value specified.\n",
bond->dev->name);
ret = -EINVAL;
goto out;
}
if ((new_value == 0) || (new_value == 1)) {
bond->params.use_carrier = new_value;
pr_info("%s: Setting use_carrier to %d.\n",
bond->dev->name, new_value);
} else {
pr_info("%s: Ignoring invalid use_carrier value %d.\n",
bond->dev->name, new_value);
}
out:
return ret;
}
static DEVICE_ATTR(use_carrier, S_IRUGO | S_IWUSR,
bonding_show_carrier, bonding_store_carrier);
/*
* Show and set currently active_slave.
*/
static ssize_t bonding_show_active_slave(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
bonding: initial RCU conversion This patch does the initial bonding conversion to RCU. After it the following modes are protected by RCU alone: roundrobin, active-backup, broadcast and xor. Modes ALB/TLB and 3ad still acquire bond->lock for reading, and will be dealt with later. curr_active_slave needs to be dereferenced via rcu in the converted modes because the only thing protecting the slave after this patch is rcu_read_lock, so we need the proper barrier for weakly ordered archs and to make sure we don't have stale pointer. It's not tagged with __rcu yet because there's still work to be done to remove the curr_slave_lock, so sparse will complain when rcu_assign_pointer and rcu_dereference are used, but the alternative to use rcu_dereference_protected would've created much bigger code churn which is more difficult to test and review. That will be converted in time. 1. Active-backup mode 1.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.55% in bonding, system spent 0.29% CPU in bonding - new bonding: iperf spent 0.29% in bonding, system spent 0.15% CPU in bonding 1.2. Bandwidth measurements - old bonding: 16.1 gbps consistently - new bonding: 17.5 gbps consistently 2. Round-robin mode 2.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.51% in bonding, system spent 0.24% CPU in bonding - new bonding: iperf spent 0.16% in bonding, system spent 0.11% CPU in bonding 2.2 Bandwidth measurements - old bonding: 8 gbps (variable due to packet reorderings) - new bonding: 10 gbps (variable due to packet reorderings) Of course the latency has improved in all converted modes, and moreover while doing enslave/release (since it doesn't affect tx anymore). Also I've stress tested all modes doing enslave/release in a loop while transmitting traffic. Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-08-01 22:54:51 +08:00
struct slave *curr;
int count = 0;
bonding: initial RCU conversion This patch does the initial bonding conversion to RCU. After it the following modes are protected by RCU alone: roundrobin, active-backup, broadcast and xor. Modes ALB/TLB and 3ad still acquire bond->lock for reading, and will be dealt with later. curr_active_slave needs to be dereferenced via rcu in the converted modes because the only thing protecting the slave after this patch is rcu_read_lock, so we need the proper barrier for weakly ordered archs and to make sure we don't have stale pointer. It's not tagged with __rcu yet because there's still work to be done to remove the curr_slave_lock, so sparse will complain when rcu_assign_pointer and rcu_dereference are used, but the alternative to use rcu_dereference_protected would've created much bigger code churn which is more difficult to test and review. That will be converted in time. 1. Active-backup mode 1.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.55% in bonding, system spent 0.29% CPU in bonding - new bonding: iperf spent 0.29% in bonding, system spent 0.15% CPU in bonding 1.2. Bandwidth measurements - old bonding: 16.1 gbps consistently - new bonding: 17.5 gbps consistently 2. Round-robin mode 2.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.51% in bonding, system spent 0.24% CPU in bonding - new bonding: iperf spent 0.16% in bonding, system spent 0.11% CPU in bonding 2.2 Bandwidth measurements - old bonding: 8 gbps (variable due to packet reorderings) - new bonding: 10 gbps (variable due to packet reorderings) Of course the latency has improved in all converted modes, and moreover while doing enslave/release (since it doesn't affect tx anymore). Also I've stress tested all modes doing enslave/release in a loop while transmitting traffic. Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-08-01 22:54:51 +08:00
rcu_read_lock();
curr = rcu_dereference(bond->curr_active_slave);
if (USES_PRIMARY(bond->params.mode) && curr)
count = sprintf(buf, "%s\n", curr->dev->name);
bonding: initial RCU conversion This patch does the initial bonding conversion to RCU. After it the following modes are protected by RCU alone: roundrobin, active-backup, broadcast and xor. Modes ALB/TLB and 3ad still acquire bond->lock for reading, and will be dealt with later. curr_active_slave needs to be dereferenced via rcu in the converted modes because the only thing protecting the slave after this patch is rcu_read_lock, so we need the proper barrier for weakly ordered archs and to make sure we don't have stale pointer. It's not tagged with __rcu yet because there's still work to be done to remove the curr_slave_lock, so sparse will complain when rcu_assign_pointer and rcu_dereference are used, but the alternative to use rcu_dereference_protected would've created much bigger code churn which is more difficult to test and review. That will be converted in time. 1. Active-backup mode 1.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.55% in bonding, system spent 0.29% CPU in bonding - new bonding: iperf spent 0.29% in bonding, system spent 0.15% CPU in bonding 1.2. Bandwidth measurements - old bonding: 16.1 gbps consistently - new bonding: 17.5 gbps consistently 2. Round-robin mode 2.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.51% in bonding, system spent 0.24% CPU in bonding - new bonding: iperf spent 0.16% in bonding, system spent 0.11% CPU in bonding 2.2 Bandwidth measurements - old bonding: 8 gbps (variable due to packet reorderings) - new bonding: 10 gbps (variable due to packet reorderings) Of course the latency has improved in all converted modes, and moreover while doing enslave/release (since it doesn't affect tx anymore). Also I've stress tested all modes doing enslave/release in a loop while transmitting traffic. Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-08-01 22:54:51 +08:00
rcu_read_unlock();
return count;
}
static ssize_t bonding_store_active_slave(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct slave *slave, *old_active, *new_active;
struct bonding *bond = to_bond(d);
char ifname[IFNAMSIZ];
if (!rtnl_trylock())
return restart_syscall();
old_active = new_active = NULL;
block_netpoll_tx();
read_lock(&bond->lock);
write_lock_bh(&bond->curr_slave_lock);
if (!USES_PRIMARY(bond->params.mode)) {
pr_info("%s: Unable to change active slave; %s is in mode %d\n",
bond->dev->name, bond->dev->name, bond->params.mode);
goto out;
}
sscanf(buf, "%15s", ifname); /* IFNAMSIZ */
/* check to see if we are clearing active */
if (!strlen(ifname) || buf[0] == '\n') {
pr_info("%s: Clearing current active slave.\n",
bond->dev->name);
bonding: initial RCU conversion This patch does the initial bonding conversion to RCU. After it the following modes are protected by RCU alone: roundrobin, active-backup, broadcast and xor. Modes ALB/TLB and 3ad still acquire bond->lock for reading, and will be dealt with later. curr_active_slave needs to be dereferenced via rcu in the converted modes because the only thing protecting the slave after this patch is rcu_read_lock, so we need the proper barrier for weakly ordered archs and to make sure we don't have stale pointer. It's not tagged with __rcu yet because there's still work to be done to remove the curr_slave_lock, so sparse will complain when rcu_assign_pointer and rcu_dereference are used, but the alternative to use rcu_dereference_protected would've created much bigger code churn which is more difficult to test and review. That will be converted in time. 1. Active-backup mode 1.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.55% in bonding, system spent 0.29% CPU in bonding - new bonding: iperf spent 0.29% in bonding, system spent 0.15% CPU in bonding 1.2. Bandwidth measurements - old bonding: 16.1 gbps consistently - new bonding: 17.5 gbps consistently 2. Round-robin mode 2.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.51% in bonding, system spent 0.24% CPU in bonding - new bonding: iperf spent 0.16% in bonding, system spent 0.11% CPU in bonding 2.2 Bandwidth measurements - old bonding: 8 gbps (variable due to packet reorderings) - new bonding: 10 gbps (variable due to packet reorderings) Of course the latency has improved in all converted modes, and moreover while doing enslave/release (since it doesn't affect tx anymore). Also I've stress tested all modes doing enslave/release in a loop while transmitting traffic. Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-08-01 22:54:51 +08:00
rcu_assign_pointer(bond->curr_active_slave, NULL);
bond_select_active_slave(bond);
goto out;
}
bond_for_each_slave(bond, slave) {
if (strncmp(slave->dev->name, ifname, IFNAMSIZ) == 0) {
old_active = bond->curr_active_slave;
new_active = slave;
if (new_active == old_active) {
/* do nothing */
pr_info("%s: %s is already the current"
" active slave.\n",
bond->dev->name,
slave->dev->name);
goto out;
} else {
if ((new_active) &&
(old_active) &&
(new_active->link == BOND_LINK_UP) &&
IS_UP(new_active->dev)) {
pr_info("%s: Setting %s as active"
" slave.\n",
bond->dev->name,
slave->dev->name);
bond_change_active_slave(bond,
new_active);
} else {
pr_info("%s: Could not set %s as"
" active slave; either %s is"
" down or the link is down.\n",
bond->dev->name,
slave->dev->name,
slave->dev->name);
}
goto out;
}
}
}
pr_info("%s: Unable to set %.*s as active slave.\n",
bond->dev->name, (int)strlen(buf) - 1, buf);
out:
write_unlock_bh(&bond->curr_slave_lock);
read_unlock(&bond->lock);
unblock_netpoll_tx();
rtnl_unlock();
return count;
}
static DEVICE_ATTR(active_slave, S_IRUGO | S_IWUSR,
bonding_show_active_slave, bonding_store_active_slave);
/*
* Show link status of the bond interface.
*/
static ssize_t bonding_show_mii_status(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
bonding: initial RCU conversion This patch does the initial bonding conversion to RCU. After it the following modes are protected by RCU alone: roundrobin, active-backup, broadcast and xor. Modes ALB/TLB and 3ad still acquire bond->lock for reading, and will be dealt with later. curr_active_slave needs to be dereferenced via rcu in the converted modes because the only thing protecting the slave after this patch is rcu_read_lock, so we need the proper barrier for weakly ordered archs and to make sure we don't have stale pointer. It's not tagged with __rcu yet because there's still work to be done to remove the curr_slave_lock, so sparse will complain when rcu_assign_pointer and rcu_dereference are used, but the alternative to use rcu_dereference_protected would've created much bigger code churn which is more difficult to test and review. That will be converted in time. 1. Active-backup mode 1.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.55% in bonding, system spent 0.29% CPU in bonding - new bonding: iperf spent 0.29% in bonding, system spent 0.15% CPU in bonding 1.2. Bandwidth measurements - old bonding: 16.1 gbps consistently - new bonding: 17.5 gbps consistently 2. Round-robin mode 2.1 Perf recording while doing iperf -P 4 - old bonding: iperf spent 0.51% in bonding, system spent 0.24% CPU in bonding - new bonding: iperf spent 0.16% in bonding, system spent 0.11% CPU in bonding 2.2 Bandwidth measurements - old bonding: 8 gbps (variable due to packet reorderings) - new bonding: 10 gbps (variable due to packet reorderings) Of course the latency has improved in all converted modes, and moreover while doing enslave/release (since it doesn't affect tx anymore). Also I've stress tested all modes doing enslave/release in a loop while transmitting traffic. Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-08-01 22:54:51 +08:00
return sprintf(buf, "%s\n", bond->curr_active_slave ? "up" : "down");
}
static DEVICE_ATTR(mii_status, S_IRUGO, bonding_show_mii_status, NULL);
/*
* Show current 802.3ad aggregator ID.
*/
static ssize_t bonding_show_ad_aggregator(struct device *d,
struct device_attribute *attr,
char *buf)
{
int count = 0;
struct bonding *bond = to_bond(d);
if (bond->params.mode == BOND_MODE_8023AD) {
struct ad_info ad_info;
count = sprintf(buf, "%d\n",
bond_3ad_get_active_agg_info(bond, &ad_info)
? 0 : ad_info.aggregator_id);
}
return count;
}
static DEVICE_ATTR(ad_aggregator, S_IRUGO, bonding_show_ad_aggregator, NULL);
/*
* Show number of active 802.3ad ports.
*/
static ssize_t bonding_show_ad_num_ports(struct device *d,
struct device_attribute *attr,
char *buf)
{
int count = 0;
struct bonding *bond = to_bond(d);
if (bond->params.mode == BOND_MODE_8023AD) {
struct ad_info ad_info;
count = sprintf(buf, "%d\n",
bond_3ad_get_active_agg_info(bond, &ad_info)
? 0 : ad_info.ports);
}
return count;
}
static DEVICE_ATTR(ad_num_ports, S_IRUGO, bonding_show_ad_num_ports, NULL);
/*
* Show current 802.3ad actor key.
*/
static ssize_t bonding_show_ad_actor_key(struct device *d,
struct device_attribute *attr,
char *buf)
{
int count = 0;
struct bonding *bond = to_bond(d);
if (bond->params.mode == BOND_MODE_8023AD) {
struct ad_info ad_info;
count = sprintf(buf, "%d\n",
bond_3ad_get_active_agg_info(bond, &ad_info)
? 0 : ad_info.actor_key);
}
return count;
}
static DEVICE_ATTR(ad_actor_key, S_IRUGO, bonding_show_ad_actor_key, NULL);
/*
* Show current 802.3ad partner key.
*/
static ssize_t bonding_show_ad_partner_key(struct device *d,
struct device_attribute *attr,
char *buf)
{
int count = 0;
struct bonding *bond = to_bond(d);
if (bond->params.mode == BOND_MODE_8023AD) {
struct ad_info ad_info;
count = sprintf(buf, "%d\n",
bond_3ad_get_active_agg_info(bond, &ad_info)
? 0 : ad_info.partner_key);
}
return count;
}
static DEVICE_ATTR(ad_partner_key, S_IRUGO, bonding_show_ad_partner_key, NULL);
/*
* Show current 802.3ad partner mac.
*/
static ssize_t bonding_show_ad_partner_mac(struct device *d,
struct device_attribute *attr,
char *buf)
{
int count = 0;
struct bonding *bond = to_bond(d);
if (bond->params.mode == BOND_MODE_8023AD) {
struct ad_info ad_info;
if (!bond_3ad_get_active_agg_info(bond, &ad_info))
count = sprintf(buf, "%pM\n", ad_info.partner_system);
}
return count;
}
static DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, NULL);
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 16:40:18 +08:00
/*
* Show the queue_ids of the slaves in the current bond.
*/
static ssize_t bonding_show_queue_id(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
struct slave *slave;
int res = 0;
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 16:40:18 +08:00
if (!rtnl_trylock())
return restart_syscall();
read_lock(&bond->lock);
bond_for_each_slave(bond, slave) {
if (res > (PAGE_SIZE - IFNAMSIZ - 6)) {
/* not enough space for another interface_name:queue_id pair */
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 16:40:18 +08:00
if ((PAGE_SIZE - res) > 10)
res = PAGE_SIZE - 10;
res += sprintf(buf + res, "++more++ ");
break;
}
res += sprintf(buf + res, "%s:%d ",
slave->dev->name, slave->queue_id);
}
read_unlock(&bond->lock);
if (res)
buf[res-1] = '\n'; /* eat the leftover space */
rtnl_unlock();
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 16:40:18 +08:00
return res;
}
/*
* Set the queue_ids of the slaves in the current bond. The bond
* interface must be enslaved for this to work.
*/
static ssize_t bonding_store_queue_id(struct device *d,
struct device_attribute *attr,
const char *buffer, size_t count)
{
struct slave *slave, *update_slave;
struct bonding *bond = to_bond(d);
u16 qid;
int ret = count;
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 16:40:18 +08:00
char *delim;
struct net_device *sdev = NULL;
if (!rtnl_trylock())
return restart_syscall();
/* delim will point to queue id if successful */
delim = strchr(buffer, ':');
if (!delim)
goto err_no_cmd;
/*
* Terminate string that points to device name and bump it
* up one, so we can read the queue id there.
*/
*delim = '\0';
if (sscanf(++delim, "%hd\n", &qid) != 1)
goto err_no_cmd;
/* Check buffer length, valid ifname and queue id */
if (strlen(buffer) > IFNAMSIZ ||
!dev_valid_name(buffer) ||
qid > bond->dev->real_num_tx_queues)
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 16:40:18 +08:00
goto err_no_cmd;
/* Get the pointer to that interface if it exists */
sdev = __dev_get_by_name(dev_net(bond->dev), buffer);
if (!sdev)
goto err_no_cmd;
read_lock(&bond->lock);
/* Search for thes slave and check for duplicate qids */
update_slave = NULL;
bond_for_each_slave(bond, slave) {
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 16:40:18 +08:00
if (sdev == slave->dev)
/*
* We don't need to check the matching
* slave for dups, since we're overwriting it
*/
update_slave = slave;
else if (qid && qid == slave->queue_id) {
goto err_no_cmd_unlock;
}
}
if (!update_slave)
goto err_no_cmd_unlock;
/* Actually set the qids for the slave */
update_slave->queue_id = qid;
read_unlock(&bond->lock);
out:
rtnl_unlock();
return ret;
err_no_cmd_unlock:
read_unlock(&bond->lock);
err_no_cmd:
pr_info("invalid input for queue_id set for %s.\n",
bond->dev->name);
ret = -EPERM;
goto out;
}
static DEVICE_ATTR(queue_id, S_IRUGO | S_IWUSR, bonding_show_queue_id,
bonding_store_queue_id);
/*
* Show and set the all_slaves_active flag.
*/
static ssize_t bonding_show_slaves_active(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.all_slaves_active);
}
static ssize_t bonding_store_slaves_active(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
int new_value, ret = count;
struct slave *slave;
if (sscanf(buf, "%d", &new_value) != 1) {
pr_err("%s: no all_slaves_active value specified.\n",
bond->dev->name);
ret = -EINVAL;
goto out;
}
if (new_value == bond->params.all_slaves_active)
goto out;
if ((new_value == 0) || (new_value == 1)) {
bond->params.all_slaves_active = new_value;
} else {
pr_info("%s: Ignoring invalid all_slaves_active value %d.\n",
bond->dev->name, new_value);
ret = -EINVAL;
goto out;
}
read_lock(&bond->lock);
bond_for_each_slave(bond, slave) {
if (!bond_is_active_slave(slave)) {
if (new_value)
slave->inactive = 0;
else
slave->inactive = 1;
}
}
read_unlock(&bond->lock);
out:
return ret;
}
static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR,
bonding_show_slaves_active, bonding_store_slaves_active);
/*
* Show and set the number of IGMP membership reports to send on link failure
*/
static ssize_t bonding_show_resend_igmp(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.resend_igmp);
}
static ssize_t bonding_store_resend_igmp(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
int new_value, ret = count;
struct bonding *bond = to_bond(d);
if (sscanf(buf, "%d", &new_value) != 1) {
pr_err("%s: no resend_igmp value specified.\n",
bond->dev->name);
ret = -EINVAL;
goto out;
}
if (new_value < 0 || new_value > 255) {
pr_err("%s: Invalid resend_igmp value %d not in range 0-255; rejected.\n",
bond->dev->name, new_value);
ret = -EINVAL;
goto out;
}
pr_info("%s: Setting resend_igmp to %d.\n",
bond->dev->name, new_value);
bond->params.resend_igmp = new_value;
out:
return ret;
}
static DEVICE_ATTR(resend_igmp, S_IRUGO | S_IWUSR,
bonding_show_resend_igmp, bonding_store_resend_igmp);
static ssize_t bonding_show_lp_interval(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.lp_interval);
}
static ssize_t bonding_store_lp_interval(struct device *d,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct bonding *bond = to_bond(d);
int new_value, ret = count;
if (sscanf(buf, "%d", &new_value) != 1) {
pr_err("%s: no lp interval value specified.\n",
bond->dev->name);
ret = -EINVAL;
goto out;
}
if (new_value <= 0) {
pr_err ("%s: lp_interval must be between 1 and %d\n",
bond->dev->name, INT_MAX);
ret = -EINVAL;
goto out;
}
bond->params.lp_interval = new_value;
out:
return ret;
}
static DEVICE_ATTR(lp_interval, S_IRUGO | S_IWUSR,
bonding_show_lp_interval, bonding_store_lp_interval);
static struct attribute *per_bond_attrs[] = {
&dev_attr_slaves.attr,
&dev_attr_mode.attr,
&dev_attr_fail_over_mac.attr,
&dev_attr_arp_validate.attr,
bonding: add an option to fail when any of arp_ip_target is inaccessible Currently, we fail only when all of the ips in arp_ip_target are gone. However, in some situations we might need to fail if even one host from arp_ip_target becomes unavailable. All situations, obviously, rely on the idea that we need *completely* functional network, with all interfaces/addresses working correctly. One real world example might be: vlans on top on bond (hybrid port). If bond and vlans have ips assigned and we have their peers monitored via arp_ip_target - in case of switch misconfiguration (trunk/access port), slave driver malfunction or tagged/untagged traffic dropped on the way - we will be able to switch to another slave. Though any other configuration needs that if we need to have access to all arp_ip_targets. This patch adds this possibility by adding a new parameter - arp_all_targets (both as a module parameter and as a sysfs knob). It can be set to: 0 or any (the default) - which works exactly as it's working now - the slave is up if any of the arp_ip_targets are up. 1 or all - the slave is up if all of the arp_ip_targets are up. This parameter can be changed on the fly (via sysfs), and requires the mode to be active-backup and arp_validate to be enabled (it obeys the arp_validate config on which slaves to validate). Internally it's done through: 1) Add target_last_arp_rx[BOND_MAX_ARP_TARGETS] array to slave struct. It's an array of jiffies, meaning that slave->target_last_arp_rx[i] is the last time we've received arp from bond->params.arp_targets[i] on this slave. 2) If we successfully validate an arp from bond->params.arp_targets[i] in bond_validate_arp() - update the slave->target_last_arp_rx[i] with the current jiffies value. 3) When getting slave's last_rx via slave_last_rx(), we return the oldest time when we've received an arp from any address in bond->params.arp_targets[]. If the value of arp_all_targets == 0 - we still work the same way as before. Also, update the documentation to reflect the new parameter. v3->v4: Kill the forgotten rtnl_unlock(), rephrase the documentation part to be more clear, don't fail setting arp_all_targets if arp_validate is not set - it has no effect anyway but can be easier to set up. Also, print a warning if the last arp_ip_target is removed while the arp_interval is on, but not the arp_validate. v2->v3: Use _bh spinlock, remove useless rtnl_lock() and use jiffies for new arp_ip_target last arp, instead of slave_last_rx(). On bond_enslave(), use the same initialization value for target_last_arp_rx[] as is used for the default last_arp_rx, to avoid useless interface flaps. Also, instead of failing to remove the last arp_ip_target just print a warning - otherwise it might break existing scripts. v1->v2: Correctly handle adding/removing hosts in arp_ip_target - we need to shift/initialize all slave's target_last_arp_rx. Also, don't fail module loading on arp_all_targets misconfiguration, just disable it, and some minor style fixes. Signed-off-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-24 17:49:34 +08:00
&dev_attr_arp_all_targets.attr,
&dev_attr_arp_interval.attr,
&dev_attr_arp_ip_target.attr,
&dev_attr_downdelay.attr,
&dev_attr_updelay.attr,
&dev_attr_lacp_rate.attr,
&dev_attr_ad_select.attr,
&dev_attr_xmit_hash_policy.attr,
&dev_attr_num_grat_arp.attr,
&dev_attr_num_unsol_na.attr,
&dev_attr_miimon.attr,
&dev_attr_primary.attr,
&dev_attr_primary_reselect.attr,
&dev_attr_use_carrier.attr,
&dev_attr_active_slave.attr,
&dev_attr_mii_status.attr,
&dev_attr_ad_aggregator.attr,
&dev_attr_ad_num_ports.attr,
&dev_attr_ad_actor_key.attr,
&dev_attr_ad_partner_key.attr,
&dev_attr_ad_partner_mac.attr,
bonding: allow user-controlled output slave selection v2: changed bonding module version, modified to apply on top of changes from previous patch in series, and updated documentation to elaborate on multiqueue awareness that now exists in bonding driver. This patch give the user the ability to control the output slave for round-robin and active-backup bonding. Similar functionality was discussed in the past, but Jay Vosburgh indicated he would rather see a feature like this added to existing modes rather than creating a completely new mode. Jay's thoughts as well as Neil's input surrounding some of the issues with the first implementation pushed us toward a design that relied on the queue_mapping rather than skb marks. Round-robin and active-backup modes were chosen as the first users of this slave selection as they seemed like the most logical choices when considering a multi-switch environment. Round-robin mode works without any modification, but active-backup does require inclusion of the first patch in this series and setting the 'all_slaves_active' flag. This will allow reception of unicast traffic on any of the backup interfaces. This was tested with IPv4-based filters as well as VLAN-based filters with good results. More information as well as a configuration example is available in the patch to Documentation/networking/bonding.txt. Signed-off-by: Andy Gospodarek <andy@greyhouse.net> Signed-off-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2010-06-02 16:40:18 +08:00
&dev_attr_queue_id.attr,
&dev_attr_all_slaves_active.attr,
&dev_attr_resend_igmp.attr,
&dev_attr_min_links.attr,
&dev_attr_lp_interval.attr,
NULL,
};
static struct attribute_group bonding_group = {
.name = "bonding",
.attrs = per_bond_attrs,
};
/*
* Initialize sysfs. This sets up the bonding_masters file in
* /sys/class/net.
*/
int bond_create_sysfs(struct bond_net *bn)
{
int ret;
bn->class_attr_bonding_masters = class_attr_bonding_masters;
bonding: Add a forgetten sysfs_attr_init on class_attr_bonding_masters When I made class_attr_bonding_matters per network namespace and dynamically allocated I overlooked the need for calling sysfs_attr_init. Oops. This fixes the following lockdep splat: [ 5.749651] bonding: Ethernet Channel Bonding Driver: v3.7.1 (April 27, 2011) [ 5.749655] bonding: MII link monitoring set to 100 ms [ 5.749676] BUG: key f49a831c not in .data! [ 5.749677] ------------[ cut here ]------------ [ 5.749752] WARNING: at kernel/lockdep.c:2897 lockdep_init_map+0x1c3/0x460() [ 5.749809] Hardware name: ProLiant BL460c G1 [ 5.749862] Modules linked in: bonding(+) [ 5.749978] Pid: 3177, comm: modprobe Not tainted 3.1.0-rc9-02177-gf2d1a4e-dirty #1157 [ 5.750066] Call Trace: [ 5.750120] [<c1352c2f>] ? printk+0x18/0x21 [ 5.750176] [<c103112d>] warn_slowpath_common+0x6d/0xa0 [ 5.750231] [<c1060133>] ? lockdep_init_map+0x1c3/0x460 [ 5.750287] [<c1060133>] ? lockdep_init_map+0x1c3/0x460 [ 5.750342] [<c103117d>] warn_slowpath_null+0x1d/0x20 [ 5.750398] [<c1060133>] lockdep_init_map+0x1c3/0x460 [ 5.750453] [<c1355ddd>] ? _raw_spin_unlock+0x1d/0x20 [ 5.750510] [<c11255c8>] ? sysfs_new_dirent+0x68/0x110 [ 5.750565] [<c1124d4b>] sysfs_add_file_mode+0x8b/0xe0 [ 5.750621] [<c1124db3>] sysfs_add_file+0x13/0x20 [ 5.750675] [<c1124e7c>] sysfs_create_file+0x1c/0x20 [ 5.750737] [<c1208f09>] class_create_file+0x19/0x20 [ 5.750794] [<c12c186f>] netdev_class_create_file+0xf/0x20 [ 5.750853] [<f85deaf4>] bond_create_sysfs+0x44/0x90 [bonding] [ 5.750911] [<f8410947>] ? bond_create_proc_dir+0x1e/0x3e [bonding] [ 5.750970] [<f841007e>] bond_net_init+0x7e/0x87 [bonding] [ 5.751026] [<f8410000>] ? 0xf840ffff [ 5.751080] [<c12abc7a>] ops_init.clone.4+0xba/0x100 [ 5.751135] [<c12abdb2>] ? register_pernet_subsys+0x12/0x30 [ 5.751191] [<c12abd03>] register_pernet_operations.clone.3+0x43/0x80 [ 5.751249] [<c12abdb9>] register_pernet_subsys+0x19/0x30 [ 5.751306] [<f84108b9>] bonding_init+0x832/0x8a2 [bonding] [ 5.751363] [<c10011f0>] do_one_initcall+0x30/0x160 [ 5.751420] [<f8410087>] ? bond_net_init+0x87/0x87 [bonding] [ 5.751477] [<c106d5cf>] sys_init_module+0xef/0x1890 [ 5.751533] [<c1356490>] sysenter_do_call+0x12/0x36 [ 5.751588] ---[ end trace 89f492d83a7f5006 ]--- Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Reported-by: Eric Dumazet <eric.dumazet@gmail.com> Tested-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2011-10-22 06:43:07 +08:00
sysfs_attr_init(&bn->class_attr_bonding_masters.attr);
ret = netdev_class_create_file(&bn->class_attr_bonding_masters);
2007-01-20 10:15:47 +08:00
/*
* Permit multiple loads of the module by ignoring failures to
* create the bonding_masters sysfs file. Bonding devices
* created by second or subsequent loads of the module will
* not be listed in, or controllable by, bonding_masters, but
* will have the usual "bonding" sysfs directory.
*
* This is done to preserve backwards compatibility for
* initscripts/sysconfig, which load bonding multiple times to
* configure multiple bonding devices.
*/
if (ret == -EEXIST) {
/* Is someone being kinky and naming a device bonding_master? */
if (__dev_get_by_name(bn->net,
class_attr_bonding_masters.attr.name))
pr_err("network device named %s already exists in sysfs",
class_attr_bonding_masters.attr.name);
ret = 0;
2007-01-20 10:15:47 +08:00
}
return ret;
}
/*
* Remove /sys/class/net/bonding_masters.
*/
void bond_destroy_sysfs(struct bond_net *bn)
{
netdev_class_remove_file(&bn->class_attr_bonding_masters);
}
/*
* Initialize sysfs for each bond. This sets up and registers
* the 'bondctl' directory for each individual bond under /sys/class/net.
*/
void bond_prepare_sysfs_group(struct bonding *bond)
{
bond->dev->sysfs_groups[0] = &bonding_group;
}