2019-05-27 14:55:01 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2014-11-28 21:34:17 +08:00
|
|
|
/*
|
|
|
|
* net/switchdev/switchdev.c - Switch device API
|
2015-09-24 16:02:41 +08:00
|
|
|
* Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us>
|
2015-03-10 04:59:09 +08:00
|
|
|
* Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
|
2014-11-28 21:34:17 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/init.h>
|
2015-01-16 06:49:36 +08:00
|
|
|
#include <linux/mutex.h>
|
|
|
|
#include <linux/notifier.h>
|
2014-11-28 21:34:17 +08:00
|
|
|
#include <linux/netdevice.h>
|
2015-10-15 01:40:51 +08:00
|
|
|
#include <linux/etherdevice.h>
|
2015-05-11 00:47:56 +08:00
|
|
|
#include <linux/if_bridge.h>
|
2015-09-24 16:02:41 +08:00
|
|
|
#include <linux/list.h>
|
2015-10-15 01:40:48 +08:00
|
|
|
#include <linux/workqueue.h>
|
2015-10-12 20:31:01 +08:00
|
|
|
#include <linux/if_vlan.h>
|
2016-01-27 22:16:43 +08:00
|
|
|
#include <linux/rtnetlink.h>
|
2014-11-28 21:34:17 +08:00
|
|
|
#include <net/switchdev.h>
|
|
|
|
|
2015-10-15 01:40:48 +08:00
|
|
|
static LIST_HEAD(deferred);
|
|
|
|
static DEFINE_SPINLOCK(deferred_lock);
|
|
|
|
|
|
|
|
typedef void switchdev_deferred_func_t(struct net_device *dev,
|
|
|
|
const void *data);
|
|
|
|
|
|
|
|
struct switchdev_deferred_item {
|
|
|
|
struct list_head list;
|
|
|
|
struct net_device *dev;
|
|
|
|
switchdev_deferred_func_t *func;
|
2020-02-18 04:02:36 +08:00
|
|
|
unsigned long data[];
|
2015-10-15 01:40:48 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct switchdev_deferred_item *switchdev_deferred_dequeue(void)
|
|
|
|
{
|
|
|
|
struct switchdev_deferred_item *dfitem;
|
|
|
|
|
|
|
|
spin_lock_bh(&deferred_lock);
|
|
|
|
if (list_empty(&deferred)) {
|
|
|
|
dfitem = NULL;
|
|
|
|
goto unlock;
|
|
|
|
}
|
|
|
|
dfitem = list_first_entry(&deferred,
|
|
|
|
struct switchdev_deferred_item, list);
|
|
|
|
list_del(&dfitem->list);
|
|
|
|
unlock:
|
|
|
|
spin_unlock_bh(&deferred_lock);
|
|
|
|
return dfitem;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* switchdev_deferred_process - Process ops in deferred queue
|
|
|
|
*
|
|
|
|
* Called to flush the ops currently queued in deferred ops queue.
|
|
|
|
* rtnl_lock must be held.
|
|
|
|
*/
|
|
|
|
void switchdev_deferred_process(void)
|
|
|
|
{
|
|
|
|
struct switchdev_deferred_item *dfitem;
|
|
|
|
|
|
|
|
ASSERT_RTNL();
|
|
|
|
|
|
|
|
while ((dfitem = switchdev_deferred_dequeue())) {
|
|
|
|
dfitem->func(dfitem->dev, dfitem->data);
|
|
|
|
dev_put(dfitem->dev);
|
|
|
|
kfree(dfitem);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(switchdev_deferred_process);
|
|
|
|
|
|
|
|
static void switchdev_deferred_process_work(struct work_struct *work)
|
|
|
|
{
|
|
|
|
rtnl_lock();
|
|
|
|
switchdev_deferred_process();
|
|
|
|
rtnl_unlock();
|
|
|
|
}
|
|
|
|
|
|
|
|
static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work);
|
|
|
|
|
|
|
|
static int switchdev_deferred_enqueue(struct net_device *dev,
|
|
|
|
const void *data, size_t data_len,
|
|
|
|
switchdev_deferred_func_t *func)
|
|
|
|
{
|
|
|
|
struct switchdev_deferred_item *dfitem;
|
|
|
|
|
|
|
|
dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC);
|
|
|
|
if (!dfitem)
|
|
|
|
return -ENOMEM;
|
|
|
|
dfitem->dev = dev;
|
|
|
|
dfitem->func = func;
|
|
|
|
memcpy(dfitem->data, data, data_len);
|
|
|
|
dev_hold(dev);
|
|
|
|
spin_lock_bh(&deferred_lock);
|
|
|
|
list_add_tail(&dfitem->list, &deferred);
|
|
|
|
spin_unlock_bh(&deferred_lock);
|
|
|
|
schedule_work(&deferred_process_work);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-02-28 03:44:31 +08:00
|
|
|
static int switchdev_port_attr_notify(enum switchdev_notifier_type nt,
|
|
|
|
struct net_device *dev,
|
2021-02-14 04:43:17 +08:00
|
|
|
const struct switchdev_attr *attr,
|
|
|
|
struct netlink_ext_ack *extack)
|
switchdev: introduce get/set attrs ops
Add two new swdev ops for get/set switch port attributes. Most swdev
interactions on a port are gets or sets on port attributes, so rather than
adding ops for each attribute, let's define clean get/set ops for all
attributes, and then we can have clear, consistent rules on how attributes
propagate on stacked devs.
Add the basic algorithms for get/set attr ops. Use the same recusive algo
to walk lower devs we've used for STP updates, for example. For get,
compare attr value for each lower dev and only return success if attr
values match across all lower devs. For sets, set the same attr value for
all lower devs. We'll use a two-phase prepare-commit transaction model for
sets. In the first phase, the driver(s) are asked if attr set is OK. If
all OK, the commit attr set in second phase. A driver would NACK the
prepare phase if it can't set the attr due to lack of resources or support,
within it's control. RTNL lock must be held across both phases because
we'll recurse all lower devs first in prepare phase, and then recurse all
lower devs again in commit phase. If any lower dev fails the prepare
phase, we need to abort the transaction for all lower devs.
If lower dev recusion isn't desired, allow a flag SWITCHDEV_F_NO_RECURSE to
indicate get/set only work on port (lowest) device.
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-11 00:47:48 +08:00
|
|
|
{
|
2019-02-28 03:44:31 +08:00
|
|
|
int err;
|
|
|
|
int rc;
|
switchdev: introduce get/set attrs ops
Add two new swdev ops for get/set switch port attributes. Most swdev
interactions on a port are gets or sets on port attributes, so rather than
adding ops for each attribute, let's define clean get/set ops for all
attributes, and then we can have clear, consistent rules on how attributes
propagate on stacked devs.
Add the basic algorithms for get/set attr ops. Use the same recusive algo
to walk lower devs we've used for STP updates, for example. For get,
compare attr value for each lower dev and only return success if attr
values match across all lower devs. For sets, set the same attr value for
all lower devs. We'll use a two-phase prepare-commit transaction model for
sets. In the first phase, the driver(s) are asked if attr set is OK. If
all OK, the commit attr set in second phase. A driver would NACK the
prepare phase if it can't set the attr due to lack of resources or support,
within it's control. RTNL lock must be held across both phases because
we'll recurse all lower devs first in prepare phase, and then recurse all
lower devs again in commit phase. If any lower dev fails the prepare
phase, we need to abort the transaction for all lower devs.
If lower dev recusion isn't desired, allow a flag SWITCHDEV_F_NO_RECURSE to
indicate get/set only work on port (lowest) device.
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-11 00:47:48 +08:00
|
|
|
|
2019-02-28 03:44:31 +08:00
|
|
|
struct switchdev_notifier_port_attr_info attr_info = {
|
|
|
|
.attr = attr,
|
|
|
|
.handled = false,
|
|
|
|
};
|
switchdev: introduce get/set attrs ops
Add two new swdev ops for get/set switch port attributes. Most swdev
interactions on a port are gets or sets on port attributes, so rather than
adding ops for each attribute, let's define clean get/set ops for all
attributes, and then we can have clear, consistent rules on how attributes
propagate on stacked devs.
Add the basic algorithms for get/set attr ops. Use the same recusive algo
to walk lower devs we've used for STP updates, for example. For get,
compare attr value for each lower dev and only return success if attr
values match across all lower devs. For sets, set the same attr value for
all lower devs. We'll use a two-phase prepare-commit transaction model for
sets. In the first phase, the driver(s) are asked if attr set is OK. If
all OK, the commit attr set in second phase. A driver would NACK the
prepare phase if it can't set the attr due to lack of resources or support,
within it's control. RTNL lock must be held across both phases because
we'll recurse all lower devs first in prepare phase, and then recurse all
lower devs again in commit phase. If any lower dev fails the prepare
phase, we need to abort the transaction for all lower devs.
If lower dev recusion isn't desired, allow a flag SWITCHDEV_F_NO_RECURSE to
indicate get/set only work on port (lowest) device.
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-11 00:47:48 +08:00
|
|
|
|
2019-02-28 03:44:31 +08:00
|
|
|
rc = call_switchdev_blocking_notifiers(nt, dev,
|
2021-02-14 04:43:17 +08:00
|
|
|
&attr_info.info, extack);
|
2019-02-28 03:44:31 +08:00
|
|
|
err = notifier_to_errno(rc);
|
|
|
|
if (err) {
|
|
|
|
WARN_ON(!attr_info.handled);
|
|
|
|
return err;
|
switchdev: introduce get/set attrs ops
Add two new swdev ops for get/set switch port attributes. Most swdev
interactions on a port are gets or sets on port attributes, so rather than
adding ops for each attribute, let's define clean get/set ops for all
attributes, and then we can have clear, consistent rules on how attributes
propagate on stacked devs.
Add the basic algorithms for get/set attr ops. Use the same recusive algo
to walk lower devs we've used for STP updates, for example. For get,
compare attr value for each lower dev and only return success if attr
values match across all lower devs. For sets, set the same attr value for
all lower devs. We'll use a two-phase prepare-commit transaction model for
sets. In the first phase, the driver(s) are asked if attr set is OK. If
all OK, the commit attr set in second phase. A driver would NACK the
prepare phase if it can't set the attr due to lack of resources or support,
within it's control. RTNL lock must be held across both phases because
we'll recurse all lower devs first in prepare phase, and then recurse all
lower devs again in commit phase. If any lower dev fails the prepare
phase, we need to abort the transaction for all lower devs.
If lower dev recusion isn't desired, allow a flag SWITCHDEV_F_NO_RECURSE to
indicate get/set only work on port (lowest) device.
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-11 00:47:48 +08:00
|
|
|
}
|
|
|
|
|
2019-02-28 03:44:31 +08:00
|
|
|
if (!attr_info.handled)
|
|
|
|
return -EOPNOTSUPP;
|
2015-10-09 10:23:18 +08:00
|
|
|
|
2019-02-28 03:44:31 +08:00
|
|
|
return 0;
|
switchdev: introduce get/set attrs ops
Add two new swdev ops for get/set switch port attributes. Most swdev
interactions on a port are gets or sets on port attributes, so rather than
adding ops for each attribute, let's define clean get/set ops for all
attributes, and then we can have clear, consistent rules on how attributes
propagate on stacked devs.
Add the basic algorithms for get/set attr ops. Use the same recusive algo
to walk lower devs we've used for STP updates, for example. For get,
compare attr value for each lower dev and only return success if attr
values match across all lower devs. For sets, set the same attr value for
all lower devs. We'll use a two-phase prepare-commit transaction model for
sets. In the first phase, the driver(s) are asked if attr set is OK. If
all OK, the commit attr set in second phase. A driver would NACK the
prepare phase if it can't set the attr due to lack of resources or support,
within it's control. RTNL lock must be held across both phases because
we'll recurse all lower devs first in prepare phase, and then recurse all
lower devs again in commit phase. If any lower dev fails the prepare
phase, we need to abort the transaction for all lower devs.
If lower dev recusion isn't desired, allow a flag SWITCHDEV_F_NO_RECURSE to
indicate get/set only work on port (lowest) device.
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-11 00:47:48 +08:00
|
|
|
}
|
|
|
|
|
2015-10-15 01:40:50 +08:00
|
|
|
static int switchdev_port_attr_set_now(struct net_device *dev,
|
2021-02-14 04:43:17 +08:00
|
|
|
const struct switchdev_attr *attr,
|
|
|
|
struct netlink_ext_ack *extack)
|
switchdev: introduce get/set attrs ops
Add two new swdev ops for get/set switch port attributes. Most swdev
interactions on a port are gets or sets on port attributes, so rather than
adding ops for each attribute, let's define clean get/set ops for all
attributes, and then we can have clear, consistent rules on how attributes
propagate on stacked devs.
Add the basic algorithms for get/set attr ops. Use the same recusive algo
to walk lower devs we've used for STP updates, for example. For get,
compare attr value for each lower dev and only return success if attr
values match across all lower devs. For sets, set the same attr value for
all lower devs. We'll use a two-phase prepare-commit transaction model for
sets. In the first phase, the driver(s) are asked if attr set is OK. If
all OK, the commit attr set in second phase. A driver would NACK the
prepare phase if it can't set the attr due to lack of resources or support,
within it's control. RTNL lock must be held across both phases because
we'll recurse all lower devs first in prepare phase, and then recurse all
lower devs again in commit phase. If any lower dev fails the prepare
phase, we need to abort the transaction for all lower devs.
If lower dev recusion isn't desired, allow a flag SWITCHDEV_F_NO_RECURSE to
indicate get/set only work on port (lowest) device.
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-11 00:47:48 +08:00
|
|
|
{
|
2021-02-14 04:43:17 +08:00
|
|
|
return switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr,
|
|
|
|
extack);
|
switchdev: introduce get/set attrs ops
Add two new swdev ops for get/set switch port attributes. Most swdev
interactions on a port are gets or sets on port attributes, so rather than
adding ops for each attribute, let's define clean get/set ops for all
attributes, and then we can have clear, consistent rules on how attributes
propagate on stacked devs.
Add the basic algorithms for get/set attr ops. Use the same recusive algo
to walk lower devs we've used for STP updates, for example. For get,
compare attr value for each lower dev and only return success if attr
values match across all lower devs. For sets, set the same attr value for
all lower devs. We'll use a two-phase prepare-commit transaction model for
sets. In the first phase, the driver(s) are asked if attr set is OK. If
all OK, the commit attr set in second phase. A driver would NACK the
prepare phase if it can't set the attr due to lack of resources or support,
within it's control. RTNL lock must be held across both phases because
we'll recurse all lower devs first in prepare phase, and then recurse all
lower devs again in commit phase. If any lower dev fails the prepare
phase, we need to abort the transaction for all lower devs.
If lower dev recusion isn't desired, allow a flag SWITCHDEV_F_NO_RECURSE to
indicate get/set only work on port (lowest) device.
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-11 00:47:48 +08:00
|
|
|
}
|
2015-10-15 01:40:50 +08:00
|
|
|
|
|
|
|
static void switchdev_port_attr_set_deferred(struct net_device *dev,
|
|
|
|
const void *data)
|
|
|
|
{
|
|
|
|
const struct switchdev_attr *attr = data;
|
|
|
|
int err;
|
|
|
|
|
2021-02-14 04:43:17 +08:00
|
|
|
err = switchdev_port_attr_set_now(dev, attr, NULL);
|
2015-10-15 01:40:50 +08:00
|
|
|
if (err && err != -EOPNOTSUPP)
|
|
|
|
netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n",
|
|
|
|
err, attr->id);
|
2016-04-21 18:52:43 +08:00
|
|
|
if (attr->complete)
|
|
|
|
attr->complete(dev, err, attr->complete_priv);
|
2015-10-15 01:40:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int switchdev_port_attr_set_defer(struct net_device *dev,
|
|
|
|
const struct switchdev_attr *attr)
|
|
|
|
{
|
|
|
|
return switchdev_deferred_enqueue(dev, attr, sizeof(*attr),
|
|
|
|
switchdev_port_attr_set_deferred);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* switchdev_port_attr_set - Set port attribute
|
|
|
|
*
|
|
|
|
* @dev: port device
|
|
|
|
* @attr: attribute to set
|
2021-02-14 04:43:17 +08:00
|
|
|
* @extack: netlink extended ack, for error message propagation
|
2015-10-15 01:40:50 +08:00
|
|
|
*
|
|
|
|
* rtnl_lock must be held and must not be in atomic section,
|
|
|
|
* in case SWITCHDEV_F_DEFER flag is not set.
|
|
|
|
*/
|
|
|
|
int switchdev_port_attr_set(struct net_device *dev,
|
2021-02-14 04:43:17 +08:00
|
|
|
const struct switchdev_attr *attr,
|
|
|
|
struct netlink_ext_ack *extack)
|
2015-10-15 01:40:50 +08:00
|
|
|
{
|
|
|
|
if (attr->flags & SWITCHDEV_F_DEFER)
|
|
|
|
return switchdev_port_attr_set_defer(dev, attr);
|
|
|
|
ASSERT_RTNL();
|
2021-02-14 04:43:17 +08:00
|
|
|
return switchdev_port_attr_set_now(dev, attr, extack);
|
2015-10-15 01:40:50 +08:00
|
|
|
}
|
switchdev: introduce get/set attrs ops
Add two new swdev ops for get/set switch port attributes. Most swdev
interactions on a port are gets or sets on port attributes, so rather than
adding ops for each attribute, let's define clean get/set ops for all
attributes, and then we can have clear, consistent rules on how attributes
propagate on stacked devs.
Add the basic algorithms for get/set attr ops. Use the same recusive algo
to walk lower devs we've used for STP updates, for example. For get,
compare attr value for each lower dev and only return success if attr
values match across all lower devs. For sets, set the same attr value for
all lower devs. We'll use a two-phase prepare-commit transaction model for
sets. In the first phase, the driver(s) are asked if attr set is OK. If
all OK, the commit attr set in second phase. A driver would NACK the
prepare phase if it can't set the attr due to lack of resources or support,
within it's control. RTNL lock must be held across both phases because
we'll recurse all lower devs first in prepare phase, and then recurse all
lower devs again in commit phase. If any lower dev fails the prepare
phase, we need to abort the transaction for all lower devs.
If lower dev recusion isn't desired, allow a flag SWITCHDEV_F_NO_RECURSE to
indicate get/set only work on port (lowest) device.
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-11 00:47:48 +08:00
|
|
|
EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
|
|
|
|
|
2015-10-29 14:17:31 +08:00
|
|
|
static size_t switchdev_obj_size(const struct switchdev_obj *obj)
|
|
|
|
{
|
|
|
|
switch (obj->id) {
|
|
|
|
case SWITCHDEV_OBJ_ID_PORT_VLAN:
|
|
|
|
return sizeof(struct switchdev_obj_port_vlan);
|
2016-01-11 04:06:22 +08:00
|
|
|
case SWITCHDEV_OBJ_ID_PORT_MDB:
|
|
|
|
return sizeof(struct switchdev_obj_port_mdb);
|
2017-11-10 06:10:59 +08:00
|
|
|
case SWITCHDEV_OBJ_ID_HOST_MDB:
|
|
|
|
return sizeof(struct switchdev_obj_port_mdb);
|
2015-10-29 14:17:31 +08:00
|
|
|
default:
|
|
|
|
BUG();
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-11-23 07:32:57 +08:00
|
|
|
static int switchdev_port_obj_notify(enum switchdev_notifier_type nt,
|
|
|
|
struct net_device *dev,
|
|
|
|
const struct switchdev_obj *obj,
|
2018-12-13 01:02:52 +08:00
|
|
|
struct netlink_ext_ack *extack)
|
2015-05-11 00:47:52 +08:00
|
|
|
{
|
2018-11-23 07:32:57 +08:00
|
|
|
int rc;
|
|
|
|
int err;
|
2015-05-11 00:47:52 +08:00
|
|
|
|
2018-11-23 07:32:57 +08:00
|
|
|
struct switchdev_notifier_port_obj_info obj_info = {
|
|
|
|
.obj = obj,
|
|
|
|
.handled = false,
|
|
|
|
};
|
2015-05-11 00:47:52 +08:00
|
|
|
|
2018-12-13 01:02:54 +08:00
|
|
|
rc = call_switchdev_blocking_notifiers(nt, dev, &obj_info.info, extack);
|
2018-11-23 07:32:57 +08:00
|
|
|
err = notifier_to_errno(rc);
|
|
|
|
if (err) {
|
|
|
|
WARN_ON(!obj_info.handled);
|
|
|
|
return err;
|
2015-05-11 00:47:52 +08:00
|
|
|
}
|
2018-11-23 07:32:57 +08:00
|
|
|
if (!obj_info.handled)
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
return 0;
|
2015-05-11 00:47:52 +08:00
|
|
|
}
|
|
|
|
|
2015-10-15 01:40:52 +08:00
|
|
|
static void switchdev_port_obj_add_deferred(struct net_device *dev,
|
|
|
|
const void *data)
|
|
|
|
{
|
|
|
|
const struct switchdev_obj *obj = data;
|
|
|
|
int err;
|
|
|
|
|
2021-01-09 08:01:49 +08:00
|
|
|
ASSERT_RTNL();
|
|
|
|
err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
|
|
|
|
dev, obj, NULL);
|
2015-10-15 01:40:52 +08:00
|
|
|
if (err && err != -EOPNOTSUPP)
|
|
|
|
netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
|
|
|
|
err, obj->id);
|
2016-04-21 18:52:43 +08:00
|
|
|
if (obj->complete)
|
|
|
|
obj->complete(dev, err, obj->complete_priv);
|
2015-10-15 01:40:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int switchdev_port_obj_add_defer(struct net_device *dev,
|
|
|
|
const struct switchdev_obj *obj)
|
|
|
|
{
|
2015-10-29 14:17:31 +08:00
|
|
|
return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
|
2015-10-15 01:40:52 +08:00
|
|
|
switchdev_port_obj_add_deferred);
|
|
|
|
}
|
2015-05-11 00:47:52 +08:00
|
|
|
|
|
|
|
/**
|
2015-10-15 01:40:52 +08:00
|
|
|
* switchdev_port_obj_add - Add port object
|
2015-05-11 00:47:52 +08:00
|
|
|
*
|
|
|
|
* @dev: port device
|
2015-10-15 01:40:52 +08:00
|
|
|
* @obj: object to add
|
2020-07-13 07:15:13 +08:00
|
|
|
* @extack: netlink extended ack
|
2015-10-15 01:40:52 +08:00
|
|
|
*
|
|
|
|
* rtnl_lock must be held and must not be in atomic section,
|
|
|
|
* in case SWITCHDEV_F_DEFER flag is not set.
|
2015-05-11 00:47:52 +08:00
|
|
|
*/
|
2015-10-15 01:40:52 +08:00
|
|
|
int switchdev_port_obj_add(struct net_device *dev,
|
2018-12-13 01:02:52 +08:00
|
|
|
const struct switchdev_obj *obj,
|
|
|
|
struct netlink_ext_ack *extack)
|
2015-10-15 01:40:52 +08:00
|
|
|
{
|
|
|
|
if (obj->flags & SWITCHDEV_F_DEFER)
|
|
|
|
return switchdev_port_obj_add_defer(dev, obj);
|
|
|
|
ASSERT_RTNL();
|
2021-01-09 08:01:49 +08:00
|
|
|
return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
|
|
|
|
dev, obj, extack);
|
2015-10-15 01:40:52 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
|
|
|
|
|
|
|
|
static int switchdev_port_obj_del_now(struct net_device *dev,
|
|
|
|
const struct switchdev_obj *obj)
|
2015-05-11 00:47:52 +08:00
|
|
|
{
|
2018-11-23 07:32:57 +08:00
|
|
|
return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_DEL,
|
net: switchdev: remove the transaction structure from port object notifiers
Since the introduction of the switchdev API, port objects were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
object notifier structures, and converts drivers to not look at this
member.
Where driver conversion is trivial (like in the case of the Marvell
Prestera driver, NXP DPAA2 switch, TI CPSW, and Rocker drivers), it is
done in this patch.
Where driver conversion needs more attention (DSA, Mellanox Spectrum),
the conversion is left for subsequent patches and here we only fake the
prepare/commit phases at a lower level, just not in the switchdev
notifier itself.
Where the code has a natural structure that is best left alone as a
preparation and a commit phase (as in the case of the Ocelot switch),
that structure is left in place, just made to not depend upon the
switchdev transactional model.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:48 +08:00
|
|
|
dev, obj, NULL);
|
2015-05-11 00:47:52 +08:00
|
|
|
}
|
2015-10-15 01:40:52 +08:00
|
|
|
|
|
|
|
static void switchdev_port_obj_del_deferred(struct net_device *dev,
|
|
|
|
const void *data)
|
|
|
|
{
|
|
|
|
const struct switchdev_obj *obj = data;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = switchdev_port_obj_del_now(dev, obj);
|
|
|
|
if (err && err != -EOPNOTSUPP)
|
|
|
|
netdev_err(dev, "failed (err=%d) to del object (id=%d)\n",
|
|
|
|
err, obj->id);
|
2016-04-21 18:52:43 +08:00
|
|
|
if (obj->complete)
|
|
|
|
obj->complete(dev, err, obj->complete_priv);
|
2015-10-15 01:40:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int switchdev_port_obj_del_defer(struct net_device *dev,
|
|
|
|
const struct switchdev_obj *obj)
|
|
|
|
{
|
2015-10-29 14:17:31 +08:00
|
|
|
return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
|
2015-10-15 01:40:52 +08:00
|
|
|
switchdev_port_obj_del_deferred);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* switchdev_port_obj_del - Delete port object
|
|
|
|
*
|
|
|
|
* @dev: port device
|
|
|
|
* @obj: object to delete
|
|
|
|
*
|
|
|
|
* rtnl_lock must be held and must not be in atomic section,
|
|
|
|
* in case SWITCHDEV_F_DEFER flag is not set.
|
|
|
|
*/
|
|
|
|
int switchdev_port_obj_del(struct net_device *dev,
|
|
|
|
const struct switchdev_obj *obj)
|
|
|
|
{
|
|
|
|
if (obj->flags & SWITCHDEV_F_DEFER)
|
|
|
|
return switchdev_port_obj_del_defer(dev, obj);
|
|
|
|
ASSERT_RTNL();
|
|
|
|
return switchdev_port_obj_del_now(dev, obj);
|
|
|
|
}
|
2015-05-11 00:47:52 +08:00
|
|
|
EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
|
|
|
|
|
2017-06-08 14:44:13 +08:00
|
|
|
static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
|
2018-11-23 07:28:25 +08:00
|
|
|
static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
|
2015-01-16 06:49:36 +08:00
|
|
|
|
|
|
|
/**
|
2015-05-11 00:47:46 +08:00
|
|
|
* register_switchdev_notifier - Register notifier
|
2015-01-16 06:49:36 +08:00
|
|
|
* @nb: notifier_block
|
|
|
|
*
|
2017-06-08 14:44:13 +08:00
|
|
|
* Register switch device notifier.
|
2015-01-16 06:49:36 +08:00
|
|
|
*/
|
2015-05-11 00:47:46 +08:00
|
|
|
int register_switchdev_notifier(struct notifier_block *nb)
|
2015-01-16 06:49:36 +08:00
|
|
|
{
|
2017-06-08 14:44:13 +08:00
|
|
|
return atomic_notifier_chain_register(&switchdev_notif_chain, nb);
|
2015-01-16 06:49:36 +08:00
|
|
|
}
|
2015-05-11 00:47:46 +08:00
|
|
|
EXPORT_SYMBOL_GPL(register_switchdev_notifier);
|
2015-01-16 06:49:36 +08:00
|
|
|
|
|
|
|
/**
|
2015-05-11 00:47:46 +08:00
|
|
|
* unregister_switchdev_notifier - Unregister notifier
|
2015-01-16 06:49:36 +08:00
|
|
|
* @nb: notifier_block
|
|
|
|
*
|
|
|
|
* Unregister switch device notifier.
|
|
|
|
*/
|
2015-05-11 00:47:46 +08:00
|
|
|
int unregister_switchdev_notifier(struct notifier_block *nb)
|
2015-01-16 06:49:36 +08:00
|
|
|
{
|
2017-06-08 14:44:13 +08:00
|
|
|
return atomic_notifier_chain_unregister(&switchdev_notif_chain, nb);
|
2015-01-16 06:49:36 +08:00
|
|
|
}
|
2015-05-11 00:47:46 +08:00
|
|
|
EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
|
2015-01-16 06:49:36 +08:00
|
|
|
|
|
|
|
/**
|
2015-05-11 00:47:46 +08:00
|
|
|
* call_switchdev_notifiers - Call notifiers
|
2015-01-16 06:49:36 +08:00
|
|
|
* @val: value passed unmodified to notifier function
|
|
|
|
* @dev: port device
|
|
|
|
* @info: notifier information data
|
2020-09-22 21:32:19 +08:00
|
|
|
* @extack: netlink extended ack
|
2017-06-08 14:44:13 +08:00
|
|
|
* Call all network notifier blocks.
|
2015-01-16 06:49:36 +08:00
|
|
|
*/
|
2015-05-11 00:47:46 +08:00
|
|
|
int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
|
2019-01-17 07:06:56 +08:00
|
|
|
struct switchdev_notifier_info *info,
|
|
|
|
struct netlink_ext_ack *extack)
|
2015-01-16 06:49:36 +08:00
|
|
|
{
|
|
|
|
info->dev = dev;
|
2019-01-17 07:06:56 +08:00
|
|
|
info->extack = extack;
|
2017-06-08 14:44:13 +08:00
|
|
|
return atomic_notifier_call_chain(&switchdev_notif_chain, val, info);
|
2015-01-16 06:49:36 +08:00
|
|
|
}
|
2015-05-11 00:47:46 +08:00
|
|
|
EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
|
2015-01-30 14:40:13 +08:00
|
|
|
|
2018-11-23 07:28:25 +08:00
|
|
|
int register_switchdev_blocking_notifier(struct notifier_block *nb)
|
|
|
|
{
|
|
|
|
struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
|
|
|
|
|
|
|
|
return blocking_notifier_chain_register(chain, nb);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(register_switchdev_blocking_notifier);
|
|
|
|
|
|
|
|
int unregister_switchdev_blocking_notifier(struct notifier_block *nb)
|
|
|
|
{
|
|
|
|
struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
|
|
|
|
|
|
|
|
return blocking_notifier_chain_unregister(chain, nb);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(unregister_switchdev_blocking_notifier);
|
|
|
|
|
|
|
|
int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
|
2018-12-13 01:02:54 +08:00
|
|
|
struct switchdev_notifier_info *info,
|
|
|
|
struct netlink_ext_ack *extack)
|
2018-11-23 07:28:25 +08:00
|
|
|
{
|
|
|
|
info->dev = dev;
|
2018-12-13 01:02:54 +08:00
|
|
|
info->extack = extack;
|
2018-11-23 07:28:25 +08:00
|
|
|
return blocking_notifier_call_chain(&switchdev_blocking_notif_chain,
|
|
|
|
val, info);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
|
|
|
|
|
net: switchdev: introduce a fanout helper for SWITCHDEV_FDB_{ADD,DEL}_TO_DEVICE
Currently DSA has an issue with FDB entries pointing towards the bridge
in the presence of br_fdb_replay() being called at port join and leave
time.
In particular, each bridge port will ask for a replay for the FDB
entries pointing towards the bridge when it joins, and for another
replay when it leaves.
This means that for example, a bridge with 4 switch ports will notify
DSA 4 times of the bridge MAC address.
But if the MAC address of the bridge changes during the normal runtime
of the system, the bridge notifies switchdev [ once ] of the deletion of
the old MAC address as a local FDB towards the bridge, and of the
insertion [ again once ] of the new MAC address as a local FDB.
This is a problem, because DSA keeps the old MAC address as a host FDB
entry with refcount 4 (4 ports asked for it using br_fdb_replay). So the
old MAC address will not be deleted. Additionally, the new MAC address
will only be installed with refcount 1, and when the first switch port
leaves the bridge (leaving 3 others as still members), it will delete
with it the new MAC address of the bridge from the local FDB entries
kept by DSA (because the br_fdb_replay call on deletion will bring the
entry's refcount from 1 to 0).
So the problem, really, is that the number of br_fdb_replay() calls is
not matched with the refcount that a host FDB is offloaded to DSA during
normal runtime.
An elegant way to solve the problem would be to make the switchdev
notification emitted by br_fdb_change_mac_address() result in a host FDB
kept by DSA which has a refcount exactly equal to the number of ports
under that bridge. Then, no matter how many DSA ports join or leave that
bridge, the host FDB entry will always be deleted when there are exactly
zero remaining DSA switch ports members of the bridge.
To implement the proposed solution, we remember that the switchdev
objects and port attributes have some helpers provided by switchdev,
which can be optionally called by drivers:
switchdev_handle_port_obj_{add,del} and switchdev_handle_port_attr_set.
These helpers:
- fan out a switchdev object/attribute emitted for the bridge towards
all the lower interfaces that pass the check_cb().
- fan out a switchdev object/attribute emitted for a bridge port that is
a LAG towards all the lower interfaces that pass the check_cb().
In other words, this is the model we need for the FDB events too:
something that will keep an FDB entry emitted towards a physical port as
it is, but translate an FDB entry emitted towards the bridge into N FDB
entries, one per physical port.
Of course, there are many differences between fanning out a switchdev
object (VLAN) on 3 lower interfaces of a LAG and fanning out an FDB
entry on 3 lower interfaces of a LAG. Intuitively, an FDB entry towards
a LAG should be treated specially, because FDB entries are unicast, we
can't just install the same address towards 3 destinations. It is
imaginable that drivers might want to treat this case specifically, so
create some methods for this case and do not recurse into the LAG lower
ports, just the bridge ports.
DSA also listens for FDB entries on "foreign" interfaces, aka interfaces
bridged with us which are not part of our hardware domain: think an
Ethernet switch bridged with a Wi-Fi AP. For those addresses, DSA
installs host FDB entries. However, there we have the same problem
(those host FDB entries are installed with a refcount of only 1) and an
even bigger one which we did not have with FDB entries towards the
bridge:
br_fdb_replay() is currently not called for FDB entries on foreign
interfaces, just for the physical port and for the bridge itself.
So when DSA sniffs an address learned by the software bridge towards a
foreign interface like an e1000 port, and then that e1000 leaves the
bridge, DSA remains with the dangling host FDB address. That will be
fixed separately by replaying all FDB entries and not just the ones
towards the port and the bridge.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-19 21:51:39 +08:00
|
|
|
static int __switchdev_handle_fdb_add_to_device(struct net_device *dev,
|
|
|
|
const struct net_device *orig_dev,
|
|
|
|
const struct switchdev_notifier_fdb_info *fdb_info,
|
|
|
|
bool (*check_cb)(const struct net_device *dev),
|
|
|
|
bool (*foreign_dev_check_cb)(const struct net_device *dev,
|
|
|
|
const struct net_device *foreign_dev),
|
|
|
|
int (*add_cb)(struct net_device *dev,
|
|
|
|
const struct net_device *orig_dev, const void *ctx,
|
|
|
|
const struct switchdev_notifier_fdb_info *fdb_info),
|
|
|
|
int (*lag_add_cb)(struct net_device *dev,
|
|
|
|
const struct net_device *orig_dev, const void *ctx,
|
|
|
|
const struct switchdev_notifier_fdb_info *fdb_info))
|
|
|
|
{
|
|
|
|
const struct switchdev_notifier_info *info = &fdb_info->info;
|
|
|
|
struct net_device *lower_dev;
|
|
|
|
struct list_head *iter;
|
|
|
|
int err = -EOPNOTSUPP;
|
|
|
|
|
|
|
|
if (check_cb(dev)) {
|
|
|
|
/* Handle FDB entries on foreign interfaces as FDB entries
|
|
|
|
* towards the software bridge.
|
|
|
|
*/
|
|
|
|
if (foreign_dev_check_cb && foreign_dev_check_cb(dev, orig_dev)) {
|
|
|
|
struct net_device *br = netdev_master_upper_dev_get_rcu(dev);
|
|
|
|
|
|
|
|
if (!br || !netif_is_bridge_master(br))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* No point in handling FDB entries on a foreign bridge */
|
|
|
|
if (foreign_dev_check_cb(dev, br))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return __switchdev_handle_fdb_add_to_device(br, orig_dev,
|
|
|
|
fdb_info, check_cb,
|
|
|
|
foreign_dev_check_cb,
|
|
|
|
add_cb, lag_add_cb);
|
|
|
|
}
|
|
|
|
|
|
|
|
return add_cb(dev, orig_dev, info->ctx, fdb_info);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If we passed over the foreign check, it means that the LAG interface
|
|
|
|
* is offloaded.
|
|
|
|
*/
|
|
|
|
if (netif_is_lag_master(dev)) {
|
|
|
|
if (!lag_add_cb)
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
|
|
return lag_add_cb(dev, orig_dev, info->ctx, fdb_info);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Recurse through lower interfaces in case the FDB entry is pointing
|
|
|
|
* towards a bridge device.
|
|
|
|
*/
|
|
|
|
netdev_for_each_lower_dev(dev, lower_dev, iter) {
|
|
|
|
/* Do not propagate FDB entries across bridges */
|
|
|
|
if (netif_is_bridge_master(lower_dev))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
err = __switchdev_handle_fdb_add_to_device(lower_dev, orig_dev,
|
|
|
|
fdb_info, check_cb,
|
|
|
|
foreign_dev_check_cb,
|
|
|
|
add_cb, lag_add_cb);
|
|
|
|
if (err && err != -EOPNOTSUPP)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
int switchdev_handle_fdb_add_to_device(struct net_device *dev,
|
|
|
|
const struct switchdev_notifier_fdb_info *fdb_info,
|
|
|
|
bool (*check_cb)(const struct net_device *dev),
|
|
|
|
bool (*foreign_dev_check_cb)(const struct net_device *dev,
|
|
|
|
const struct net_device *foreign_dev),
|
|
|
|
int (*add_cb)(struct net_device *dev,
|
|
|
|
const struct net_device *orig_dev, const void *ctx,
|
|
|
|
const struct switchdev_notifier_fdb_info *fdb_info),
|
|
|
|
int (*lag_add_cb)(struct net_device *dev,
|
|
|
|
const struct net_device *orig_dev, const void *ctx,
|
|
|
|
const struct switchdev_notifier_fdb_info *fdb_info))
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = __switchdev_handle_fdb_add_to_device(dev, dev, fdb_info,
|
|
|
|
check_cb,
|
|
|
|
foreign_dev_check_cb,
|
|
|
|
add_cb, lag_add_cb);
|
|
|
|
if (err == -EOPNOTSUPP)
|
|
|
|
err = 0;
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(switchdev_handle_fdb_add_to_device);
|
|
|
|
|
|
|
|
static int __switchdev_handle_fdb_del_to_device(struct net_device *dev,
|
|
|
|
const struct net_device *orig_dev,
|
|
|
|
const struct switchdev_notifier_fdb_info *fdb_info,
|
|
|
|
bool (*check_cb)(const struct net_device *dev),
|
|
|
|
bool (*foreign_dev_check_cb)(const struct net_device *dev,
|
|
|
|
const struct net_device *foreign_dev),
|
|
|
|
int (*del_cb)(struct net_device *dev,
|
|
|
|
const struct net_device *orig_dev, const void *ctx,
|
|
|
|
const struct switchdev_notifier_fdb_info *fdb_info),
|
|
|
|
int (*lag_del_cb)(struct net_device *dev,
|
|
|
|
const struct net_device *orig_dev, const void *ctx,
|
|
|
|
const struct switchdev_notifier_fdb_info *fdb_info))
|
|
|
|
{
|
|
|
|
const struct switchdev_notifier_info *info = &fdb_info->info;
|
|
|
|
struct net_device *lower_dev;
|
|
|
|
struct list_head *iter;
|
|
|
|
int err = -EOPNOTSUPP;
|
|
|
|
|
|
|
|
if (check_cb(dev)) {
|
|
|
|
/* Handle FDB entries on foreign interfaces as FDB entries
|
|
|
|
* towards the software bridge.
|
|
|
|
*/
|
|
|
|
if (foreign_dev_check_cb && foreign_dev_check_cb(dev, orig_dev)) {
|
|
|
|
struct net_device *br = netdev_master_upper_dev_get_rcu(dev);
|
|
|
|
|
|
|
|
if (!br || !netif_is_bridge_master(br))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* No point in handling FDB entries on a foreign bridge */
|
|
|
|
if (foreign_dev_check_cb(dev, br))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return __switchdev_handle_fdb_del_to_device(br, orig_dev,
|
|
|
|
fdb_info, check_cb,
|
|
|
|
foreign_dev_check_cb,
|
|
|
|
del_cb, lag_del_cb);
|
|
|
|
}
|
|
|
|
|
|
|
|
return del_cb(dev, orig_dev, info->ctx, fdb_info);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If we passed over the foreign check, it means that the LAG interface
|
|
|
|
* is offloaded.
|
|
|
|
*/
|
|
|
|
if (netif_is_lag_master(dev)) {
|
|
|
|
if (!lag_del_cb)
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
|
|
return lag_del_cb(dev, orig_dev, info->ctx, fdb_info);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Recurse through lower interfaces in case the FDB entry is pointing
|
|
|
|
* towards a bridge device.
|
|
|
|
*/
|
|
|
|
netdev_for_each_lower_dev(dev, lower_dev, iter) {
|
|
|
|
/* Do not propagate FDB entries across bridges */
|
|
|
|
if (netif_is_bridge_master(lower_dev))
|
|
|
|
continue;
|
|
|
|
|
2021-07-21 01:35:57 +08:00
|
|
|
err = __switchdev_handle_fdb_del_to_device(lower_dev, orig_dev,
|
|
|
|
fdb_info, check_cb,
|
|
|
|
foreign_dev_check_cb,
|
|
|
|
del_cb, lag_del_cb);
|
net: switchdev: introduce a fanout helper for SWITCHDEV_FDB_{ADD,DEL}_TO_DEVICE
Currently DSA has an issue with FDB entries pointing towards the bridge
in the presence of br_fdb_replay() being called at port join and leave
time.
In particular, each bridge port will ask for a replay for the FDB
entries pointing towards the bridge when it joins, and for another
replay when it leaves.
This means that for example, a bridge with 4 switch ports will notify
DSA 4 times of the bridge MAC address.
But if the MAC address of the bridge changes during the normal runtime
of the system, the bridge notifies switchdev [ once ] of the deletion of
the old MAC address as a local FDB towards the bridge, and of the
insertion [ again once ] of the new MAC address as a local FDB.
This is a problem, because DSA keeps the old MAC address as a host FDB
entry with refcount 4 (4 ports asked for it using br_fdb_replay). So the
old MAC address will not be deleted. Additionally, the new MAC address
will only be installed with refcount 1, and when the first switch port
leaves the bridge (leaving 3 others as still members), it will delete
with it the new MAC address of the bridge from the local FDB entries
kept by DSA (because the br_fdb_replay call on deletion will bring the
entry's refcount from 1 to 0).
So the problem, really, is that the number of br_fdb_replay() calls is
not matched with the refcount that a host FDB is offloaded to DSA during
normal runtime.
An elegant way to solve the problem would be to make the switchdev
notification emitted by br_fdb_change_mac_address() result in a host FDB
kept by DSA which has a refcount exactly equal to the number of ports
under that bridge. Then, no matter how many DSA ports join or leave that
bridge, the host FDB entry will always be deleted when there are exactly
zero remaining DSA switch ports members of the bridge.
To implement the proposed solution, we remember that the switchdev
objects and port attributes have some helpers provided by switchdev,
which can be optionally called by drivers:
switchdev_handle_port_obj_{add,del} and switchdev_handle_port_attr_set.
These helpers:
- fan out a switchdev object/attribute emitted for the bridge towards
all the lower interfaces that pass the check_cb().
- fan out a switchdev object/attribute emitted for a bridge port that is
a LAG towards all the lower interfaces that pass the check_cb().
In other words, this is the model we need for the FDB events too:
something that will keep an FDB entry emitted towards a physical port as
it is, but translate an FDB entry emitted towards the bridge into N FDB
entries, one per physical port.
Of course, there are many differences between fanning out a switchdev
object (VLAN) on 3 lower interfaces of a LAG and fanning out an FDB
entry on 3 lower interfaces of a LAG. Intuitively, an FDB entry towards
a LAG should be treated specially, because FDB entries are unicast, we
can't just install the same address towards 3 destinations. It is
imaginable that drivers might want to treat this case specifically, so
create some methods for this case and do not recurse into the LAG lower
ports, just the bridge ports.
DSA also listens for FDB entries on "foreign" interfaces, aka interfaces
bridged with us which are not part of our hardware domain: think an
Ethernet switch bridged with a Wi-Fi AP. For those addresses, DSA
installs host FDB entries. However, there we have the same problem
(those host FDB entries are installed with a refcount of only 1) and an
even bigger one which we did not have with FDB entries towards the
bridge:
br_fdb_replay() is currently not called for FDB entries on foreign
interfaces, just for the physical port and for the bridge itself.
So when DSA sniffs an address learned by the software bridge towards a
foreign interface like an e1000 port, and then that e1000 leaves the
bridge, DSA remains with the dangling host FDB address. That will be
fixed separately by replaying all FDB entries and not just the ones
towards the port and the bridge.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-19 21:51:39 +08:00
|
|
|
if (err && err != -EOPNOTSUPP)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
int switchdev_handle_fdb_del_to_device(struct net_device *dev,
|
|
|
|
const struct switchdev_notifier_fdb_info *fdb_info,
|
|
|
|
bool (*check_cb)(const struct net_device *dev),
|
|
|
|
bool (*foreign_dev_check_cb)(const struct net_device *dev,
|
|
|
|
const struct net_device *foreign_dev),
|
|
|
|
int (*del_cb)(struct net_device *dev,
|
|
|
|
const struct net_device *orig_dev, const void *ctx,
|
|
|
|
const struct switchdev_notifier_fdb_info *fdb_info),
|
|
|
|
int (*lag_del_cb)(struct net_device *dev,
|
|
|
|
const struct net_device *orig_dev, const void *ctx,
|
|
|
|
const struct switchdev_notifier_fdb_info *fdb_info))
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = __switchdev_handle_fdb_del_to_device(dev, dev, fdb_info,
|
|
|
|
check_cb,
|
|
|
|
foreign_dev_check_cb,
|
|
|
|
del_cb, lag_del_cb);
|
|
|
|
if (err == -EOPNOTSUPP)
|
|
|
|
err = 0;
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(switchdev_handle_fdb_del_to_device);
|
|
|
|
|
2018-11-23 07:29:44 +08:00
|
|
|
static int __switchdev_handle_port_obj_add(struct net_device *dev,
|
|
|
|
struct switchdev_notifier_port_obj_info *port_obj_info,
|
|
|
|
bool (*check_cb)(const struct net_device *dev),
|
2021-06-27 19:54:24 +08:00
|
|
|
int (*add_cb)(struct net_device *dev, const void *ctx,
|
2018-11-23 07:29:44 +08:00
|
|
|
const struct switchdev_obj *obj,
|
2018-12-13 01:02:56 +08:00
|
|
|
struct netlink_ext_ack *extack))
|
2018-11-23 07:29:44 +08:00
|
|
|
{
|
2021-06-27 19:54:24 +08:00
|
|
|
struct switchdev_notifier_info *info = &port_obj_info->info;
|
2018-12-13 01:02:56 +08:00
|
|
|
struct netlink_ext_ack *extack;
|
2018-11-23 07:29:44 +08:00
|
|
|
struct net_device *lower_dev;
|
|
|
|
struct list_head *iter;
|
|
|
|
int err = -EOPNOTSUPP;
|
|
|
|
|
2021-06-27 19:54:24 +08:00
|
|
|
extack = switchdev_notifier_info_to_extack(info);
|
2018-12-13 01:02:56 +08:00
|
|
|
|
2018-11-23 07:29:44 +08:00
|
|
|
if (check_cb(dev)) {
|
2021-06-27 19:54:24 +08:00
|
|
|
err = add_cb(dev, info->ctx, port_obj_info->obj, extack);
|
net: switchdev: don't set port_obj_info->handled true when -EOPNOTSUPP
It's not true that switchdev_port_obj_notify() only inspects the
->handled field of "struct switchdev_notifier_port_obj_info" if
call_switchdev_blocking_notifiers() returns 0 - there's a WARN_ON()
triggering for a non-zero return combined with ->handled not being
true. But the real problem here is that -EOPNOTSUPP is not being
properly handled.
The wrapper functions switchdev_handle_port_obj_add() et al change a
return value of -EOPNOTSUPP to 0, and the treatment of ->handled in
switchdev_port_obj_notify() seems to be designed to change that back
to -EOPNOTSUPP in case nobody actually acted on the notifier (i.e.,
everybody returned -EOPNOTSUPP).
Currently, as soon as some device down the stack passes the check_cb()
check, ->handled gets set to true, which means that
switchdev_port_obj_notify() cannot actually ever return -EOPNOTSUPP.
This, for example, means that the detection of hardware offload
support in the MRP code is broken: switchdev_port_obj_add() used by
br_mrp_switchdev_send_ring_test() always returns 0, so since the MRP
code thinks the generation of MRP test frames has been offloaded, no
such frames are actually put on the wire. Similarly,
br_mrp_switchdev_set_ring_role() also always returns 0, causing
mrp->ring_role_offloaded to be set to 1.
To fix this, continue to set ->handled true if any callback returns
success or any error distinct from -EOPNOTSUPP. But if all the
callbacks return -EOPNOTSUPP, make sure that ->handled stays false, so
the logic in switchdev_port_obj_notify() can propagate that
information.
Fixes: 9a9f26e8f7ea ("bridge: mrp: Connect MRP API with the switchdev API")
Fixes: f30f0601eb93 ("switchdev: Add helpers to aid traversal through lower devices")
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Link: https://lore.kernel.org/r/20210125124116.102928-1-rasmus.villemoes@prevas.dk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-25 20:41:16 +08:00
|
|
|
if (err != -EOPNOTSUPP)
|
|
|
|
port_obj_info->handled = true;
|
|
|
|
return err;
|
2018-11-23 07:29:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Switch ports might be stacked under e.g. a LAG. Ignore the
|
|
|
|
* unsupported devices, another driver might be able to handle them. But
|
|
|
|
* propagate to the callers any hard errors.
|
|
|
|
*
|
|
|
|
* If the driver does its own bookkeeping of stacked ports, it's not
|
|
|
|
* necessary to go through this helper.
|
|
|
|
*/
|
|
|
|
netdev_for_each_lower_dev(dev, lower_dev, iter) {
|
2020-02-27 01:14:21 +08:00
|
|
|
if (netif_is_bridge_master(lower_dev))
|
|
|
|
continue;
|
|
|
|
|
2018-11-23 07:29:44 +08:00
|
|
|
err = __switchdev_handle_port_obj_add(lower_dev, port_obj_info,
|
|
|
|
check_cb, add_cb);
|
|
|
|
if (err && err != -EOPNOTSUPP)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
int switchdev_handle_port_obj_add(struct net_device *dev,
|
|
|
|
struct switchdev_notifier_port_obj_info *port_obj_info,
|
|
|
|
bool (*check_cb)(const struct net_device *dev),
|
2021-06-27 19:54:24 +08:00
|
|
|
int (*add_cb)(struct net_device *dev, const void *ctx,
|
2018-11-23 07:29:44 +08:00
|
|
|
const struct switchdev_obj *obj,
|
2018-12-13 01:02:56 +08:00
|
|
|
struct netlink_ext_ack *extack))
|
2018-11-23 07:29:44 +08:00
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = __switchdev_handle_port_obj_add(dev, port_obj_info, check_cb,
|
|
|
|
add_cb);
|
|
|
|
if (err == -EOPNOTSUPP)
|
|
|
|
err = 0;
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_add);
|
|
|
|
|
|
|
|
static int __switchdev_handle_port_obj_del(struct net_device *dev,
|
|
|
|
struct switchdev_notifier_port_obj_info *port_obj_info,
|
|
|
|
bool (*check_cb)(const struct net_device *dev),
|
2021-06-27 19:54:24 +08:00
|
|
|
int (*del_cb)(struct net_device *dev, const void *ctx,
|
2018-11-23 07:29:44 +08:00
|
|
|
const struct switchdev_obj *obj))
|
|
|
|
{
|
2021-06-27 19:54:24 +08:00
|
|
|
struct switchdev_notifier_info *info = &port_obj_info->info;
|
2018-11-23 07:29:44 +08:00
|
|
|
struct net_device *lower_dev;
|
|
|
|
struct list_head *iter;
|
|
|
|
int err = -EOPNOTSUPP;
|
|
|
|
|
|
|
|
if (check_cb(dev)) {
|
2021-06-27 19:54:24 +08:00
|
|
|
err = del_cb(dev, info->ctx, port_obj_info->obj);
|
net: switchdev: don't set port_obj_info->handled true when -EOPNOTSUPP
It's not true that switchdev_port_obj_notify() only inspects the
->handled field of "struct switchdev_notifier_port_obj_info" if
call_switchdev_blocking_notifiers() returns 0 - there's a WARN_ON()
triggering for a non-zero return combined with ->handled not being
true. But the real problem here is that -EOPNOTSUPP is not being
properly handled.
The wrapper functions switchdev_handle_port_obj_add() et al change a
return value of -EOPNOTSUPP to 0, and the treatment of ->handled in
switchdev_port_obj_notify() seems to be designed to change that back
to -EOPNOTSUPP in case nobody actually acted on the notifier (i.e.,
everybody returned -EOPNOTSUPP).
Currently, as soon as some device down the stack passes the check_cb()
check, ->handled gets set to true, which means that
switchdev_port_obj_notify() cannot actually ever return -EOPNOTSUPP.
This, for example, means that the detection of hardware offload
support in the MRP code is broken: switchdev_port_obj_add() used by
br_mrp_switchdev_send_ring_test() always returns 0, so since the MRP
code thinks the generation of MRP test frames has been offloaded, no
such frames are actually put on the wire. Similarly,
br_mrp_switchdev_set_ring_role() also always returns 0, causing
mrp->ring_role_offloaded to be set to 1.
To fix this, continue to set ->handled true if any callback returns
success or any error distinct from -EOPNOTSUPP. But if all the
callbacks return -EOPNOTSUPP, make sure that ->handled stays false, so
the logic in switchdev_port_obj_notify() can propagate that
information.
Fixes: 9a9f26e8f7ea ("bridge: mrp: Connect MRP API with the switchdev API")
Fixes: f30f0601eb93 ("switchdev: Add helpers to aid traversal through lower devices")
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Link: https://lore.kernel.org/r/20210125124116.102928-1-rasmus.villemoes@prevas.dk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-25 20:41:16 +08:00
|
|
|
if (err != -EOPNOTSUPP)
|
|
|
|
port_obj_info->handled = true;
|
|
|
|
return err;
|
2018-11-23 07:29:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Switch ports might be stacked under e.g. a LAG. Ignore the
|
|
|
|
* unsupported devices, another driver might be able to handle them. But
|
|
|
|
* propagate to the callers any hard errors.
|
|
|
|
*
|
|
|
|
* If the driver does its own bookkeeping of stacked ports, it's not
|
|
|
|
* necessary to go through this helper.
|
|
|
|
*/
|
|
|
|
netdev_for_each_lower_dev(dev, lower_dev, iter) {
|
2020-02-27 01:14:21 +08:00
|
|
|
if (netif_is_bridge_master(lower_dev))
|
|
|
|
continue;
|
|
|
|
|
2018-11-23 07:29:44 +08:00
|
|
|
err = __switchdev_handle_port_obj_del(lower_dev, port_obj_info,
|
|
|
|
check_cb, del_cb);
|
|
|
|
if (err && err != -EOPNOTSUPP)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
int switchdev_handle_port_obj_del(struct net_device *dev,
|
|
|
|
struct switchdev_notifier_port_obj_info *port_obj_info,
|
|
|
|
bool (*check_cb)(const struct net_device *dev),
|
2021-06-27 19:54:24 +08:00
|
|
|
int (*del_cb)(struct net_device *dev, const void *ctx,
|
2018-11-23 07:29:44 +08:00
|
|
|
const struct switchdev_obj *obj))
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = __switchdev_handle_port_obj_del(dev, port_obj_info, check_cb,
|
|
|
|
del_cb);
|
|
|
|
if (err == -EOPNOTSUPP)
|
|
|
|
err = 0;
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del);
|
2019-02-28 03:44:25 +08:00
|
|
|
|
|
|
|
static int __switchdev_handle_port_attr_set(struct net_device *dev,
|
|
|
|
struct switchdev_notifier_port_attr_info *port_attr_info,
|
|
|
|
bool (*check_cb)(const struct net_device *dev),
|
2021-06-27 19:54:24 +08:00
|
|
|
int (*set_cb)(struct net_device *dev, const void *ctx,
|
2021-02-12 23:15:51 +08:00
|
|
|
const struct switchdev_attr *attr,
|
|
|
|
struct netlink_ext_ack *extack))
|
2019-02-28 03:44:25 +08:00
|
|
|
{
|
2021-06-27 19:54:24 +08:00
|
|
|
struct switchdev_notifier_info *info = &port_attr_info->info;
|
2021-02-12 23:15:51 +08:00
|
|
|
struct netlink_ext_ack *extack;
|
2019-02-28 03:44:25 +08:00
|
|
|
struct net_device *lower_dev;
|
|
|
|
struct list_head *iter;
|
|
|
|
int err = -EOPNOTSUPP;
|
|
|
|
|
2021-06-27 19:54:24 +08:00
|
|
|
extack = switchdev_notifier_info_to_extack(info);
|
2021-02-12 23:15:51 +08:00
|
|
|
|
2019-02-28 03:44:25 +08:00
|
|
|
if (check_cb(dev)) {
|
2021-06-27 19:54:24 +08:00
|
|
|
err = set_cb(dev, info->ctx, port_attr_info->attr, extack);
|
net: switchdev: don't set port_obj_info->handled true when -EOPNOTSUPP
It's not true that switchdev_port_obj_notify() only inspects the
->handled field of "struct switchdev_notifier_port_obj_info" if
call_switchdev_blocking_notifiers() returns 0 - there's a WARN_ON()
triggering for a non-zero return combined with ->handled not being
true. But the real problem here is that -EOPNOTSUPP is not being
properly handled.
The wrapper functions switchdev_handle_port_obj_add() et al change a
return value of -EOPNOTSUPP to 0, and the treatment of ->handled in
switchdev_port_obj_notify() seems to be designed to change that back
to -EOPNOTSUPP in case nobody actually acted on the notifier (i.e.,
everybody returned -EOPNOTSUPP).
Currently, as soon as some device down the stack passes the check_cb()
check, ->handled gets set to true, which means that
switchdev_port_obj_notify() cannot actually ever return -EOPNOTSUPP.
This, for example, means that the detection of hardware offload
support in the MRP code is broken: switchdev_port_obj_add() used by
br_mrp_switchdev_send_ring_test() always returns 0, so since the MRP
code thinks the generation of MRP test frames has been offloaded, no
such frames are actually put on the wire. Similarly,
br_mrp_switchdev_set_ring_role() also always returns 0, causing
mrp->ring_role_offloaded to be set to 1.
To fix this, continue to set ->handled true if any callback returns
success or any error distinct from -EOPNOTSUPP. But if all the
callbacks return -EOPNOTSUPP, make sure that ->handled stays false, so
the logic in switchdev_port_obj_notify() can propagate that
information.
Fixes: 9a9f26e8f7ea ("bridge: mrp: Connect MRP API with the switchdev API")
Fixes: f30f0601eb93 ("switchdev: Add helpers to aid traversal through lower devices")
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Link: https://lore.kernel.org/r/20210125124116.102928-1-rasmus.villemoes@prevas.dk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-25 20:41:16 +08:00
|
|
|
if (err != -EOPNOTSUPP)
|
|
|
|
port_attr_info->handled = true;
|
|
|
|
return err;
|
2019-02-28 03:44:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Switch ports might be stacked under e.g. a LAG. Ignore the
|
|
|
|
* unsupported devices, another driver might be able to handle them. But
|
|
|
|
* propagate to the callers any hard errors.
|
|
|
|
*
|
|
|
|
* If the driver does its own bookkeeping of stacked ports, it's not
|
|
|
|
* necessary to go through this helper.
|
|
|
|
*/
|
|
|
|
netdev_for_each_lower_dev(dev, lower_dev, iter) {
|
2020-02-27 01:14:21 +08:00
|
|
|
if (netif_is_bridge_master(lower_dev))
|
|
|
|
continue;
|
|
|
|
|
2019-02-28 03:44:25 +08:00
|
|
|
err = __switchdev_handle_port_attr_set(lower_dev, port_attr_info,
|
|
|
|
check_cb, set_cb);
|
|
|
|
if (err && err != -EOPNOTSUPP)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
int switchdev_handle_port_attr_set(struct net_device *dev,
|
|
|
|
struct switchdev_notifier_port_attr_info *port_attr_info,
|
|
|
|
bool (*check_cb)(const struct net_device *dev),
|
2021-06-27 19:54:24 +08:00
|
|
|
int (*set_cb)(struct net_device *dev, const void *ctx,
|
2021-02-12 23:15:51 +08:00
|
|
|
const struct switchdev_attr *attr,
|
|
|
|
struct netlink_ext_ack *extack))
|
2019-02-28 03:44:25 +08:00
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = __switchdev_handle_port_attr_set(dev, port_attr_info, check_cb,
|
|
|
|
set_cb);
|
|
|
|
if (err == -EOPNOTSUPP)
|
|
|
|
err = 0;
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set);
|