OpenCloudOS-Kernel/net/switchdev/switchdev.c

539 lines
14 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* net/switchdev/switchdev.c - Switch device API
* Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us>
* Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
*/
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/notifier.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/if_bridge.h>
#include <linux/list.h>
#include <linux/workqueue.h>
#include <linux/if_vlan.h>
#include <linux/rtnetlink.h>
#include <net/switchdev.h>
static LIST_HEAD(deferred);
static DEFINE_SPINLOCK(deferred_lock);
typedef void switchdev_deferred_func_t(struct net_device *dev,
const void *data);
struct switchdev_deferred_item {
struct list_head list;
struct net_device *dev;
switchdev_deferred_func_t *func;
unsigned long data[];
};
static struct switchdev_deferred_item *switchdev_deferred_dequeue(void)
{
struct switchdev_deferred_item *dfitem;
spin_lock_bh(&deferred_lock);
if (list_empty(&deferred)) {
dfitem = NULL;
goto unlock;
}
dfitem = list_first_entry(&deferred,
struct switchdev_deferred_item, list);
list_del(&dfitem->list);
unlock:
spin_unlock_bh(&deferred_lock);
return dfitem;
}
/**
* switchdev_deferred_process - Process ops in deferred queue
*
* Called to flush the ops currently queued in deferred ops queue.
* rtnl_lock must be held.
*/
void switchdev_deferred_process(void)
{
struct switchdev_deferred_item *dfitem;
ASSERT_RTNL();
while ((dfitem = switchdev_deferred_dequeue())) {
dfitem->func(dfitem->dev, dfitem->data);
dev_put(dfitem->dev);
kfree(dfitem);
}
}
EXPORT_SYMBOL_GPL(switchdev_deferred_process);
static void switchdev_deferred_process_work(struct work_struct *work)
{
rtnl_lock();
switchdev_deferred_process();
rtnl_unlock();
}
static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work);
static int switchdev_deferred_enqueue(struct net_device *dev,
const void *data, size_t data_len,
switchdev_deferred_func_t *func)
{
struct switchdev_deferred_item *dfitem;
dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC);
if (!dfitem)
return -ENOMEM;
dfitem->dev = dev;
dfitem->func = func;
memcpy(dfitem->data, data, data_len);
dev_hold(dev);
spin_lock_bh(&deferred_lock);
list_add_tail(&dfitem->list, &deferred);
spin_unlock_bh(&deferred_lock);
schedule_work(&deferred_process_work);
return 0;
}
static int switchdev_port_attr_notify(enum switchdev_notifier_type nt,
struct net_device *dev,
net: switchdev: remove the transaction structure from port attributes Since the introduction of the switchdev API, port attributes were transmitted to drivers for offloading using a two-step transactional model, with a prepare phase that was supposed to catch all errors, and a commit phase that was supposed to never fail. Some classes of failures can never be avoided, like hardware access, or memory allocation. In the latter case, merely attempting to move the memory allocation to the preparation phase makes it impossible to avoid memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused transaction item queue") which has removed the unused mechanism of passing on the allocated memory between one phase and another. It is time we admit that separating the preparation from the commit phase is something that is best left for the driver to decide, and not something that should be baked into the API, especially since there are no switchdev callers that depend on this. This patch removes the struct switchdev_trans member from switchdev port attribute notifier structures, and converts drivers to not look at this member. In part, this patch contains a revert of my previous commit 2e554a7a5d8a ("net: dsa: propagate switchdev vlan_filtering prepare phase to drivers"). For the most part, the conversion was trivial except for: - Rocker's world implementation based on Broadcom OF-DPA had an odd implementation of ofdpa_port_attr_bridge_flags_set. The conversion was done mechanically, by pasting the implementation twice, then only keeping the code that would get executed during prepare phase on top, then only keeping the code that gets executed during the commit phase on bottom, then simplifying the resulting code until this was obtained. - DSA's offloading of STP state, bridge flags, VLAN filtering and multicast router could be converted right away. But the ageing time could not, so a shim was introduced and this was left for a further commit. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Acked-by: Linus Walleij <linus.walleij@linaro.org> Acked-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB Reviewed-by: Ido Schimmel <idosch@nvidia.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
const struct switchdev_attr *attr)
2015-05-11 00:47:48 +08:00
{
int err;
int rc;
2015-05-11 00:47:48 +08:00
struct switchdev_notifier_port_attr_info attr_info = {
.attr = attr,
.handled = false,
};
2015-05-11 00:47:48 +08:00
rc = call_switchdev_blocking_notifiers(nt, dev,
&attr_info.info, NULL);
err = notifier_to_errno(rc);
if (err) {
WARN_ON(!attr_info.handled);
return err;
2015-05-11 00:47:48 +08:00
}
if (!attr_info.handled)
return -EOPNOTSUPP;
return 0;
2015-05-11 00:47:48 +08:00
}
static int switchdev_port_attr_set_now(struct net_device *dev,
const struct switchdev_attr *attr)
2015-05-11 00:47:48 +08:00
{
net: switchdev: remove the transaction structure from port attributes Since the introduction of the switchdev API, port attributes were transmitted to drivers for offloading using a two-step transactional model, with a prepare phase that was supposed to catch all errors, and a commit phase that was supposed to never fail. Some classes of failures can never be avoided, like hardware access, or memory allocation. In the latter case, merely attempting to move the memory allocation to the preparation phase makes it impossible to avoid memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused transaction item queue") which has removed the unused mechanism of passing on the allocated memory between one phase and another. It is time we admit that separating the preparation from the commit phase is something that is best left for the driver to decide, and not something that should be baked into the API, especially since there are no switchdev callers that depend on this. This patch removes the struct switchdev_trans member from switchdev port attribute notifier structures, and converts drivers to not look at this member. In part, this patch contains a revert of my previous commit 2e554a7a5d8a ("net: dsa: propagate switchdev vlan_filtering prepare phase to drivers"). For the most part, the conversion was trivial except for: - Rocker's world implementation based on Broadcom OF-DPA had an odd implementation of ofdpa_port_attr_bridge_flags_set. The conversion was done mechanically, by pasting the implementation twice, then only keeping the code that would get executed during prepare phase on top, then only keeping the code that gets executed during the commit phase on bottom, then simplifying the resulting code until this was obtained. - DSA's offloading of STP state, bridge flags, VLAN filtering and multicast router could be converted right away. But the ageing time could not, so a shim was introduced and this was left for a further commit. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Acked-by: Linus Walleij <linus.walleij@linaro.org> Acked-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB Reviewed-by: Ido Schimmel <idosch@nvidia.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
return switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr);
2015-05-11 00:47:48 +08:00
}
static void switchdev_port_attr_set_deferred(struct net_device *dev,
const void *data)
{
const struct switchdev_attr *attr = data;
int err;
err = switchdev_port_attr_set_now(dev, attr);
if (err && err != -EOPNOTSUPP)
netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n",
err, attr->id);
if (attr->complete)
attr->complete(dev, err, attr->complete_priv);
}
static int switchdev_port_attr_set_defer(struct net_device *dev,
const struct switchdev_attr *attr)
{
return switchdev_deferred_enqueue(dev, attr, sizeof(*attr),
switchdev_port_attr_set_deferred);
}
/**
* switchdev_port_attr_set - Set port attribute
*
* @dev: port device
* @attr: attribute to set
*
* rtnl_lock must be held and must not be in atomic section,
* in case SWITCHDEV_F_DEFER flag is not set.
*/
int switchdev_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr)
{
if (attr->flags & SWITCHDEV_F_DEFER)
return switchdev_port_attr_set_defer(dev, attr);
ASSERT_RTNL();
return switchdev_port_attr_set_now(dev, attr);
}
2015-05-11 00:47:48 +08:00
EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
static size_t switchdev_obj_size(const struct switchdev_obj *obj)
{
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_VLAN:
return sizeof(struct switchdev_obj_port_vlan);
case SWITCHDEV_OBJ_ID_PORT_MDB:
return sizeof(struct switchdev_obj_port_mdb);
case SWITCHDEV_OBJ_ID_HOST_MDB:
return sizeof(struct switchdev_obj_port_mdb);
default:
BUG();
}
return 0;
}
static int switchdev_port_obj_notify(enum switchdev_notifier_type nt,
struct net_device *dev,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack)
{
int rc;
int err;
struct switchdev_notifier_port_obj_info obj_info = {
.obj = obj,
.handled = false,
};
rc = call_switchdev_blocking_notifiers(nt, dev, &obj_info.info, extack);
err = notifier_to_errno(rc);
if (err) {
WARN_ON(!obj_info.handled);
return err;
}
if (!obj_info.handled)
return -EOPNOTSUPP;
return 0;
}
static void switchdev_port_obj_add_deferred(struct net_device *dev,
const void *data)
{
const struct switchdev_obj *obj = data;
int err;
ASSERT_RTNL();
err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
dev, obj, NULL);
if (err && err != -EOPNOTSUPP)
netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
err, obj->id);
if (obj->complete)
obj->complete(dev, err, obj->complete_priv);
}
static int switchdev_port_obj_add_defer(struct net_device *dev,
const struct switchdev_obj *obj)
{
return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
switchdev_port_obj_add_deferred);
}
/**
* switchdev_port_obj_add - Add port object
*
* @dev: port device
* @obj: object to add
* @extack: netlink extended ack
*
* rtnl_lock must be held and must not be in atomic section,
* in case SWITCHDEV_F_DEFER flag is not set.
*/
int switchdev_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack)
{
if (obj->flags & SWITCHDEV_F_DEFER)
return switchdev_port_obj_add_defer(dev, obj);
ASSERT_RTNL();
return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
dev, obj, extack);
}
EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
static int switchdev_port_obj_del_now(struct net_device *dev,
const struct switchdev_obj *obj)
{
return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_DEL,
net: switchdev: remove the transaction structure from port object notifiers Since the introduction of the switchdev API, port objects were transmitted to drivers for offloading using a two-step transactional model, with a prepare phase that was supposed to catch all errors, and a commit phase that was supposed to never fail. Some classes of failures can never be avoided, like hardware access, or memory allocation. In the latter case, merely attempting to move the memory allocation to the preparation phase makes it impossible to avoid memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused transaction item queue") which has removed the unused mechanism of passing on the allocated memory between one phase and another. It is time we admit that separating the preparation from the commit phase is something that is best left for the driver to decide, and not something that should be baked into the API, especially since there are no switchdev callers that depend on this. This patch removes the struct switchdev_trans member from switchdev port object notifier structures, and converts drivers to not look at this member. Where driver conversion is trivial (like in the case of the Marvell Prestera driver, NXP DPAA2 switch, TI CPSW, and Rocker drivers), it is done in this patch. Where driver conversion needs more attention (DSA, Mellanox Spectrum), the conversion is left for subsequent patches and here we only fake the prepare/commit phases at a lower level, just not in the switchdev notifier itself. Where the code has a natural structure that is best left alone as a preparation and a commit phase (as in the case of the Ocelot switch), that structure is left in place, just made to not depend upon the switchdev transactional model. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Acked-by: Linus Walleij <linus.walleij@linaro.org> Acked-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:48 +08:00
dev, obj, NULL);
}
static void switchdev_port_obj_del_deferred(struct net_device *dev,
const void *data)
{
const struct switchdev_obj *obj = data;
int err;
err = switchdev_port_obj_del_now(dev, obj);
if (err && err != -EOPNOTSUPP)
netdev_err(dev, "failed (err=%d) to del object (id=%d)\n",
err, obj->id);
if (obj->complete)
obj->complete(dev, err, obj->complete_priv);
}
static int switchdev_port_obj_del_defer(struct net_device *dev,
const struct switchdev_obj *obj)
{
return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
switchdev_port_obj_del_deferred);
}
/**
* switchdev_port_obj_del - Delete port object
*
* @dev: port device
* @obj: object to delete
*
* rtnl_lock must be held and must not be in atomic section,
* in case SWITCHDEV_F_DEFER flag is not set.
*/
int switchdev_port_obj_del(struct net_device *dev,
const struct switchdev_obj *obj)
{
if (obj->flags & SWITCHDEV_F_DEFER)
return switchdev_port_obj_del_defer(dev, obj);
ASSERT_RTNL();
return switchdev_port_obj_del_now(dev, obj);
}
EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
/**
* register_switchdev_notifier - Register notifier
* @nb: notifier_block
*
* Register switch device notifier.
*/
int register_switchdev_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_register(&switchdev_notif_chain, nb);
}
EXPORT_SYMBOL_GPL(register_switchdev_notifier);
/**
* unregister_switchdev_notifier - Unregister notifier
* @nb: notifier_block
*
* Unregister switch device notifier.
*/
int unregister_switchdev_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_unregister(&switchdev_notif_chain, nb);
}
EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
/**
* call_switchdev_notifiers - Call notifiers
* @val: value passed unmodified to notifier function
* @dev: port device
* @info: notifier information data
* @extack: netlink extended ack
* Call all network notifier blocks.
*/
int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
struct switchdev_notifier_info *info,
struct netlink_ext_ack *extack)
{
info->dev = dev;
info->extack = extack;
return atomic_notifier_call_chain(&switchdev_notif_chain, val, info);
}
EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
int register_switchdev_blocking_notifier(struct notifier_block *nb)
{
struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
return blocking_notifier_chain_register(chain, nb);
}
EXPORT_SYMBOL_GPL(register_switchdev_blocking_notifier);
int unregister_switchdev_blocking_notifier(struct notifier_block *nb)
{
struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
return blocking_notifier_chain_unregister(chain, nb);
}
EXPORT_SYMBOL_GPL(unregister_switchdev_blocking_notifier);
int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
struct switchdev_notifier_info *info,
struct netlink_ext_ack *extack)
{
info->dev = dev;
info->extack = extack;
return blocking_notifier_call_chain(&switchdev_blocking_notif_chain,
val, info);
}
EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
switchdev: Add helpers to aid traversal through lower devices After the transition from switchdev operations to notifier chain (which will take place in following patches), the onus is on the driver to find its own devices below possible layer of LAG or other uppers. The logic to do so is fairly repetitive: each driver is looking for its own devices among the lowers of the notified device. For those that it finds, it calls a handler. To indicate that the event was handled, struct switchdev_notifier_port_obj_info.handled is set. The differences lie only in what constitutes an "own" device and what handler to call. Therefore abstract this logic into two helpers, switchdev_handle_port_obj_add() and switchdev_handle_port_obj_del(). If a driver only supports physical ports under a bridge device, it will simply avoid this layer of indirection. One area where this helper diverges from the current switchdev behavior is the case of mixed lowers, some of which are switchdev ports and some of which are not. Previously, such scenario would fail with -EOPNOTSUPP. The helper could do that for lowers for which the passed-in predicate doesn't hold. That would however break the case that switchdev ports from several different drivers are stashed under one master, a scenario that switchdev currently happily supports. Therefore tolerate any and all unknown netdevices, whether they are backed by a switchdev driver or not. Signed-off-by: Petr Machata <petrm@mellanox.com> Reviewed-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-11-23 07:29:44 +08:00
static int __switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
int (*add_cb)(struct net_device *dev,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack))
switchdev: Add helpers to aid traversal through lower devices After the transition from switchdev operations to notifier chain (which will take place in following patches), the onus is on the driver to find its own devices below possible layer of LAG or other uppers. The logic to do so is fairly repetitive: each driver is looking for its own devices among the lowers of the notified device. For those that it finds, it calls a handler. To indicate that the event was handled, struct switchdev_notifier_port_obj_info.handled is set. The differences lie only in what constitutes an "own" device and what handler to call. Therefore abstract this logic into two helpers, switchdev_handle_port_obj_add() and switchdev_handle_port_obj_del(). If a driver only supports physical ports under a bridge device, it will simply avoid this layer of indirection. One area where this helper diverges from the current switchdev behavior is the case of mixed lowers, some of which are switchdev ports and some of which are not. Previously, such scenario would fail with -EOPNOTSUPP. The helper could do that for lowers for which the passed-in predicate doesn't hold. That would however break the case that switchdev ports from several different drivers are stashed under one master, a scenario that switchdev currently happily supports. Therefore tolerate any and all unknown netdevices, whether they are backed by a switchdev driver or not. Signed-off-by: Petr Machata <petrm@mellanox.com> Reviewed-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-11-23 07:29:44 +08:00
{
struct netlink_ext_ack *extack;
switchdev: Add helpers to aid traversal through lower devices After the transition from switchdev operations to notifier chain (which will take place in following patches), the onus is on the driver to find its own devices below possible layer of LAG or other uppers. The logic to do so is fairly repetitive: each driver is looking for its own devices among the lowers of the notified device. For those that it finds, it calls a handler. To indicate that the event was handled, struct switchdev_notifier_port_obj_info.handled is set. The differences lie only in what constitutes an "own" device and what handler to call. Therefore abstract this logic into two helpers, switchdev_handle_port_obj_add() and switchdev_handle_port_obj_del(). If a driver only supports physical ports under a bridge device, it will simply avoid this layer of indirection. One area where this helper diverges from the current switchdev behavior is the case of mixed lowers, some of which are switchdev ports and some of which are not. Previously, such scenario would fail with -EOPNOTSUPP. The helper could do that for lowers for which the passed-in predicate doesn't hold. That would however break the case that switchdev ports from several different drivers are stashed under one master, a scenario that switchdev currently happily supports. Therefore tolerate any and all unknown netdevices, whether they are backed by a switchdev driver or not. Signed-off-by: Petr Machata <petrm@mellanox.com> Reviewed-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-11-23 07:29:44 +08:00
struct net_device *lower_dev;
struct list_head *iter;
int err = -EOPNOTSUPP;
extack = switchdev_notifier_info_to_extack(&port_obj_info->info);
switchdev: Add helpers to aid traversal through lower devices After the transition from switchdev operations to notifier chain (which will take place in following patches), the onus is on the driver to find its own devices below possible layer of LAG or other uppers. The logic to do so is fairly repetitive: each driver is looking for its own devices among the lowers of the notified device. For those that it finds, it calls a handler. To indicate that the event was handled, struct switchdev_notifier_port_obj_info.handled is set. The differences lie only in what constitutes an "own" device and what handler to call. Therefore abstract this logic into two helpers, switchdev_handle_port_obj_add() and switchdev_handle_port_obj_del(). If a driver only supports physical ports under a bridge device, it will simply avoid this layer of indirection. One area where this helper diverges from the current switchdev behavior is the case of mixed lowers, some of which are switchdev ports and some of which are not. Previously, such scenario would fail with -EOPNOTSUPP. The helper could do that for lowers for which the passed-in predicate doesn't hold. That would however break the case that switchdev ports from several different drivers are stashed under one master, a scenario that switchdev currently happily supports. Therefore tolerate any and all unknown netdevices, whether they are backed by a switchdev driver or not. Signed-off-by: Petr Machata <petrm@mellanox.com> Reviewed-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-11-23 07:29:44 +08:00
if (check_cb(dev)) {
err = add_cb(dev, port_obj_info->obj, extack);
net: switchdev: don't set port_obj_info->handled true when -EOPNOTSUPP It's not true that switchdev_port_obj_notify() only inspects the ->handled field of "struct switchdev_notifier_port_obj_info" if call_switchdev_blocking_notifiers() returns 0 - there's a WARN_ON() triggering for a non-zero return combined with ->handled not being true. But the real problem here is that -EOPNOTSUPP is not being properly handled. The wrapper functions switchdev_handle_port_obj_add() et al change a return value of -EOPNOTSUPP to 0, and the treatment of ->handled in switchdev_port_obj_notify() seems to be designed to change that back to -EOPNOTSUPP in case nobody actually acted on the notifier (i.e., everybody returned -EOPNOTSUPP). Currently, as soon as some device down the stack passes the check_cb() check, ->handled gets set to true, which means that switchdev_port_obj_notify() cannot actually ever return -EOPNOTSUPP. This, for example, means that the detection of hardware offload support in the MRP code is broken: switchdev_port_obj_add() used by br_mrp_switchdev_send_ring_test() always returns 0, so since the MRP code thinks the generation of MRP test frames has been offloaded, no such frames are actually put on the wire. Similarly, br_mrp_switchdev_set_ring_role() also always returns 0, causing mrp->ring_role_offloaded to be set to 1. To fix this, continue to set ->handled true if any callback returns success or any error distinct from -EOPNOTSUPP. But if all the callbacks return -EOPNOTSUPP, make sure that ->handled stays false, so the logic in switchdev_port_obj_notify() can propagate that information. Fixes: 9a9f26e8f7ea ("bridge: mrp: Connect MRP API with the switchdev API") Fixes: f30f0601eb93 ("switchdev: Add helpers to aid traversal through lower devices") Reviewed-by: Petr Machata <petrm@nvidia.com> Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk> Link: https://lore.kernel.org/r/20210125124116.102928-1-rasmus.villemoes@prevas.dk Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-25 20:41:16 +08:00
if (err != -EOPNOTSUPP)
port_obj_info->handled = true;
return err;
switchdev: Add helpers to aid traversal through lower devices After the transition from switchdev operations to notifier chain (which will take place in following patches), the onus is on the driver to find its own devices below possible layer of LAG or other uppers. The logic to do so is fairly repetitive: each driver is looking for its own devices among the lowers of the notified device. For those that it finds, it calls a handler. To indicate that the event was handled, struct switchdev_notifier_port_obj_info.handled is set. The differences lie only in what constitutes an "own" device and what handler to call. Therefore abstract this logic into two helpers, switchdev_handle_port_obj_add() and switchdev_handle_port_obj_del(). If a driver only supports physical ports under a bridge device, it will simply avoid this layer of indirection. One area where this helper diverges from the current switchdev behavior is the case of mixed lowers, some of which are switchdev ports and some of which are not. Previously, such scenario would fail with -EOPNOTSUPP. The helper could do that for lowers for which the passed-in predicate doesn't hold. That would however break the case that switchdev ports from several different drivers are stashed under one master, a scenario that switchdev currently happily supports. Therefore tolerate any and all unknown netdevices, whether they are backed by a switchdev driver or not. Signed-off-by: Petr Machata <petrm@mellanox.com> Reviewed-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-11-23 07:29:44 +08:00
}
/* Switch ports might be stacked under e.g. a LAG. Ignore the
* unsupported devices, another driver might be able to handle them. But
* propagate to the callers any hard errors.
*
* If the driver does its own bookkeeping of stacked ports, it's not
* necessary to go through this helper.
*/
netdev_for_each_lower_dev(dev, lower_dev, iter) {
if (netif_is_bridge_master(lower_dev))
continue;
switchdev: Add helpers to aid traversal through lower devices After the transition from switchdev operations to notifier chain (which will take place in following patches), the onus is on the driver to find its own devices below possible layer of LAG or other uppers. The logic to do so is fairly repetitive: each driver is looking for its own devices among the lowers of the notified device. For those that it finds, it calls a handler. To indicate that the event was handled, struct switchdev_notifier_port_obj_info.handled is set. The differences lie only in what constitutes an "own" device and what handler to call. Therefore abstract this logic into two helpers, switchdev_handle_port_obj_add() and switchdev_handle_port_obj_del(). If a driver only supports physical ports under a bridge device, it will simply avoid this layer of indirection. One area where this helper diverges from the current switchdev behavior is the case of mixed lowers, some of which are switchdev ports and some of which are not. Previously, such scenario would fail with -EOPNOTSUPP. The helper could do that for lowers for which the passed-in predicate doesn't hold. That would however break the case that switchdev ports from several different drivers are stashed under one master, a scenario that switchdev currently happily supports. Therefore tolerate any and all unknown netdevices, whether they are backed by a switchdev driver or not. Signed-off-by: Petr Machata <petrm@mellanox.com> Reviewed-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-11-23 07:29:44 +08:00
err = __switchdev_handle_port_obj_add(lower_dev, port_obj_info,
check_cb, add_cb);
if (err && err != -EOPNOTSUPP)
return err;
}
return err;
}
int switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
int (*add_cb)(struct net_device *dev,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack))
switchdev: Add helpers to aid traversal through lower devices After the transition from switchdev operations to notifier chain (which will take place in following patches), the onus is on the driver to find its own devices below possible layer of LAG or other uppers. The logic to do so is fairly repetitive: each driver is looking for its own devices among the lowers of the notified device. For those that it finds, it calls a handler. To indicate that the event was handled, struct switchdev_notifier_port_obj_info.handled is set. The differences lie only in what constitutes an "own" device and what handler to call. Therefore abstract this logic into two helpers, switchdev_handle_port_obj_add() and switchdev_handle_port_obj_del(). If a driver only supports physical ports under a bridge device, it will simply avoid this layer of indirection. One area where this helper diverges from the current switchdev behavior is the case of mixed lowers, some of which are switchdev ports and some of which are not. Previously, such scenario would fail with -EOPNOTSUPP. The helper could do that for lowers for which the passed-in predicate doesn't hold. That would however break the case that switchdev ports from several different drivers are stashed under one master, a scenario that switchdev currently happily supports. Therefore tolerate any and all unknown netdevices, whether they are backed by a switchdev driver or not. Signed-off-by: Petr Machata <petrm@mellanox.com> Reviewed-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-11-23 07:29:44 +08:00
{
int err;
err = __switchdev_handle_port_obj_add(dev, port_obj_info, check_cb,
add_cb);
if (err == -EOPNOTSUPP)
err = 0;
return err;
}
EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_add);
static int __switchdev_handle_port_obj_del(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
int (*del_cb)(struct net_device *dev,
const struct switchdev_obj *obj))
{
struct net_device *lower_dev;
struct list_head *iter;
int err = -EOPNOTSUPP;
if (check_cb(dev)) {
net: switchdev: don't set port_obj_info->handled true when -EOPNOTSUPP It's not true that switchdev_port_obj_notify() only inspects the ->handled field of "struct switchdev_notifier_port_obj_info" if call_switchdev_blocking_notifiers() returns 0 - there's a WARN_ON() triggering for a non-zero return combined with ->handled not being true. But the real problem here is that -EOPNOTSUPP is not being properly handled. The wrapper functions switchdev_handle_port_obj_add() et al change a return value of -EOPNOTSUPP to 0, and the treatment of ->handled in switchdev_port_obj_notify() seems to be designed to change that back to -EOPNOTSUPP in case nobody actually acted on the notifier (i.e., everybody returned -EOPNOTSUPP). Currently, as soon as some device down the stack passes the check_cb() check, ->handled gets set to true, which means that switchdev_port_obj_notify() cannot actually ever return -EOPNOTSUPP. This, for example, means that the detection of hardware offload support in the MRP code is broken: switchdev_port_obj_add() used by br_mrp_switchdev_send_ring_test() always returns 0, so since the MRP code thinks the generation of MRP test frames has been offloaded, no such frames are actually put on the wire. Similarly, br_mrp_switchdev_set_ring_role() also always returns 0, causing mrp->ring_role_offloaded to be set to 1. To fix this, continue to set ->handled true if any callback returns success or any error distinct from -EOPNOTSUPP. But if all the callbacks return -EOPNOTSUPP, make sure that ->handled stays false, so the logic in switchdev_port_obj_notify() can propagate that information. Fixes: 9a9f26e8f7ea ("bridge: mrp: Connect MRP API with the switchdev API") Fixes: f30f0601eb93 ("switchdev: Add helpers to aid traversal through lower devices") Reviewed-by: Petr Machata <petrm@nvidia.com> Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk> Link: https://lore.kernel.org/r/20210125124116.102928-1-rasmus.villemoes@prevas.dk Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-25 20:41:16 +08:00
err = del_cb(dev, port_obj_info->obj);
if (err != -EOPNOTSUPP)
port_obj_info->handled = true;
return err;
switchdev: Add helpers to aid traversal through lower devices After the transition from switchdev operations to notifier chain (which will take place in following patches), the onus is on the driver to find its own devices below possible layer of LAG or other uppers. The logic to do so is fairly repetitive: each driver is looking for its own devices among the lowers of the notified device. For those that it finds, it calls a handler. To indicate that the event was handled, struct switchdev_notifier_port_obj_info.handled is set. The differences lie only in what constitutes an "own" device and what handler to call. Therefore abstract this logic into two helpers, switchdev_handle_port_obj_add() and switchdev_handle_port_obj_del(). If a driver only supports physical ports under a bridge device, it will simply avoid this layer of indirection. One area where this helper diverges from the current switchdev behavior is the case of mixed lowers, some of which are switchdev ports and some of which are not. Previously, such scenario would fail with -EOPNOTSUPP. The helper could do that for lowers for which the passed-in predicate doesn't hold. That would however break the case that switchdev ports from several different drivers are stashed under one master, a scenario that switchdev currently happily supports. Therefore tolerate any and all unknown netdevices, whether they are backed by a switchdev driver or not. Signed-off-by: Petr Machata <petrm@mellanox.com> Reviewed-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-11-23 07:29:44 +08:00
}
/* Switch ports might be stacked under e.g. a LAG. Ignore the
* unsupported devices, another driver might be able to handle them. But
* propagate to the callers any hard errors.
*
* If the driver does its own bookkeeping of stacked ports, it's not
* necessary to go through this helper.
*/
netdev_for_each_lower_dev(dev, lower_dev, iter) {
if (netif_is_bridge_master(lower_dev))
continue;
switchdev: Add helpers to aid traversal through lower devices After the transition from switchdev operations to notifier chain (which will take place in following patches), the onus is on the driver to find its own devices below possible layer of LAG or other uppers. The logic to do so is fairly repetitive: each driver is looking for its own devices among the lowers of the notified device. For those that it finds, it calls a handler. To indicate that the event was handled, struct switchdev_notifier_port_obj_info.handled is set. The differences lie only in what constitutes an "own" device and what handler to call. Therefore abstract this logic into two helpers, switchdev_handle_port_obj_add() and switchdev_handle_port_obj_del(). If a driver only supports physical ports under a bridge device, it will simply avoid this layer of indirection. One area where this helper diverges from the current switchdev behavior is the case of mixed lowers, some of which are switchdev ports and some of which are not. Previously, such scenario would fail with -EOPNOTSUPP. The helper could do that for lowers for which the passed-in predicate doesn't hold. That would however break the case that switchdev ports from several different drivers are stashed under one master, a scenario that switchdev currently happily supports. Therefore tolerate any and all unknown netdevices, whether they are backed by a switchdev driver or not. Signed-off-by: Petr Machata <petrm@mellanox.com> Reviewed-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-11-23 07:29:44 +08:00
err = __switchdev_handle_port_obj_del(lower_dev, port_obj_info,
check_cb, del_cb);
if (err && err != -EOPNOTSUPP)
return err;
}
return err;
}
int switchdev_handle_port_obj_del(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
int (*del_cb)(struct net_device *dev,
const struct switchdev_obj *obj))
{
int err;
err = __switchdev_handle_port_obj_del(dev, port_obj_info, check_cb,
del_cb);
if (err == -EOPNOTSUPP)
err = 0;
return err;
}
EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del);
static int __switchdev_handle_port_attr_set(struct net_device *dev,
struct switchdev_notifier_port_attr_info *port_attr_info,
bool (*check_cb)(const struct net_device *dev),
int (*set_cb)(struct net_device *dev,
net: switchdev: remove the transaction structure from port attributes Since the introduction of the switchdev API, port attributes were transmitted to drivers for offloading using a two-step transactional model, with a prepare phase that was supposed to catch all errors, and a commit phase that was supposed to never fail. Some classes of failures can never be avoided, like hardware access, or memory allocation. In the latter case, merely attempting to move the memory allocation to the preparation phase makes it impossible to avoid memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused transaction item queue") which has removed the unused mechanism of passing on the allocated memory between one phase and another. It is time we admit that separating the preparation from the commit phase is something that is best left for the driver to decide, and not something that should be baked into the API, especially since there are no switchdev callers that depend on this. This patch removes the struct switchdev_trans member from switchdev port attribute notifier structures, and converts drivers to not look at this member. In part, this patch contains a revert of my previous commit 2e554a7a5d8a ("net: dsa: propagate switchdev vlan_filtering prepare phase to drivers"). For the most part, the conversion was trivial except for: - Rocker's world implementation based on Broadcom OF-DPA had an odd implementation of ofdpa_port_attr_bridge_flags_set. The conversion was done mechanically, by pasting the implementation twice, then only keeping the code that would get executed during prepare phase on top, then only keeping the code that gets executed during the commit phase on bottom, then simplifying the resulting code until this was obtained. - DSA's offloading of STP state, bridge flags, VLAN filtering and multicast router could be converted right away. But the ageing time could not, so a shim was introduced and this was left for a further commit. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Acked-by: Linus Walleij <linus.walleij@linaro.org> Acked-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB Reviewed-by: Ido Schimmel <idosch@nvidia.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
const struct switchdev_attr *attr))
{
struct net_device *lower_dev;
struct list_head *iter;
int err = -EOPNOTSUPP;
if (check_cb(dev)) {
err = set_cb(dev, port_attr_info->attr);
net: switchdev: don't set port_obj_info->handled true when -EOPNOTSUPP It's not true that switchdev_port_obj_notify() only inspects the ->handled field of "struct switchdev_notifier_port_obj_info" if call_switchdev_blocking_notifiers() returns 0 - there's a WARN_ON() triggering for a non-zero return combined with ->handled not being true. But the real problem here is that -EOPNOTSUPP is not being properly handled. The wrapper functions switchdev_handle_port_obj_add() et al change a return value of -EOPNOTSUPP to 0, and the treatment of ->handled in switchdev_port_obj_notify() seems to be designed to change that back to -EOPNOTSUPP in case nobody actually acted on the notifier (i.e., everybody returned -EOPNOTSUPP). Currently, as soon as some device down the stack passes the check_cb() check, ->handled gets set to true, which means that switchdev_port_obj_notify() cannot actually ever return -EOPNOTSUPP. This, for example, means that the detection of hardware offload support in the MRP code is broken: switchdev_port_obj_add() used by br_mrp_switchdev_send_ring_test() always returns 0, so since the MRP code thinks the generation of MRP test frames has been offloaded, no such frames are actually put on the wire. Similarly, br_mrp_switchdev_set_ring_role() also always returns 0, causing mrp->ring_role_offloaded to be set to 1. To fix this, continue to set ->handled true if any callback returns success or any error distinct from -EOPNOTSUPP. But if all the callbacks return -EOPNOTSUPP, make sure that ->handled stays false, so the logic in switchdev_port_obj_notify() can propagate that information. Fixes: 9a9f26e8f7ea ("bridge: mrp: Connect MRP API with the switchdev API") Fixes: f30f0601eb93 ("switchdev: Add helpers to aid traversal through lower devices") Reviewed-by: Petr Machata <petrm@nvidia.com> Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk> Link: https://lore.kernel.org/r/20210125124116.102928-1-rasmus.villemoes@prevas.dk Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-25 20:41:16 +08:00
if (err != -EOPNOTSUPP)
port_attr_info->handled = true;
return err;
}
/* Switch ports might be stacked under e.g. a LAG. Ignore the
* unsupported devices, another driver might be able to handle them. But
* propagate to the callers any hard errors.
*
* If the driver does its own bookkeeping of stacked ports, it's not
* necessary to go through this helper.
*/
netdev_for_each_lower_dev(dev, lower_dev, iter) {
if (netif_is_bridge_master(lower_dev))
continue;
err = __switchdev_handle_port_attr_set(lower_dev, port_attr_info,
check_cb, set_cb);
if (err && err != -EOPNOTSUPP)
return err;
}
return err;
}
int switchdev_handle_port_attr_set(struct net_device *dev,
struct switchdev_notifier_port_attr_info *port_attr_info,
bool (*check_cb)(const struct net_device *dev),
int (*set_cb)(struct net_device *dev,
net: switchdev: remove the transaction structure from port attributes Since the introduction of the switchdev API, port attributes were transmitted to drivers for offloading using a two-step transactional model, with a prepare phase that was supposed to catch all errors, and a commit phase that was supposed to never fail. Some classes of failures can never be avoided, like hardware access, or memory allocation. In the latter case, merely attempting to move the memory allocation to the preparation phase makes it impossible to avoid memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused transaction item queue") which has removed the unused mechanism of passing on the allocated memory between one phase and another. It is time we admit that separating the preparation from the commit phase is something that is best left for the driver to decide, and not something that should be baked into the API, especially since there are no switchdev callers that depend on this. This patch removes the struct switchdev_trans member from switchdev port attribute notifier structures, and converts drivers to not look at this member. In part, this patch contains a revert of my previous commit 2e554a7a5d8a ("net: dsa: propagate switchdev vlan_filtering prepare phase to drivers"). For the most part, the conversion was trivial except for: - Rocker's world implementation based on Broadcom OF-DPA had an odd implementation of ofdpa_port_attr_bridge_flags_set. The conversion was done mechanically, by pasting the implementation twice, then only keeping the code that would get executed during prepare phase on top, then only keeping the code that gets executed during the commit phase on bottom, then simplifying the resulting code until this was obtained. - DSA's offloading of STP state, bridge flags, VLAN filtering and multicast router could be converted right away. But the ageing time could not, so a shim was introduced and this was left for a further commit. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Acked-by: Linus Walleij <linus.walleij@linaro.org> Acked-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB Reviewed-by: Ido Schimmel <idosch@nvidia.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
const struct switchdev_attr *attr))
{
int err;
err = __switchdev_handle_port_attr_set(dev, port_attr_info, check_cb,
set_cb);
if (err == -EOPNOTSUPP)
err = 0;
return err;
}
EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set);