From 793f40147e82cdedc80971fa7f5596d6ed1e555e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 14 Oct 2015 19:40:48 +0200 Subject: [PATCH 1/8] switchdev: introduce switchdev deferred ops infrastructure Introduce infrastructure which will be used internally to defer ops. Note that the deferred ops are queued up and either are processed by scheduled work or explicitly by user calling deferred_process function. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 5 +++ net/switchdev/switchdev.c | 80 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 1ce70830357d..31b9038e07b0 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -167,6 +167,7 @@ switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info) #ifdef CONFIG_NET_SWITCHDEV +void switchdev_deferred_process(void); int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr); int switchdev_port_attr_set(struct net_device *dev, @@ -208,6 +209,10 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, #else +static inline void switchdev_deferred_process(void) +{ +} + static inline int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index b8aaf820ef65..5e64b591aff7 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -92,6 +93,85 @@ static void switchdev_trans_items_warn_destroy(struct net_device *dev, switchdev_trans_items_destroy(trans); } +static LIST_HEAD(deferred); +static DEFINE_SPINLOCK(deferred_lock); + +typedef void switchdev_deferred_func_t(struct net_device *dev, + const void *data); + +struct switchdev_deferred_item { + struct list_head list; + struct net_device *dev; + switchdev_deferred_func_t *func; + unsigned long data[0]; +}; + +static struct switchdev_deferred_item *switchdev_deferred_dequeue(void) +{ + struct switchdev_deferred_item *dfitem; + + spin_lock_bh(&deferred_lock); + if (list_empty(&deferred)) { + dfitem = NULL; + goto unlock; + } + dfitem = list_first_entry(&deferred, + struct switchdev_deferred_item, list); + list_del(&dfitem->list); +unlock: + spin_unlock_bh(&deferred_lock); + return dfitem; +} + +/** + * switchdev_deferred_process - Process ops in deferred queue + * + * Called to flush the ops currently queued in deferred ops queue. + * rtnl_lock must be held. + */ +void switchdev_deferred_process(void) +{ + struct switchdev_deferred_item *dfitem; + + ASSERT_RTNL(); + + while ((dfitem = switchdev_deferred_dequeue())) { + dfitem->func(dfitem->dev, dfitem->data); + dev_put(dfitem->dev); + kfree(dfitem); + } +} +EXPORT_SYMBOL_GPL(switchdev_deferred_process); + +static void switchdev_deferred_process_work(struct work_struct *work) +{ + rtnl_lock(); + switchdev_deferred_process(); + rtnl_unlock(); +} + +static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work); + +static int switchdev_deferred_enqueue(struct net_device *dev, + const void *data, size_t data_len, + switchdev_deferred_func_t *func) +{ + struct switchdev_deferred_item *dfitem; + + dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC); + if (!dfitem) + return -ENOMEM; + dfitem->dev = dev; + dfitem->func = func; + memcpy(dfitem->data, data, data_len); + dev_hold(dev); + spin_lock_bh(&deferred_lock); + list_add_tail(&dfitem->list, &deferred); + spin_unlock_bh(&deferred_lock); + schedule_work(&deferred_process_work); + return 0; +} + /** * switchdev_port_attr_get - Get port attribute * From f7fadf3047d005d17376da65aa9e5734f45a77d4 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 14 Oct 2015 19:40:49 +0200 Subject: [PATCH 2/8] switchdev: make struct switchdev_attr parameter const for attr_set calls Signed-off-by: Jiri Pirko Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.c | 2 +- include/net/switchdev.h | 6 +++--- net/dsa/slave.c | 2 +- net/switchdev/switchdev.c | 7 ++++--- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index eafa907965ec..f0e820d2b8ec 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4374,7 +4374,7 @@ static int rocker_port_bridge_ageing_time(struct rocker_port *rocker_port, } static int rocker_port_attr_set(struct net_device *dev, - struct switchdev_attr *attr, + const struct switchdev_attr *attr, struct switchdev_trans *trans) { struct rocker_port *rocker_port = netdev_priv(dev); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 31b9038e07b0..d1c7f901ea61 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -132,7 +132,7 @@ struct switchdev_ops { int (*switchdev_port_attr_get)(struct net_device *dev, struct switchdev_attr *attr); int (*switchdev_port_attr_set)(struct net_device *dev, - struct switchdev_attr *attr, + const struct switchdev_attr *attr, struct switchdev_trans *trans); int (*switchdev_port_obj_add)(struct net_device *dev, const struct switchdev_obj *obj, @@ -171,7 +171,7 @@ void switchdev_deferred_process(void); int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr); int switchdev_port_attr_set(struct net_device *dev, - struct switchdev_attr *attr); + const struct switchdev_attr *attr); int switchdev_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj); int switchdev_port_obj_del(struct net_device *dev, @@ -220,7 +220,7 @@ static inline int switchdev_port_attr_get(struct net_device *dev, } static inline int switchdev_port_attr_set(struct net_device *dev, - struct switchdev_attr *attr) + const struct switchdev_attr *attr) { return -EOPNOTSUPP; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 43d7342e7527..84cd8639e37b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -453,7 +453,7 @@ static int dsa_slave_stp_update(struct net_device *dev, u8 state) } static int dsa_slave_port_attr_set(struct net_device *dev, - struct switchdev_attr *attr, + const struct switchdev_attr *attr, struct switchdev_trans *trans) { struct dsa_slave_priv *p = netdev_priv(dev); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 5e64b591aff7..23b4e5b347dc 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -215,7 +215,7 @@ int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) EXPORT_SYMBOL_GPL(switchdev_port_attr_get); static int __switchdev_port_attr_set(struct net_device *dev, - struct switchdev_attr *attr, + const struct switchdev_attr *attr, struct switchdev_trans *trans) { const struct switchdev_ops *ops = dev->switchdev_ops; @@ -274,7 +274,7 @@ static void switchdev_port_attr_set_work(struct work_struct *work) } static int switchdev_port_attr_set_defer(struct net_device *dev, - struct switchdev_attr *attr) + const struct switchdev_attr *attr) { struct switchdev_attr_set_work *asw; @@ -303,7 +303,8 @@ static int switchdev_port_attr_set_defer(struct net_device *dev, * system is not left in a partially updated state due to * failure from driver/device. */ -int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) +int switchdev_port_attr_set(struct net_device *dev, + const struct switchdev_attr *attr) { struct switchdev_trans trans; int err; From 0bc05d585d381c30de3fdf955730df31593d2101 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 14 Oct 2015 19:40:50 +0200 Subject: [PATCH 3/8] switchdev: allow caller to explicitly request attr_set as deferred Caller should know if he can call attr_set directly (when holding RTNL) or if he has to defer the att_set processing for later. This also allows drivers to sleep inside attr_set and report operation status back to switchdev core. Switchdev core then warns if status is not ok, instead of silent errors happening in drivers. Benefit from newly introduced switchdev deferred ops infrastructure. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 1 + net/bridge/br_stp.c | 3 +- net/switchdev/switchdev.c | 108 +++++++++++++++----------------------- 3 files changed, 46 insertions(+), 66 deletions(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d1c7f901ea61..f7de6f8e9a4c 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -17,6 +17,7 @@ #define SWITCHDEV_F_NO_RECURSE BIT(0) #define SWITCHDEV_F_SKIP_EOPNOTSUPP BIT(1) +#define SWITCHDEV_F_DEFER BIT(2) struct switchdev_trans_item { struct list_head list; diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index db6d243defb2..80c34d70218c 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -41,13 +41,14 @@ void br_set_state(struct net_bridge_port *p, unsigned int state) { struct switchdev_attr attr = { .id = SWITCHDEV_ATTR_ID_PORT_STP_STATE, + .flags = SWITCHDEV_F_DEFER, .u.stp_state = state, }; int err; p->state = state; err = switchdev_port_attr_set(p->dev, &attr); - if (err && err != -EOPNOTSUPP) + if (err) br_warn(p->br, "error setting offload STP state on port %u(%s)\n", (unsigned int) p->port_no, p->dev->name); } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 23b4e5b347dc..007b8f40df06 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -250,75 +250,12 @@ done: return err; } -struct switchdev_attr_set_work { - struct work_struct work; - struct net_device *dev; - struct switchdev_attr attr; -}; - -static void switchdev_port_attr_set_work(struct work_struct *work) -{ - struct switchdev_attr_set_work *asw = - container_of(work, struct switchdev_attr_set_work, work); - int err; - - rtnl_lock(); - err = switchdev_port_attr_set(asw->dev, &asw->attr); - if (err && err != -EOPNOTSUPP) - netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n", - err, asw->attr.id); - rtnl_unlock(); - - dev_put(asw->dev); - kfree(work); -} - -static int switchdev_port_attr_set_defer(struct net_device *dev, - const struct switchdev_attr *attr) -{ - struct switchdev_attr_set_work *asw; - - asw = kmalloc(sizeof(*asw), GFP_ATOMIC); - if (!asw) - return -ENOMEM; - - INIT_WORK(&asw->work, switchdev_port_attr_set_work); - - dev_hold(dev); - asw->dev = dev; - memcpy(&asw->attr, attr, sizeof(asw->attr)); - - schedule_work(&asw->work); - - return 0; -} - -/** - * switchdev_port_attr_set - Set port attribute - * - * @dev: port device - * @attr: attribute to set - * - * Use a 2-phase prepare-commit transaction model to ensure - * system is not left in a partially updated state due to - * failure from driver/device. - */ -int switchdev_port_attr_set(struct net_device *dev, - const struct switchdev_attr *attr) +static int switchdev_port_attr_set_now(struct net_device *dev, + const struct switchdev_attr *attr) { struct switchdev_trans trans; int err; - if (!rtnl_is_locked()) { - /* Running prepare-commit transaction across stacked - * devices requires nothing moves, so if rtnl_lock is - * not held, schedule a worker thread to hold rtnl_lock - * while setting attr. - */ - - return switchdev_port_attr_set_defer(dev, attr); - } - switchdev_trans_init(&trans); /* Phase I: prepare for attr set. Driver/device should fail @@ -355,6 +292,47 @@ int switchdev_port_attr_set(struct net_device *dev, return err; } + +static void switchdev_port_attr_set_deferred(struct net_device *dev, + const void *data) +{ + const struct switchdev_attr *attr = data; + int err; + + err = switchdev_port_attr_set_now(dev, attr); + if (err && err != -EOPNOTSUPP) + netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n", + err, attr->id); +} + +static int switchdev_port_attr_set_defer(struct net_device *dev, + const struct switchdev_attr *attr) +{ + return switchdev_deferred_enqueue(dev, attr, sizeof(*attr), + switchdev_port_attr_set_deferred); +} + +/** + * switchdev_port_attr_set - Set port attribute + * + * @dev: port device + * @attr: attribute to set + * + * Use a 2-phase prepare-commit transaction model to ensure + * system is not left in a partially updated state due to + * failure from driver/device. + * + * rtnl_lock must be held and must not be in atomic section, + * in case SWITCHDEV_F_DEFER flag is not set. + */ +int switchdev_port_attr_set(struct net_device *dev, + const struct switchdev_attr *attr) +{ + if (attr->flags & SWITCHDEV_F_DEFER) + return switchdev_port_attr_set_defer(dev, attr); + ASSERT_RTNL(); + return switchdev_port_attr_set_now(dev, attr); +} EXPORT_SYMBOL_GPL(switchdev_port_attr_set); static int __switchdev_port_obj_add(struct net_device *dev, From 850d0cbc9171f63f0418afffb0d89a84db927851 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 14 Oct 2015 19:40:51 +0200 Subject: [PATCH 4/8] switchdev: remove pointers from switchdev objects When object is used in deferred work, we cannot use pointers in switchdev object structures because the memory they point at may be already used by someone else. So rather do local copy of the value. Signed-off-by: Jiri Pirko Acked-by: Scott Feldman Reviewed-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.c | 6 +++--- include/net/switchdev.h | 7 +++---- net/bridge/br_fdb.c | 2 +- net/dsa/slave.c | 2 +- net/switchdev/switchdev.c | 11 +++++++---- 5 files changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index f0e820d2b8ec..2cd7435b2316 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4469,7 +4469,7 @@ static int rocker_port_obj_add(struct net_device *dev, fib4 = SWITCHDEV_OBJ_IPV4_FIB(obj); err = rocker_port_fib_ipv4(rocker_port, trans, htonl(fib4->dst), fib4->dst_len, - fib4->fi, fib4->tb_id, 0); + &fib4->fi, fib4->tb_id, 0); break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = rocker_port_fdb_add(rocker_port, trans, @@ -4541,7 +4541,7 @@ static int rocker_port_obj_del(struct net_device *dev, fib4 = SWITCHDEV_OBJ_IPV4_FIB(obj); err = rocker_port_fib_ipv4(rocker_port, NULL, htonl(fib4->dst), fib4->dst_len, - fib4->fi, fib4->tb_id, + &fib4->fi, fib4->tb_id, ROCKER_OP_FLAG_REMOVE); break; case SWITCHDEV_OBJ_ID_PORT_FDB: @@ -4571,7 +4571,7 @@ static int rocker_port_fdb_dump(const struct rocker_port *rocker_port, hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) { if (found->key.rocker_port != rocker_port) continue; - fdb->addr = found->key.addr; + ether_addr_copy(fdb->addr, found->key.addr); fdb->ndm_state = NUD_REACHABLE; fdb->vid = rocker_port_vlan_to_vid(rocker_port, found->key.vlan_id); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index f7de6f8e9a4c..f8672d7f3ff2 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -14,6 +14,7 @@ #include #include #include +#include #define SWITCHDEV_F_NO_RECURSE BIT(0) #define SWITCHDEV_F_SKIP_EOPNOTSUPP BIT(1) @@ -59,8 +60,6 @@ struct switchdev_attr { } u; }; -struct fib_info; - enum switchdev_obj_id { SWITCHDEV_OBJ_ID_UNDEFINED, SWITCHDEV_OBJ_ID_PORT_VLAN, @@ -88,7 +87,7 @@ struct switchdev_obj_ipv4_fib { struct switchdev_obj obj; u32 dst; int dst_len; - struct fib_info *fi; + struct fib_info fi; u8 tos; u8 type; u32 nlflags; @@ -101,7 +100,7 @@ struct switchdev_obj_ipv4_fib { /* SWITCHDEV_OBJ_ID_PORT_FDB */ struct switchdev_obj_port_fdb { struct switchdev_obj obj; - const unsigned char *addr; + unsigned char addr[ETH_ALEN]; u16 vid; u16 ndm_state; }; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index f43ce05c66a6..f5e7da0fe93b 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -135,10 +135,10 @@ static void fdb_del_external_learn(struct net_bridge_fdb_entry *f) { struct switchdev_obj_port_fdb fdb = { .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, - .addr = f->addr.addr, .vid = f->vlan_id, }; + ether_addr_copy(fdb.addr, f->addr.addr); switchdev_port_obj_del(f->dst->dev, &fdb.obj); } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 84cd8639e37b..b0b8da0f5af8 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -393,7 +393,7 @@ static int dsa_slave_port_fdb_dump(struct net_device *dev, if (ret < 0) break; - fdb->addr = addr; + ether_addr_copy(fdb->addr, addr); fdb->vid = vid; fdb->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 007b8f40df06..5963d7ac1026 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -891,10 +892,10 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], { struct switchdev_obj_port_fdb fdb = { .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, - .addr = addr, .vid = vid, }; + ether_addr_copy(fdb.addr, addr); return switchdev_port_obj_add(dev, &fdb.obj); } EXPORT_SYMBOL_GPL(switchdev_port_fdb_add); @@ -916,10 +917,10 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], { struct switchdev_obj_port_fdb fdb = { .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, - .addr = addr, .vid = vid, }; + ether_addr_copy(fdb.addr, addr); return switchdev_port_obj_del(dev, &fdb.obj); } EXPORT_SYMBOL_GPL(switchdev_port_fdb_del); @@ -1081,7 +1082,6 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, .dst = dst, .dst_len = dst_len, - .fi = fi, .tos = tos, .type = type, .nlflags = nlflags, @@ -1090,6 +1090,8 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, struct net_device *dev; int err = 0; + memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi)); + /* Don't offload route if using custom ip rules or if * IPv4 FIB offloading has been disabled completely. */ @@ -1133,7 +1135,6 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, .dst = dst, .dst_len = dst_len, - .fi = fi, .tos = tos, .type = type, .nlflags = 0, @@ -1142,6 +1143,8 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, struct net_device *dev; int err = 0; + memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi)); + if (!(fi->fib_flags & RTNH_F_OFFLOAD)) return 0; From 4d429c5ddc5128fccd3048059ae26bb39f0d8284 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 14 Oct 2015 19:40:52 +0200 Subject: [PATCH 5/8] switchdev: introduce possibility to defer obj_add/del Similar to the attr usecase, the caller knows if he is holding RTNL and is in atomic section. So let the called to decide the correct call variant. This allows drivers to sleep inside their ops and wait for hw to get the operation status. Then the status is propagated into switchdev core. This avoids silent errors in drivers. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 1 + net/switchdev/switchdev.c | 100 ++++++++++++++++++++++++++++++-------- 2 files changed, 81 insertions(+), 20 deletions(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index f8672d7f3ff2..bc865e244efe 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -69,6 +69,7 @@ enum switchdev_obj_id { struct switchdev_obj { enum switchdev_obj_id id; + u32 flags; }; /* SWITCHDEV_OBJ_ID_PORT_VLAN */ diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 5963d7ac1026..eac68c4e57ec 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -362,21 +362,8 @@ static int __switchdev_port_obj_add(struct net_device *dev, return err; } -/** - * switchdev_port_obj_add - Add port object - * - * @dev: port device - * @id: object ID - * @obj: object to add - * - * Use a 2-phase prepare-commit transaction model to ensure - * system is not left in a partially updated state due to - * failure from driver/device. - * - * rtnl_lock must be held. - */ -int switchdev_port_obj_add(struct net_device *dev, - const struct switchdev_obj *obj) +static int switchdev_port_obj_add_now(struct net_device *dev, + const struct switchdev_obj *obj) { struct switchdev_trans trans; int err; @@ -418,17 +405,52 @@ int switchdev_port_obj_add(struct net_device *dev, return err; } -EXPORT_SYMBOL_GPL(switchdev_port_obj_add); + +static void switchdev_port_obj_add_deferred(struct net_device *dev, + const void *data) +{ + const struct switchdev_obj *obj = data; + int err; + + err = switchdev_port_obj_add_now(dev, obj); + if (err && err != -EOPNOTSUPP) + netdev_err(dev, "failed (err=%d) to add object (id=%d)\n", + err, obj->id); +} + +static int switchdev_port_obj_add_defer(struct net_device *dev, + const struct switchdev_obj *obj) +{ + return switchdev_deferred_enqueue(dev, obj, sizeof(*obj), + switchdev_port_obj_add_deferred); +} /** - * switchdev_port_obj_del - Delete port object + * switchdev_port_obj_add - Add port object * * @dev: port device * @id: object ID - * @obj: object to delete + * @obj: object to add + * + * Use a 2-phase prepare-commit transaction model to ensure + * system is not left in a partially updated state due to + * failure from driver/device. + * + * rtnl_lock must be held and must not be in atomic section, + * in case SWITCHDEV_F_DEFER flag is not set. */ -int switchdev_port_obj_del(struct net_device *dev, +int switchdev_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj) +{ + if (obj->flags & SWITCHDEV_F_DEFER) + return switchdev_port_obj_add_defer(dev, obj); + ASSERT_RTNL(); + return switchdev_port_obj_add_now(dev, obj); +} +EXPORT_SYMBOL_GPL(switchdev_port_obj_add); + +static int switchdev_port_obj_del_now(struct net_device *dev, + const struct switchdev_obj *obj) { const struct switchdev_ops *ops = dev->switchdev_ops; struct net_device *lower_dev; @@ -444,13 +466,51 @@ int switchdev_port_obj_del(struct net_device *dev, */ netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = switchdev_port_obj_del(lower_dev, obj); + err = switchdev_port_obj_del_now(lower_dev, obj); if (err) break; } return err; } + +static void switchdev_port_obj_del_deferred(struct net_device *dev, + const void *data) +{ + const struct switchdev_obj *obj = data; + int err; + + err = switchdev_port_obj_del_now(dev, obj); + if (err && err != -EOPNOTSUPP) + netdev_err(dev, "failed (err=%d) to del object (id=%d)\n", + err, obj->id); +} + +static int switchdev_port_obj_del_defer(struct net_device *dev, + const struct switchdev_obj *obj) +{ + return switchdev_deferred_enqueue(dev, obj, sizeof(*obj), + switchdev_port_obj_del_deferred); +} + +/** + * switchdev_port_obj_del - Delete port object + * + * @dev: port device + * @id: object ID + * @obj: object to delete + * + * rtnl_lock must be held and must not be in atomic section, + * in case SWITCHDEV_F_DEFER flag is not set. + */ +int switchdev_port_obj_del(struct net_device *dev, + const struct switchdev_obj *obj) +{ + if (obj->flags & SWITCHDEV_F_DEFER) + return switchdev_port_obj_del_defer(dev, obj); + ASSERT_RTNL(); + return switchdev_port_obj_del_now(dev, obj); +} EXPORT_SYMBOL_GPL(switchdev_port_obj_del); /** From 56607386e80cc7ce923592e115a3492485b47c72 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 14 Oct 2015 19:40:53 +0200 Subject: [PATCH 6/8] bridge: defer switchdev fdb del call in fdb_del_external_learn Since spinlock is held here, defer the switchdev operation. Also, ensure that defered switchdev ops are processed before port master device is unlinked. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 5 ++++- net/bridge/br_if.c | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index f5e7da0fe93b..c88bd8e8937e 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -134,7 +134,10 @@ static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr) static void fdb_del_external_learn(struct net_bridge_fdb_entry *f) { struct switchdev_obj_port_fdb fdb = { - .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, + .obj = { + .id = SWITCHDEV_OBJ_ID_PORT_FDB, + .flags = SWITCHDEV_F_DEFER, + }, .vid = f->vlan_id, }; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 45e4757c6fd2..ec02f5869a78 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "br_private.h" @@ -250,6 +251,8 @@ static void del_nbp(struct net_bridge_port *p) nbp_vlan_flush(p); br_fdb_delete_by_port(br, p, 0, 1); + switchdev_deferred_process(); + nbp_update_port_count(br); netdev_upper_dev_unlink(dev, br->dev); From d33eeb645d59ffd14bbc6db977c3783af42dd700 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 14 Oct 2015 19:40:54 +0200 Subject: [PATCH 7/8] rocker: remove nowait from switchdev callbacks. No need to avoid sleeping in switchdev callbacks now, as the switchdev core allows it. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 2cd7435b2316..32a80d2df7ff 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -3672,7 +3672,7 @@ static int rocker_port_fdb_flush(struct rocker_port *rocker_port, rocker_port->stp_state == BR_STATE_FORWARDING) return 0; - flags |= ROCKER_OP_FLAG_REMOVE; + flags |= ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_REMOVE; spin_lock_irqsave(&rocker->fdb_tbl_lock, lock_flags); @@ -4382,8 +4382,7 @@ static int rocker_port_attr_set(struct net_device *dev, switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: - err = rocker_port_stp_update(rocker_port, trans, - ROCKER_OP_FLAG_NOWAIT, + err = rocker_port_stp_update(rocker_port, trans, 0, attr->u.stp_state); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: @@ -4517,7 +4516,7 @@ static int rocker_port_fdb_del(struct rocker_port *rocker_port, const struct switchdev_obj_port_fdb *fdb) { __be16 vlan_id = rocker_port_vid_to_vlan(rocker_port, fdb->vid, NULL); - int flags = ROCKER_OP_FLAG_NOWAIT | ROCKER_OP_FLAG_REMOVE; + int flags = ROCKER_OP_FLAG_REMOVE; if (!rocker_port_is_bridged(rocker_port)) return -EINVAL; From 771acac2ffa5957b91e881908cd4c9657978a209 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 14 Oct 2015 19:40:55 +0200 Subject: [PATCH 8/8] switchdev: assert rtnl mutex when going over lower netdevs netdev_for_each_lower_dev has to be called with rtnl mutex held. So better enforce it in switchdev functions. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/switchdev/switchdev.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index eac68c4e57ec..73e3895175cf 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -520,6 +520,8 @@ EXPORT_SYMBOL_GPL(switchdev_port_obj_del); * @id: object ID * @obj: object to dump * @cb: function to call with a filled object + * + * rtnl_lock must be held. */ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj, switchdev_obj_dump_cb_t *cb) @@ -529,6 +531,8 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj, struct list_head *iter; int err = -EOPNOTSUPP; + ASSERT_RTNL(); + if (ops && ops->switchdev_port_obj_dump) return ops->switchdev_port_obj_dump(dev, obj, cb); @@ -1097,6 +1101,8 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) struct net_device *dev = NULL; int nhsel; + ASSERT_RTNL(); + /* For this route, all nexthop devs must be on the same switch. */ for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { @@ -1327,10 +1333,11 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, u32 mark = dev->ifindex; u32 reset_mark = 0; - if (group_dev && joining) { - mark = switchdev_port_fwd_mark_get(dev, group_dev); - } else if (group_dev && !joining) { - if (dev->offload_fwd_mark == mark) + if (group_dev) { + ASSERT_RTNL(); + if (joining) + mark = switchdev_port_fwd_mark_get(dev, group_dev); + else if (dev->offload_fwd_mark == mark) /* Ohoh, this port was the mark reference port, * but it's leaving the group, so reset the * mark for the remaining ports in the group.