2019-05-27 14:55:01 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2017-05-20 05:00:44 +08:00
|
|
|
/*
|
|
|
|
* Handling of a single switch port
|
|
|
|
*
|
|
|
|
* Copyright (c) 2017 Savoir-faire Linux Inc.
|
|
|
|
* Vivien Didelot <vivien.didelot@savoirfairelinux.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/if_bridge.h>
|
2017-05-20 05:00:45 +08:00
|
|
|
#include <linux/notifier.h>
|
2017-10-26 22:50:07 +08:00
|
|
|
#include <linux/of_mdio.h>
|
|
|
|
#include <linux/of_net.h>
|
2017-05-20 05:00:44 +08:00
|
|
|
|
|
|
|
#include "dsa_priv.h"
|
|
|
|
|
2021-01-29 09:00:04 +08:00
|
|
|
/**
|
|
|
|
* dsa_port_notify - Notify the switching fabric of changes to a port
|
|
|
|
* @dp: port on which change occurred
|
|
|
|
* @e: event, must be of type DSA_NOTIFIER_*
|
|
|
|
* @v: event-specific value.
|
|
|
|
*
|
|
|
|
* Notify all switches in the DSA tree that this port's switch belongs to,
|
|
|
|
* including this switch itself, of an event. Allows the other switches to
|
|
|
|
* reconfigure themselves for cross-chip operations. Can also be used to
|
|
|
|
* reconfigure ports without net_devices (CPU ports, DSA links) whenever
|
|
|
|
* a user port's state changes.
|
|
|
|
*/
|
2017-11-10 06:11:01 +08:00
|
|
|
static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v)
|
2017-05-20 05:00:45 +08:00
|
|
|
{
|
2021-01-29 09:00:04 +08:00
|
|
|
return dsa_tree_notify(dp->ds->dst, e, v);
|
2017-05-20 05:00:45 +08:00
|
|
|
}
|
|
|
|
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
|
|
|
int dsa_port_set_state(struct dsa_port *dp, u8 state)
|
2017-05-20 05:00:44 +08:00
|
|
|
{
|
|
|
|
struct dsa_switch *ds = dp->ds;
|
|
|
|
int port = dp->index;
|
|
|
|
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
|
|
|
if (!ds->ops->port_stp_state_set)
|
|
|
|
return -EOPNOTSUPP;
|
2017-05-20 05:00:44 +08:00
|
|
|
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
|
|
|
ds->ops->port_stp_state_set(ds, port, state);
|
2017-05-20 05:00:44 +08:00
|
|
|
|
|
|
|
if (ds->ops->port_fast_age) {
|
|
|
|
/* Fast age FDB entries or flush appropriate forwarding database
|
|
|
|
* for the given port, if we are moving it from Learning or
|
|
|
|
* Forwarding state, to Disabled or Blocking or Listening state.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if ((dp->stp_state == BR_STATE_LEARNING ||
|
|
|
|
dp->stp_state == BR_STATE_FORWARDING) &&
|
|
|
|
(state == BR_STATE_DISABLED ||
|
|
|
|
state == BR_STATE_BLOCKING ||
|
|
|
|
state == BR_STATE_LISTENING))
|
|
|
|
ds->ops->port_fast_age(ds, port);
|
|
|
|
}
|
|
|
|
|
|
|
|
dp->stp_state = state;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-09-23 07:01:56 +08:00
|
|
|
static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
|
2017-05-20 05:00:44 +08:00
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
|
|
|
err = dsa_port_set_state(dp, state);
|
2017-05-20 05:00:44 +08:00
|
|
|
if (err)
|
|
|
|
pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
|
|
|
|
}
|
2017-05-20 05:00:45 +08:00
|
|
|
|
2020-03-03 23:01:46 +08:00
|
|
|
int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy)
|
2017-09-23 07:01:56 +08:00
|
|
|
{
|
|
|
|
struct dsa_switch *ds = dp->ds;
|
|
|
|
int port = dp->index;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (ds->ops->port_enable) {
|
|
|
|
err = ds->ops->port_enable(ds, port, phy);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2019-02-20 18:32:52 +08:00
|
|
|
if (!dp->bridge_dev)
|
|
|
|
dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
|
2017-09-23 07:01:56 +08:00
|
|
|
|
2020-03-03 23:01:46 +08:00
|
|
|
if (dp->pl)
|
|
|
|
phylink_start(dp->pl);
|
|
|
|
|
2017-09-23 07:01:56 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-03-03 23:01:46 +08:00
|
|
|
int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
rtnl_lock();
|
|
|
|
err = dsa_port_enable_rt(dp, phy);
|
|
|
|
rtnl_unlock();
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
void dsa_port_disable_rt(struct dsa_port *dp)
|
2017-09-23 07:01:56 +08:00
|
|
|
{
|
|
|
|
struct dsa_switch *ds = dp->ds;
|
|
|
|
int port = dp->index;
|
|
|
|
|
2020-03-03 23:01:46 +08:00
|
|
|
if (dp->pl)
|
|
|
|
phylink_stop(dp->pl);
|
|
|
|
|
2019-02-20 18:32:52 +08:00
|
|
|
if (!dp->bridge_dev)
|
|
|
|
dsa_port_set_state_now(dp, BR_STATE_DISABLED);
|
2017-09-23 07:01:56 +08:00
|
|
|
|
|
|
|
if (ds->ops->port_disable)
|
2019-02-25 03:44:43 +08:00
|
|
|
ds->ops->port_disable(ds, port);
|
2017-09-23 07:01:56 +08:00
|
|
|
}
|
|
|
|
|
2020-03-03 23:01:46 +08:00
|
|
|
void dsa_port_disable(struct dsa_port *dp)
|
|
|
|
{
|
|
|
|
rtnl_lock();
|
|
|
|
dsa_port_disable_rt(dp);
|
|
|
|
rtnl_unlock();
|
|
|
|
}
|
|
|
|
|
net: dsa: configure better brport flags when ports leave the bridge
For a DSA switch port operating in standalone mode, address learning
doesn't make much sense since that is a bridge function. In fact,
address learning even breaks setups such as this one:
+---------------------------------------------+
| |
| +-------------------+ |
| | br0 | send receive |
| +--------+-+--------+ +--------+ +--------+ |
| | | | | | | | | |
| | swp0 | | swp1 | | swp2 | | swp3 | |
| | | | | | | | | |
+-+--------+-+--------+-+--------+-+--------+-+
| ^ | ^
| | | |
| +-----------+ |
| |
+--------------------------------+
because if the switch has a single FDB (can offload a single bridge)
then source address learning on swp3 can "steal" the source MAC address
of swp2 from br0's FDB, because learning frames coming from swp2 will be
done twice: first on the swp1 ingress port, second on the swp3 ingress
port. So the hardware FDB will become out of sync with the software
bridge, and when swp2 tries to send one more packet towards swp1, the
ASIC will attempt to short-circuit the forwarding path and send it
directly to swp3 (since that's the last port it learned that address on),
which it obviously can't, because swp3 operates in standalone mode.
So DSA drivers operating in standalone mode should still configure a
list of bridge port flags even when they are standalone. Currently DSA
attempts to call dsa_port_bridge_flags with 0, which disables egress
flooding of unknown unicast and multicast, something which doesn't make
much sense. For the switches that implement .port_egress_floods - b53
and mv88e6xxx, it probably doesn't matter too much either, since they
can possibly inject traffic from the CPU into a standalone port,
regardless of MAC DA, even if egress flooding is turned off for that
port, but certainly not all DSA switches can do that - sja1105, for
example, can't. So it makes sense to use a better common default there,
such as "flood everything".
It should also be noted that what DSA calls "dsa_port_bridge_flags()"
is a degenerate name for just calling .port_egress_floods(), since
nothing else is implemented - not learning, in particular. But disabling
address learning, something that this driver is also coding up for, will
be supported by individual drivers once .port_egress_floods is replaced
with a more generic .port_bridge_flags.
Previous attempts to code up this logic have been in the common bridge
layer, but as pointed out by Ido Schimmel, there are corner cases that
are missed when doing that:
https://patchwork.kernel.org/project/netdevbpf/patch/20210209151936.97382-5-olteanv@gmail.com/
So, at least for now, let's leave DSA in charge of setting port flags
before and after the bridge join and leave.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:54 +08:00
|
|
|
static void dsa_port_change_brport_flags(struct dsa_port *dp,
|
|
|
|
bool bridge_offload)
|
|
|
|
{
|
net: switchdev: pass flags and mask to both {PRE_,}BRIDGE_FLAGS attributes
This switchdev attribute offers a counterproductive API for a driver
writer, because although br_switchdev_set_port_flag gets passed a
"flags" and a "mask", those are passed piecemeal to the driver, so while
the PRE_BRIDGE_FLAGS listener knows what changed because it has the
"mask", the BRIDGE_FLAGS listener doesn't, because it only has the final
value. But certain drivers can offload only certain combinations of
settings, like for example they cannot change unicast flooding
independently of multicast flooding - they must be both on or both off.
The way the information is passed to switchdev makes drivers not
expressive enough, and unable to reject this request ahead of time, in
the PRE_BRIDGE_FLAGS notifier, so they are forced to reject it during
the deferred BRIDGE_FLAGS attribute, where the rejection is currently
ignored.
This patch also changes drivers to make use of the "mask" field for edge
detection when possible.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:55 +08:00
|
|
|
struct switchdev_brport_flags flags;
|
|
|
|
int flag;
|
net: dsa: configure better brport flags when ports leave the bridge
For a DSA switch port operating in standalone mode, address learning
doesn't make much sense since that is a bridge function. In fact,
address learning even breaks setups such as this one:
+---------------------------------------------+
| |
| +-------------------+ |
| | br0 | send receive |
| +--------+-+--------+ +--------+ +--------+ |
| | | | | | | | | |
| | swp0 | | swp1 | | swp2 | | swp3 | |
| | | | | | | | | |
+-+--------+-+--------+-+--------+-+--------+-+
| ^ | ^
| | | |
| +-----------+ |
| |
+--------------------------------+
because if the switch has a single FDB (can offload a single bridge)
then source address learning on swp3 can "steal" the source MAC address
of swp2 from br0's FDB, because learning frames coming from swp2 will be
done twice: first on the swp1 ingress port, second on the swp3 ingress
port. So the hardware FDB will become out of sync with the software
bridge, and when swp2 tries to send one more packet towards swp1, the
ASIC will attempt to short-circuit the forwarding path and send it
directly to swp3 (since that's the last port it learned that address on),
which it obviously can't, because swp3 operates in standalone mode.
So DSA drivers operating in standalone mode should still configure a
list of bridge port flags even when they are standalone. Currently DSA
attempts to call dsa_port_bridge_flags with 0, which disables egress
flooding of unknown unicast and multicast, something which doesn't make
much sense. For the switches that implement .port_egress_floods - b53
and mv88e6xxx, it probably doesn't matter too much either, since they
can possibly inject traffic from the CPU into a standalone port,
regardless of MAC DA, even if egress flooding is turned off for that
port, but certainly not all DSA switches can do that - sja1105, for
example, can't. So it makes sense to use a better common default there,
such as "flood everything".
It should also be noted that what DSA calls "dsa_port_bridge_flags()"
is a degenerate name for just calling .port_egress_floods(), since
nothing else is implemented - not learning, in particular. But disabling
address learning, something that this driver is also coding up for, will
be supported by individual drivers once .port_egress_floods is replaced
with a more generic .port_bridge_flags.
Previous attempts to code up this logic have been in the common bridge
layer, but as pointed out by Ido Schimmel, there are corner cases that
are missed when doing that:
https://patchwork.kernel.org/project/netdevbpf/patch/20210209151936.97382-5-olteanv@gmail.com/
So, at least for now, let's leave DSA in charge of setting port flags
before and after the bridge join and leave.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:54 +08:00
|
|
|
|
net: switchdev: pass flags and mask to both {PRE_,}BRIDGE_FLAGS attributes
This switchdev attribute offers a counterproductive API for a driver
writer, because although br_switchdev_set_port_flag gets passed a
"flags" and a "mask", those are passed piecemeal to the driver, so while
the PRE_BRIDGE_FLAGS listener knows what changed because it has the
"mask", the BRIDGE_FLAGS listener doesn't, because it only has the final
value. But certain drivers can offload only certain combinations of
settings, like for example they cannot change unicast flooding
independently of multicast flooding - they must be both on or both off.
The way the information is passed to switchdev makes drivers not
expressive enough, and unable to reject this request ahead of time, in
the PRE_BRIDGE_FLAGS notifier, so they are forced to reject it during
the deferred BRIDGE_FLAGS attribute, where the rejection is currently
ignored.
This patch also changes drivers to make use of the "mask" field for edge
detection when possible.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:55 +08:00
|
|
|
flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
|
net: dsa: configure better brport flags when ports leave the bridge
For a DSA switch port operating in standalone mode, address learning
doesn't make much sense since that is a bridge function. In fact,
address learning even breaks setups such as this one:
+---------------------------------------------+
| |
| +-------------------+ |
| | br0 | send receive |
| +--------+-+--------+ +--------+ +--------+ |
| | | | | | | | | |
| | swp0 | | swp1 | | swp2 | | swp3 | |
| | | | | | | | | |
+-+--------+-+--------+-+--------+-+--------+-+
| ^ | ^
| | | |
| +-----------+ |
| |
+--------------------------------+
because if the switch has a single FDB (can offload a single bridge)
then source address learning on swp3 can "steal" the source MAC address
of swp2 from br0's FDB, because learning frames coming from swp2 will be
done twice: first on the swp1 ingress port, second on the swp3 ingress
port. So the hardware FDB will become out of sync with the software
bridge, and when swp2 tries to send one more packet towards swp1, the
ASIC will attempt to short-circuit the forwarding path and send it
directly to swp3 (since that's the last port it learned that address on),
which it obviously can't, because swp3 operates in standalone mode.
So DSA drivers operating in standalone mode should still configure a
list of bridge port flags even when they are standalone. Currently DSA
attempts to call dsa_port_bridge_flags with 0, which disables egress
flooding of unknown unicast and multicast, something which doesn't make
much sense. For the switches that implement .port_egress_floods - b53
and mv88e6xxx, it probably doesn't matter too much either, since they
can possibly inject traffic from the CPU into a standalone port,
regardless of MAC DA, even if egress flooding is turned off for that
port, but certainly not all DSA switches can do that - sja1105, for
example, can't. So it makes sense to use a better common default there,
such as "flood everything".
It should also be noted that what DSA calls "dsa_port_bridge_flags()"
is a degenerate name for just calling .port_egress_floods(), since
nothing else is implemented - not learning, in particular. But disabling
address learning, something that this driver is also coding up for, will
be supported by individual drivers once .port_egress_floods is replaced
with a more generic .port_bridge_flags.
Previous attempts to code up this logic have been in the common bridge
layer, but as pointed out by Ido Schimmel, there are corner cases that
are missed when doing that:
https://patchwork.kernel.org/project/netdevbpf/patch/20210209151936.97382-5-olteanv@gmail.com/
So, at least for now, let's leave DSA in charge of setting port flags
before and after the bridge join and leave.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:54 +08:00
|
|
|
if (bridge_offload)
|
net: switchdev: pass flags and mask to both {PRE_,}BRIDGE_FLAGS attributes
This switchdev attribute offers a counterproductive API for a driver
writer, because although br_switchdev_set_port_flag gets passed a
"flags" and a "mask", those are passed piecemeal to the driver, so while
the PRE_BRIDGE_FLAGS listener knows what changed because it has the
"mask", the BRIDGE_FLAGS listener doesn't, because it only has the final
value. But certain drivers can offload only certain combinations of
settings, like for example they cannot change unicast flooding
independently of multicast flooding - they must be both on or both off.
The way the information is passed to switchdev makes drivers not
expressive enough, and unable to reject this request ahead of time, in
the PRE_BRIDGE_FLAGS notifier, so they are forced to reject it during
the deferred BRIDGE_FLAGS attribute, where the rejection is currently
ignored.
This patch also changes drivers to make use of the "mask" field for edge
detection when possible.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:55 +08:00
|
|
|
flags.val = flags.mask;
|
net: dsa: configure better brport flags when ports leave the bridge
For a DSA switch port operating in standalone mode, address learning
doesn't make much sense since that is a bridge function. In fact,
address learning even breaks setups such as this one:
+---------------------------------------------+
| |
| +-------------------+ |
| | br0 | send receive |
| +--------+-+--------+ +--------+ +--------+ |
| | | | | | | | | |
| | swp0 | | swp1 | | swp2 | | swp3 | |
| | | | | | | | | |
+-+--------+-+--------+-+--------+-+--------+-+
| ^ | ^
| | | |
| +-----------+ |
| |
+--------------------------------+
because if the switch has a single FDB (can offload a single bridge)
then source address learning on swp3 can "steal" the source MAC address
of swp2 from br0's FDB, because learning frames coming from swp2 will be
done twice: first on the swp1 ingress port, second on the swp3 ingress
port. So the hardware FDB will become out of sync with the software
bridge, and when swp2 tries to send one more packet towards swp1, the
ASIC will attempt to short-circuit the forwarding path and send it
directly to swp3 (since that's the last port it learned that address on),
which it obviously can't, because swp3 operates in standalone mode.
So DSA drivers operating in standalone mode should still configure a
list of bridge port flags even when they are standalone. Currently DSA
attempts to call dsa_port_bridge_flags with 0, which disables egress
flooding of unknown unicast and multicast, something which doesn't make
much sense. For the switches that implement .port_egress_floods - b53
and mv88e6xxx, it probably doesn't matter too much either, since they
can possibly inject traffic from the CPU into a standalone port,
regardless of MAC DA, even if egress flooding is turned off for that
port, but certainly not all DSA switches can do that - sja1105, for
example, can't. So it makes sense to use a better common default there,
such as "flood everything".
It should also be noted that what DSA calls "dsa_port_bridge_flags()"
is a degenerate name for just calling .port_egress_floods(), since
nothing else is implemented - not learning, in particular. But disabling
address learning, something that this driver is also coding up for, will
be supported by individual drivers once .port_egress_floods is replaced
with a more generic .port_bridge_flags.
Previous attempts to code up this logic have been in the common bridge
layer, but as pointed out by Ido Schimmel, there are corner cases that
are missed when doing that:
https://patchwork.kernel.org/project/netdevbpf/patch/20210209151936.97382-5-olteanv@gmail.com/
So, at least for now, let's leave DSA in charge of setting port flags
before and after the bridge join and leave.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:54 +08:00
|
|
|
else
|
net: switchdev: pass flags and mask to both {PRE_,}BRIDGE_FLAGS attributes
This switchdev attribute offers a counterproductive API for a driver
writer, because although br_switchdev_set_port_flag gets passed a
"flags" and a "mask", those are passed piecemeal to the driver, so while
the PRE_BRIDGE_FLAGS listener knows what changed because it has the
"mask", the BRIDGE_FLAGS listener doesn't, because it only has the final
value. But certain drivers can offload only certain combinations of
settings, like for example they cannot change unicast flooding
independently of multicast flooding - they must be both on or both off.
The way the information is passed to switchdev makes drivers not
expressive enough, and unable to reject this request ahead of time, in
the PRE_BRIDGE_FLAGS notifier, so they are forced to reject it during
the deferred BRIDGE_FLAGS attribute, where the rejection is currently
ignored.
This patch also changes drivers to make use of the "mask" field for edge
detection when possible.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:55 +08:00
|
|
|
flags.val = flags.mask & ~BR_LEARNING;
|
net: dsa: configure better brport flags when ports leave the bridge
For a DSA switch port operating in standalone mode, address learning
doesn't make much sense since that is a bridge function. In fact,
address learning even breaks setups such as this one:
+---------------------------------------------+
| |
| +-------------------+ |
| | br0 | send receive |
| +--------+-+--------+ +--------+ +--------+ |
| | | | | | | | | |
| | swp0 | | swp1 | | swp2 | | swp3 | |
| | | | | | | | | |
+-+--------+-+--------+-+--------+-+--------+-+
| ^ | ^
| | | |
| +-----------+ |
| |
+--------------------------------+
because if the switch has a single FDB (can offload a single bridge)
then source address learning on swp3 can "steal" the source MAC address
of swp2 from br0's FDB, because learning frames coming from swp2 will be
done twice: first on the swp1 ingress port, second on the swp3 ingress
port. So the hardware FDB will become out of sync with the software
bridge, and when swp2 tries to send one more packet towards swp1, the
ASIC will attempt to short-circuit the forwarding path and send it
directly to swp3 (since that's the last port it learned that address on),
which it obviously can't, because swp3 operates in standalone mode.
So DSA drivers operating in standalone mode should still configure a
list of bridge port flags even when they are standalone. Currently DSA
attempts to call dsa_port_bridge_flags with 0, which disables egress
flooding of unknown unicast and multicast, something which doesn't make
much sense. For the switches that implement .port_egress_floods - b53
and mv88e6xxx, it probably doesn't matter too much either, since they
can possibly inject traffic from the CPU into a standalone port,
regardless of MAC DA, even if egress flooding is turned off for that
port, but certainly not all DSA switches can do that - sja1105, for
example, can't. So it makes sense to use a better common default there,
such as "flood everything".
It should also be noted that what DSA calls "dsa_port_bridge_flags()"
is a degenerate name for just calling .port_egress_floods(), since
nothing else is implemented - not learning, in particular. But disabling
address learning, something that this driver is also coding up for, will
be supported by individual drivers once .port_egress_floods is replaced
with a more generic .port_bridge_flags.
Previous attempts to code up this logic have been in the common bridge
layer, but as pointed out by Ido Schimmel, there are corner cases that
are missed when doing that:
https://patchwork.kernel.org/project/netdevbpf/patch/20210209151936.97382-5-olteanv@gmail.com/
So, at least for now, let's leave DSA in charge of setting port flags
before and after the bridge join and leave.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:54 +08:00
|
|
|
|
net: switchdev: pass flags and mask to both {PRE_,}BRIDGE_FLAGS attributes
This switchdev attribute offers a counterproductive API for a driver
writer, because although br_switchdev_set_port_flag gets passed a
"flags" and a "mask", those are passed piecemeal to the driver, so while
the PRE_BRIDGE_FLAGS listener knows what changed because it has the
"mask", the BRIDGE_FLAGS listener doesn't, because it only has the final
value. But certain drivers can offload only certain combinations of
settings, like for example they cannot change unicast flooding
independently of multicast flooding - they must be both on or both off.
The way the information is passed to switchdev makes drivers not
expressive enough, and unable to reject this request ahead of time, in
the PRE_BRIDGE_FLAGS notifier, so they are forced to reject it during
the deferred BRIDGE_FLAGS attribute, where the rejection is currently
ignored.
This patch also changes drivers to make use of the "mask" field for edge
detection when possible.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:55 +08:00
|
|
|
for_each_set_bit(flag, &flags.mask, 32) {
|
|
|
|
struct switchdev_brport_flags tmp;
|
|
|
|
|
|
|
|
tmp.val = flags.val & BIT(flag);
|
|
|
|
tmp.mask = BIT(flag);
|
net: dsa: configure better brport flags when ports leave the bridge
For a DSA switch port operating in standalone mode, address learning
doesn't make much sense since that is a bridge function. In fact,
address learning even breaks setups such as this one:
+---------------------------------------------+
| |
| +-------------------+ |
| | br0 | send receive |
| +--------+-+--------+ +--------+ +--------+ |
| | | | | | | | | |
| | swp0 | | swp1 | | swp2 | | swp3 | |
| | | | | | | | | |
+-+--------+-+--------+-+--------+-+--------+-+
| ^ | ^
| | | |
| +-----------+ |
| |
+--------------------------------+
because if the switch has a single FDB (can offload a single bridge)
then source address learning on swp3 can "steal" the source MAC address
of swp2 from br0's FDB, because learning frames coming from swp2 will be
done twice: first on the swp1 ingress port, second on the swp3 ingress
port. So the hardware FDB will become out of sync with the software
bridge, and when swp2 tries to send one more packet towards swp1, the
ASIC will attempt to short-circuit the forwarding path and send it
directly to swp3 (since that's the last port it learned that address on),
which it obviously can't, because swp3 operates in standalone mode.
So DSA drivers operating in standalone mode should still configure a
list of bridge port flags even when they are standalone. Currently DSA
attempts to call dsa_port_bridge_flags with 0, which disables egress
flooding of unknown unicast and multicast, something which doesn't make
much sense. For the switches that implement .port_egress_floods - b53
and mv88e6xxx, it probably doesn't matter too much either, since they
can possibly inject traffic from the CPU into a standalone port,
regardless of MAC DA, even if egress flooding is turned off for that
port, but certainly not all DSA switches can do that - sja1105, for
example, can't. So it makes sense to use a better common default there,
such as "flood everything".
It should also be noted that what DSA calls "dsa_port_bridge_flags()"
is a degenerate name for just calling .port_egress_floods(), since
nothing else is implemented - not learning, in particular. But disabling
address learning, something that this driver is also coding up for, will
be supported by individual drivers once .port_egress_floods is replaced
with a more generic .port_bridge_flags.
Previous attempts to code up this logic have been in the common bridge
layer, but as pointed out by Ido Schimmel, there are corner cases that
are missed when doing that:
https://patchwork.kernel.org/project/netdevbpf/patch/20210209151936.97382-5-olteanv@gmail.com/
So, at least for now, let's leave DSA in charge of setting port flags
before and after the bridge join and leave.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:54 +08:00
|
|
|
|
net: dsa: act as passthrough for bridge port flags
There are multiple ways in which a PORT_BRIDGE_FLAGS attribute can be
expressed by the bridge through switchdev, and not all of them can be
emulated by DSA mid-layer API at the same time.
One possible configuration is when the bridge offloads the port flags
using a mask that has a single bit set - therefore only one feature
should change. However, DSA currently groups together unicast and
multicast flooding in the .port_egress_floods method, which limits our
options when we try to add support for turning off broadcast flooding:
do we extend .port_egress_floods with a third parameter which b53 and
mv88e6xxx will ignore? But that means that the DSA layer, which
currently implements the PRE_BRIDGE_FLAGS attribute all by itself, will
see that .port_egress_floods is implemented, and will report that all 3
types of flooding are supported - not necessarily true.
Another configuration is when the user specifies more than one flag at
the same time, in the same netlink message. If we were to create one
individual function per offloadable bridge port flag, we would limit the
expressiveness of the switch driver of refusing certain combinations of
flag values. For example, a switch may not have an explicit knob for
flooding of unknown multicast, just for flooding in general. In that
case, the only correct thing to do is to allow changes to BR_FLOOD and
BR_MCAST_FLOOD in tandem, and never allow mismatched values. But having
a separate .port_set_unicast_flood and .port_set_multicast_flood would
not allow the driver to possibly reject that.
Also, DSA doesn't consider it necessary to inform the driver that a
SWITCHDEV_ATTR_ID_BRIDGE_MROUTER attribute was offloaded, because it
just calls .port_egress_floods for the CPU port. When we'll add support
for the plain SWITCHDEV_ATTR_ID_PORT_MROUTER, that will become a real
problem because the flood settings will need to be held statefully in
the DSA middle layer, otherwise changing the mrouter port attribute will
impact the flooding attribute. And that's _assuming_ that the underlying
hardware doesn't have anything else to do when a multicast router
attaches to a port than flood unknown traffic to it. If it does, there
will need to be a dedicated .port_set_mrouter anyway.
So we need to let the DSA drivers see the exact form that the bridge
passes this switchdev attribute in, otherwise we are standing in the
way. Therefore we also need to use this form of language when
communicating to the driver that it needs to configure its initial
(before bridge join) and final (after bridge leave) port flags.
The b53 and mv88e6xxx drivers are converted to the passthrough API and
their implementation of .port_egress_floods is split into two: a
function that configures unicast flooding and another for multicast.
The mv88e6xxx implementation is quite hairy, and it turns out that
the implementations of unknown unicast flooding are actually the same
for 6185 and for 6352:
behind the confusing names actually lie two individual bits:
NO_UNKNOWN_MC -> FLOOD_UC = 0x4 = BIT(2)
NO_UNKNOWN_UC -> FLOOD_MC = 0x8 = BIT(3)
so there was no reason to entangle them in the first place.
Whereas the 6185 writes to MV88E6185_PORT_CTL0_FORWARD_UNKNOWN of
PORT_CTL0, which has the exact same bit index. I have left the
implementations separate though, for the only reason that the names are
different enough to confuse me, since I am not able to double-check with
a user manual. The multicast flooding setting for 6185 is in a different
register than for 6352 though.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:56 +08:00
|
|
|
dsa_port_bridge_flags(dp, tmp, NULL);
|
net: dsa: configure better brport flags when ports leave the bridge
For a DSA switch port operating in standalone mode, address learning
doesn't make much sense since that is a bridge function. In fact,
address learning even breaks setups such as this one:
+---------------------------------------------+
| |
| +-------------------+ |
| | br0 | send receive |
| +--------+-+--------+ +--------+ +--------+ |
| | | | | | | | | |
| | swp0 | | swp1 | | swp2 | | swp3 | |
| | | | | | | | | |
+-+--------+-+--------+-+--------+-+--------+-+
| ^ | ^
| | | |
| +-----------+ |
| |
+--------------------------------+
because if the switch has a single FDB (can offload a single bridge)
then source address learning on swp3 can "steal" the source MAC address
of swp2 from br0's FDB, because learning frames coming from swp2 will be
done twice: first on the swp1 ingress port, second on the swp3 ingress
port. So the hardware FDB will become out of sync with the software
bridge, and when swp2 tries to send one more packet towards swp1, the
ASIC will attempt to short-circuit the forwarding path and send it
directly to swp3 (since that's the last port it learned that address on),
which it obviously can't, because swp3 operates in standalone mode.
So DSA drivers operating in standalone mode should still configure a
list of bridge port flags even when they are standalone. Currently DSA
attempts to call dsa_port_bridge_flags with 0, which disables egress
flooding of unknown unicast and multicast, something which doesn't make
much sense. For the switches that implement .port_egress_floods - b53
and mv88e6xxx, it probably doesn't matter too much either, since they
can possibly inject traffic from the CPU into a standalone port,
regardless of MAC DA, even if egress flooding is turned off for that
port, but certainly not all DSA switches can do that - sja1105, for
example, can't. So it makes sense to use a better common default there,
such as "flood everything".
It should also be noted that what DSA calls "dsa_port_bridge_flags()"
is a degenerate name for just calling .port_egress_floods(), since
nothing else is implemented - not learning, in particular. But disabling
address learning, something that this driver is also coding up for, will
be supported by individual drivers once .port_egress_floods is replaced
with a more generic .port_bridge_flags.
Previous attempts to code up this logic have been in the common bridge
layer, but as pointed out by Ido Schimmel, there are corner cases that
are missed when doing that:
https://patchwork.kernel.org/project/netdevbpf/patch/20210209151936.97382-5-olteanv@gmail.com/
So, at least for now, let's leave DSA in charge of setting port flags
before and after the bridge join and leave.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:54 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-20 05:00:45 +08:00
|
|
|
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
|
|
|
|
{
|
|
|
|
struct dsa_notifier_bridge_info info = {
|
net: dsa: permit cross-chip bridging between all trees in the system
One way of utilizing DSA is by cascading switches which do not all have
compatible taggers. Consider the following real-life topology:
+---------------------------------------------------------------+
| LS1028A |
| +------------------------------+ |
| | DSA master for Felix | |
| |(internal ENETC port 2: eno2))| |
| +------------+------------------------------+-------------+ |
| | Felix embedded L2 switch | |
| | | |
| | +--------------+ +--------------+ +--------------+ | |
| | |DSA master for| |DSA master for| |DSA master for| | |
| | | SJA1105 1 | | SJA1105 2 | | SJA1105 3 | | |
| | |(Felix port 1)| |(Felix port 2)| |(Felix port 3)| | |
+--+-+--------------+---+--------------+---+--------------+--+--+
+-----------------------+ +-----------------------+ +-----------------------+
| SJA1105 switch 1 | | SJA1105 switch 2 | | SJA1105 switch 3 |
+-----+-----+-----+-----+ +-----+-----+-----+-----+ +-----+-----+-----+-----+
|sw1p0|sw1p1|sw1p2|sw1p3| |sw2p0|sw2p1|sw2p2|sw2p3| |sw3p0|sw3p1|sw3p2|sw3p3|
+-----+-----+-----+-----+ +-----+-----+-----+-----+ +-----+-----+-----+-----+
The above can be described in the device tree as follows (obviously not
complete):
mscc_felix {
dsa,member = <0 0>;
ports {
port@4 {
ethernet = <&enetc_port2>;
};
};
};
sja1105_switch1 {
dsa,member = <1 1>;
ports {
port@4 {
ethernet = <&mscc_felix_port1>;
};
};
};
sja1105_switch2 {
dsa,member = <2 2>;
ports {
port@4 {
ethernet = <&mscc_felix_port2>;
};
};
};
sja1105_switch3 {
dsa,member = <3 3>;
ports {
port@4 {
ethernet = <&mscc_felix_port3>;
};
};
};
Basically we instantiate one DSA switch tree for every hardware switch
in the system, but we still give them globally unique switch IDs (will
come back to that later). Having 3 disjoint switch trees makes the
tagger drivers "just work", because net devices are registered for the
3 Felix DSA master ports, and they are also DSA slave ports to the ENETC
port. So packets received on the ENETC port are stripped of their
stacked DSA tags one by one.
Currently, hardware bridging between ports on the same sja1105 chip is
possible, but switching between sja1105 ports on different chips is
handled by the software bridge. This is fine, but we can do better.
In fact, the dsa_8021q tag used by sja1105 is compatible with cascading.
In other words, a sja1105 switch can correctly parse and route a packet
containing a dsa_8021q tag. So if we could enable hardware bridging on
the Felix DSA master ports, cross-chip bridging could be completely
offloaded.
Such as system would be used as follows:
ip link add dev br0 type bridge && ip link set dev br0 up
for port in sw0p0 sw0p1 sw0p2 sw0p3 \
sw1p0 sw1p1 sw1p2 sw1p3 \
sw2p0 sw2p1 sw2p2 sw2p3; do
ip link set dev $port master br0
done
The above makes switching between ports on the same row be performed in
hardware, and between ports on different rows in software. Now assume
the Felix switch ports are called swp0, swp1, swp2. By running the
following extra commands:
ip link add dev br1 type bridge && ip link set dev br1 up
for port in swp0 swp1 swp2; do
ip link set dev $port master br1
done
the CPU no longer sees packets which traverse sja1105 switch boundaries
and can be forwarded directly by Felix. The br1 bridge would not be used
for any sort of traffic termination.
For this to work, we need to give drivers an opportunity to listen for
bridging events on DSA trees other than their own, and pass that other
tree index as argument. I have made the assumption, for the moment, that
the other existing DSA notifiers don't need to be broadcast to other
trees. That assumption might turn out to be incorrect. But in the
meantime, introduce a dsa_broadcast function, similar in purpose to
dsa_port_notify, which is used only by the bridging notifiers.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-05-11 00:37:41 +08:00
|
|
|
.tree_index = dp->ds->dst->index,
|
2017-05-20 05:00:45 +08:00
|
|
|
.sw_index = dp->ds->index,
|
|
|
|
.port = dp->index,
|
|
|
|
.br = br,
|
|
|
|
};
|
|
|
|
int err;
|
|
|
|
|
net: dsa: configure better brport flags when ports leave the bridge
For a DSA switch port operating in standalone mode, address learning
doesn't make much sense since that is a bridge function. In fact,
address learning even breaks setups such as this one:
+---------------------------------------------+
| |
| +-------------------+ |
| | br0 | send receive |
| +--------+-+--------+ +--------+ +--------+ |
| | | | | | | | | |
| | swp0 | | swp1 | | swp2 | | swp3 | |
| | | | | | | | | |
+-+--------+-+--------+-+--------+-+--------+-+
| ^ | ^
| | | |
| +-----------+ |
| |
+--------------------------------+
because if the switch has a single FDB (can offload a single bridge)
then source address learning on swp3 can "steal" the source MAC address
of swp2 from br0's FDB, because learning frames coming from swp2 will be
done twice: first on the swp1 ingress port, second on the swp3 ingress
port. So the hardware FDB will become out of sync with the software
bridge, and when swp2 tries to send one more packet towards swp1, the
ASIC will attempt to short-circuit the forwarding path and send it
directly to swp3 (since that's the last port it learned that address on),
which it obviously can't, because swp3 operates in standalone mode.
So DSA drivers operating in standalone mode should still configure a
list of bridge port flags even when they are standalone. Currently DSA
attempts to call dsa_port_bridge_flags with 0, which disables egress
flooding of unknown unicast and multicast, something which doesn't make
much sense. For the switches that implement .port_egress_floods - b53
and mv88e6xxx, it probably doesn't matter too much either, since they
can possibly inject traffic from the CPU into a standalone port,
regardless of MAC DA, even if egress flooding is turned off for that
port, but certainly not all DSA switches can do that - sja1105, for
example, can't. So it makes sense to use a better common default there,
such as "flood everything".
It should also be noted that what DSA calls "dsa_port_bridge_flags()"
is a degenerate name for just calling .port_egress_floods(), since
nothing else is implemented - not learning, in particular. But disabling
address learning, something that this driver is also coding up for, will
be supported by individual drivers once .port_egress_floods is replaced
with a more generic .port_bridge_flags.
Previous attempts to code up this logic have been in the common bridge
layer, but as pointed out by Ido Schimmel, there are corner cases that
are missed when doing that:
https://patchwork.kernel.org/project/netdevbpf/patch/20210209151936.97382-5-olteanv@gmail.com/
So, at least for now, let's leave DSA in charge of setting port flags
before and after the bridge join and leave.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:54 +08:00
|
|
|
/* Notify the port driver to set its configurable flags in a way that
|
|
|
|
* matches the initial settings of a bridge port.
|
|
|
|
*/
|
|
|
|
dsa_port_change_brport_flags(dp, true);
|
net: dsa: enable flooding for bridge ports
Switches work by learning the MAC address for each attached station by
monitoring traffic from each station. When a station sends a packet,
the switch records which port the MAC address is connected to.
With IPv4 networking, before communication commences with a neighbour,
an ARP packet is broadcasted to all stations asking for the MAC address
corresponding with the IPv4. The desired station responds with an ARP
reply, and the ARP reply causes the switch to learn which port the
station is connected to.
With IPv6 networking, the situation is rather different. Rather than
broadcasting ARP packets, a "neighbour solicitation" is multicasted
rather than broadcasted. This multicast needs to reach the intended
station in order for the neighbour to be discovered.
Once a neighbour has been discovered, and entered into the sending
stations neighbour cache, communication can restart at a point later
without sending a new neighbour solicitation, even if the entry in
the neighbour cache is marked as stale. This can be after the MAC
address has expired from the forwarding cache of the DSA switch -
when that occurs, there is a long pause in communication.
Our DSA implementation for mv88e6xxx switches disables flooding of
multicast and unicast frames for bridged ports. As per the above
description, this is fine for IPv4 networking, since the broadcasted
ARP queries will be sent to and received by all stations on the same
network. However, this breaks IPv6 very badly - blocking neighbour
solicitations and later causing connections to stall.
The defaults that the Linux bridge code expect from bridges are for
unknown unicast and unknown multicast frames to be flooded to all ports
on the bridge, which is at odds to the defaults adopted by our DSA
implementation for mv88e6xxx switches.
This commit enables by default flooding of both unknown unicast and
unknown multicast frames whenever a port is added to a bridge, and
disables the flooding when a port leaves the bridge. This means that
mv88e6xxx DSA switches now behave as per the bridge(8) man page, and
IPv6 works flawlessly through such a switch.
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vivien Didelot <vivien.didelot@gmail.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-02-21 07:35:06 +08:00
|
|
|
|
|
|
|
/* Here the interface is already bridged. Reflect the current
|
|
|
|
* configuration so that drivers can program their chips accordingly.
|
2017-05-20 05:00:45 +08:00
|
|
|
*/
|
|
|
|
dp->bridge_dev = br;
|
|
|
|
|
net: dsa: permit cross-chip bridging between all trees in the system
One way of utilizing DSA is by cascading switches which do not all have
compatible taggers. Consider the following real-life topology:
+---------------------------------------------------------------+
| LS1028A |
| +------------------------------+ |
| | DSA master for Felix | |
| |(internal ENETC port 2: eno2))| |
| +------------+------------------------------+-------------+ |
| | Felix embedded L2 switch | |
| | | |
| | +--------------+ +--------------+ +--------------+ | |
| | |DSA master for| |DSA master for| |DSA master for| | |
| | | SJA1105 1 | | SJA1105 2 | | SJA1105 3 | | |
| | |(Felix port 1)| |(Felix port 2)| |(Felix port 3)| | |
+--+-+--------------+---+--------------+---+--------------+--+--+
+-----------------------+ +-----------------------+ +-----------------------+
| SJA1105 switch 1 | | SJA1105 switch 2 | | SJA1105 switch 3 |
+-----+-----+-----+-----+ +-----+-----+-----+-----+ +-----+-----+-----+-----+
|sw1p0|sw1p1|sw1p2|sw1p3| |sw2p0|sw2p1|sw2p2|sw2p3| |sw3p0|sw3p1|sw3p2|sw3p3|
+-----+-----+-----+-----+ +-----+-----+-----+-----+ +-----+-----+-----+-----+
The above can be described in the device tree as follows (obviously not
complete):
mscc_felix {
dsa,member = <0 0>;
ports {
port@4 {
ethernet = <&enetc_port2>;
};
};
};
sja1105_switch1 {
dsa,member = <1 1>;
ports {
port@4 {
ethernet = <&mscc_felix_port1>;
};
};
};
sja1105_switch2 {
dsa,member = <2 2>;
ports {
port@4 {
ethernet = <&mscc_felix_port2>;
};
};
};
sja1105_switch3 {
dsa,member = <3 3>;
ports {
port@4 {
ethernet = <&mscc_felix_port3>;
};
};
};
Basically we instantiate one DSA switch tree for every hardware switch
in the system, but we still give them globally unique switch IDs (will
come back to that later). Having 3 disjoint switch trees makes the
tagger drivers "just work", because net devices are registered for the
3 Felix DSA master ports, and they are also DSA slave ports to the ENETC
port. So packets received on the ENETC port are stripped of their
stacked DSA tags one by one.
Currently, hardware bridging between ports on the same sja1105 chip is
possible, but switching between sja1105 ports on different chips is
handled by the software bridge. This is fine, but we can do better.
In fact, the dsa_8021q tag used by sja1105 is compatible with cascading.
In other words, a sja1105 switch can correctly parse and route a packet
containing a dsa_8021q tag. So if we could enable hardware bridging on
the Felix DSA master ports, cross-chip bridging could be completely
offloaded.
Such as system would be used as follows:
ip link add dev br0 type bridge && ip link set dev br0 up
for port in sw0p0 sw0p1 sw0p2 sw0p3 \
sw1p0 sw1p1 sw1p2 sw1p3 \
sw2p0 sw2p1 sw2p2 sw2p3; do
ip link set dev $port master br0
done
The above makes switching between ports on the same row be performed in
hardware, and between ports on different rows in software. Now assume
the Felix switch ports are called swp0, swp1, swp2. By running the
following extra commands:
ip link add dev br1 type bridge && ip link set dev br1 up
for port in swp0 swp1 swp2; do
ip link set dev $port master br1
done
the CPU no longer sees packets which traverse sja1105 switch boundaries
and can be forwarded directly by Felix. The br1 bridge would not be used
for any sort of traffic termination.
For this to work, we need to give drivers an opportunity to listen for
bridging events on DSA trees other than their own, and pass that other
tree index as argument. I have made the assumption, for the moment, that
the other existing DSA notifiers don't need to be broadcast to other
trees. That assumption might turn out to be incorrect. But in the
meantime, introduce a dsa_broadcast function, similar in purpose to
dsa_port_notify, which is used only by the bridging notifiers.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-05-11 00:37:41 +08:00
|
|
|
err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_JOIN, &info);
|
2017-05-20 05:00:45 +08:00
|
|
|
|
|
|
|
/* The bridging is rolled back on error */
|
net: dsa: enable flooding for bridge ports
Switches work by learning the MAC address for each attached station by
monitoring traffic from each station. When a station sends a packet,
the switch records which port the MAC address is connected to.
With IPv4 networking, before communication commences with a neighbour,
an ARP packet is broadcasted to all stations asking for the MAC address
corresponding with the IPv4. The desired station responds with an ARP
reply, and the ARP reply causes the switch to learn which port the
station is connected to.
With IPv6 networking, the situation is rather different. Rather than
broadcasting ARP packets, a "neighbour solicitation" is multicasted
rather than broadcasted. This multicast needs to reach the intended
station in order for the neighbour to be discovered.
Once a neighbour has been discovered, and entered into the sending
stations neighbour cache, communication can restart at a point later
without sending a new neighbour solicitation, even if the entry in
the neighbour cache is marked as stale. This can be after the MAC
address has expired from the forwarding cache of the DSA switch -
when that occurs, there is a long pause in communication.
Our DSA implementation for mv88e6xxx switches disables flooding of
multicast and unicast frames for bridged ports. As per the above
description, this is fine for IPv4 networking, since the broadcasted
ARP queries will be sent to and received by all stations on the same
network. However, this breaks IPv6 very badly - blocking neighbour
solicitations and later causing connections to stall.
The defaults that the Linux bridge code expect from bridges are for
unknown unicast and unknown multicast frames to be flooded to all ports
on the bridge, which is at odds to the defaults adopted by our DSA
implementation for mv88e6xxx switches.
This commit enables by default flooding of both unknown unicast and
unknown multicast frames whenever a port is added to a bridge, and
disables the flooding when a port leaves the bridge. This means that
mv88e6xxx DSA switches now behave as per the bridge(8) man page, and
IPv6 works flawlessly through such a switch.
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vivien Didelot <vivien.didelot@gmail.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-02-21 07:35:06 +08:00
|
|
|
if (err) {
|
net: dsa: configure better brport flags when ports leave the bridge
For a DSA switch port operating in standalone mode, address learning
doesn't make much sense since that is a bridge function. In fact,
address learning even breaks setups such as this one:
+---------------------------------------------+
| |
| +-------------------+ |
| | br0 | send receive |
| +--------+-+--------+ +--------+ +--------+ |
| | | | | | | | | |
| | swp0 | | swp1 | | swp2 | | swp3 | |
| | | | | | | | | |
+-+--------+-+--------+-+--------+-+--------+-+
| ^ | ^
| | | |
| +-----------+ |
| |
+--------------------------------+
because if the switch has a single FDB (can offload a single bridge)
then source address learning on swp3 can "steal" the source MAC address
of swp2 from br0's FDB, because learning frames coming from swp2 will be
done twice: first on the swp1 ingress port, second on the swp3 ingress
port. So the hardware FDB will become out of sync with the software
bridge, and when swp2 tries to send one more packet towards swp1, the
ASIC will attempt to short-circuit the forwarding path and send it
directly to swp3 (since that's the last port it learned that address on),
which it obviously can't, because swp3 operates in standalone mode.
So DSA drivers operating in standalone mode should still configure a
list of bridge port flags even when they are standalone. Currently DSA
attempts to call dsa_port_bridge_flags with 0, which disables egress
flooding of unknown unicast and multicast, something which doesn't make
much sense. For the switches that implement .port_egress_floods - b53
and mv88e6xxx, it probably doesn't matter too much either, since they
can possibly inject traffic from the CPU into a standalone port,
regardless of MAC DA, even if egress flooding is turned off for that
port, but certainly not all DSA switches can do that - sja1105, for
example, can't. So it makes sense to use a better common default there,
such as "flood everything".
It should also be noted that what DSA calls "dsa_port_bridge_flags()"
is a degenerate name for just calling .port_egress_floods(), since
nothing else is implemented - not learning, in particular. But disabling
address learning, something that this driver is also coding up for, will
be supported by individual drivers once .port_egress_floods is replaced
with a more generic .port_bridge_flags.
Previous attempts to code up this logic have been in the common bridge
layer, but as pointed out by Ido Schimmel, there are corner cases that
are missed when doing that:
https://patchwork.kernel.org/project/netdevbpf/patch/20210209151936.97382-5-olteanv@gmail.com/
So, at least for now, let's leave DSA in charge of setting port flags
before and after the bridge join and leave.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:54 +08:00
|
|
|
dsa_port_change_brport_flags(dp, false);
|
2017-05-20 05:00:45 +08:00
|
|
|
dp->bridge_dev = NULL;
|
net: dsa: enable flooding for bridge ports
Switches work by learning the MAC address for each attached station by
monitoring traffic from each station. When a station sends a packet,
the switch records which port the MAC address is connected to.
With IPv4 networking, before communication commences with a neighbour,
an ARP packet is broadcasted to all stations asking for the MAC address
corresponding with the IPv4. The desired station responds with an ARP
reply, and the ARP reply causes the switch to learn which port the
station is connected to.
With IPv6 networking, the situation is rather different. Rather than
broadcasting ARP packets, a "neighbour solicitation" is multicasted
rather than broadcasted. This multicast needs to reach the intended
station in order for the neighbour to be discovered.
Once a neighbour has been discovered, and entered into the sending
stations neighbour cache, communication can restart at a point later
without sending a new neighbour solicitation, even if the entry in
the neighbour cache is marked as stale. This can be after the MAC
address has expired from the forwarding cache of the DSA switch -
when that occurs, there is a long pause in communication.
Our DSA implementation for mv88e6xxx switches disables flooding of
multicast and unicast frames for bridged ports. As per the above
description, this is fine for IPv4 networking, since the broadcasted
ARP queries will be sent to and received by all stations on the same
network. However, this breaks IPv6 very badly - blocking neighbour
solicitations and later causing connections to stall.
The defaults that the Linux bridge code expect from bridges are for
unknown unicast and unknown multicast frames to be flooded to all ports
on the bridge, which is at odds to the defaults adopted by our DSA
implementation for mv88e6xxx switches.
This commit enables by default flooding of both unknown unicast and
unknown multicast frames whenever a port is added to a bridge, and
disables the flooding when a port leaves the bridge. This means that
mv88e6xxx DSA switches now behave as per the bridge(8) man page, and
IPv6 works flawlessly through such a switch.
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vivien Didelot <vivien.didelot@gmail.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-02-21 07:35:06 +08:00
|
|
|
}
|
2017-05-20 05:00:45 +08:00
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
|
|
|
|
{
|
|
|
|
struct dsa_notifier_bridge_info info = {
|
net: dsa: permit cross-chip bridging between all trees in the system
One way of utilizing DSA is by cascading switches which do not all have
compatible taggers. Consider the following real-life topology:
+---------------------------------------------------------------+
| LS1028A |
| +------------------------------+ |
| | DSA master for Felix | |
| |(internal ENETC port 2: eno2))| |
| +------------+------------------------------+-------------+ |
| | Felix embedded L2 switch | |
| | | |
| | +--------------+ +--------------+ +--------------+ | |
| | |DSA master for| |DSA master for| |DSA master for| | |
| | | SJA1105 1 | | SJA1105 2 | | SJA1105 3 | | |
| | |(Felix port 1)| |(Felix port 2)| |(Felix port 3)| | |
+--+-+--------------+---+--------------+---+--------------+--+--+
+-----------------------+ +-----------------------+ +-----------------------+
| SJA1105 switch 1 | | SJA1105 switch 2 | | SJA1105 switch 3 |
+-----+-----+-----+-----+ +-----+-----+-----+-----+ +-----+-----+-----+-----+
|sw1p0|sw1p1|sw1p2|sw1p3| |sw2p0|sw2p1|sw2p2|sw2p3| |sw3p0|sw3p1|sw3p2|sw3p3|
+-----+-----+-----+-----+ +-----+-----+-----+-----+ +-----+-----+-----+-----+
The above can be described in the device tree as follows (obviously not
complete):
mscc_felix {
dsa,member = <0 0>;
ports {
port@4 {
ethernet = <&enetc_port2>;
};
};
};
sja1105_switch1 {
dsa,member = <1 1>;
ports {
port@4 {
ethernet = <&mscc_felix_port1>;
};
};
};
sja1105_switch2 {
dsa,member = <2 2>;
ports {
port@4 {
ethernet = <&mscc_felix_port2>;
};
};
};
sja1105_switch3 {
dsa,member = <3 3>;
ports {
port@4 {
ethernet = <&mscc_felix_port3>;
};
};
};
Basically we instantiate one DSA switch tree for every hardware switch
in the system, but we still give them globally unique switch IDs (will
come back to that later). Having 3 disjoint switch trees makes the
tagger drivers "just work", because net devices are registered for the
3 Felix DSA master ports, and they are also DSA slave ports to the ENETC
port. So packets received on the ENETC port are stripped of their
stacked DSA tags one by one.
Currently, hardware bridging between ports on the same sja1105 chip is
possible, but switching between sja1105 ports on different chips is
handled by the software bridge. This is fine, but we can do better.
In fact, the dsa_8021q tag used by sja1105 is compatible with cascading.
In other words, a sja1105 switch can correctly parse and route a packet
containing a dsa_8021q tag. So if we could enable hardware bridging on
the Felix DSA master ports, cross-chip bridging could be completely
offloaded.
Such as system would be used as follows:
ip link add dev br0 type bridge && ip link set dev br0 up
for port in sw0p0 sw0p1 sw0p2 sw0p3 \
sw1p0 sw1p1 sw1p2 sw1p3 \
sw2p0 sw2p1 sw2p2 sw2p3; do
ip link set dev $port master br0
done
The above makes switching between ports on the same row be performed in
hardware, and between ports on different rows in software. Now assume
the Felix switch ports are called swp0, swp1, swp2. By running the
following extra commands:
ip link add dev br1 type bridge && ip link set dev br1 up
for port in swp0 swp1 swp2; do
ip link set dev $port master br1
done
the CPU no longer sees packets which traverse sja1105 switch boundaries
and can be forwarded directly by Felix. The br1 bridge would not be used
for any sort of traffic termination.
For this to work, we need to give drivers an opportunity to listen for
bridging events on DSA trees other than their own, and pass that other
tree index as argument. I have made the assumption, for the moment, that
the other existing DSA notifiers don't need to be broadcast to other
trees. That assumption might turn out to be incorrect. But in the
meantime, introduce a dsa_broadcast function, similar in purpose to
dsa_port_notify, which is used only by the bridging notifiers.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-05-11 00:37:41 +08:00
|
|
|
.tree_index = dp->ds->dst->index,
|
2017-05-20 05:00:45 +08:00
|
|
|
.sw_index = dp->ds->index,
|
|
|
|
.port = dp->index,
|
|
|
|
.br = br,
|
|
|
|
};
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/* Here the port is already unbridged. Reflect the current configuration
|
|
|
|
* so that drivers can program their chips accordingly.
|
|
|
|
*/
|
|
|
|
dp->bridge_dev = NULL;
|
|
|
|
|
net: dsa: permit cross-chip bridging between all trees in the system
One way of utilizing DSA is by cascading switches which do not all have
compatible taggers. Consider the following real-life topology:
+---------------------------------------------------------------+
| LS1028A |
| +------------------------------+ |
| | DSA master for Felix | |
| |(internal ENETC port 2: eno2))| |
| +------------+------------------------------+-------------+ |
| | Felix embedded L2 switch | |
| | | |
| | +--------------+ +--------------+ +--------------+ | |
| | |DSA master for| |DSA master for| |DSA master for| | |
| | | SJA1105 1 | | SJA1105 2 | | SJA1105 3 | | |
| | |(Felix port 1)| |(Felix port 2)| |(Felix port 3)| | |
+--+-+--------------+---+--------------+---+--------------+--+--+
+-----------------------+ +-----------------------+ +-----------------------+
| SJA1105 switch 1 | | SJA1105 switch 2 | | SJA1105 switch 3 |
+-----+-----+-----+-----+ +-----+-----+-----+-----+ +-----+-----+-----+-----+
|sw1p0|sw1p1|sw1p2|sw1p3| |sw2p0|sw2p1|sw2p2|sw2p3| |sw3p0|sw3p1|sw3p2|sw3p3|
+-----+-----+-----+-----+ +-----+-----+-----+-----+ +-----+-----+-----+-----+
The above can be described in the device tree as follows (obviously not
complete):
mscc_felix {
dsa,member = <0 0>;
ports {
port@4 {
ethernet = <&enetc_port2>;
};
};
};
sja1105_switch1 {
dsa,member = <1 1>;
ports {
port@4 {
ethernet = <&mscc_felix_port1>;
};
};
};
sja1105_switch2 {
dsa,member = <2 2>;
ports {
port@4 {
ethernet = <&mscc_felix_port2>;
};
};
};
sja1105_switch3 {
dsa,member = <3 3>;
ports {
port@4 {
ethernet = <&mscc_felix_port3>;
};
};
};
Basically we instantiate one DSA switch tree for every hardware switch
in the system, but we still give them globally unique switch IDs (will
come back to that later). Having 3 disjoint switch trees makes the
tagger drivers "just work", because net devices are registered for the
3 Felix DSA master ports, and they are also DSA slave ports to the ENETC
port. So packets received on the ENETC port are stripped of their
stacked DSA tags one by one.
Currently, hardware bridging between ports on the same sja1105 chip is
possible, but switching between sja1105 ports on different chips is
handled by the software bridge. This is fine, but we can do better.
In fact, the dsa_8021q tag used by sja1105 is compatible with cascading.
In other words, a sja1105 switch can correctly parse and route a packet
containing a dsa_8021q tag. So if we could enable hardware bridging on
the Felix DSA master ports, cross-chip bridging could be completely
offloaded.
Such as system would be used as follows:
ip link add dev br0 type bridge && ip link set dev br0 up
for port in sw0p0 sw0p1 sw0p2 sw0p3 \
sw1p0 sw1p1 sw1p2 sw1p3 \
sw2p0 sw2p1 sw2p2 sw2p3; do
ip link set dev $port master br0
done
The above makes switching between ports on the same row be performed in
hardware, and between ports on different rows in software. Now assume
the Felix switch ports are called swp0, swp1, swp2. By running the
following extra commands:
ip link add dev br1 type bridge && ip link set dev br1 up
for port in swp0 swp1 swp2; do
ip link set dev $port master br1
done
the CPU no longer sees packets which traverse sja1105 switch boundaries
and can be forwarded directly by Felix. The br1 bridge would not be used
for any sort of traffic termination.
For this to work, we need to give drivers an opportunity to listen for
bridging events on DSA trees other than their own, and pass that other
tree index as argument. I have made the assumption, for the moment, that
the other existing DSA notifiers don't need to be broadcast to other
trees. That assumption might turn out to be incorrect. But in the
meantime, introduce a dsa_broadcast function, similar in purpose to
dsa_port_notify, which is used only by the bridging notifiers.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-05-11 00:37:41 +08:00
|
|
|
err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info);
|
2017-05-20 05:00:45 +08:00
|
|
|
if (err)
|
|
|
|
pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
|
|
|
|
|
net: dsa: configure better brport flags when ports leave the bridge
For a DSA switch port operating in standalone mode, address learning
doesn't make much sense since that is a bridge function. In fact,
address learning even breaks setups such as this one:
+---------------------------------------------+
| |
| +-------------------+ |
| | br0 | send receive |
| +--------+-+--------+ +--------+ +--------+ |
| | | | | | | | | |
| | swp0 | | swp1 | | swp2 | | swp3 | |
| | | | | | | | | |
+-+--------+-+--------+-+--------+-+--------+-+
| ^ | ^
| | | |
| +-----------+ |
| |
+--------------------------------+
because if the switch has a single FDB (can offload a single bridge)
then source address learning on swp3 can "steal" the source MAC address
of swp2 from br0's FDB, because learning frames coming from swp2 will be
done twice: first on the swp1 ingress port, second on the swp3 ingress
port. So the hardware FDB will become out of sync with the software
bridge, and when swp2 tries to send one more packet towards swp1, the
ASIC will attempt to short-circuit the forwarding path and send it
directly to swp3 (since that's the last port it learned that address on),
which it obviously can't, because swp3 operates in standalone mode.
So DSA drivers operating in standalone mode should still configure a
list of bridge port flags even when they are standalone. Currently DSA
attempts to call dsa_port_bridge_flags with 0, which disables egress
flooding of unknown unicast and multicast, something which doesn't make
much sense. For the switches that implement .port_egress_floods - b53
and mv88e6xxx, it probably doesn't matter too much either, since they
can possibly inject traffic from the CPU into a standalone port,
regardless of MAC DA, even if egress flooding is turned off for that
port, but certainly not all DSA switches can do that - sja1105, for
example, can't. So it makes sense to use a better common default there,
such as "flood everything".
It should also be noted that what DSA calls "dsa_port_bridge_flags()"
is a degenerate name for just calling .port_egress_floods(), since
nothing else is implemented - not learning, in particular. But disabling
address learning, something that this driver is also coding up for, will
be supported by individual drivers once .port_egress_floods is replaced
with a more generic .port_bridge_flags.
Previous attempts to code up this logic have been in the common bridge
layer, but as pointed out by Ido Schimmel, there are corner cases that
are missed when doing that:
https://patchwork.kernel.org/project/netdevbpf/patch/20210209151936.97382-5-olteanv@gmail.com/
So, at least for now, let's leave DSA in charge of setting port flags
before and after the bridge join and leave.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:54 +08:00
|
|
|
/* Configure the port for standalone mode (no address learning,
|
|
|
|
* flood everything).
|
|
|
|
* The bridge only emits SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS events
|
|
|
|
* when the user requests it through netlink or sysfs, but not
|
|
|
|
* automatically at port join or leave, so we need to handle resetting
|
|
|
|
* the brport flags ourselves. But we even prefer it that way, because
|
|
|
|
* otherwise, some setups might never get the notification they need,
|
|
|
|
* for example, when a port leaves a LAG that offloads the bridge,
|
|
|
|
* it becomes standalone, but as far as the bridge is concerned, no
|
|
|
|
* port ever left.
|
|
|
|
*/
|
|
|
|
dsa_port_change_brport_flags(dp, false);
|
net: dsa: enable flooding for bridge ports
Switches work by learning the MAC address for each attached station by
monitoring traffic from each station. When a station sends a packet,
the switch records which port the MAC address is connected to.
With IPv4 networking, before communication commences with a neighbour,
an ARP packet is broadcasted to all stations asking for the MAC address
corresponding with the IPv4. The desired station responds with an ARP
reply, and the ARP reply causes the switch to learn which port the
station is connected to.
With IPv6 networking, the situation is rather different. Rather than
broadcasting ARP packets, a "neighbour solicitation" is multicasted
rather than broadcasted. This multicast needs to reach the intended
station in order for the neighbour to be discovered.
Once a neighbour has been discovered, and entered into the sending
stations neighbour cache, communication can restart at a point later
without sending a new neighbour solicitation, even if the entry in
the neighbour cache is marked as stale. This can be after the MAC
address has expired from the forwarding cache of the DSA switch -
when that occurs, there is a long pause in communication.
Our DSA implementation for mv88e6xxx switches disables flooding of
multicast and unicast frames for bridged ports. As per the above
description, this is fine for IPv4 networking, since the broadcasted
ARP queries will be sent to and received by all stations on the same
network. However, this breaks IPv6 very badly - blocking neighbour
solicitations and later causing connections to stall.
The defaults that the Linux bridge code expect from bridges are for
unknown unicast and unknown multicast frames to be flooded to all ports
on the bridge, which is at odds to the defaults adopted by our DSA
implementation for mv88e6xxx switches.
This commit enables by default flooding of both unknown unicast and
unknown multicast frames whenever a port is added to a bridge, and
disables the flooding when a port leaves the bridge. This means that
mv88e6xxx DSA switches now behave as per the bridge(8) man page, and
IPv6 works flawlessly through such a switch.
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vivien Didelot <vivien.didelot@gmail.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-02-21 07:35:06 +08:00
|
|
|
|
2017-05-20 05:00:45 +08:00
|
|
|
/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
|
|
|
|
* so allow it to be in BR_STATE_FORWARDING to be kept functional
|
|
|
|
*/
|
|
|
|
dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
|
|
|
|
}
|
2017-05-20 05:00:46 +08:00
|
|
|
|
2021-01-13 16:42:53 +08:00
|
|
|
int dsa_port_lag_change(struct dsa_port *dp,
|
|
|
|
struct netdev_lag_lower_state_info *linfo)
|
|
|
|
{
|
|
|
|
struct dsa_notifier_lag_info info = {
|
|
|
|
.sw_index = dp->ds->index,
|
|
|
|
.port = dp->index,
|
|
|
|
};
|
|
|
|
bool tx_enabled;
|
|
|
|
|
|
|
|
if (!dp->lag_dev)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* On statically configured aggregates (e.g. loadbalance
|
|
|
|
* without LACP) ports will always be tx_enabled, even if the
|
|
|
|
* link is down. Thus we require both link_up and tx_enabled
|
|
|
|
* in order to include it in the tx set.
|
|
|
|
*/
|
|
|
|
tx_enabled = linfo->link_up && linfo->tx_enabled;
|
|
|
|
|
|
|
|
if (tx_enabled == dp->lag_tx_enabled)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
dp->lag_tx_enabled = tx_enabled;
|
|
|
|
|
|
|
|
return dsa_port_notify(dp, DSA_NOTIFIER_LAG_CHANGE, &info);
|
|
|
|
}
|
|
|
|
|
|
|
|
int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag,
|
|
|
|
struct netdev_lag_upper_info *uinfo)
|
|
|
|
{
|
|
|
|
struct dsa_notifier_lag_info info = {
|
|
|
|
.sw_index = dp->ds->index,
|
|
|
|
.port = dp->index,
|
|
|
|
.lag = lag,
|
|
|
|
.info = uinfo,
|
|
|
|
};
|
|
|
|
int err;
|
|
|
|
|
|
|
|
dsa_lag_map(dp->ds->dst, lag);
|
|
|
|
dp->lag_dev = lag;
|
|
|
|
|
|
|
|
err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_JOIN, &info);
|
|
|
|
if (err) {
|
|
|
|
dp->lag_dev = NULL;
|
|
|
|
dsa_lag_unmap(dp->ds->dst, lag);
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag)
|
|
|
|
{
|
|
|
|
struct dsa_notifier_lag_info info = {
|
|
|
|
.sw_index = dp->ds->index,
|
|
|
|
.port = dp->index,
|
|
|
|
.lag = lag,
|
|
|
|
};
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (!dp->lag_dev)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Port might have been part of a LAG that in turn was
|
|
|
|
* attached to a bridge.
|
|
|
|
*/
|
|
|
|
if (dp->bridge_dev)
|
|
|
|
dsa_port_bridge_leave(dp, dp->bridge_dev);
|
|
|
|
|
|
|
|
dp->lag_tx_enabled = false;
|
|
|
|
dp->lag_dev = NULL;
|
|
|
|
|
|
|
|
err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info);
|
|
|
|
if (err)
|
|
|
|
pr_err("DSA: failed to notify DSA_NOTIFIER_LAG_LEAVE: %d\n",
|
|
|
|
err);
|
|
|
|
|
|
|
|
dsa_lag_unmap(dp->ds->dst, lag);
|
|
|
|
}
|
|
|
|
|
2020-09-21 08:10:28 +08:00
|
|
|
/* Must be called under rcu_read_lock() */
|
2019-04-29 02:45:44 +08:00
|
|
|
static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp,
|
2021-02-14 04:43:19 +08:00
|
|
|
bool vlan_filtering,
|
|
|
|
struct netlink_ext_ack *extack)
|
2019-04-29 02:45:44 +08:00
|
|
|
{
|
|
|
|
struct dsa_switch *ds = dp->ds;
|
2020-09-21 08:10:28 +08:00
|
|
|
int err, i;
|
|
|
|
|
|
|
|
/* VLAN awareness was off, so the question is "can we turn it on".
|
|
|
|
* We may have had 8021q uppers, those need to go. Make sure we don't
|
|
|
|
* enter an inconsistent state: deny changing the VLAN awareness state
|
|
|
|
* as long as we have 8021q uppers.
|
|
|
|
*/
|
|
|
|
if (vlan_filtering && dsa_is_user_port(ds, dp->index)) {
|
|
|
|
struct net_device *upper_dev, *slave = dp->slave;
|
|
|
|
struct net_device *br = dp->bridge_dev;
|
|
|
|
struct list_head *iter;
|
|
|
|
|
|
|
|
netdev_for_each_upper_dev_rcu(slave, upper_dev, iter) {
|
|
|
|
struct bridge_vlan_info br_info;
|
|
|
|
u16 vid;
|
|
|
|
|
|
|
|
if (!is_vlan_dev(upper_dev))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
vid = vlan_dev_vlan_id(upper_dev);
|
|
|
|
|
|
|
|
/* br_vlan_get_info() returns -EINVAL or -ENOENT if the
|
|
|
|
* device, respectively the VID is not found, returning
|
|
|
|
* 0 means success, which is a failure for us here.
|
|
|
|
*/
|
|
|
|
err = br_vlan_get_info(br, vid, &br_info);
|
|
|
|
if (err == 0) {
|
2021-02-14 04:43:19 +08:00
|
|
|
NL_SET_ERR_MSG_MOD(extack,
|
|
|
|
"Must first remove VLAN uppers having VIDs also present in bridge");
|
2020-09-21 08:10:28 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-04-29 02:45:44 +08:00
|
|
|
|
|
|
|
if (!ds->vlan_filtering_is_global)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/* For cases where enabling/disabling VLAN awareness is global to the
|
|
|
|
* switch, we need to handle the case where multiple bridges span
|
|
|
|
* different ports of the same switch device and one of them has a
|
|
|
|
* different setting than what is being requested.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < ds->num_ports; i++) {
|
|
|
|
struct net_device *other_bridge;
|
|
|
|
|
|
|
|
other_bridge = dsa_to_port(ds, i)->bridge_dev;
|
|
|
|
if (!other_bridge)
|
|
|
|
continue;
|
|
|
|
/* If it's the same bridge, it also has same
|
|
|
|
* vlan_filtering setting => no need to check
|
|
|
|
*/
|
|
|
|
if (other_bridge == dp->bridge_dev)
|
|
|
|
continue;
|
|
|
|
if (br_vlan_enabled(other_bridge) != vlan_filtering) {
|
2021-02-14 04:43:19 +08:00
|
|
|
NL_SET_ERR_MSG_MOD(extack,
|
|
|
|
"VLAN filtering is a global setting");
|
2019-04-29 02:45:44 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-02-14 04:43:19 +08:00
|
|
|
int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
|
|
|
|
struct netlink_ext_ack *extack)
|
2017-05-20 05:00:46 +08:00
|
|
|
{
|
|
|
|
struct dsa_switch *ds = dp->ds;
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
|
|
|
bool apply;
|
2019-04-29 02:45:43 +08:00
|
|
|
int err;
|
2017-05-20 05:00:46 +08:00
|
|
|
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
|
|
|
if (!ds->ops->port_vlan_filtering)
|
|
|
|
return -EOPNOTSUPP;
|
2020-09-21 08:10:28 +08:00
|
|
|
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
|
|
|
/* We are called from dsa_slave_switchdev_blocking_event(),
|
|
|
|
* which is not under rcu_read_lock(), unlike
|
|
|
|
* dsa_slave_switchdev_event().
|
|
|
|
*/
|
|
|
|
rcu_read_lock();
|
2021-02-14 04:43:19 +08:00
|
|
|
apply = dsa_port_can_apply_vlan_filtering(dp, vlan_filtering, extack);
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
|
|
|
rcu_read_unlock();
|
|
|
|
if (!apply)
|
|
|
|
return -EINVAL;
|
2019-04-29 02:45:44 +08:00
|
|
|
|
2019-04-29 02:45:51 +08:00
|
|
|
if (dsa_port_is_vlan_filtering(dp) == vlan_filtering)
|
|
|
|
return 0;
|
|
|
|
|
2021-02-14 04:43:19 +08:00
|
|
|
err = ds->ops->port_vlan_filtering(ds, dp->index, vlan_filtering,
|
|
|
|
extack);
|
2019-04-29 02:45:44 +08:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
|
|
|
if (ds->vlan_filtering_is_global)
|
|
|
|
ds->vlan_filtering = vlan_filtering;
|
|
|
|
else
|
|
|
|
dp->vlan_filtering = vlan_filtering;
|
2020-10-03 06:06:46 +08:00
|
|
|
|
2017-05-20 05:00:46 +08:00
|
|
|
return 0;
|
|
|
|
}
|
2017-05-20 05:00:47 +08:00
|
|
|
|
2020-05-13 01:20:25 +08:00
|
|
|
/* This enforces legacy behavior for switch drivers which assume they can't
|
|
|
|
* receive VLAN configuration when enslaved to a bridge with vlan_filtering=0
|
|
|
|
*/
|
|
|
|
bool dsa_port_skip_vlan_configuration(struct dsa_port *dp)
|
|
|
|
{
|
|
|
|
struct dsa_switch *ds = dp->ds;
|
|
|
|
|
|
|
|
if (!dp->bridge_dev)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return (!ds->configure_vlan_while_not_filtering &&
|
|
|
|
!br_vlan_enabled(dp->bridge_dev));
|
|
|
|
}
|
|
|
|
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
|
|
|
int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock)
|
2017-05-20 05:00:47 +08:00
|
|
|
{
|
|
|
|
unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock);
|
|
|
|
unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies);
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
|
|
|
struct dsa_notifier_ageing_time_info info;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
info.ageing_time = ageing_time;
|
2017-05-20 05:00:47 +08:00
|
|
|
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
|
|
|
err = dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info);
|
|
|
|
if (err)
|
|
|
|
return err;
|
2017-05-20 05:00:47 +08:00
|
|
|
|
|
|
|
dp->ageing_time = ageing_time;
|
|
|
|
|
2021-01-09 08:01:51 +08:00
|
|
|
return 0;
|
2017-05-20 05:00:47 +08:00
|
|
|
}
|
2017-05-20 05:00:48 +08:00
|
|
|
|
net: switchdev: pass flags and mask to both {PRE_,}BRIDGE_FLAGS attributes
This switchdev attribute offers a counterproductive API for a driver
writer, because although br_switchdev_set_port_flag gets passed a
"flags" and a "mask", those are passed piecemeal to the driver, so while
the PRE_BRIDGE_FLAGS listener knows what changed because it has the
"mask", the BRIDGE_FLAGS listener doesn't, because it only has the final
value. But certain drivers can offload only certain combinations of
settings, like for example they cannot change unicast flooding
independently of multicast flooding - they must be both on or both off.
The way the information is passed to switchdev makes drivers not
expressive enough, and unable to reject this request ahead of time, in
the PRE_BRIDGE_FLAGS notifier, so they are forced to reject it during
the deferred BRIDGE_FLAGS attribute, where the rejection is currently
ignored.
This patch also changes drivers to make use of the "mask" field for edge
detection when possible.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:55 +08:00
|
|
|
int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
|
net: dsa: act as passthrough for bridge port flags
There are multiple ways in which a PORT_BRIDGE_FLAGS attribute can be
expressed by the bridge through switchdev, and not all of them can be
emulated by DSA mid-layer API at the same time.
One possible configuration is when the bridge offloads the port flags
using a mask that has a single bit set - therefore only one feature
should change. However, DSA currently groups together unicast and
multicast flooding in the .port_egress_floods method, which limits our
options when we try to add support for turning off broadcast flooding:
do we extend .port_egress_floods with a third parameter which b53 and
mv88e6xxx will ignore? But that means that the DSA layer, which
currently implements the PRE_BRIDGE_FLAGS attribute all by itself, will
see that .port_egress_floods is implemented, and will report that all 3
types of flooding are supported - not necessarily true.
Another configuration is when the user specifies more than one flag at
the same time, in the same netlink message. If we were to create one
individual function per offloadable bridge port flag, we would limit the
expressiveness of the switch driver of refusing certain combinations of
flag values. For example, a switch may not have an explicit knob for
flooding of unknown multicast, just for flooding in general. In that
case, the only correct thing to do is to allow changes to BR_FLOOD and
BR_MCAST_FLOOD in tandem, and never allow mismatched values. But having
a separate .port_set_unicast_flood and .port_set_multicast_flood would
not allow the driver to possibly reject that.
Also, DSA doesn't consider it necessary to inform the driver that a
SWITCHDEV_ATTR_ID_BRIDGE_MROUTER attribute was offloaded, because it
just calls .port_egress_floods for the CPU port. When we'll add support
for the plain SWITCHDEV_ATTR_ID_PORT_MROUTER, that will become a real
problem because the flood settings will need to be held statefully in
the DSA middle layer, otherwise changing the mrouter port attribute will
impact the flooding attribute. And that's _assuming_ that the underlying
hardware doesn't have anything else to do when a multicast router
attaches to a port than flood unknown traffic to it. If it does, there
will need to be a dedicated .port_set_mrouter anyway.
So we need to let the DSA drivers see the exact form that the bridge
passes this switchdev attribute in, otherwise we are standing in the
way. Therefore we also need to use this form of language when
communicating to the driver that it needs to configure its initial
(before bridge join) and final (after bridge leave) port flags.
The b53 and mv88e6xxx drivers are converted to the passthrough API and
their implementation of .port_egress_floods is split into two: a
function that configures unicast flooding and another for multicast.
The mv88e6xxx implementation is quite hairy, and it turns out that
the implementations of unknown unicast flooding are actually the same
for 6185 and for 6352:
behind the confusing names actually lie two individual bits:
NO_UNKNOWN_MC -> FLOOD_UC = 0x4 = BIT(2)
NO_UNKNOWN_UC -> FLOOD_MC = 0x8 = BIT(3)
so there was no reason to entangle them in the first place.
Whereas the 6185 writes to MV88E6185_PORT_CTL0_FORWARD_UNKNOWN of
PORT_CTL0, which has the exact same bit index. I have left the
implementations separate though, for the only reason that the names are
different enough to confuse me, since I am not able to double-check with
a user manual. The multicast flooding setting for 6185 is in a different
register than for 6352 though.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:56 +08:00
|
|
|
struct switchdev_brport_flags flags,
|
|
|
|
struct netlink_ext_ack *extack)
|
2019-02-21 08:58:22 +08:00
|
|
|
{
|
|
|
|
struct dsa_switch *ds = dp->ds;
|
|
|
|
|
net: dsa: act as passthrough for bridge port flags
There are multiple ways in which a PORT_BRIDGE_FLAGS attribute can be
expressed by the bridge through switchdev, and not all of them can be
emulated by DSA mid-layer API at the same time.
One possible configuration is when the bridge offloads the port flags
using a mask that has a single bit set - therefore only one feature
should change. However, DSA currently groups together unicast and
multicast flooding in the .port_egress_floods method, which limits our
options when we try to add support for turning off broadcast flooding:
do we extend .port_egress_floods with a third parameter which b53 and
mv88e6xxx will ignore? But that means that the DSA layer, which
currently implements the PRE_BRIDGE_FLAGS attribute all by itself, will
see that .port_egress_floods is implemented, and will report that all 3
types of flooding are supported - not necessarily true.
Another configuration is when the user specifies more than one flag at
the same time, in the same netlink message. If we were to create one
individual function per offloadable bridge port flag, we would limit the
expressiveness of the switch driver of refusing certain combinations of
flag values. For example, a switch may not have an explicit knob for
flooding of unknown multicast, just for flooding in general. In that
case, the only correct thing to do is to allow changes to BR_FLOOD and
BR_MCAST_FLOOD in tandem, and never allow mismatched values. But having
a separate .port_set_unicast_flood and .port_set_multicast_flood would
not allow the driver to possibly reject that.
Also, DSA doesn't consider it necessary to inform the driver that a
SWITCHDEV_ATTR_ID_BRIDGE_MROUTER attribute was offloaded, because it
just calls .port_egress_floods for the CPU port. When we'll add support
for the plain SWITCHDEV_ATTR_ID_PORT_MROUTER, that will become a real
problem because the flood settings will need to be held statefully in
the DSA middle layer, otherwise changing the mrouter port attribute will
impact the flooding attribute. And that's _assuming_ that the underlying
hardware doesn't have anything else to do when a multicast router
attaches to a port than flood unknown traffic to it. If it does, there
will need to be a dedicated .port_set_mrouter anyway.
So we need to let the DSA drivers see the exact form that the bridge
passes this switchdev attribute in, otherwise we are standing in the
way. Therefore we also need to use this form of language when
communicating to the driver that it needs to configure its initial
(before bridge join) and final (after bridge leave) port flags.
The b53 and mv88e6xxx drivers are converted to the passthrough API and
their implementation of .port_egress_floods is split into two: a
function that configures unicast flooding and another for multicast.
The mv88e6xxx implementation is quite hairy, and it turns out that
the implementations of unknown unicast flooding are actually the same
for 6185 and for 6352:
behind the confusing names actually lie two individual bits:
NO_UNKNOWN_MC -> FLOOD_UC = 0x4 = BIT(2)
NO_UNKNOWN_UC -> FLOOD_MC = 0x8 = BIT(3)
so there was no reason to entangle them in the first place.
Whereas the 6185 writes to MV88E6185_PORT_CTL0_FORWARD_UNKNOWN of
PORT_CTL0, which has the exact same bit index. I have left the
implementations separate though, for the only reason that the names are
different enough to confuse me, since I am not able to double-check with
a user manual. The multicast flooding setting for 6185 is in a different
register than for 6352 though.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:56 +08:00
|
|
|
if (!ds->ops->port_pre_bridge_flags)
|
2019-02-21 08:58:22 +08:00
|
|
|
return -EINVAL;
|
|
|
|
|
net: dsa: act as passthrough for bridge port flags
There are multiple ways in which a PORT_BRIDGE_FLAGS attribute can be
expressed by the bridge through switchdev, and not all of them can be
emulated by DSA mid-layer API at the same time.
One possible configuration is when the bridge offloads the port flags
using a mask that has a single bit set - therefore only one feature
should change. However, DSA currently groups together unicast and
multicast flooding in the .port_egress_floods method, which limits our
options when we try to add support for turning off broadcast flooding:
do we extend .port_egress_floods with a third parameter which b53 and
mv88e6xxx will ignore? But that means that the DSA layer, which
currently implements the PRE_BRIDGE_FLAGS attribute all by itself, will
see that .port_egress_floods is implemented, and will report that all 3
types of flooding are supported - not necessarily true.
Another configuration is when the user specifies more than one flag at
the same time, in the same netlink message. If we were to create one
individual function per offloadable bridge port flag, we would limit the
expressiveness of the switch driver of refusing certain combinations of
flag values. For example, a switch may not have an explicit knob for
flooding of unknown multicast, just for flooding in general. In that
case, the only correct thing to do is to allow changes to BR_FLOOD and
BR_MCAST_FLOOD in tandem, and never allow mismatched values. But having
a separate .port_set_unicast_flood and .port_set_multicast_flood would
not allow the driver to possibly reject that.
Also, DSA doesn't consider it necessary to inform the driver that a
SWITCHDEV_ATTR_ID_BRIDGE_MROUTER attribute was offloaded, because it
just calls .port_egress_floods for the CPU port. When we'll add support
for the plain SWITCHDEV_ATTR_ID_PORT_MROUTER, that will become a real
problem because the flood settings will need to be held statefully in
the DSA middle layer, otherwise changing the mrouter port attribute will
impact the flooding attribute. And that's _assuming_ that the underlying
hardware doesn't have anything else to do when a multicast router
attaches to a port than flood unknown traffic to it. If it does, there
will need to be a dedicated .port_set_mrouter anyway.
So we need to let the DSA drivers see the exact form that the bridge
passes this switchdev attribute in, otherwise we are standing in the
way. Therefore we also need to use this form of language when
communicating to the driver that it needs to configure its initial
(before bridge join) and final (after bridge leave) port flags.
The b53 and mv88e6xxx drivers are converted to the passthrough API and
their implementation of .port_egress_floods is split into two: a
function that configures unicast flooding and another for multicast.
The mv88e6xxx implementation is quite hairy, and it turns out that
the implementations of unknown unicast flooding are actually the same
for 6185 and for 6352:
behind the confusing names actually lie two individual bits:
NO_UNKNOWN_MC -> FLOOD_UC = 0x4 = BIT(2)
NO_UNKNOWN_UC -> FLOOD_MC = 0x8 = BIT(3)
so there was no reason to entangle them in the first place.
Whereas the 6185 writes to MV88E6185_PORT_CTL0_FORWARD_UNKNOWN of
PORT_CTL0, which has the exact same bit index. I have left the
implementations separate though, for the only reason that the names are
different enough to confuse me, since I am not able to double-check with
a user manual. The multicast flooding setting for 6185 is in a different
register than for 6352 though.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:56 +08:00
|
|
|
return ds->ops->port_pre_bridge_flags(ds, dp->index, flags, extack);
|
2019-02-21 08:58:22 +08:00
|
|
|
}
|
|
|
|
|
net: switchdev: pass flags and mask to both {PRE_,}BRIDGE_FLAGS attributes
This switchdev attribute offers a counterproductive API for a driver
writer, because although br_switchdev_set_port_flag gets passed a
"flags" and a "mask", those are passed piecemeal to the driver, so while
the PRE_BRIDGE_FLAGS listener knows what changed because it has the
"mask", the BRIDGE_FLAGS listener doesn't, because it only has the final
value. But certain drivers can offload only certain combinations of
settings, like for example they cannot change unicast flooding
independently of multicast flooding - they must be both on or both off.
The way the information is passed to switchdev makes drivers not
expressive enough, and unable to reject this request ahead of time, in
the PRE_BRIDGE_FLAGS notifier, so they are forced to reject it during
the deferred BRIDGE_FLAGS attribute, where the rejection is currently
ignored.
This patch also changes drivers to make use of the "mask" field for edge
detection when possible.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:55 +08:00
|
|
|
int dsa_port_bridge_flags(const struct dsa_port *dp,
|
net: dsa: act as passthrough for bridge port flags
There are multiple ways in which a PORT_BRIDGE_FLAGS attribute can be
expressed by the bridge through switchdev, and not all of them can be
emulated by DSA mid-layer API at the same time.
One possible configuration is when the bridge offloads the port flags
using a mask that has a single bit set - therefore only one feature
should change. However, DSA currently groups together unicast and
multicast flooding in the .port_egress_floods method, which limits our
options when we try to add support for turning off broadcast flooding:
do we extend .port_egress_floods with a third parameter which b53 and
mv88e6xxx will ignore? But that means that the DSA layer, which
currently implements the PRE_BRIDGE_FLAGS attribute all by itself, will
see that .port_egress_floods is implemented, and will report that all 3
types of flooding are supported - not necessarily true.
Another configuration is when the user specifies more than one flag at
the same time, in the same netlink message. If we were to create one
individual function per offloadable bridge port flag, we would limit the
expressiveness of the switch driver of refusing certain combinations of
flag values. For example, a switch may not have an explicit knob for
flooding of unknown multicast, just for flooding in general. In that
case, the only correct thing to do is to allow changes to BR_FLOOD and
BR_MCAST_FLOOD in tandem, and never allow mismatched values. But having
a separate .port_set_unicast_flood and .port_set_multicast_flood would
not allow the driver to possibly reject that.
Also, DSA doesn't consider it necessary to inform the driver that a
SWITCHDEV_ATTR_ID_BRIDGE_MROUTER attribute was offloaded, because it
just calls .port_egress_floods for the CPU port. When we'll add support
for the plain SWITCHDEV_ATTR_ID_PORT_MROUTER, that will become a real
problem because the flood settings will need to be held statefully in
the DSA middle layer, otherwise changing the mrouter port attribute will
impact the flooding attribute. And that's _assuming_ that the underlying
hardware doesn't have anything else to do when a multicast router
attaches to a port than flood unknown traffic to it. If it does, there
will need to be a dedicated .port_set_mrouter anyway.
So we need to let the DSA drivers see the exact form that the bridge
passes this switchdev attribute in, otherwise we are standing in the
way. Therefore we also need to use this form of language when
communicating to the driver that it needs to configure its initial
(before bridge join) and final (after bridge leave) port flags.
The b53 and mv88e6xxx drivers are converted to the passthrough API and
their implementation of .port_egress_floods is split into two: a
function that configures unicast flooding and another for multicast.
The mv88e6xxx implementation is quite hairy, and it turns out that
the implementations of unknown unicast flooding are actually the same
for 6185 and for 6352:
behind the confusing names actually lie two individual bits:
NO_UNKNOWN_MC -> FLOOD_UC = 0x4 = BIT(2)
NO_UNKNOWN_UC -> FLOOD_MC = 0x8 = BIT(3)
so there was no reason to entangle them in the first place.
Whereas the 6185 writes to MV88E6185_PORT_CTL0_FORWARD_UNKNOWN of
PORT_CTL0, which has the exact same bit index. I have left the
implementations separate though, for the only reason that the names are
different enough to confuse me, since I am not able to double-check with
a user manual. The multicast flooding setting for 6185 is in a different
register than for 6352 though.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:56 +08:00
|
|
|
struct switchdev_brport_flags flags,
|
|
|
|
struct netlink_ext_ack *extack)
|
2019-02-21 07:35:04 +08:00
|
|
|
{
|
|
|
|
struct dsa_switch *ds = dp->ds;
|
|
|
|
|
net: dsa: act as passthrough for bridge port flags
There are multiple ways in which a PORT_BRIDGE_FLAGS attribute can be
expressed by the bridge through switchdev, and not all of them can be
emulated by DSA mid-layer API at the same time.
One possible configuration is when the bridge offloads the port flags
using a mask that has a single bit set - therefore only one feature
should change. However, DSA currently groups together unicast and
multicast flooding in the .port_egress_floods method, which limits our
options when we try to add support for turning off broadcast flooding:
do we extend .port_egress_floods with a third parameter which b53 and
mv88e6xxx will ignore? But that means that the DSA layer, which
currently implements the PRE_BRIDGE_FLAGS attribute all by itself, will
see that .port_egress_floods is implemented, and will report that all 3
types of flooding are supported - not necessarily true.
Another configuration is when the user specifies more than one flag at
the same time, in the same netlink message. If we were to create one
individual function per offloadable bridge port flag, we would limit the
expressiveness of the switch driver of refusing certain combinations of
flag values. For example, a switch may not have an explicit knob for
flooding of unknown multicast, just for flooding in general. In that
case, the only correct thing to do is to allow changes to BR_FLOOD and
BR_MCAST_FLOOD in tandem, and never allow mismatched values. But having
a separate .port_set_unicast_flood and .port_set_multicast_flood would
not allow the driver to possibly reject that.
Also, DSA doesn't consider it necessary to inform the driver that a
SWITCHDEV_ATTR_ID_BRIDGE_MROUTER attribute was offloaded, because it
just calls .port_egress_floods for the CPU port. When we'll add support
for the plain SWITCHDEV_ATTR_ID_PORT_MROUTER, that will become a real
problem because the flood settings will need to be held statefully in
the DSA middle layer, otherwise changing the mrouter port attribute will
impact the flooding attribute. And that's _assuming_ that the underlying
hardware doesn't have anything else to do when a multicast router
attaches to a port than flood unknown traffic to it. If it does, there
will need to be a dedicated .port_set_mrouter anyway.
So we need to let the DSA drivers see the exact form that the bridge
passes this switchdev attribute in, otherwise we are standing in the
way. Therefore we also need to use this form of language when
communicating to the driver that it needs to configure its initial
(before bridge join) and final (after bridge leave) port flags.
The b53 and mv88e6xxx drivers are converted to the passthrough API and
their implementation of .port_egress_floods is split into two: a
function that configures unicast flooding and another for multicast.
The mv88e6xxx implementation is quite hairy, and it turns out that
the implementations of unknown unicast flooding are actually the same
for 6185 and for 6352:
behind the confusing names actually lie two individual bits:
NO_UNKNOWN_MC -> FLOOD_UC = 0x4 = BIT(2)
NO_UNKNOWN_UC -> FLOOD_MC = 0x8 = BIT(3)
so there was no reason to entangle them in the first place.
Whereas the 6185 writes to MV88E6185_PORT_CTL0_FORWARD_UNKNOWN of
PORT_CTL0, which has the exact same bit index. I have left the
implementations separate though, for the only reason that the names are
different enough to confuse me, since I am not able to double-check with
a user manual. The multicast flooding setting for 6185 is in a different
register than for 6352 though.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:56 +08:00
|
|
|
if (!ds->ops->port_bridge_flags)
|
|
|
|
return -EINVAL;
|
2019-02-21 07:35:04 +08:00
|
|
|
|
net: dsa: act as passthrough for bridge port flags
There are multiple ways in which a PORT_BRIDGE_FLAGS attribute can be
expressed by the bridge through switchdev, and not all of them can be
emulated by DSA mid-layer API at the same time.
One possible configuration is when the bridge offloads the port flags
using a mask that has a single bit set - therefore only one feature
should change. However, DSA currently groups together unicast and
multicast flooding in the .port_egress_floods method, which limits our
options when we try to add support for turning off broadcast flooding:
do we extend .port_egress_floods with a third parameter which b53 and
mv88e6xxx will ignore? But that means that the DSA layer, which
currently implements the PRE_BRIDGE_FLAGS attribute all by itself, will
see that .port_egress_floods is implemented, and will report that all 3
types of flooding are supported - not necessarily true.
Another configuration is when the user specifies more than one flag at
the same time, in the same netlink message. If we were to create one
individual function per offloadable bridge port flag, we would limit the
expressiveness of the switch driver of refusing certain combinations of
flag values. For example, a switch may not have an explicit knob for
flooding of unknown multicast, just for flooding in general. In that
case, the only correct thing to do is to allow changes to BR_FLOOD and
BR_MCAST_FLOOD in tandem, and never allow mismatched values. But having
a separate .port_set_unicast_flood and .port_set_multicast_flood would
not allow the driver to possibly reject that.
Also, DSA doesn't consider it necessary to inform the driver that a
SWITCHDEV_ATTR_ID_BRIDGE_MROUTER attribute was offloaded, because it
just calls .port_egress_floods for the CPU port. When we'll add support
for the plain SWITCHDEV_ATTR_ID_PORT_MROUTER, that will become a real
problem because the flood settings will need to be held statefully in
the DSA middle layer, otherwise changing the mrouter port attribute will
impact the flooding attribute. And that's _assuming_ that the underlying
hardware doesn't have anything else to do when a multicast router
attaches to a port than flood unknown traffic to it. If it does, there
will need to be a dedicated .port_set_mrouter anyway.
So we need to let the DSA drivers see the exact form that the bridge
passes this switchdev attribute in, otherwise we are standing in the
way. Therefore we also need to use this form of language when
communicating to the driver that it needs to configure its initial
(before bridge join) and final (after bridge leave) port flags.
The b53 and mv88e6xxx drivers are converted to the passthrough API and
their implementation of .port_egress_floods is split into two: a
function that configures unicast flooding and another for multicast.
The mv88e6xxx implementation is quite hairy, and it turns out that
the implementations of unknown unicast flooding are actually the same
for 6185 and for 6352:
behind the confusing names actually lie two individual bits:
NO_UNKNOWN_MC -> FLOOD_UC = 0x4 = BIT(2)
NO_UNKNOWN_UC -> FLOOD_MC = 0x8 = BIT(3)
so there was no reason to entangle them in the first place.
Whereas the 6185 writes to MV88E6185_PORT_CTL0_FORWARD_UNKNOWN of
PORT_CTL0, which has the exact same bit index. I have left the
implementations separate though, for the only reason that the names are
different enough to confuse me, since I am not able to double-check with
a user manual. The multicast flooding setting for 6185 is in a different
register than for 6352 though.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:56 +08:00
|
|
|
return ds->ops->port_bridge_flags(ds, dp->index, flags, extack);
|
2019-02-21 07:35:04 +08:00
|
|
|
}
|
|
|
|
|
net: dsa: act as passthrough for bridge port flags
There are multiple ways in which a PORT_BRIDGE_FLAGS attribute can be
expressed by the bridge through switchdev, and not all of them can be
emulated by DSA mid-layer API at the same time.
One possible configuration is when the bridge offloads the port flags
using a mask that has a single bit set - therefore only one feature
should change. However, DSA currently groups together unicast and
multicast flooding in the .port_egress_floods method, which limits our
options when we try to add support for turning off broadcast flooding:
do we extend .port_egress_floods with a third parameter which b53 and
mv88e6xxx will ignore? But that means that the DSA layer, which
currently implements the PRE_BRIDGE_FLAGS attribute all by itself, will
see that .port_egress_floods is implemented, and will report that all 3
types of flooding are supported - not necessarily true.
Another configuration is when the user specifies more than one flag at
the same time, in the same netlink message. If we were to create one
individual function per offloadable bridge port flag, we would limit the
expressiveness of the switch driver of refusing certain combinations of
flag values. For example, a switch may not have an explicit knob for
flooding of unknown multicast, just for flooding in general. In that
case, the only correct thing to do is to allow changes to BR_FLOOD and
BR_MCAST_FLOOD in tandem, and never allow mismatched values. But having
a separate .port_set_unicast_flood and .port_set_multicast_flood would
not allow the driver to possibly reject that.
Also, DSA doesn't consider it necessary to inform the driver that a
SWITCHDEV_ATTR_ID_BRIDGE_MROUTER attribute was offloaded, because it
just calls .port_egress_floods for the CPU port. When we'll add support
for the plain SWITCHDEV_ATTR_ID_PORT_MROUTER, that will become a real
problem because the flood settings will need to be held statefully in
the DSA middle layer, otherwise changing the mrouter port attribute will
impact the flooding attribute. And that's _assuming_ that the underlying
hardware doesn't have anything else to do when a multicast router
attaches to a port than flood unknown traffic to it. If it does, there
will need to be a dedicated .port_set_mrouter anyway.
So we need to let the DSA drivers see the exact form that the bridge
passes this switchdev attribute in, otherwise we are standing in the
way. Therefore we also need to use this form of language when
communicating to the driver that it needs to configure its initial
(before bridge join) and final (after bridge leave) port flags.
The b53 and mv88e6xxx drivers are converted to the passthrough API and
their implementation of .port_egress_floods is split into two: a
function that configures unicast flooding and another for multicast.
The mv88e6xxx implementation is quite hairy, and it turns out that
the implementations of unknown unicast flooding are actually the same
for 6185 and for 6352:
behind the confusing names actually lie two individual bits:
NO_UNKNOWN_MC -> FLOOD_UC = 0x4 = BIT(2)
NO_UNKNOWN_UC -> FLOOD_MC = 0x8 = BIT(3)
so there was no reason to entangle them in the first place.
Whereas the 6185 writes to MV88E6185_PORT_CTL0_FORWARD_UNKNOWN of
PORT_CTL0, which has the exact same bit index. I have left the
implementations separate though, for the only reason that the names are
different enough to confuse me, since I am not able to double-check with
a user manual. The multicast flooding setting for 6185 is in a different
register than for 6352 though.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:56 +08:00
|
|
|
int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
|
|
|
|
struct netlink_ext_ack *extack)
|
2019-07-09 11:31:13 +08:00
|
|
|
{
|
|
|
|
struct dsa_switch *ds = dp->ds;
|
|
|
|
|
net: dsa: act as passthrough for bridge port flags
There are multiple ways in which a PORT_BRIDGE_FLAGS attribute can be
expressed by the bridge through switchdev, and not all of them can be
emulated by DSA mid-layer API at the same time.
One possible configuration is when the bridge offloads the port flags
using a mask that has a single bit set - therefore only one feature
should change. However, DSA currently groups together unicast and
multicast flooding in the .port_egress_floods method, which limits our
options when we try to add support for turning off broadcast flooding:
do we extend .port_egress_floods with a third parameter which b53 and
mv88e6xxx will ignore? But that means that the DSA layer, which
currently implements the PRE_BRIDGE_FLAGS attribute all by itself, will
see that .port_egress_floods is implemented, and will report that all 3
types of flooding are supported - not necessarily true.
Another configuration is when the user specifies more than one flag at
the same time, in the same netlink message. If we were to create one
individual function per offloadable bridge port flag, we would limit the
expressiveness of the switch driver of refusing certain combinations of
flag values. For example, a switch may not have an explicit knob for
flooding of unknown multicast, just for flooding in general. In that
case, the only correct thing to do is to allow changes to BR_FLOOD and
BR_MCAST_FLOOD in tandem, and never allow mismatched values. But having
a separate .port_set_unicast_flood and .port_set_multicast_flood would
not allow the driver to possibly reject that.
Also, DSA doesn't consider it necessary to inform the driver that a
SWITCHDEV_ATTR_ID_BRIDGE_MROUTER attribute was offloaded, because it
just calls .port_egress_floods for the CPU port. When we'll add support
for the plain SWITCHDEV_ATTR_ID_PORT_MROUTER, that will become a real
problem because the flood settings will need to be held statefully in
the DSA middle layer, otherwise changing the mrouter port attribute will
impact the flooding attribute. And that's _assuming_ that the underlying
hardware doesn't have anything else to do when a multicast router
attaches to a port than flood unknown traffic to it. If it does, there
will need to be a dedicated .port_set_mrouter anyway.
So we need to let the DSA drivers see the exact form that the bridge
passes this switchdev attribute in, otherwise we are standing in the
way. Therefore we also need to use this form of language when
communicating to the driver that it needs to configure its initial
(before bridge join) and final (after bridge leave) port flags.
The b53 and mv88e6xxx drivers are converted to the passthrough API and
their implementation of .port_egress_floods is split into two: a
function that configures unicast flooding and another for multicast.
The mv88e6xxx implementation is quite hairy, and it turns out that
the implementations of unknown unicast flooding are actually the same
for 6185 and for 6352:
behind the confusing names actually lie two individual bits:
NO_UNKNOWN_MC -> FLOOD_UC = 0x4 = BIT(2)
NO_UNKNOWN_UC -> FLOOD_MC = 0x8 = BIT(3)
so there was no reason to entangle them in the first place.
Whereas the 6185 writes to MV88E6185_PORT_CTL0_FORWARD_UNKNOWN of
PORT_CTL0, which has the exact same bit index. I have left the
implementations separate though, for the only reason that the names are
different enough to confuse me, since I am not able to double-check with
a user manual. The multicast flooding setting for 6185 is in a different
register than for 6352 though.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:56 +08:00
|
|
|
if (!ds->ops->port_set_mrouter)
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
|
|
|
return -EOPNOTSUPP;
|
2019-07-09 11:31:13 +08:00
|
|
|
|
net: dsa: act as passthrough for bridge port flags
There are multiple ways in which a PORT_BRIDGE_FLAGS attribute can be
expressed by the bridge through switchdev, and not all of them can be
emulated by DSA mid-layer API at the same time.
One possible configuration is when the bridge offloads the port flags
using a mask that has a single bit set - therefore only one feature
should change. However, DSA currently groups together unicast and
multicast flooding in the .port_egress_floods method, which limits our
options when we try to add support for turning off broadcast flooding:
do we extend .port_egress_floods with a third parameter which b53 and
mv88e6xxx will ignore? But that means that the DSA layer, which
currently implements the PRE_BRIDGE_FLAGS attribute all by itself, will
see that .port_egress_floods is implemented, and will report that all 3
types of flooding are supported - not necessarily true.
Another configuration is when the user specifies more than one flag at
the same time, in the same netlink message. If we were to create one
individual function per offloadable bridge port flag, we would limit the
expressiveness of the switch driver of refusing certain combinations of
flag values. For example, a switch may not have an explicit knob for
flooding of unknown multicast, just for flooding in general. In that
case, the only correct thing to do is to allow changes to BR_FLOOD and
BR_MCAST_FLOOD in tandem, and never allow mismatched values. But having
a separate .port_set_unicast_flood and .port_set_multicast_flood would
not allow the driver to possibly reject that.
Also, DSA doesn't consider it necessary to inform the driver that a
SWITCHDEV_ATTR_ID_BRIDGE_MROUTER attribute was offloaded, because it
just calls .port_egress_floods for the CPU port. When we'll add support
for the plain SWITCHDEV_ATTR_ID_PORT_MROUTER, that will become a real
problem because the flood settings will need to be held statefully in
the DSA middle layer, otherwise changing the mrouter port attribute will
impact the flooding attribute. And that's _assuming_ that the underlying
hardware doesn't have anything else to do when a multicast router
attaches to a port than flood unknown traffic to it. If it does, there
will need to be a dedicated .port_set_mrouter anyway.
So we need to let the DSA drivers see the exact form that the bridge
passes this switchdev attribute in, otherwise we are standing in the
way. Therefore we also need to use this form of language when
communicating to the driver that it needs to configure its initial
(before bridge join) and final (after bridge leave) port flags.
The b53 and mv88e6xxx drivers are converted to the passthrough API and
their implementation of .port_egress_floods is split into two: a
function that configures unicast flooding and another for multicast.
The mv88e6xxx implementation is quite hairy, and it turns out that
the implementations of unknown unicast flooding are actually the same
for 6185 and for 6352:
behind the confusing names actually lie two individual bits:
NO_UNKNOWN_MC -> FLOOD_UC = 0x4 = BIT(2)
NO_UNKNOWN_UC -> FLOOD_MC = 0x8 = BIT(3)
so there was no reason to entangle them in the first place.
Whereas the 6185 writes to MV88E6185_PORT_CTL0_FORWARD_UNKNOWN of
PORT_CTL0, which has the exact same bit index. I have left the
implementations separate though, for the only reason that the names are
different enough to confuse me, since I am not able to double-check with
a user manual. The multicast flooding setting for 6185 is in a different
register than for 6352 though.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-12 23:15:56 +08:00
|
|
|
return ds->ops->port_set_mrouter(ds, dp->index, mrouter, extack);
|
2019-07-09 11:31:13 +08:00
|
|
|
}
|
|
|
|
|
net: dsa: configure the MTU for switch ports
It is useful be able to configure port policers on a switch to accept
frames of various sizes:
- Increase the MTU for better throughput from the default of 1500 if it
is known that there is no 10/100 Mbps device in the network.
- Decrease the MTU to limit the latency of high-priority frames under
congestion, or work around various network segments that add extra
headers to packets which can't be fragmented.
For DSA slave ports, this is mostly a pass-through callback, called
through the regular ndo ops and at probe time (to ensure consistency
across all supported switches).
The CPU port is called with an MTU equal to the largest configured MTU
of the slave ports. The assumption is that the user might want to
sustain a bidirectional conversation with a partner over any switch
port.
The DSA master is configured the same as the CPU port, plus the tagger
overhead. Since the MTU is by definition L2 payload (sans Ethernet
header), it is up to each individual driver to figure out if it needs to
do anything special for its frame tags on the CPU port (it shouldn't
except in special cases). So the MTU does not contain the tagger
overhead on the CPU port.
However the MTU of the DSA master, minus the tagger overhead, is used as
a proxy for the MTU of the CPU port, which does not have a net device.
This is to avoid uselessly calling the .change_mtu function on the CPU
port when nothing should change.
So it is safe to assume that the DSA master and the CPU port MTUs are
apart by exactly the tagger's overhead in bytes.
Some changes were made around dsa_master_set_mtu(), function which was
now removed, for 2 reasons:
- dev_set_mtu() already calls dev_validate_mtu(), so it's redundant to
do the same thing in DSA
- __dev_set_mtu() returns 0 if ops->ndo_change_mtu is an absent method
That is to say, there's no need for this function in DSA, we can safely
call dev_set_mtu() directly, take the rtnl lock when necessary, and just
propagate whatever errors get reported (since the user probably wants to
be informed).
Some inspiration (mainly in the MTU DSA notifier) was taken from a
vaguely similar patch from Murali and Florian, who are credited as
co-developers down below.
Co-developed-by: Murali Krishna Policharla <murali.policharla@broadcom.com>
Signed-off-by: Murali Krishna Policharla <murali.policharla@broadcom.com>
Co-developed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-03-28 03:55:42 +08:00
|
|
|
int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
|
|
|
|
bool propagate_upstream)
|
|
|
|
{
|
|
|
|
struct dsa_notifier_mtu_info info = {
|
|
|
|
.sw_index = dp->ds->index,
|
|
|
|
.propagate_upstream = propagate_upstream,
|
|
|
|
.port = dp->index,
|
|
|
|
.mtu = new_mtu,
|
|
|
|
};
|
|
|
|
|
|
|
|
return dsa_port_notify(dp, DSA_NOTIFIER_MTU, &info);
|
|
|
|
}
|
|
|
|
|
2017-08-06 21:15:41 +08:00
|
|
|
int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
|
|
|
|
u16 vid)
|
2017-05-20 05:00:48 +08:00
|
|
|
{
|
2017-05-20 05:00:53 +08:00
|
|
|
struct dsa_notifier_fdb_info info = {
|
|
|
|
.sw_index = dp->ds->index,
|
|
|
|
.port = dp->index,
|
2017-08-06 21:15:41 +08:00
|
|
|
.addr = addr,
|
|
|
|
.vid = vid,
|
2017-05-20 05:00:53 +08:00
|
|
|
};
|
2017-05-20 05:00:48 +08:00
|
|
|
|
2017-05-20 05:00:53 +08:00
|
|
|
return dsa_port_notify(dp, DSA_NOTIFIER_FDB_ADD, &info);
|
2017-05-20 05:00:48 +08:00
|
|
|
}
|
|
|
|
|
2017-08-06 21:15:41 +08:00
|
|
|
int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
|
|
|
|
u16 vid)
|
2017-05-20 05:00:48 +08:00
|
|
|
{
|
2017-05-20 05:00:53 +08:00
|
|
|
struct dsa_notifier_fdb_info info = {
|
|
|
|
.sw_index = dp->ds->index,
|
|
|
|
.port = dp->index,
|
2017-08-06 21:15:41 +08:00
|
|
|
.addr = addr,
|
|
|
|
.vid = vid,
|
|
|
|
|
2017-05-20 05:00:53 +08:00
|
|
|
};
|
2017-05-20 05:00:48 +08:00
|
|
|
|
2017-05-20 05:00:53 +08:00
|
|
|
return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info);
|
2017-05-20 05:00:48 +08:00
|
|
|
}
|
|
|
|
|
2017-09-21 07:32:14 +08:00
|
|
|
int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data)
|
|
|
|
{
|
|
|
|
struct dsa_switch *ds = dp->ds;
|
|
|
|
int port = dp->index;
|
|
|
|
|
|
|
|
if (!ds->ops->port_fdb_dump)
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
|
|
return ds->ops->port_fdb_dump(ds, port, cb, data);
|
|
|
|
}
|
|
|
|
|
2017-11-10 06:11:01 +08:00
|
|
|
int dsa_port_mdb_add(const struct dsa_port *dp,
|
net: switchdev: remove the transaction structure from port object notifiers
Since the introduction of the switchdev API, port objects were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
object notifier structures, and converts drivers to not look at this
member.
Where driver conversion is trivial (like in the case of the Marvell
Prestera driver, NXP DPAA2 switch, TI CPSW, and Rocker drivers), it is
done in this patch.
Where driver conversion needs more attention (DSA, Mellanox Spectrum),
the conversion is left for subsequent patches and here we only fake the
prepare/commit phases at a lower level, just not in the switchdev
notifier itself.
Where the code has a natural structure that is best left alone as a
preparation and a commit phase (as in the case of the Ocelot switch),
that structure is left in place, just made to not depend upon the
switchdev transactional model.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:48 +08:00
|
|
|
const struct switchdev_obj_port_mdb *mdb)
|
2017-05-20 05:00:49 +08:00
|
|
|
{
|
2017-05-20 05:00:54 +08:00
|
|
|
struct dsa_notifier_mdb_info info = {
|
|
|
|
.sw_index = dp->ds->index,
|
|
|
|
.port = dp->index,
|
|
|
|
.mdb = mdb,
|
|
|
|
};
|
2017-05-20 05:00:49 +08:00
|
|
|
|
2017-05-20 05:00:54 +08:00
|
|
|
return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info);
|
2017-05-20 05:00:49 +08:00
|
|
|
}
|
|
|
|
|
2017-11-10 06:11:01 +08:00
|
|
|
int dsa_port_mdb_del(const struct dsa_port *dp,
|
2017-05-20 05:00:49 +08:00
|
|
|
const struct switchdev_obj_port_mdb *mdb)
|
|
|
|
{
|
2017-05-20 05:00:54 +08:00
|
|
|
struct dsa_notifier_mdb_info info = {
|
|
|
|
.sw_index = dp->ds->index,
|
|
|
|
.port = dp->index,
|
|
|
|
.mdb = mdb,
|
|
|
|
};
|
2017-05-20 05:00:49 +08:00
|
|
|
|
2017-05-20 05:00:54 +08:00
|
|
|
return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info);
|
2017-05-20 05:00:49 +08:00
|
|
|
}
|
|
|
|
|
2017-05-20 05:00:50 +08:00
|
|
|
int dsa_port_vlan_add(struct dsa_port *dp,
|
2021-02-14 04:43:18 +08:00
|
|
|
const struct switchdev_obj_port_vlan *vlan,
|
|
|
|
struct netlink_ext_ack *extack)
|
2017-05-20 05:00:50 +08:00
|
|
|
{
|
2017-05-20 05:00:55 +08:00
|
|
|
struct dsa_notifier_vlan_info info = {
|
|
|
|
.sw_index = dp->ds->index,
|
|
|
|
.port = dp->index,
|
|
|
|
.vlan = vlan,
|
2021-02-14 04:43:18 +08:00
|
|
|
.extack = extack,
|
2017-05-20 05:00:55 +08:00
|
|
|
};
|
2017-05-20 05:00:50 +08:00
|
|
|
|
2019-08-26 01:25:18 +08:00
|
|
|
return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
|
2017-05-20 05:00:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int dsa_port_vlan_del(struct dsa_port *dp,
|
|
|
|
const struct switchdev_obj_port_vlan *vlan)
|
|
|
|
{
|
2017-05-20 05:00:55 +08:00
|
|
|
struct dsa_notifier_vlan_info info = {
|
|
|
|
.sw_index = dp->ds->index,
|
|
|
|
.port = dp->index,
|
|
|
|
.vlan = vlan,
|
|
|
|
};
|
2017-05-20 05:00:50 +08:00
|
|
|
|
2019-08-26 01:25:18 +08:00
|
|
|
return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
|
2017-05-20 05:00:50 +08:00
|
|
|
}
|
2017-10-26 22:50:07 +08:00
|
|
|
|
2021-02-17 05:42:04 +08:00
|
|
|
int dsa_port_mrp_add(const struct dsa_port *dp,
|
|
|
|
const struct switchdev_obj_mrp *mrp)
|
|
|
|
{
|
|
|
|
struct dsa_notifier_mrp_info info = {
|
|
|
|
.sw_index = dp->ds->index,
|
|
|
|
.port = dp->index,
|
|
|
|
.mrp = mrp,
|
|
|
|
};
|
|
|
|
|
|
|
|
return dsa_port_notify(dp, DSA_NOTIFIER_MRP_ADD, &info);
|
|
|
|
}
|
|
|
|
|
|
|
|
int dsa_port_mrp_del(const struct dsa_port *dp,
|
|
|
|
const struct switchdev_obj_mrp *mrp)
|
|
|
|
{
|
|
|
|
struct dsa_notifier_mrp_info info = {
|
|
|
|
.sw_index = dp->ds->index,
|
|
|
|
.port = dp->index,
|
|
|
|
.mrp = mrp,
|
|
|
|
};
|
|
|
|
|
|
|
|
return dsa_port_notify(dp, DSA_NOTIFIER_MRP_DEL, &info);
|
|
|
|
}
|
|
|
|
|
|
|
|
int dsa_port_mrp_add_ring_role(const struct dsa_port *dp,
|
|
|
|
const struct switchdev_obj_ring_role_mrp *mrp)
|
|
|
|
{
|
|
|
|
struct dsa_notifier_mrp_ring_role_info info = {
|
|
|
|
.sw_index = dp->ds->index,
|
|
|
|
.port = dp->index,
|
|
|
|
.mrp = mrp,
|
|
|
|
};
|
|
|
|
|
|
|
|
return dsa_port_notify(dp, DSA_NOTIFIER_MRP_ADD_RING_ROLE, &info);
|
|
|
|
}
|
|
|
|
|
|
|
|
int dsa_port_mrp_del_ring_role(const struct dsa_port *dp,
|
|
|
|
const struct switchdev_obj_ring_role_mrp *mrp)
|
|
|
|
{
|
|
|
|
struct dsa_notifier_mrp_ring_role_info info = {
|
|
|
|
.sw_index = dp->ds->index,
|
|
|
|
.port = dp->index,
|
|
|
|
.mrp = mrp,
|
|
|
|
};
|
|
|
|
|
|
|
|
return dsa_port_notify(dp, DSA_NOTIFIER_MRP_DEL_RING_ROLE, &info);
|
|
|
|
}
|
|
|
|
|
net: dsa: allow changing the tag protocol via the "tagging" device attribute
Currently DSA exposes the following sysfs:
$ cat /sys/class/net/eno2/dsa/tagging
ocelot
which is a read-only device attribute, introduced in the kernel as
commit 98cdb4807123 ("net: dsa: Expose tagging protocol to user-space"),
and used by libpcap since its commit 993db3800d7d ("Add support for DSA
link-layer types").
It would be nice if we could extend this device attribute by making it
writable:
$ echo ocelot-8021q > /sys/class/net/eno2/dsa/tagging
This is useful with DSA switches that can make use of more than one
tagging protocol. It may be useful in dsa_loop in the future too, to
perform offline testing of various taggers, or for changing between dsa
and edsa on Marvell switches, if that is desirable.
In terms of implementation, drivers can support this feature by
implementing .change_tag_protocol, which should always leave the switch
in a consistent state: either with the new protocol if things went well,
or with the old one if something failed. Teardown of the old protocol,
if necessary, must be handled by the driver.
Some things remain as before:
- The .get_tag_protocol is currently only called at probe time, to load
the initial tagging protocol driver. Nonetheless, new drivers should
report the tagging protocol in current use now.
- The driver should manage by itself the initial setup of tagging
protocol, no later than the .setup() method, as well as destroying
resources used by the last tagger in use, no earlier than the
.teardown() method.
For multi-switch DSA trees, error handling is a bit more complicated,
since e.g. the 5th out of 7 switches may fail to change the tag
protocol. When that happens, a revert to the original tag protocol is
attempted, but that may fail too, leaving the tree in an inconsistent
state despite each individual switch implementing .change_tag_protocol
transactionally. Since the intersection between drivers that implement
.change_tag_protocol and drivers that support D in DSA is currently the
empty set, the possibility for this error to happen is ignored for now.
Testing:
$ insmod mscc_felix.ko
[ 79.549784] mscc_felix 0000:00:00.5: Adding to iommu group 14
[ 79.565712] mscc_felix 0000:00:00.5: Failed to register DSA switch: -517
$ insmod tag_ocelot.ko
$ rmmod mscc_felix.ko
$ insmod mscc_felix.ko
[ 97.261724] libphy: VSC9959 internal MDIO bus: probed
[ 97.267363] mscc_felix 0000:00:00.5: Found PCS at internal MDIO address 0
[ 97.274998] mscc_felix 0000:00:00.5: Found PCS at internal MDIO address 1
[ 97.282561] mscc_felix 0000:00:00.5: Found PCS at internal MDIO address 2
[ 97.289700] mscc_felix 0000:00:00.5: Found PCS at internal MDIO address 3
[ 97.599163] mscc_felix 0000:00:00.5 swp0 (uninitialized): PHY [0000:00:00.3:10] driver [Microsemi GE VSC8514 SyncE] (irq=POLL)
[ 97.862034] mscc_felix 0000:00:00.5 swp1 (uninitialized): PHY [0000:00:00.3:11] driver [Microsemi GE VSC8514 SyncE] (irq=POLL)
[ 97.950731] mscc_felix 0000:00:00.5 swp0: configuring for inband/qsgmii link mode
[ 97.964278] 8021q: adding VLAN 0 to HW filter on device swp0
[ 98.146161] mscc_felix 0000:00:00.5 swp2 (uninitialized): PHY [0000:00:00.3:12] driver [Microsemi GE VSC8514 SyncE] (irq=POLL)
[ 98.238649] mscc_felix 0000:00:00.5 swp1: configuring for inband/qsgmii link mode
[ 98.251845] 8021q: adding VLAN 0 to HW filter on device swp1
[ 98.433916] mscc_felix 0000:00:00.5 swp3 (uninitialized): PHY [0000:00:00.3:13] driver [Microsemi GE VSC8514 SyncE] (irq=POLL)
[ 98.485542] mscc_felix 0000:00:00.5: configuring for fixed/internal link mode
[ 98.503584] mscc_felix 0000:00:00.5: Link is Up - 2.5Gbps/Full - flow control rx/tx
[ 98.527948] device eno2 entered promiscuous mode
[ 98.544755] DSA: tree 0 setup
$ ping 10.0.0.1
PING 10.0.0.1 (10.0.0.1): 56 data bytes
64 bytes from 10.0.0.1: seq=0 ttl=64 time=2.337 ms
64 bytes from 10.0.0.1: seq=1 ttl=64 time=0.754 ms
^C
- 10.0.0.1 ping statistics -
2 packets transmitted, 2 packets received, 0% packet loss
round-trip min/avg/max = 0.754/1.545/2.337 ms
$ cat /sys/class/net/eno2/dsa/tagging
ocelot
$ cat ./test_ocelot_8021q.sh
#!/bin/bash
ip link set swp0 down
ip link set swp1 down
ip link set swp2 down
ip link set swp3 down
ip link set swp5 down
ip link set eno2 down
echo ocelot-8021q > /sys/class/net/eno2/dsa/tagging
ip link set eno2 up
ip link set swp0 up
ip link set swp1 up
ip link set swp2 up
ip link set swp3 up
ip link set swp5 up
$ ./test_ocelot_8021q.sh
./test_ocelot_8021q.sh: line 9: echo: write error: Protocol not available
$ rmmod tag_ocelot.ko
rmmod: can't unload module 'tag_ocelot': Resource temporarily unavailable
$ insmod tag_ocelot_8021q.ko
$ ./test_ocelot_8021q.sh
$ cat /sys/class/net/eno2/dsa/tagging
ocelot-8021q
$ rmmod tag_ocelot.ko
$ rmmod tag_ocelot_8021q.ko
rmmod: can't unload module 'tag_ocelot_8021q': Resource temporarily unavailable
$ ping 10.0.0.1
PING 10.0.0.1 (10.0.0.1): 56 data bytes
64 bytes from 10.0.0.1: seq=0 ttl=64 time=0.953 ms
64 bytes from 10.0.0.1: seq=1 ttl=64 time=0.787 ms
64 bytes from 10.0.0.1: seq=2 ttl=64 time=0.771 ms
$ rmmod mscc_felix.ko
[ 645.544426] mscc_felix 0000:00:00.5: Link is Down
[ 645.838608] DSA: tree 0 torn down
$ rmmod tag_ocelot_8021q.ko
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-29 09:00:06 +08:00
|
|
|
void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
|
|
|
|
const struct dsa_device_ops *tag_ops)
|
|
|
|
{
|
|
|
|
cpu_dp->filter = tag_ops->filter;
|
|
|
|
cpu_dp->rcv = tag_ops->rcv;
|
|
|
|
cpu_dp->tag_ops = tag_ops;
|
|
|
|
}
|
|
|
|
|
2018-04-26 03:12:51 +08:00
|
|
|
static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp)
|
2018-01-23 23:03:46 +08:00
|
|
|
{
|
|
|
|
struct device_node *phy_dn;
|
|
|
|
struct phy_device *phydev;
|
|
|
|
|
2018-04-26 03:12:51 +08:00
|
|
|
phy_dn = of_parse_phandle(dp->dn, "phy-handle", 0);
|
2018-01-23 23:03:46 +08:00
|
|
|
if (!phy_dn)
|
2018-04-26 03:12:51 +08:00
|
|
|
return NULL;
|
2018-01-23 23:03:46 +08:00
|
|
|
|
|
|
|
phydev = of_phy_find_device(phy_dn);
|
|
|
|
if (!phydev) {
|
2018-04-26 03:12:51 +08:00
|
|
|
of_node_put(phy_dn);
|
|
|
|
return ERR_PTR(-EPROBE_DEFER);
|
2018-01-23 23:03:46 +08:00
|
|
|
}
|
|
|
|
|
2019-02-25 15:22:19 +08:00
|
|
|
of_node_put(phy_dn);
|
2018-04-26 03:12:51 +08:00
|
|
|
return phydev;
|
|
|
|
}
|
|
|
|
|
2019-12-17 02:32:47 +08:00
|
|
|
static void dsa_port_phylink_validate(struct phylink_config *config,
|
|
|
|
unsigned long *supported,
|
|
|
|
struct phylink_link_state *state)
|
2019-05-29 01:38:15 +08:00
|
|
|
{
|
|
|
|
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
|
|
|
|
struct dsa_switch *ds = dp->ds;
|
|
|
|
|
|
|
|
if (!ds->ops->phylink_validate)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ds->ops->phylink_validate(ds, dp->index, supported, state);
|
|
|
|
}
|
|
|
|
|
2019-12-17 02:32:47 +08:00
|
|
|
static void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
|
|
|
|
struct phylink_link_state *state)
|
2019-05-29 01:38:15 +08:00
|
|
|
{
|
|
|
|
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
|
|
|
|
struct dsa_switch *ds = dp->ds;
|
2020-03-14 18:15:28 +08:00
|
|
|
int err;
|
2019-05-29 01:38:15 +08:00
|
|
|
|
2019-11-21 08:36:22 +08:00
|
|
|
/* Only called for inband modes */
|
|
|
|
if (!ds->ops->phylink_mac_link_state) {
|
|
|
|
state->link = 0;
|
|
|
|
return;
|
|
|
|
}
|
2019-05-29 01:38:15 +08:00
|
|
|
|
2020-03-14 18:15:28 +08:00
|
|
|
err = ds->ops->phylink_mac_link_state(ds, dp->index, state);
|
|
|
|
if (err < 0) {
|
|
|
|
dev_err(ds->dev, "p%d: phylink_mac_link_state() failed: %d\n",
|
|
|
|
dp->index, err);
|
2019-11-21 08:36:22 +08:00
|
|
|
state->link = 0;
|
2020-03-14 18:15:28 +08:00
|
|
|
}
|
2019-05-29 01:38:15 +08:00
|
|
|
}
|
|
|
|
|
2019-12-17 02:32:47 +08:00
|
|
|
static void dsa_port_phylink_mac_config(struct phylink_config *config,
|
|
|
|
unsigned int mode,
|
|
|
|
const struct phylink_link_state *state)
|
2019-05-29 01:38:15 +08:00
|
|
|
{
|
|
|
|
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
|
|
|
|
struct dsa_switch *ds = dp->ds;
|
|
|
|
|
|
|
|
if (!ds->ops->phylink_mac_config)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ds->ops->phylink_mac_config(ds, dp->index, mode, state);
|
|
|
|
}
|
|
|
|
|
2019-12-17 02:32:47 +08:00
|
|
|
static void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
|
2019-05-29 01:38:15 +08:00
|
|
|
{
|
|
|
|
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
|
|
|
|
struct dsa_switch *ds = dp->ds;
|
|
|
|
|
|
|
|
if (!ds->ops->phylink_mac_an_restart)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ds->ops->phylink_mac_an_restart(ds, dp->index);
|
|
|
|
}
|
|
|
|
|
2019-12-17 02:32:47 +08:00
|
|
|
static void dsa_port_phylink_mac_link_down(struct phylink_config *config,
|
|
|
|
unsigned int mode,
|
|
|
|
phy_interface_t interface)
|
2019-05-29 01:38:15 +08:00
|
|
|
{
|
|
|
|
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
|
2019-05-29 01:38:16 +08:00
|
|
|
struct phy_device *phydev = NULL;
|
2019-05-29 01:38:15 +08:00
|
|
|
struct dsa_switch *ds = dp->ds;
|
|
|
|
|
2019-05-29 01:38:16 +08:00
|
|
|
if (dsa_is_user_port(ds, dp->index))
|
|
|
|
phydev = dp->slave->phydev;
|
|
|
|
|
2019-05-29 01:38:15 +08:00
|
|
|
if (!ds->ops->phylink_mac_link_down) {
|
2019-05-29 01:38:16 +08:00
|
|
|
if (ds->ops->adjust_link && phydev)
|
|
|
|
ds->ops->adjust_link(ds, dp->index, phydev);
|
2019-05-29 01:38:15 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface);
|
|
|
|
}
|
|
|
|
|
2019-12-17 02:32:47 +08:00
|
|
|
static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
|
2020-02-26 18:23:41 +08:00
|
|
|
struct phy_device *phydev,
|
2019-12-17 02:32:47 +08:00
|
|
|
unsigned int mode,
|
|
|
|
phy_interface_t interface,
|
2020-02-26 18:23:41 +08:00
|
|
|
int speed, int duplex,
|
|
|
|
bool tx_pause, bool rx_pause)
|
2019-05-29 01:38:15 +08:00
|
|
|
{
|
|
|
|
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
|
|
|
|
struct dsa_switch *ds = dp->ds;
|
|
|
|
|
|
|
|
if (!ds->ops->phylink_mac_link_up) {
|
2019-05-29 01:38:16 +08:00
|
|
|
if (ds->ops->adjust_link && phydev)
|
|
|
|
ds->ops->adjust_link(ds, dp->index, phydev);
|
2019-05-29 01:38:15 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-02-26 18:23:46 +08:00
|
|
|
ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev,
|
|
|
|
speed, duplex, tx_pause, rx_pause);
|
2019-05-29 01:38:15 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
|
|
|
|
.validate = dsa_port_phylink_validate,
|
2019-11-21 08:36:22 +08:00
|
|
|
.mac_pcs_get_state = dsa_port_phylink_mac_pcs_get_state,
|
2019-05-29 01:38:15 +08:00
|
|
|
.mac_config = dsa_port_phylink_mac_config,
|
|
|
|
.mac_an_restart = dsa_port_phylink_mac_an_restart,
|
|
|
|
.mac_link_down = dsa_port_phylink_mac_link_down,
|
|
|
|
.mac_link_up = dsa_port_phylink_mac_link_up,
|
|
|
|
};
|
|
|
|
|
2018-04-26 03:12:51 +08:00
|
|
|
static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
|
|
|
|
{
|
|
|
|
struct dsa_switch *ds = dp->ds;
|
|
|
|
struct phy_device *phydev;
|
|
|
|
int port = dp->index;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
phydev = dsa_port_get_phy_device(dp);
|
|
|
|
if (!phydev)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (IS_ERR(phydev))
|
|
|
|
return PTR_ERR(phydev);
|
|
|
|
|
2018-01-23 23:03:46 +08:00
|
|
|
if (enable) {
|
|
|
|
err = genphy_resume(phydev);
|
|
|
|
if (err < 0)
|
|
|
|
goto err_put_dev;
|
|
|
|
|
|
|
|
err = genphy_read_status(phydev);
|
|
|
|
if (err < 0)
|
|
|
|
goto err_put_dev;
|
|
|
|
} else {
|
|
|
|
err = genphy_suspend(phydev);
|
|
|
|
if (err < 0)
|
|
|
|
goto err_put_dev;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ds->ops->adjust_link)
|
|
|
|
ds->ops->adjust_link(ds, port, phydev);
|
|
|
|
|
|
|
|
dev_dbg(ds->dev, "enabled port's phy: %s", phydev_name(phydev));
|
|
|
|
|
|
|
|
err_put_dev:
|
|
|
|
put_device(&phydev->mdio.dev);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int dsa_port_fixed_link_register_of(struct dsa_port *dp)
|
2017-10-26 22:50:07 +08:00
|
|
|
{
|
|
|
|
struct device_node *dn = dp->dn;
|
|
|
|
struct dsa_switch *ds = dp->ds;
|
|
|
|
struct phy_device *phydev;
|
|
|
|
int port = dp->index;
|
net: of_get_phy_mode: Change API to solve int/unit warnings
Before this change of_get_phy_mode() returned an enum,
phy_interface_t. On error, -ENODEV etc, is returned. If the result of
the function is stored in a variable of type phy_interface_t, and the
compiler has decided to represent this as an unsigned int, comparision
with -ENODEV etc, is a signed vs unsigned comparision.
Fix this problem by changing the API. Make the function return an
error, or 0 on success, and pass a pointer, of type phy_interface_t,
where the phy mode should be stored.
v2:
Return with *interface set to PHY_INTERFACE_MODE_NA on error.
Add error checks to all users of of_get_phy_mode()
Fixup a few reverse christmas tree errors
Fixup a few slightly malformed reverse christmas trees
v3:
Fix 0-day reported errors.
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-04 09:40:33 +08:00
|
|
|
phy_interface_t mode;
|
2017-10-26 22:50:07 +08:00
|
|
|
int err;
|
|
|
|
|
2018-01-23 23:03:46 +08:00
|
|
|
err = of_phy_register_fixed_link(dn);
|
|
|
|
if (err) {
|
|
|
|
dev_err(ds->dev,
|
|
|
|
"failed to register the fixed PHY of port %d\n",
|
|
|
|
port);
|
|
|
|
return err;
|
|
|
|
}
|
2017-10-26 22:50:07 +08:00
|
|
|
|
2018-01-23 23:03:46 +08:00
|
|
|
phydev = of_phy_find_device(dn);
|
2017-10-26 22:50:07 +08:00
|
|
|
|
net: of_get_phy_mode: Change API to solve int/unit warnings
Before this change of_get_phy_mode() returned an enum,
phy_interface_t. On error, -ENODEV etc, is returned. If the result of
the function is stored in a variable of type phy_interface_t, and the
compiler has decided to represent this as an unsigned int, comparision
with -ENODEV etc, is a signed vs unsigned comparision.
Fix this problem by changing the API. Make the function return an
error, or 0 on success, and pass a pointer, of type phy_interface_t,
where the phy mode should be stored.
v2:
Return with *interface set to PHY_INTERFACE_MODE_NA on error.
Add error checks to all users of of_get_phy_mode()
Fixup a few reverse christmas tree errors
Fixup a few slightly malformed reverse christmas trees
v3:
Fix 0-day reported errors.
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-04 09:40:33 +08:00
|
|
|
err = of_get_phy_mode(dn, &mode);
|
|
|
|
if (err)
|
2018-01-23 23:03:46 +08:00
|
|
|
mode = PHY_INTERFACE_MODE_NA;
|
|
|
|
phydev->interface = mode;
|
2017-10-26 22:50:07 +08:00
|
|
|
|
2018-01-23 23:03:46 +08:00
|
|
|
genphy_read_status(phydev);
|
2017-10-26 22:50:07 +08:00
|
|
|
|
2018-01-23 23:03:46 +08:00
|
|
|
if (ds->ops->adjust_link)
|
|
|
|
ds->ops->adjust_link(ds, port, phydev);
|
2017-10-26 22:50:07 +08:00
|
|
|
|
2018-01-23 23:03:46 +08:00
|
|
|
put_device(&phydev->mdio.dev);
|
2017-10-26 22:50:07 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-05-29 01:38:16 +08:00
|
|
|
static int dsa_port_phylink_register(struct dsa_port *dp)
|
|
|
|
{
|
|
|
|
struct dsa_switch *ds = dp->ds;
|
|
|
|
struct device_node *port_dn = dp->dn;
|
net: of_get_phy_mode: Change API to solve int/unit warnings
Before this change of_get_phy_mode() returned an enum,
phy_interface_t. On error, -ENODEV etc, is returned. If the result of
the function is stored in a variable of type phy_interface_t, and the
compiler has decided to represent this as an unsigned int, comparision
with -ENODEV etc, is a signed vs unsigned comparision.
Fix this problem by changing the API. Make the function return an
error, or 0 on success, and pass a pointer, of type phy_interface_t,
where the phy mode should be stored.
v2:
Return with *interface set to PHY_INTERFACE_MODE_NA on error.
Add error checks to all users of of_get_phy_mode()
Fixup a few reverse christmas tree errors
Fixup a few slightly malformed reverse christmas trees
v3:
Fix 0-day reported errors.
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-04 09:40:33 +08:00
|
|
|
phy_interface_t mode;
|
|
|
|
int err;
|
2019-05-29 01:38:16 +08:00
|
|
|
|
net: of_get_phy_mode: Change API to solve int/unit warnings
Before this change of_get_phy_mode() returned an enum,
phy_interface_t. On error, -ENODEV etc, is returned. If the result of
the function is stored in a variable of type phy_interface_t, and the
compiler has decided to represent this as an unsigned int, comparision
with -ENODEV etc, is a signed vs unsigned comparision.
Fix this problem by changing the API. Make the function return an
error, or 0 on success, and pass a pointer, of type phy_interface_t,
where the phy mode should be stored.
v2:
Return with *interface set to PHY_INTERFACE_MODE_NA on error.
Add error checks to all users of of_get_phy_mode()
Fixup a few reverse christmas tree errors
Fixup a few slightly malformed reverse christmas trees
v3:
Fix 0-day reported errors.
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-04 09:40:33 +08:00
|
|
|
err = of_get_phy_mode(port_dn, &mode);
|
|
|
|
if (err)
|
2019-05-29 01:38:16 +08:00
|
|
|
mode = PHY_INTERFACE_MODE_NA;
|
|
|
|
|
|
|
|
dp->pl_config.dev = ds->dev;
|
|
|
|
dp->pl_config.type = PHYLINK_DEV;
|
2020-01-06 09:34:12 +08:00
|
|
|
dp->pl_config.pcs_poll = ds->pcs_poll;
|
2019-05-29 01:38:16 +08:00
|
|
|
|
|
|
|
dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(port_dn),
|
|
|
|
mode, &dsa_port_phylink_mac_ops);
|
|
|
|
if (IS_ERR(dp->pl)) {
|
|
|
|
pr_err("error creating PHYLINK: %ld\n", PTR_ERR(dp->pl));
|
|
|
|
return PTR_ERR(dp->pl);
|
|
|
|
}
|
|
|
|
|
|
|
|
err = phylink_of_phy_connect(dp->pl, port_dn, 0);
|
2019-06-11 03:31:49 +08:00
|
|
|
if (err && err != -ENODEV) {
|
2019-05-29 01:38:16 +08:00
|
|
|
pr_err("could not attach to PHY: %d\n", err);
|
|
|
|
goto err_phy_connect;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err_phy_connect:
|
|
|
|
phylink_destroy(dp->pl);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2018-01-23 23:03:46 +08:00
|
|
|
int dsa_port_link_register_of(struct dsa_port *dp)
|
2017-10-26 22:50:07 +08:00
|
|
|
{
|
2019-05-29 01:38:16 +08:00
|
|
|
struct dsa_switch *ds = dp->ds;
|
2020-03-11 23:24:24 +08:00
|
|
|
struct device_node *phy_np;
|
2020-04-14 08:34:39 +08:00
|
|
|
int port = dp->index;
|
2019-05-29 01:38:16 +08:00
|
|
|
|
2020-03-11 23:24:24 +08:00
|
|
|
if (!ds->ops->adjust_link) {
|
|
|
|
phy_np = of_parse_phandle(dp->dn, "phy-handle", 0);
|
2020-04-14 08:34:39 +08:00
|
|
|
if (of_phy_is_fixed_link(dp->dn) || phy_np) {
|
|
|
|
if (ds->ops->phylink_mac_link_down)
|
|
|
|
ds->ops->phylink_mac_link_down(ds, port,
|
|
|
|
MLO_AN_FIXED, PHY_INTERFACE_MODE_NA);
|
2020-03-11 23:24:24 +08:00
|
|
|
return dsa_port_phylink_register(dp);
|
2020-04-14 08:34:39 +08:00
|
|
|
}
|
2020-03-11 23:24:24 +08:00
|
|
|
return 0;
|
|
|
|
}
|
2019-05-29 01:38:16 +08:00
|
|
|
|
|
|
|
dev_warn(ds->dev,
|
|
|
|
"Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n");
|
|
|
|
|
2018-01-23 23:03:46 +08:00
|
|
|
if (of_phy_is_fixed_link(dp->dn))
|
|
|
|
return dsa_port_fixed_link_register_of(dp);
|
|
|
|
else
|
|
|
|
return dsa_port_setup_phy_of(dp, true);
|
|
|
|
}
|
2017-10-26 22:50:07 +08:00
|
|
|
|
2018-01-23 23:03:46 +08:00
|
|
|
void dsa_port_link_unregister_of(struct dsa_port *dp)
|
|
|
|
{
|
2019-05-29 01:38:16 +08:00
|
|
|
struct dsa_switch *ds = dp->ds;
|
|
|
|
|
2020-03-11 23:24:24 +08:00
|
|
|
if (!ds->ops->adjust_link && dp->pl) {
|
2019-05-29 01:38:16 +08:00
|
|
|
rtnl_lock();
|
|
|
|
phylink_disconnect_phy(dp->pl);
|
|
|
|
rtnl_unlock();
|
|
|
|
phylink_destroy(dp->pl);
|
2020-03-11 23:24:24 +08:00
|
|
|
dp->pl = NULL;
|
2019-05-29 01:38:16 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-01-23 23:03:46 +08:00
|
|
|
if (of_phy_is_fixed_link(dp->dn))
|
|
|
|
of_phy_deregister_fixed_link(dp->dn);
|
|
|
|
else
|
|
|
|
dsa_port_setup_phy_of(dp, false);
|
2017-10-26 22:50:07 +08:00
|
|
|
}
|
2018-04-26 03:12:52 +08:00
|
|
|
|
|
|
|
int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data)
|
|
|
|
{
|
|
|
|
struct phy_device *phydev;
|
|
|
|
int ret = -EOPNOTSUPP;
|
|
|
|
|
|
|
|
if (of_phy_is_fixed_link(dp->dn))
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
phydev = dsa_port_get_phy_device(dp);
|
|
|
|
if (IS_ERR_OR_NULL(phydev))
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
ret = phy_ethtool_get_strings(phydev, data);
|
|
|
|
put_device(&phydev->mdio.dev);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(dsa_port_get_phy_strings);
|
|
|
|
|
|
|
|
int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data)
|
|
|
|
{
|
|
|
|
struct phy_device *phydev;
|
|
|
|
int ret = -EOPNOTSUPP;
|
|
|
|
|
|
|
|
if (of_phy_is_fixed_link(dp->dn))
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
phydev = dsa_port_get_phy_device(dp);
|
|
|
|
if (IS_ERR_OR_NULL(phydev))
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
ret = phy_ethtool_get_stats(phydev, NULL, data);
|
|
|
|
put_device(&phydev->mdio.dev);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(dsa_port_get_ethtool_phy_stats);
|
|
|
|
|
|
|
|
int dsa_port_get_phy_sset_count(struct dsa_port *dp)
|
|
|
|
{
|
|
|
|
struct phy_device *phydev;
|
|
|
|
int ret = -EOPNOTSUPP;
|
|
|
|
|
|
|
|
if (of_phy_is_fixed_link(dp->dn))
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
phydev = dsa_port_get_phy_device(dp);
|
|
|
|
if (IS_ERR_OR_NULL(phydev))
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
ret = phy_ethtool_get_sset_count(phydev);
|
|
|
|
put_device(&phydev->mdio.dev);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(dsa_port_get_phy_sset_count);
|
2021-02-10 09:02:12 +08:00
|
|
|
|
|
|
|
int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr)
|
|
|
|
{
|
|
|
|
struct dsa_notifier_hsr_info info = {
|
|
|
|
.sw_index = dp->ds->index,
|
|
|
|
.port = dp->index,
|
|
|
|
.hsr = hsr,
|
|
|
|
};
|
|
|
|
int err;
|
|
|
|
|
|
|
|
dp->hsr_dev = hsr;
|
|
|
|
|
|
|
|
err = dsa_port_notify(dp, DSA_NOTIFIER_HSR_JOIN, &info);
|
|
|
|
if (err)
|
|
|
|
dp->hsr_dev = NULL;
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr)
|
|
|
|
{
|
|
|
|
struct dsa_notifier_hsr_info info = {
|
|
|
|
.sw_index = dp->ds->index,
|
|
|
|
.port = dp->index,
|
|
|
|
.hsr = hsr,
|
|
|
|
};
|
|
|
|
int err;
|
|
|
|
|
|
|
|
dp->hsr_dev = NULL;
|
|
|
|
|
|
|
|
err = dsa_port_notify(dp, DSA_NOTIFIER_HSR_LEAVE, &info);
|
|
|
|
if (err)
|
|
|
|
pr_err("DSA: failed to notify DSA_NOTIFIER_HSR_LEAVE\n");
|
|
|
|
}
|