OpenCloudOS-Kernel/drivers/net/ethernet/mscc/ocelot.c

1635 lines
44 KiB
C
Raw Normal View History

// SPDX-License-Identifier: (GPL-2.0 OR MIT)
/*
* Microsemi Ocelot Switch driver
*
* Copyright (c) 2017 Microsemi Corporation
*/
#include <linux/if_bridge.h>
#include <soc/mscc/ocelot_vcap.h>
#include "ocelot.h"
#include "ocelot_vcap.h"
#define TABLE_UPDATE_SLEEP_US 10
#define TABLE_UPDATE_TIMEOUT_US 100000
struct ocelot_mact_entry {
u8 mac[ETH_ALEN];
u16 vid;
enum macaccess_entry_type type;
};
static inline u32 ocelot_mact_read_macaccess(struct ocelot *ocelot)
{
return ocelot_read(ocelot, ANA_TABLES_MACACCESS);
}
static inline int ocelot_mact_wait_for_completion(struct ocelot *ocelot)
{
u32 val;
return readx_poll_timeout(ocelot_mact_read_macaccess,
ocelot, val,
(val & ANA_TABLES_MACACCESS_MAC_TABLE_CMD_M) ==
MACACCESS_CMD_IDLE,
TABLE_UPDATE_SLEEP_US, TABLE_UPDATE_TIMEOUT_US);
}
static void ocelot_mact_select(struct ocelot *ocelot,
const unsigned char mac[ETH_ALEN],
unsigned int vid)
{
u32 macl = 0, mach = 0;
/* Set the MAC address to handle and the vlan associated in a format
* understood by the hardware.
*/
mach |= vid << 16;
mach |= mac[0] << 8;
mach |= mac[1] << 0;
macl |= mac[2] << 24;
macl |= mac[3] << 16;
macl |= mac[4] << 8;
macl |= mac[5] << 0;
ocelot_write(ocelot, macl, ANA_TABLES_MACLDATA);
ocelot_write(ocelot, mach, ANA_TABLES_MACHDATA);
}
int ocelot_mact_learn(struct ocelot *ocelot, int port,
const unsigned char mac[ETH_ALEN],
unsigned int vid, enum macaccess_entry_type type)
{
ocelot_mact_select(ocelot, mac, vid);
/* Issue a write command */
ocelot_write(ocelot, ANA_TABLES_MACACCESS_VALID |
ANA_TABLES_MACACCESS_DEST_IDX(port) |
ANA_TABLES_MACACCESS_ENTRYTYPE(type) |
ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_LEARN),
ANA_TABLES_MACACCESS);
return ocelot_mact_wait_for_completion(ocelot);
}
EXPORT_SYMBOL(ocelot_mact_learn);
int ocelot_mact_forget(struct ocelot *ocelot,
const unsigned char mac[ETH_ALEN], unsigned int vid)
{
ocelot_mact_select(ocelot, mac, vid);
/* Issue a forget command */
ocelot_write(ocelot,
ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_FORGET),
ANA_TABLES_MACACCESS);
return ocelot_mact_wait_for_completion(ocelot);
}
EXPORT_SYMBOL(ocelot_mact_forget);
static void ocelot_mact_init(struct ocelot *ocelot)
{
/* Configure the learning mode entries attributes:
* - Do not copy the frame to the CPU extraction queues.
* - Use the vlan and mac_cpoy for dmac lookup.
*/
ocelot_rmw(ocelot, 0,
ANA_AGENCTRL_LEARN_CPU_COPY | ANA_AGENCTRL_IGNORE_DMAC_FLAGS
| ANA_AGENCTRL_LEARN_FWD_KILL
| ANA_AGENCTRL_LEARN_IGNORE_VLAN,
ANA_AGENCTRL);
/* Clear the MAC table */
ocelot_write(ocelot, MACACCESS_CMD_INIT, ANA_TABLES_MACACCESS);
}
static void ocelot_vcap_enable(struct ocelot *ocelot, int port)
{
ocelot_write_gix(ocelot, ANA_PORT_VCAP_S2_CFG_S2_ENA |
ANA_PORT_VCAP_S2_CFG_S2_IP6_CFG(0xa),
ANA_PORT_VCAP_S2_CFG, port);
ocelot_write_gix(ocelot, ANA_PORT_VCAP_CFG_S1_ENA,
ANA_PORT_VCAP_CFG, port);
ocelot_rmw_gix(ocelot, REW_PORT_CFG_ES0_EN,
REW_PORT_CFG_ES0_EN,
REW_PORT_CFG, port);
}
static inline u32 ocelot_vlant_read_vlanaccess(struct ocelot *ocelot)
{
return ocelot_read(ocelot, ANA_TABLES_VLANACCESS);
}
static inline int ocelot_vlant_wait_for_completion(struct ocelot *ocelot)
{
u32 val;
return readx_poll_timeout(ocelot_vlant_read_vlanaccess,
ocelot,
val,
(val & ANA_TABLES_VLANACCESS_VLAN_TBL_CMD_M) ==
ANA_TABLES_VLANACCESS_CMD_IDLE,
TABLE_UPDATE_SLEEP_US, TABLE_UPDATE_TIMEOUT_US);
}
static int ocelot_vlant_set_mask(struct ocelot *ocelot, u16 vid, u32 mask)
{
/* Select the VID to configure */
ocelot_write(ocelot, ANA_TABLES_VLANTIDX_V_INDEX(vid),
ANA_TABLES_VLANTIDX);
/* Set the vlan port members mask and issue a write command */
ocelot_write(ocelot, ANA_TABLES_VLANACCESS_VLAN_PORT_MASK(mask) |
ANA_TABLES_VLANACCESS_CMD_WRITE,
ANA_TABLES_VLANACCESS);
return ocelot_vlant_wait_for_completion(ocelot);
}
static void ocelot_port_set_native_vlan(struct ocelot *ocelot, int port,
struct ocelot_vlan native_vlan)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
net: mscc: ocelot: fix untagged packet drops when enslaving to vlan aware bridge To rehash a previous explanation given in commit 1c44ce560b4d ("net: mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is up"), the switch driver operates the in a mode where a single VLAN can be transmitted as untagged on a particular egress port. That is the "native VLAN on trunk port" use case. The configuration for this native VLAN is driven in 2 ways: - Set the egress port rewriter to strip the VLAN tag for the native VID (as it is egress-untagged, after all). - Configure the ingress port to drop untagged and priority-tagged traffic, if there is no native VLAN. The intention of this setting is that a trunk port with no native VLAN should not accept untagged traffic. Since both of the above configurations for the native VLAN should only be done if VLAN awareness is requested, they are actually done from the ocelot_port_vlan_filtering function, after the basic procedure of toggling the VLAN awareness flag of the port. But there's a problem with that simplistic approach: we are trying to juggle with 2 independent variables from a single function: - Native VLAN of the port - its value is held in port->vid. - VLAN awareness state of the port - currently there are some issues here, more on that later*. The actual problem can be seen when enslaving the switch ports to a VLAN filtering bridge: 0. The driver configures a pvid of zero for each port, when in standalone mode. While the bridge configures a default_pvid of 1 for each port that gets added as a slave to it. 1. The bridge calls ocelot_port_vlan_filtering with vlan_aware=true. The VLAN-filtering-dependent portion of the native VLAN configuration is done, considering that the native VLAN is 0. 2. The bridge calls ocelot_vlan_add with vid=1, pvid=true, untagged=true. The native VLAN changes to 1 (change which gets propagated to hardware). 3. ??? - nobody calls ocelot_port_vlan_filtering again, to reapply the VLAN-filtering-dependent portion of the native VLAN configuration, for the new native VLAN of 1. One can notice that after toggling "ip link set dev br0 type bridge vlan_filtering 0 && ip link set dev br0 type bridge vlan_filtering 1", the new native VLAN finally makes it through and untagged traffic finally starts flowing again. But obviously that shouldn't be needed. So it is clear that 2 independent variables need to both re-trigger the native VLAN configuration. So we introduce the second variable as ocelot_port->vlan_aware. *Actually both the DSA Felix driver and the Ocelot driver already had each its own variable: - Ocelot: ocelot_port_private->vlan_aware - Felix: dsa_port->vlan_filtering but the common Ocelot library needs to work with a single, common, variable, so there is some refactoring done to move the vlan_aware property from the private structure into the common ocelot_port structure. Fixes: 97bb69e1e36e ("net: mscc: ocelot: break apart ocelot_vlan_port_apply") Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-04-15 03:36:15 +08:00
u32 val = 0;
ocelot_port->native_vlan = native_vlan;
ocelot_rmw_gix(ocelot, REW_PORT_VLAN_CFG_PORT_VID(native_vlan.vid),
net: mscc: ocelot: fix untagged packet drops when enslaving to vlan aware bridge To rehash a previous explanation given in commit 1c44ce560b4d ("net: mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is up"), the switch driver operates the in a mode where a single VLAN can be transmitted as untagged on a particular egress port. That is the "native VLAN on trunk port" use case. The configuration for this native VLAN is driven in 2 ways: - Set the egress port rewriter to strip the VLAN tag for the native VID (as it is egress-untagged, after all). - Configure the ingress port to drop untagged and priority-tagged traffic, if there is no native VLAN. The intention of this setting is that a trunk port with no native VLAN should not accept untagged traffic. Since both of the above configurations for the native VLAN should only be done if VLAN awareness is requested, they are actually done from the ocelot_port_vlan_filtering function, after the basic procedure of toggling the VLAN awareness flag of the port. But there's a problem with that simplistic approach: we are trying to juggle with 2 independent variables from a single function: - Native VLAN of the port - its value is held in port->vid. - VLAN awareness state of the port - currently there are some issues here, more on that later*. The actual problem can be seen when enslaving the switch ports to a VLAN filtering bridge: 0. The driver configures a pvid of zero for each port, when in standalone mode. While the bridge configures a default_pvid of 1 for each port that gets added as a slave to it. 1. The bridge calls ocelot_port_vlan_filtering with vlan_aware=true. The VLAN-filtering-dependent portion of the native VLAN configuration is done, considering that the native VLAN is 0. 2. The bridge calls ocelot_vlan_add with vid=1, pvid=true, untagged=true. The native VLAN changes to 1 (change which gets propagated to hardware). 3. ??? - nobody calls ocelot_port_vlan_filtering again, to reapply the VLAN-filtering-dependent portion of the native VLAN configuration, for the new native VLAN of 1. One can notice that after toggling "ip link set dev br0 type bridge vlan_filtering 0 && ip link set dev br0 type bridge vlan_filtering 1", the new native VLAN finally makes it through and untagged traffic finally starts flowing again. But obviously that shouldn't be needed. So it is clear that 2 independent variables need to both re-trigger the native VLAN configuration. So we introduce the second variable as ocelot_port->vlan_aware. *Actually both the DSA Felix driver and the Ocelot driver already had each its own variable: - Ocelot: ocelot_port_private->vlan_aware - Felix: dsa_port->vlan_filtering but the common Ocelot library needs to work with a single, common, variable, so there is some refactoring done to move the vlan_aware property from the private structure into the common ocelot_port structure. Fixes: 97bb69e1e36e ("net: mscc: ocelot: break apart ocelot_vlan_port_apply") Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-04-15 03:36:15 +08:00
REW_PORT_VLAN_CFG_PORT_VID_M,
REW_PORT_VLAN_CFG, port);
net: mscc: ocelot: fix untagged packet drops when enslaving to vlan aware bridge To rehash a previous explanation given in commit 1c44ce560b4d ("net: mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is up"), the switch driver operates the in a mode where a single VLAN can be transmitted as untagged on a particular egress port. That is the "native VLAN on trunk port" use case. The configuration for this native VLAN is driven in 2 ways: - Set the egress port rewriter to strip the VLAN tag for the native VID (as it is egress-untagged, after all). - Configure the ingress port to drop untagged and priority-tagged traffic, if there is no native VLAN. The intention of this setting is that a trunk port with no native VLAN should not accept untagged traffic. Since both of the above configurations for the native VLAN should only be done if VLAN awareness is requested, they are actually done from the ocelot_port_vlan_filtering function, after the basic procedure of toggling the VLAN awareness flag of the port. But there's a problem with that simplistic approach: we are trying to juggle with 2 independent variables from a single function: - Native VLAN of the port - its value is held in port->vid. - VLAN awareness state of the port - currently there are some issues here, more on that later*. The actual problem can be seen when enslaving the switch ports to a VLAN filtering bridge: 0. The driver configures a pvid of zero for each port, when in standalone mode. While the bridge configures a default_pvid of 1 for each port that gets added as a slave to it. 1. The bridge calls ocelot_port_vlan_filtering with vlan_aware=true. The VLAN-filtering-dependent portion of the native VLAN configuration is done, considering that the native VLAN is 0. 2. The bridge calls ocelot_vlan_add with vid=1, pvid=true, untagged=true. The native VLAN changes to 1 (change which gets propagated to hardware). 3. ??? - nobody calls ocelot_port_vlan_filtering again, to reapply the VLAN-filtering-dependent portion of the native VLAN configuration, for the new native VLAN of 1. One can notice that after toggling "ip link set dev br0 type bridge vlan_filtering 0 && ip link set dev br0 type bridge vlan_filtering 1", the new native VLAN finally makes it through and untagged traffic finally starts flowing again. But obviously that shouldn't be needed. So it is clear that 2 independent variables need to both re-trigger the native VLAN configuration. So we introduce the second variable as ocelot_port->vlan_aware. *Actually both the DSA Felix driver and the Ocelot driver already had each its own variable: - Ocelot: ocelot_port_private->vlan_aware - Felix: dsa_port->vlan_filtering but the common Ocelot library needs to work with a single, common, variable, so there is some refactoring done to move the vlan_aware property from the private structure into the common ocelot_port structure. Fixes: 97bb69e1e36e ("net: mscc: ocelot: break apart ocelot_vlan_port_apply") Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-04-15 03:36:15 +08:00
if (ocelot_port->vlan_aware) {
if (native_vlan.valid)
/* Tag all frames except when VID == DEFAULT_VLAN */
net: mscc: ocelot: fix untagged packet drops when enslaving to vlan aware bridge To rehash a previous explanation given in commit 1c44ce560b4d ("net: mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is up"), the switch driver operates the in a mode where a single VLAN can be transmitted as untagged on a particular egress port. That is the "native VLAN on trunk port" use case. The configuration for this native VLAN is driven in 2 ways: - Set the egress port rewriter to strip the VLAN tag for the native VID (as it is egress-untagged, after all). - Configure the ingress port to drop untagged and priority-tagged traffic, if there is no native VLAN. The intention of this setting is that a trunk port with no native VLAN should not accept untagged traffic. Since both of the above configurations for the native VLAN should only be done if VLAN awareness is requested, they are actually done from the ocelot_port_vlan_filtering function, after the basic procedure of toggling the VLAN awareness flag of the port. But there's a problem with that simplistic approach: we are trying to juggle with 2 independent variables from a single function: - Native VLAN of the port - its value is held in port->vid. - VLAN awareness state of the port - currently there are some issues here, more on that later*. The actual problem can be seen when enslaving the switch ports to a VLAN filtering bridge: 0. The driver configures a pvid of zero for each port, when in standalone mode. While the bridge configures a default_pvid of 1 for each port that gets added as a slave to it. 1. The bridge calls ocelot_port_vlan_filtering with vlan_aware=true. The VLAN-filtering-dependent portion of the native VLAN configuration is done, considering that the native VLAN is 0. 2. The bridge calls ocelot_vlan_add with vid=1, pvid=true, untagged=true. The native VLAN changes to 1 (change which gets propagated to hardware). 3. ??? - nobody calls ocelot_port_vlan_filtering again, to reapply the VLAN-filtering-dependent portion of the native VLAN configuration, for the new native VLAN of 1. One can notice that after toggling "ip link set dev br0 type bridge vlan_filtering 0 && ip link set dev br0 type bridge vlan_filtering 1", the new native VLAN finally makes it through and untagged traffic finally starts flowing again. But obviously that shouldn't be needed. So it is clear that 2 independent variables need to both re-trigger the native VLAN configuration. So we introduce the second variable as ocelot_port->vlan_aware. *Actually both the DSA Felix driver and the Ocelot driver already had each its own variable: - Ocelot: ocelot_port_private->vlan_aware - Felix: dsa_port->vlan_filtering but the common Ocelot library needs to work with a single, common, variable, so there is some refactoring done to move the vlan_aware property from the private structure into the common ocelot_port structure. Fixes: 97bb69e1e36e ("net: mscc: ocelot: break apart ocelot_vlan_port_apply") Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-04-15 03:36:15 +08:00
val = REW_TAG_CFG_TAG_CFG(1);
else
/* Tag all frames */
net: mscc: ocelot: fix untagged packet drops when enslaving to vlan aware bridge To rehash a previous explanation given in commit 1c44ce560b4d ("net: mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is up"), the switch driver operates the in a mode where a single VLAN can be transmitted as untagged on a particular egress port. That is the "native VLAN on trunk port" use case. The configuration for this native VLAN is driven in 2 ways: - Set the egress port rewriter to strip the VLAN tag for the native VID (as it is egress-untagged, after all). - Configure the ingress port to drop untagged and priority-tagged traffic, if there is no native VLAN. The intention of this setting is that a trunk port with no native VLAN should not accept untagged traffic. Since both of the above configurations for the native VLAN should only be done if VLAN awareness is requested, they are actually done from the ocelot_port_vlan_filtering function, after the basic procedure of toggling the VLAN awareness flag of the port. But there's a problem with that simplistic approach: we are trying to juggle with 2 independent variables from a single function: - Native VLAN of the port - its value is held in port->vid. - VLAN awareness state of the port - currently there are some issues here, more on that later*. The actual problem can be seen when enslaving the switch ports to a VLAN filtering bridge: 0. The driver configures a pvid of zero for each port, when in standalone mode. While the bridge configures a default_pvid of 1 for each port that gets added as a slave to it. 1. The bridge calls ocelot_port_vlan_filtering with vlan_aware=true. The VLAN-filtering-dependent portion of the native VLAN configuration is done, considering that the native VLAN is 0. 2. The bridge calls ocelot_vlan_add with vid=1, pvid=true, untagged=true. The native VLAN changes to 1 (change which gets propagated to hardware). 3. ??? - nobody calls ocelot_port_vlan_filtering again, to reapply the VLAN-filtering-dependent portion of the native VLAN configuration, for the new native VLAN of 1. One can notice that after toggling "ip link set dev br0 type bridge vlan_filtering 0 && ip link set dev br0 type bridge vlan_filtering 1", the new native VLAN finally makes it through and untagged traffic finally starts flowing again. But obviously that shouldn't be needed. So it is clear that 2 independent variables need to both re-trigger the native VLAN configuration. So we introduce the second variable as ocelot_port->vlan_aware. *Actually both the DSA Felix driver and the Ocelot driver already had each its own variable: - Ocelot: ocelot_port_private->vlan_aware - Felix: dsa_port->vlan_filtering but the common Ocelot library needs to work with a single, common, variable, so there is some refactoring done to move the vlan_aware property from the private structure into the common ocelot_port structure. Fixes: 97bb69e1e36e ("net: mscc: ocelot: break apart ocelot_vlan_port_apply") Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-04-15 03:36:15 +08:00
val = REW_TAG_CFG_TAG_CFG(3);
} else {
/* Port tagging disabled. */
val = REW_TAG_CFG_TAG_CFG(0);
}
ocelot_rmw_gix(ocelot, val,
REW_TAG_CFG_TAG_CFG_M,
REW_TAG_CFG, port);
}
net: mscc: ocelot: use the pvid of zero when bridged with vlan_filtering=0 Currently, mscc_ocelot ports configure pvid=0 in standalone mode, and inherit the pvid from the bridge when one is present. When the bridge has vlan_filtering=0, the software semantics are that packets should be received regardless of whether there's a pvid configured on the ingress port or not. However, ocelot does not observe those semantics today. Moreover, changing the PVID is also a problem with vlan_filtering=0. We are privately remapping the VID of FDB, MDB entries to the port's PVID when those are VLAN-unaware (i.e. when the VID of these entries comes to us as 0). But we have no logic of adjusting that remapping when the user changes the pvid and vlan_filtering is 0. So stale entries would be left behind, and untagged traffic will stop matching on them. And even if we were to solve that, there's an even bigger problem. If swp0 has pvid 1, and swp1 has pvid 2, and both are under a vlan_filtering=0 bridge, they should be able to forward traffic between one another. However, with ocelot they wouldn't do that. The simplest way of fixing this is to never configure the pvid based on what the bridge is asking for, when vlan_filtering is 0. Only if there was a VLAN that the bridge couldn't mangle, that we could use as pvid.... So, turns out, there's 0 just for that. And for a reason: IEEE 802.1Q-2018, page 247, Table 9-2-Reserved VID values says: The null VID. Indicates that the tag header contains only priority information; no VID is present in the frame. This VID value shall not be configured as a PVID or a member ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of a VID Set, or configured in any FDB entry, or used in any Management operation. So, aren't we doing exactly what 802.1Q says not to? Well, in a way, but what we're doing here is just driver-level bookkeeping, all for the better. The fact that we're using a pvid of 0 is not observable behavior from the outside world: the network stack does not see the classified VLAN that the switch uses, in vlan_filtering=0 mode. And we're also more consistent with the standalone mode now. And now that we use the pvid of 0 in this mode, there's another advantage: we don't need to perform any VID remapping for FDB and MDB entries either, we can just use the VID of 0 that the bridge is passing to us. The only gotcha is that every time we change the vlan_filtering setting, we need to reapply the pvid (either to 0, or to the value from the bridge). A small side-effect visible in the patch is that ocelot_port_set_pvid needs to be moved above ocelot_port_vlan_filtering, so that it can be called from there without forward-declarations. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-31 18:29:10 +08:00
/* Default vlan to clasify for untagged frames (may be zero) */
static void ocelot_port_set_pvid(struct ocelot *ocelot, int port,
struct ocelot_vlan pvid_vlan)
net: mscc: ocelot: use the pvid of zero when bridged with vlan_filtering=0 Currently, mscc_ocelot ports configure pvid=0 in standalone mode, and inherit the pvid from the bridge when one is present. When the bridge has vlan_filtering=0, the software semantics are that packets should be received regardless of whether there's a pvid configured on the ingress port or not. However, ocelot does not observe those semantics today. Moreover, changing the PVID is also a problem with vlan_filtering=0. We are privately remapping the VID of FDB, MDB entries to the port's PVID when those are VLAN-unaware (i.e. when the VID of these entries comes to us as 0). But we have no logic of adjusting that remapping when the user changes the pvid and vlan_filtering is 0. So stale entries would be left behind, and untagged traffic will stop matching on them. And even if we were to solve that, there's an even bigger problem. If swp0 has pvid 1, and swp1 has pvid 2, and both are under a vlan_filtering=0 bridge, they should be able to forward traffic between one another. However, with ocelot they wouldn't do that. The simplest way of fixing this is to never configure the pvid based on what the bridge is asking for, when vlan_filtering is 0. Only if there was a VLAN that the bridge couldn't mangle, that we could use as pvid.... So, turns out, there's 0 just for that. And for a reason: IEEE 802.1Q-2018, page 247, Table 9-2-Reserved VID values says: The null VID. Indicates that the tag header contains only priority information; no VID is present in the frame. This VID value shall not be configured as a PVID or a member ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of a VID Set, or configured in any FDB entry, or used in any Management operation. So, aren't we doing exactly what 802.1Q says not to? Well, in a way, but what we're doing here is just driver-level bookkeeping, all for the better. The fact that we're using a pvid of 0 is not observable behavior from the outside world: the network stack does not see the classified VLAN that the switch uses, in vlan_filtering=0 mode. And we're also more consistent with the standalone mode now. And now that we use the pvid of 0 in this mode, there's another advantage: we don't need to perform any VID remapping for FDB and MDB entries either, we can just use the VID of 0 that the bridge is passing to us. The only gotcha is that every time we change the vlan_filtering setting, we need to reapply the pvid (either to 0, or to the value from the bridge). A small side-effect visible in the patch is that ocelot_port_set_pvid needs to be moved above ocelot_port_vlan_filtering, so that it can be called from there without forward-declarations. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-31 18:29:10 +08:00
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
net: mscc: ocelot: move the logic to drop 802.1p traffic to the pvid deletion Currently, the ocelot_port_set_native_vlan() function starts dropping untagged and prio-tagged traffic when the native VLAN is removed? What is the native VLAN? It is the only egress-untagged VLAN that ocelot supports on a port. If the port is a trunk with 100 VLANs, one of those VLANs can be transmitted as egress-untagged, and that's the native VLAN. Is it wrong to drop untagged and prio-tagged traffic if there's no native VLAN? Yes and no. In this case, which is more typical, it's ok to apply that drop configuration: $ bridge vlan add dev swp0 vid 1 pvid untagged <- this is the native VLAN $ bridge vlan add dev swp0 vid 100 $ bridge vlan add dev swp0 vid 101 $ bridge vlan del dev swp0 vid 1 <- delete the native VLAN But only because the pvid and the native VLAN have the same ID. In this case, it isn't: $ bridge vlan add dev swp0 vid 1 pvid $ bridge vlan add dev swp0 vid 100 untagged <- this is the native VLAN $ bridge vlan del dev swp0 vid 101 $ bridge vlan del dev swp0 vid 100 <- delete the native VLAN It's wrong, because the switch will drop untagged and prio-tagged traffic now, despite having a valid pvid of 1. The confusion seems to stem from the fact that the native VLAN is an egress setting, while the PVID is an ingress setting. It would be correct to drop untagged and prio-tagged traffic only if there was no pvid on the port. So let's do just that. Background: https://lore.kernel.org/netdev/CA+h21hrRMrLH-RjBGhEJSTZd6_QPRSd3RkVRQF-wNKkrgKcRSA@mail.gmail.com/#t Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-31 18:29:14 +08:00
u32 val = 0;
net: mscc: ocelot: use the pvid of zero when bridged with vlan_filtering=0 Currently, mscc_ocelot ports configure pvid=0 in standalone mode, and inherit the pvid from the bridge when one is present. When the bridge has vlan_filtering=0, the software semantics are that packets should be received regardless of whether there's a pvid configured on the ingress port or not. However, ocelot does not observe those semantics today. Moreover, changing the PVID is also a problem with vlan_filtering=0. We are privately remapping the VID of FDB, MDB entries to the port's PVID when those are VLAN-unaware (i.e. when the VID of these entries comes to us as 0). But we have no logic of adjusting that remapping when the user changes the pvid and vlan_filtering is 0. So stale entries would be left behind, and untagged traffic will stop matching on them. And even if we were to solve that, there's an even bigger problem. If swp0 has pvid 1, and swp1 has pvid 2, and both are under a vlan_filtering=0 bridge, they should be able to forward traffic between one another. However, with ocelot they wouldn't do that. The simplest way of fixing this is to never configure the pvid based on what the bridge is asking for, when vlan_filtering is 0. Only if there was a VLAN that the bridge couldn't mangle, that we could use as pvid.... So, turns out, there's 0 just for that. And for a reason: IEEE 802.1Q-2018, page 247, Table 9-2-Reserved VID values says: The null VID. Indicates that the tag header contains only priority information; no VID is present in the frame. This VID value shall not be configured as a PVID or a member ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of a VID Set, or configured in any FDB entry, or used in any Management operation. So, aren't we doing exactly what 802.1Q says not to? Well, in a way, but what we're doing here is just driver-level bookkeeping, all for the better. The fact that we're using a pvid of 0 is not observable behavior from the outside world: the network stack does not see the classified VLAN that the switch uses, in vlan_filtering=0 mode. And we're also more consistent with the standalone mode now. And now that we use the pvid of 0 in this mode, there's another advantage: we don't need to perform any VID remapping for FDB and MDB entries either, we can just use the VID of 0 that the bridge is passing to us. The only gotcha is that every time we change the vlan_filtering setting, we need to reapply the pvid (either to 0, or to the value from the bridge). A small side-effect visible in the patch is that ocelot_port_set_pvid needs to be moved above ocelot_port_vlan_filtering, so that it can be called from there without forward-declarations. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-31 18:29:10 +08:00
ocelot_port->pvid_vlan = pvid_vlan;
net: mscc: ocelot: use the pvid of zero when bridged with vlan_filtering=0 Currently, mscc_ocelot ports configure pvid=0 in standalone mode, and inherit the pvid from the bridge when one is present. When the bridge has vlan_filtering=0, the software semantics are that packets should be received regardless of whether there's a pvid configured on the ingress port or not. However, ocelot does not observe those semantics today. Moreover, changing the PVID is also a problem with vlan_filtering=0. We are privately remapping the VID of FDB, MDB entries to the port's PVID when those are VLAN-unaware (i.e. when the VID of these entries comes to us as 0). But we have no logic of adjusting that remapping when the user changes the pvid and vlan_filtering is 0. So stale entries would be left behind, and untagged traffic will stop matching on them. And even if we were to solve that, there's an even bigger problem. If swp0 has pvid 1, and swp1 has pvid 2, and both are under a vlan_filtering=0 bridge, they should be able to forward traffic between one another. However, with ocelot they wouldn't do that. The simplest way of fixing this is to never configure the pvid based on what the bridge is asking for, when vlan_filtering is 0. Only if there was a VLAN that the bridge couldn't mangle, that we could use as pvid.... So, turns out, there's 0 just for that. And for a reason: IEEE 802.1Q-2018, page 247, Table 9-2-Reserved VID values says: The null VID. Indicates that the tag header contains only priority information; no VID is present in the frame. This VID value shall not be configured as a PVID or a member ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of a VID Set, or configured in any FDB entry, or used in any Management operation. So, aren't we doing exactly what 802.1Q says not to? Well, in a way, but what we're doing here is just driver-level bookkeeping, all for the better. The fact that we're using a pvid of 0 is not observable behavior from the outside world: the network stack does not see the classified VLAN that the switch uses, in vlan_filtering=0 mode. And we're also more consistent with the standalone mode now. And now that we use the pvid of 0 in this mode, there's another advantage: we don't need to perform any VID remapping for FDB and MDB entries either, we can just use the VID of 0 that the bridge is passing to us. The only gotcha is that every time we change the vlan_filtering setting, we need to reapply the pvid (either to 0, or to the value from the bridge). A small side-effect visible in the patch is that ocelot_port_set_pvid needs to be moved above ocelot_port_vlan_filtering, so that it can be called from there without forward-declarations. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-31 18:29:10 +08:00
if (!ocelot_port->vlan_aware)
pvid_vlan.vid = 0;
net: mscc: ocelot: use the pvid of zero when bridged with vlan_filtering=0 Currently, mscc_ocelot ports configure pvid=0 in standalone mode, and inherit the pvid from the bridge when one is present. When the bridge has vlan_filtering=0, the software semantics are that packets should be received regardless of whether there's a pvid configured on the ingress port or not. However, ocelot does not observe those semantics today. Moreover, changing the PVID is also a problem with vlan_filtering=0. We are privately remapping the VID of FDB, MDB entries to the port's PVID when those are VLAN-unaware (i.e. when the VID of these entries comes to us as 0). But we have no logic of adjusting that remapping when the user changes the pvid and vlan_filtering is 0. So stale entries would be left behind, and untagged traffic will stop matching on them. And even if we were to solve that, there's an even bigger problem. If swp0 has pvid 1, and swp1 has pvid 2, and both are under a vlan_filtering=0 bridge, they should be able to forward traffic between one another. However, with ocelot they wouldn't do that. The simplest way of fixing this is to never configure the pvid based on what the bridge is asking for, when vlan_filtering is 0. Only if there was a VLAN that the bridge couldn't mangle, that we could use as pvid.... So, turns out, there's 0 just for that. And for a reason: IEEE 802.1Q-2018, page 247, Table 9-2-Reserved VID values says: The null VID. Indicates that the tag header contains only priority information; no VID is present in the frame. This VID value shall not be configured as a PVID or a member ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of a VID Set, or configured in any FDB entry, or used in any Management operation. So, aren't we doing exactly what 802.1Q says not to? Well, in a way, but what we're doing here is just driver-level bookkeeping, all for the better. The fact that we're using a pvid of 0 is not observable behavior from the outside world: the network stack does not see the classified VLAN that the switch uses, in vlan_filtering=0 mode. And we're also more consistent with the standalone mode now. And now that we use the pvid of 0 in this mode, there's another advantage: we don't need to perform any VID remapping for FDB and MDB entries either, we can just use the VID of 0 that the bridge is passing to us. The only gotcha is that every time we change the vlan_filtering setting, we need to reapply the pvid (either to 0, or to the value from the bridge). A small side-effect visible in the patch is that ocelot_port_set_pvid needs to be moved above ocelot_port_vlan_filtering, so that it can be called from there without forward-declarations. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-31 18:29:10 +08:00
ocelot_rmw_gix(ocelot,
ANA_PORT_VLAN_CFG_VLAN_VID(pvid_vlan.vid),
net: mscc: ocelot: use the pvid of zero when bridged with vlan_filtering=0 Currently, mscc_ocelot ports configure pvid=0 in standalone mode, and inherit the pvid from the bridge when one is present. When the bridge has vlan_filtering=0, the software semantics are that packets should be received regardless of whether there's a pvid configured on the ingress port or not. However, ocelot does not observe those semantics today. Moreover, changing the PVID is also a problem with vlan_filtering=0. We are privately remapping the VID of FDB, MDB entries to the port's PVID when those are VLAN-unaware (i.e. when the VID of these entries comes to us as 0). But we have no logic of adjusting that remapping when the user changes the pvid and vlan_filtering is 0. So stale entries would be left behind, and untagged traffic will stop matching on them. And even if we were to solve that, there's an even bigger problem. If swp0 has pvid 1, and swp1 has pvid 2, and both are under a vlan_filtering=0 bridge, they should be able to forward traffic between one another. However, with ocelot they wouldn't do that. The simplest way of fixing this is to never configure the pvid based on what the bridge is asking for, when vlan_filtering is 0. Only if there was a VLAN that the bridge couldn't mangle, that we could use as pvid.... So, turns out, there's 0 just for that. And for a reason: IEEE 802.1Q-2018, page 247, Table 9-2-Reserved VID values says: The null VID. Indicates that the tag header contains only priority information; no VID is present in the frame. This VID value shall not be configured as a PVID or a member ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of a VID Set, or configured in any FDB entry, or used in any Management operation. So, aren't we doing exactly what 802.1Q says not to? Well, in a way, but what we're doing here is just driver-level bookkeeping, all for the better. The fact that we're using a pvid of 0 is not observable behavior from the outside world: the network stack does not see the classified VLAN that the switch uses, in vlan_filtering=0 mode. And we're also more consistent with the standalone mode now. And now that we use the pvid of 0 in this mode, there's another advantage: we don't need to perform any VID remapping for FDB and MDB entries either, we can just use the VID of 0 that the bridge is passing to us. The only gotcha is that every time we change the vlan_filtering setting, we need to reapply the pvid (either to 0, or to the value from the bridge). A small side-effect visible in the patch is that ocelot_port_set_pvid needs to be moved above ocelot_port_vlan_filtering, so that it can be called from there without forward-declarations. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-31 18:29:10 +08:00
ANA_PORT_VLAN_CFG_VLAN_VID_M,
ANA_PORT_VLAN_CFG, port);
net: mscc: ocelot: move the logic to drop 802.1p traffic to the pvid deletion Currently, the ocelot_port_set_native_vlan() function starts dropping untagged and prio-tagged traffic when the native VLAN is removed? What is the native VLAN? It is the only egress-untagged VLAN that ocelot supports on a port. If the port is a trunk with 100 VLANs, one of those VLANs can be transmitted as egress-untagged, and that's the native VLAN. Is it wrong to drop untagged and prio-tagged traffic if there's no native VLAN? Yes and no. In this case, which is more typical, it's ok to apply that drop configuration: $ bridge vlan add dev swp0 vid 1 pvid untagged <- this is the native VLAN $ bridge vlan add dev swp0 vid 100 $ bridge vlan add dev swp0 vid 101 $ bridge vlan del dev swp0 vid 1 <- delete the native VLAN But only because the pvid and the native VLAN have the same ID. In this case, it isn't: $ bridge vlan add dev swp0 vid 1 pvid $ bridge vlan add dev swp0 vid 100 untagged <- this is the native VLAN $ bridge vlan del dev swp0 vid 101 $ bridge vlan del dev swp0 vid 100 <- delete the native VLAN It's wrong, because the switch will drop untagged and prio-tagged traffic now, despite having a valid pvid of 1. The confusion seems to stem from the fact that the native VLAN is an egress setting, while the PVID is an ingress setting. It would be correct to drop untagged and prio-tagged traffic only if there was no pvid on the port. So let's do just that. Background: https://lore.kernel.org/netdev/CA+h21hrRMrLH-RjBGhEJSTZd6_QPRSd3RkVRQF-wNKkrgKcRSA@mail.gmail.com/#t Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-31 18:29:14 +08:00
/* If there's no pvid, we should drop not only untagged traffic (which
* happens automatically), but also 802.1p traffic which gets
* classified to VLAN 0, but that is always in our RX filter, so it
* would get accepted were it not for this setting.
*/
if (!pvid_vlan.valid && ocelot_port->vlan_aware)
val = ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA |
ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA;
ocelot_rmw_gix(ocelot, val,
ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA |
ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA,
ANA_PORT_DROP_CFG, port);
net: mscc: ocelot: use the pvid of zero when bridged with vlan_filtering=0 Currently, mscc_ocelot ports configure pvid=0 in standalone mode, and inherit the pvid from the bridge when one is present. When the bridge has vlan_filtering=0, the software semantics are that packets should be received regardless of whether there's a pvid configured on the ingress port or not. However, ocelot does not observe those semantics today. Moreover, changing the PVID is also a problem with vlan_filtering=0. We are privately remapping the VID of FDB, MDB entries to the port's PVID when those are VLAN-unaware (i.e. when the VID of these entries comes to us as 0). But we have no logic of adjusting that remapping when the user changes the pvid and vlan_filtering is 0. So stale entries would be left behind, and untagged traffic will stop matching on them. And even if we were to solve that, there's an even bigger problem. If swp0 has pvid 1, and swp1 has pvid 2, and both are under a vlan_filtering=0 bridge, they should be able to forward traffic between one another. However, with ocelot they wouldn't do that. The simplest way of fixing this is to never configure the pvid based on what the bridge is asking for, when vlan_filtering is 0. Only if there was a VLAN that the bridge couldn't mangle, that we could use as pvid.... So, turns out, there's 0 just for that. And for a reason: IEEE 802.1Q-2018, page 247, Table 9-2-Reserved VID values says: The null VID. Indicates that the tag header contains only priority information; no VID is present in the frame. This VID value shall not be configured as a PVID or a member ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ of a VID Set, or configured in any FDB entry, or used in any Management operation. So, aren't we doing exactly what 802.1Q says not to? Well, in a way, but what we're doing here is just driver-level bookkeeping, all for the better. The fact that we're using a pvid of 0 is not observable behavior from the outside world: the network stack does not see the classified VLAN that the switch uses, in vlan_filtering=0 mode. And we're also more consistent with the standalone mode now. And now that we use the pvid of 0 in this mode, there's another advantage: we don't need to perform any VID remapping for FDB and MDB entries either, we can just use the VID of 0 that the bridge is passing to us. The only gotcha is that every time we change the vlan_filtering setting, we need to reapply the pvid (either to 0, or to the value from the bridge). A small side-effect visible in the patch is that ocelot_port_set_pvid needs to be moved above ocelot_port_vlan_filtering, so that it can be called from there without forward-declarations. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-31 18:29:10 +08:00
}
net: dsa: propagate switchdev vlan_filtering prepare phase to drivers A driver may refuse to enable VLAN filtering for any reason beyond what the DSA framework cares about, such as: - having tc-flower rules that rely on the switch being VLAN-aware - the particular switch does not support VLAN, even if the driver does (the DSA framework just checks for the presence of the .port_vlan_add and .port_vlan_del pointers) - simply not supporting this configuration to be toggled at runtime Currently, when a driver rejects a configuration it cannot support, it does this from the commit phase, which triggers various warnings in switchdev. So propagate the prepare phase to drivers, to give them the ability to refuse invalid configurations cleanly and avoid the warnings. Since we need to modify all function prototypes and check for the prepare phase from within the drivers, take that opportunity and move the existing driver restrictions within the prepare phase where that is possible and easy. Cc: Florian Fainelli <f.fainelli@gmail.com> Cc: Martin Blumenstingl <martin.blumenstingl@googlemail.com> Cc: Hauke Mehrtens <hauke@hauke-m.de> Cc: Woojung Huh <woojung.huh@microchip.com> Cc: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com> Cc: Sean Wang <sean.wang@mediatek.com> Cc: Landen Chao <Landen.Chao@mediatek.com> Cc: Andrew Lunn <andrew@lunn.ch> Cc: Vivien Didelot <vivien.didelot@gmail.com> Cc: Jonathan McDowell <noodles@earth.li> Cc: Linus Walleij <linus.walleij@linaro.org> Cc: Alexandre Belloni <alexandre.belloni@bootlin.com> Cc: Claudiu Manoil <claudiu.manoil@nxp.com> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-10-03 06:06:46 +08:00
int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
bool vlan_aware, struct switchdev_trans *trans)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
net: mscc: ocelot: fix untagged packet drops when enslaving to vlan aware bridge To rehash a previous explanation given in commit 1c44ce560b4d ("net: mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is up"), the switch driver operates the in a mode where a single VLAN can be transmitted as untagged on a particular egress port. That is the "native VLAN on trunk port" use case. The configuration for this native VLAN is driven in 2 ways: - Set the egress port rewriter to strip the VLAN tag for the native VID (as it is egress-untagged, after all). - Configure the ingress port to drop untagged and priority-tagged traffic, if there is no native VLAN. The intention of this setting is that a trunk port with no native VLAN should not accept untagged traffic. Since both of the above configurations for the native VLAN should only be done if VLAN awareness is requested, they are actually done from the ocelot_port_vlan_filtering function, after the basic procedure of toggling the VLAN awareness flag of the port. But there's a problem with that simplistic approach: we are trying to juggle with 2 independent variables from a single function: - Native VLAN of the port - its value is held in port->vid. - VLAN awareness state of the port - currently there are some issues here, more on that later*. The actual problem can be seen when enslaving the switch ports to a VLAN filtering bridge: 0. The driver configures a pvid of zero for each port, when in standalone mode. While the bridge configures a default_pvid of 1 for each port that gets added as a slave to it. 1. The bridge calls ocelot_port_vlan_filtering with vlan_aware=true. The VLAN-filtering-dependent portion of the native VLAN configuration is done, considering that the native VLAN is 0. 2. The bridge calls ocelot_vlan_add with vid=1, pvid=true, untagged=true. The native VLAN changes to 1 (change which gets propagated to hardware). 3. ??? - nobody calls ocelot_port_vlan_filtering again, to reapply the VLAN-filtering-dependent portion of the native VLAN configuration, for the new native VLAN of 1. One can notice that after toggling "ip link set dev br0 type bridge vlan_filtering 0 && ip link set dev br0 type bridge vlan_filtering 1", the new native VLAN finally makes it through and untagged traffic finally starts flowing again. But obviously that shouldn't be needed. So it is clear that 2 independent variables need to both re-trigger the native VLAN configuration. So we introduce the second variable as ocelot_port->vlan_aware. *Actually both the DSA Felix driver and the Ocelot driver already had each its own variable: - Ocelot: ocelot_port_private->vlan_aware - Felix: dsa_port->vlan_filtering but the common Ocelot library needs to work with a single, common, variable, so there is some refactoring done to move the vlan_aware property from the private structure into the common ocelot_port structure. Fixes: 97bb69e1e36e ("net: mscc: ocelot: break apart ocelot_vlan_port_apply") Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-04-15 03:36:15 +08:00
u32 val;
net: mscc: ocelot: offload VLAN mangle action to VCAP IS1 The VCAP_IS1_ACT_VID_REPLACE_ENA action, from the VCAP IS1 ingress TCAM, changes the classified VLAN. We are only exposing this ability for switch ports that are under VLAN aware bridges. This is because in standalone ports mode and under a bridge with vlan_filtering=0, the ocelot driver configures the switch to operate as VLAN-unaware, so the classified VLAN is not derived from the 802.1Q header from the packet, but instead is always equal to the port-based VLAN ID of the ingress port. We _can_ still change the classified VLAN for packets when operating in this mode, but the end result will most likely be a drop, since both the ingress and the egress port need to be members of the modified VLAN. And even if we install the new classified VLAN into the VLAN table of the switch, the result would still not be as expected: we wouldn't see, on the output port, the modified VLAN tag, but the original one, even though the classified VLAN was indeed modified. This is because of how the hardware works: on egress, what is pushed to the frame is a "port tag", which gives us the following options: - Tag all frames with port tag (derived from the classified VLAN) - Tag all frames with port tag, except if the classified VLAN is 0 or equal to the native VLAN of the egress port - No port tag Needless to say, in VLAN-unaware mode we are disabling the port tag. Otherwise, the existing VLAN tag would be ignored, and a second VLAN tag (the port tag), holding the classified VLAN, would be pushed (instead of replacing the existing 802.1Q tag). This is definitely not what the user wanted when installing a "vlan modify" action. So it is simply not worth bothering with VLAN modify rules under other configurations except when the ports are fully VLAN-aware. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-08 19:56:58 +08:00
if (switchdev_trans_ph_prepare(trans)) {
struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1];
struct ocelot_vcap_filter *filter;
list_for_each_entry(filter, &block->rules, list) {
if (filter->ingress_port_mask & BIT(port) &&
filter->action.vid_replace_ena) {
dev_err(ocelot->dev,
"Cannot change VLAN state with vlan modify rules active\n");
return -EBUSY;
}
}
net: dsa: propagate switchdev vlan_filtering prepare phase to drivers A driver may refuse to enable VLAN filtering for any reason beyond what the DSA framework cares about, such as: - having tc-flower rules that rely on the switch being VLAN-aware - the particular switch does not support VLAN, even if the driver does (the DSA framework just checks for the presence of the .port_vlan_add and .port_vlan_del pointers) - simply not supporting this configuration to be toggled at runtime Currently, when a driver rejects a configuration it cannot support, it does this from the commit phase, which triggers various warnings in switchdev. So propagate the prepare phase to drivers, to give them the ability to refuse invalid configurations cleanly and avoid the warnings. Since we need to modify all function prototypes and check for the prepare phase from within the drivers, take that opportunity and move the existing driver restrictions within the prepare phase where that is possible and easy. Cc: Florian Fainelli <f.fainelli@gmail.com> Cc: Martin Blumenstingl <martin.blumenstingl@googlemail.com> Cc: Hauke Mehrtens <hauke@hauke-m.de> Cc: Woojung Huh <woojung.huh@microchip.com> Cc: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com> Cc: Sean Wang <sean.wang@mediatek.com> Cc: Landen Chao <Landen.Chao@mediatek.com> Cc: Andrew Lunn <andrew@lunn.ch> Cc: Vivien Didelot <vivien.didelot@gmail.com> Cc: Jonathan McDowell <noodles@earth.li> Cc: Linus Walleij <linus.walleij@linaro.org> Cc: Alexandre Belloni <alexandre.belloni@bootlin.com> Cc: Claudiu Manoil <claudiu.manoil@nxp.com> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-10-03 06:06:46 +08:00
return 0;
net: mscc: ocelot: offload VLAN mangle action to VCAP IS1 The VCAP_IS1_ACT_VID_REPLACE_ENA action, from the VCAP IS1 ingress TCAM, changes the classified VLAN. We are only exposing this ability for switch ports that are under VLAN aware bridges. This is because in standalone ports mode and under a bridge with vlan_filtering=0, the ocelot driver configures the switch to operate as VLAN-unaware, so the classified VLAN is not derived from the 802.1Q header from the packet, but instead is always equal to the port-based VLAN ID of the ingress port. We _can_ still change the classified VLAN for packets when operating in this mode, but the end result will most likely be a drop, since both the ingress and the egress port need to be members of the modified VLAN. And even if we install the new classified VLAN into the VLAN table of the switch, the result would still not be as expected: we wouldn't see, on the output port, the modified VLAN tag, but the original one, even though the classified VLAN was indeed modified. This is because of how the hardware works: on egress, what is pushed to the frame is a "port tag", which gives us the following options: - Tag all frames with port tag (derived from the classified VLAN) - Tag all frames with port tag, except if the classified VLAN is 0 or equal to the native VLAN of the egress port - No port tag Needless to say, in VLAN-unaware mode we are disabling the port tag. Otherwise, the existing VLAN tag would be ignored, and a second VLAN tag (the port tag), holding the classified VLAN, would be pushed (instead of replacing the existing 802.1Q tag). This is definitely not what the user wanted when installing a "vlan modify" action. So it is simply not worth bothering with VLAN modify rules under other configurations except when the ports are fully VLAN-aware. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-08 19:56:58 +08:00
}
net: dsa: propagate switchdev vlan_filtering prepare phase to drivers A driver may refuse to enable VLAN filtering for any reason beyond what the DSA framework cares about, such as: - having tc-flower rules that rely on the switch being VLAN-aware - the particular switch does not support VLAN, even if the driver does (the DSA framework just checks for the presence of the .port_vlan_add and .port_vlan_del pointers) - simply not supporting this configuration to be toggled at runtime Currently, when a driver rejects a configuration it cannot support, it does this from the commit phase, which triggers various warnings in switchdev. So propagate the prepare phase to drivers, to give them the ability to refuse invalid configurations cleanly and avoid the warnings. Since we need to modify all function prototypes and check for the prepare phase from within the drivers, take that opportunity and move the existing driver restrictions within the prepare phase where that is possible and easy. Cc: Florian Fainelli <f.fainelli@gmail.com> Cc: Martin Blumenstingl <martin.blumenstingl@googlemail.com> Cc: Hauke Mehrtens <hauke@hauke-m.de> Cc: Woojung Huh <woojung.huh@microchip.com> Cc: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com> Cc: Sean Wang <sean.wang@mediatek.com> Cc: Landen Chao <Landen.Chao@mediatek.com> Cc: Andrew Lunn <andrew@lunn.ch> Cc: Vivien Didelot <vivien.didelot@gmail.com> Cc: Jonathan McDowell <noodles@earth.li> Cc: Linus Walleij <linus.walleij@linaro.org> Cc: Alexandre Belloni <alexandre.belloni@bootlin.com> Cc: Claudiu Manoil <claudiu.manoil@nxp.com> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-10-03 06:06:46 +08:00
net: mscc: ocelot: fix untagged packet drops when enslaving to vlan aware bridge To rehash a previous explanation given in commit 1c44ce560b4d ("net: mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is up"), the switch driver operates the in a mode where a single VLAN can be transmitted as untagged on a particular egress port. That is the "native VLAN on trunk port" use case. The configuration for this native VLAN is driven in 2 ways: - Set the egress port rewriter to strip the VLAN tag for the native VID (as it is egress-untagged, after all). - Configure the ingress port to drop untagged and priority-tagged traffic, if there is no native VLAN. The intention of this setting is that a trunk port with no native VLAN should not accept untagged traffic. Since both of the above configurations for the native VLAN should only be done if VLAN awareness is requested, they are actually done from the ocelot_port_vlan_filtering function, after the basic procedure of toggling the VLAN awareness flag of the port. But there's a problem with that simplistic approach: we are trying to juggle with 2 independent variables from a single function: - Native VLAN of the port - its value is held in port->vid. - VLAN awareness state of the port - currently there are some issues here, more on that later*. The actual problem can be seen when enslaving the switch ports to a VLAN filtering bridge: 0. The driver configures a pvid of zero for each port, when in standalone mode. While the bridge configures a default_pvid of 1 for each port that gets added as a slave to it. 1. The bridge calls ocelot_port_vlan_filtering with vlan_aware=true. The VLAN-filtering-dependent portion of the native VLAN configuration is done, considering that the native VLAN is 0. 2. The bridge calls ocelot_vlan_add with vid=1, pvid=true, untagged=true. The native VLAN changes to 1 (change which gets propagated to hardware). 3. ??? - nobody calls ocelot_port_vlan_filtering again, to reapply the VLAN-filtering-dependent portion of the native VLAN configuration, for the new native VLAN of 1. One can notice that after toggling "ip link set dev br0 type bridge vlan_filtering 0 && ip link set dev br0 type bridge vlan_filtering 1", the new native VLAN finally makes it through and untagged traffic finally starts flowing again. But obviously that shouldn't be needed. So it is clear that 2 independent variables need to both re-trigger the native VLAN configuration. So we introduce the second variable as ocelot_port->vlan_aware. *Actually both the DSA Felix driver and the Ocelot driver already had each its own variable: - Ocelot: ocelot_port_private->vlan_aware - Felix: dsa_port->vlan_filtering but the common Ocelot library needs to work with a single, common, variable, so there is some refactoring done to move the vlan_aware property from the private structure into the common ocelot_port structure. Fixes: 97bb69e1e36e ("net: mscc: ocelot: break apart ocelot_vlan_port_apply") Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-04-15 03:36:15 +08:00
ocelot_port->vlan_aware = vlan_aware;
net: mscc: ocelot: fix untagged packet drops when enslaving to vlan aware bridge To rehash a previous explanation given in commit 1c44ce560b4d ("net: mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is up"), the switch driver operates the in a mode where a single VLAN can be transmitted as untagged on a particular egress port. That is the "native VLAN on trunk port" use case. The configuration for this native VLAN is driven in 2 ways: - Set the egress port rewriter to strip the VLAN tag for the native VID (as it is egress-untagged, after all). - Configure the ingress port to drop untagged and priority-tagged traffic, if there is no native VLAN. The intention of this setting is that a trunk port with no native VLAN should not accept untagged traffic. Since both of the above configurations for the native VLAN should only be done if VLAN awareness is requested, they are actually done from the ocelot_port_vlan_filtering function, after the basic procedure of toggling the VLAN awareness flag of the port. But there's a problem with that simplistic approach: we are trying to juggle with 2 independent variables from a single function: - Native VLAN of the port - its value is held in port->vid. - VLAN awareness state of the port - currently there are some issues here, more on that later*. The actual problem can be seen when enslaving the switch ports to a VLAN filtering bridge: 0. The driver configures a pvid of zero for each port, when in standalone mode. While the bridge configures a default_pvid of 1 for each port that gets added as a slave to it. 1. The bridge calls ocelot_port_vlan_filtering with vlan_aware=true. The VLAN-filtering-dependent portion of the native VLAN configuration is done, considering that the native VLAN is 0. 2. The bridge calls ocelot_vlan_add with vid=1, pvid=true, untagged=true. The native VLAN changes to 1 (change which gets propagated to hardware). 3. ??? - nobody calls ocelot_port_vlan_filtering again, to reapply the VLAN-filtering-dependent portion of the native VLAN configuration, for the new native VLAN of 1. One can notice that after toggling "ip link set dev br0 type bridge vlan_filtering 0 && ip link set dev br0 type bridge vlan_filtering 1", the new native VLAN finally makes it through and untagged traffic finally starts flowing again. But obviously that shouldn't be needed. So it is clear that 2 independent variables need to both re-trigger the native VLAN configuration. So we introduce the second variable as ocelot_port->vlan_aware. *Actually both the DSA Felix driver and the Ocelot driver already had each its own variable: - Ocelot: ocelot_port_private->vlan_aware - Felix: dsa_port->vlan_filtering but the common Ocelot library needs to work with a single, common, variable, so there is some refactoring done to move the vlan_aware property from the private structure into the common ocelot_port structure. Fixes: 97bb69e1e36e ("net: mscc: ocelot: break apart ocelot_vlan_port_apply") Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-04-15 03:36:15 +08:00
if (vlan_aware)
val = ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA |
ANA_PORT_VLAN_CFG_VLAN_POP_CNT(1);
else
val = 0;
ocelot_rmw_gix(ocelot, val,
ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA |
ANA_PORT_VLAN_CFG_VLAN_POP_CNT_M,
ANA_PORT_VLAN_CFG, port);
ocelot_port_set_pvid(ocelot, port, ocelot_port->pvid_vlan);
ocelot_port_set_native_vlan(ocelot, port, ocelot_port->native_vlan);
net: dsa: propagate switchdev vlan_filtering prepare phase to drivers A driver may refuse to enable VLAN filtering for any reason beyond what the DSA framework cares about, such as: - having tc-flower rules that rely on the switch being VLAN-aware - the particular switch does not support VLAN, even if the driver does (the DSA framework just checks for the presence of the .port_vlan_add and .port_vlan_del pointers) - simply not supporting this configuration to be toggled at runtime Currently, when a driver rejects a configuration it cannot support, it does this from the commit phase, which triggers various warnings in switchdev. So propagate the prepare phase to drivers, to give them the ability to refuse invalid configurations cleanly and avoid the warnings. Since we need to modify all function prototypes and check for the prepare phase from within the drivers, take that opportunity and move the existing driver restrictions within the prepare phase where that is possible and easy. Cc: Florian Fainelli <f.fainelli@gmail.com> Cc: Martin Blumenstingl <martin.blumenstingl@googlemail.com> Cc: Hauke Mehrtens <hauke@hauke-m.de> Cc: Woojung Huh <woojung.huh@microchip.com> Cc: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com> Cc: Sean Wang <sean.wang@mediatek.com> Cc: Landen Chao <Landen.Chao@mediatek.com> Cc: Andrew Lunn <andrew@lunn.ch> Cc: Vivien Didelot <vivien.didelot@gmail.com> Cc: Jonathan McDowell <noodles@earth.li> Cc: Linus Walleij <linus.walleij@linaro.org> Cc: Alexandre Belloni <alexandre.belloni@bootlin.com> Cc: Claudiu Manoil <claudiu.manoil@nxp.com> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-10-03 06:06:46 +08:00
return 0;
}
net: mscc: ocelot: fix untagged packet drops when enslaving to vlan aware bridge To rehash a previous explanation given in commit 1c44ce560b4d ("net: mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is up"), the switch driver operates the in a mode where a single VLAN can be transmitted as untagged on a particular egress port. That is the "native VLAN on trunk port" use case. The configuration for this native VLAN is driven in 2 ways: - Set the egress port rewriter to strip the VLAN tag for the native VID (as it is egress-untagged, after all). - Configure the ingress port to drop untagged and priority-tagged traffic, if there is no native VLAN. The intention of this setting is that a trunk port with no native VLAN should not accept untagged traffic. Since both of the above configurations for the native VLAN should only be done if VLAN awareness is requested, they are actually done from the ocelot_port_vlan_filtering function, after the basic procedure of toggling the VLAN awareness flag of the port. But there's a problem with that simplistic approach: we are trying to juggle with 2 independent variables from a single function: - Native VLAN of the port - its value is held in port->vid. - VLAN awareness state of the port - currently there are some issues here, more on that later*. The actual problem can be seen when enslaving the switch ports to a VLAN filtering bridge: 0. The driver configures a pvid of zero for each port, when in standalone mode. While the bridge configures a default_pvid of 1 for each port that gets added as a slave to it. 1. The bridge calls ocelot_port_vlan_filtering with vlan_aware=true. The VLAN-filtering-dependent portion of the native VLAN configuration is done, considering that the native VLAN is 0. 2. The bridge calls ocelot_vlan_add with vid=1, pvid=true, untagged=true. The native VLAN changes to 1 (change which gets propagated to hardware). 3. ??? - nobody calls ocelot_port_vlan_filtering again, to reapply the VLAN-filtering-dependent portion of the native VLAN configuration, for the new native VLAN of 1. One can notice that after toggling "ip link set dev br0 type bridge vlan_filtering 0 && ip link set dev br0 type bridge vlan_filtering 1", the new native VLAN finally makes it through and untagged traffic finally starts flowing again. But obviously that shouldn't be needed. So it is clear that 2 independent variables need to both re-trigger the native VLAN configuration. So we introduce the second variable as ocelot_port->vlan_aware. *Actually both the DSA Felix driver and the Ocelot driver already had each its own variable: - Ocelot: ocelot_port_private->vlan_aware - Felix: dsa_port->vlan_filtering but the common Ocelot library needs to work with a single, common, variable, so there is some refactoring done to move the vlan_aware property from the private structure into the common ocelot_port structure. Fixes: 97bb69e1e36e ("net: mscc: ocelot: break apart ocelot_vlan_port_apply") Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-04-15 03:36:15 +08:00
EXPORT_SYMBOL(ocelot_port_vlan_filtering);
int ocelot_vlan_prepare(struct ocelot *ocelot, int port, u16 vid, bool pvid,
bool untagged)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
/* Deny changing the native VLAN, but always permit deleting it */
if (untagged && ocelot_port->native_vlan.vid != vid &&
ocelot_port->native_vlan.valid) {
dev_err(ocelot->dev,
"Port already has a native VLAN: %d\n",
ocelot_port->native_vlan.vid);
return -EBUSY;
}
return 0;
}
EXPORT_SYMBOL(ocelot_vlan_prepare);
int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid,
bool untagged)
{
int ret;
/* Make the port a member of the VLAN */
ocelot->vlan_mask[vid] |= BIT(port);
ret = ocelot_vlant_set_mask(ocelot, vid, ocelot->vlan_mask[vid]);
if (ret)
return ret;
/* Default ingress vlan classification */
if (pvid) {
struct ocelot_vlan pvid_vlan;
pvid_vlan.vid = vid;
pvid_vlan.valid = true;
ocelot_port_set_pvid(ocelot, port, pvid_vlan);
}
/* Untagged egress vlan clasification */
if (untagged) {
struct ocelot_vlan native_vlan;
native_vlan.vid = vid;
native_vlan.valid = true;
ocelot_port_set_native_vlan(ocelot, port, native_vlan);
net: mscc: ocelot: refuse to overwrite the port's native vlan The switch driver keeps a "vid" variable per port, which signifies _the_ VLAN ID that is stripped on that port's egress (aka the native VLAN on a trunk port). That is the way the hardware is designed (mostly). The port->vid is programmed into REW:PORT:PORT_VLAN_CFG:PORT_VID and the rewriter is told to send all traffic as tagged except the one having port->vid. There exists a possibility of finer-grained egress untagging decisions: using the VCAP IS1 engine, one rule can be added to match every VLAN-tagged frame whose VLAN should be untagged, and set POP_CNT=1 as action. However, the IS1 can hold at most 512 entries, and the VLANs are in the order of 6 * 4096. So the code is fine for now. But this sequence of commands: $ bridge vlan add dev swp0 vid 1 pvid untagged $ bridge vlan add dev swp0 vid 2 untagged makes untagged and pvid-tagged traffic be sent out of swp0 as tagged with VID 1, despite user's request. Prevent that from happening. The user should temporarily remove the existing untagged VLAN (1 in this case), add it back as tagged, and then add the new untagged VLAN (2 in this case). Cc: Antoine Tenart <antoine.tenart@bootlin.com> Cc: Alexandre Belloni <alexandre.belloni@bootlin.com> Fixes: 7142529f1688 ("net: mscc: ocelot: add VLAN filtering") Signed-off-by: Vladimir Oltean <olteanv@gmail.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-27 02:04:27 +08:00
}
return 0;
}
EXPORT_SYMBOL(ocelot_vlan_add);
int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
int ret;
/* Stop the port from being a member of the vlan */
ocelot->vlan_mask[vid] &= ~BIT(port);
ret = ocelot_vlant_set_mask(ocelot, vid, ocelot->vlan_mask[vid]);
if (ret)
return ret;
net: mscc: ocelot: move the logic to drop 802.1p traffic to the pvid deletion Currently, the ocelot_port_set_native_vlan() function starts dropping untagged and prio-tagged traffic when the native VLAN is removed? What is the native VLAN? It is the only egress-untagged VLAN that ocelot supports on a port. If the port is a trunk with 100 VLANs, one of those VLANs can be transmitted as egress-untagged, and that's the native VLAN. Is it wrong to drop untagged and prio-tagged traffic if there's no native VLAN? Yes and no. In this case, which is more typical, it's ok to apply that drop configuration: $ bridge vlan add dev swp0 vid 1 pvid untagged <- this is the native VLAN $ bridge vlan add dev swp0 vid 100 $ bridge vlan add dev swp0 vid 101 $ bridge vlan del dev swp0 vid 1 <- delete the native VLAN But only because the pvid and the native VLAN have the same ID. In this case, it isn't: $ bridge vlan add dev swp0 vid 1 pvid $ bridge vlan add dev swp0 vid 100 untagged <- this is the native VLAN $ bridge vlan del dev swp0 vid 101 $ bridge vlan del dev swp0 vid 100 <- delete the native VLAN It's wrong, because the switch will drop untagged and prio-tagged traffic now, despite having a valid pvid of 1. The confusion seems to stem from the fact that the native VLAN is an egress setting, while the PVID is an ingress setting. It would be correct to drop untagged and prio-tagged traffic only if there was no pvid on the port. So let's do just that. Background: https://lore.kernel.org/netdev/CA+h21hrRMrLH-RjBGhEJSTZd6_QPRSd3RkVRQF-wNKkrgKcRSA@mail.gmail.com/#t Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-31 18:29:14 +08:00
/* Ingress */
if (ocelot_port->pvid_vlan.vid == vid) {
struct ocelot_vlan pvid_vlan = {0};
ocelot_port_set_pvid(ocelot, port, pvid_vlan);
}
/* Egress */
if (ocelot_port->native_vlan.vid == vid) {
struct ocelot_vlan native_vlan = {0};
ocelot_port_set_native_vlan(ocelot, port, native_vlan);
}
return 0;
}
EXPORT_SYMBOL(ocelot_vlan_del);
static void ocelot_vlan_init(struct ocelot *ocelot)
{
u16 port, vid;
/* Clear VLAN table, by default all ports are members of all VLANs */
ocelot_write(ocelot, ANA_TABLES_VLANACCESS_CMD_INIT,
ANA_TABLES_VLANACCESS);
ocelot_vlant_wait_for_completion(ocelot);
/* Configure the port VLAN memberships */
for (vid = 1; vid < VLAN_N_VID; vid++) {
ocelot->vlan_mask[vid] = 0;
ocelot_vlant_set_mask(ocelot, vid, ocelot->vlan_mask[vid]);
}
/* Because VLAN filtering is enabled, we need VID 0 to get untagged
* traffic. It is added automatically if 8021q module is loaded, but
* we can't rely on it since module may be not loaded.
*/
ocelot->vlan_mask[0] = GENMASK(ocelot->num_phys_ports - 1, 0);
ocelot_vlant_set_mask(ocelot, 0, ocelot->vlan_mask[0]);
/* Set vlan ingress filter mask to all ports but the CPU port by
* default.
*/
ocelot_write(ocelot, GENMASK(ocelot->num_phys_ports - 1, 0),
ANA_VLANMASK);
for (port = 0; port < ocelot->num_phys_ports; port++) {
ocelot_write_gix(ocelot, 0, REW_PORT_VLAN_CFG, port);
ocelot_write_gix(ocelot, 0, REW_TAG_CFG, port);
}
}
void ocelot_adjust_link(struct ocelot *ocelot, int port,
struct phy_device *phydev)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
int speed, mode = 0;
switch (phydev->speed) {
case SPEED_10:
speed = OCELOT_SPEED_10;
break;
case SPEED_100:
speed = OCELOT_SPEED_100;
break;
case SPEED_1000:
speed = OCELOT_SPEED_1000;
mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA;
break;
case SPEED_2500:
speed = OCELOT_SPEED_2500;
mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA;
break;
default:
dev_err(ocelot->dev, "Unsupported PHY speed on port %d: %d\n",
port, phydev->speed);
return;
}
phy_print_status(phydev);
if (!phydev->link)
return;
/* Only full duplex supported for now */
ocelot_port_writel(ocelot_port, DEV_MAC_MODE_CFG_FDX_ENA |
mode, DEV_MAC_MODE_CFG);
/* Disable HDX fast control */
ocelot_port_writel(ocelot_port, DEV_PORT_MISC_HDX_FAST_DIS,
DEV_PORT_MISC);
/* SGMII only for now */
ocelot_port_writel(ocelot_port, PCS1G_MODE_CFG_SGMII_MODE_ENA,
PCS1G_MODE_CFG);
ocelot_port_writel(ocelot_port, PCS1G_SD_CFG_SD_SEL, PCS1G_SD_CFG);
/* Enable PCS */
ocelot_port_writel(ocelot_port, PCS1G_CFG_PCS_ENA, PCS1G_CFG);
/* No aneg on SGMII */
ocelot_port_writel(ocelot_port, 0, PCS1G_ANEG_CFG);
/* No loopback */
ocelot_port_writel(ocelot_port, 0, PCS1G_LB_CFG);
/* Enable MAC module */
ocelot_port_writel(ocelot_port, DEV_MAC_ENA_CFG_RX_ENA |
DEV_MAC_ENA_CFG_TX_ENA, DEV_MAC_ENA_CFG);
/* Take MAC, Port, Phy (intern) and PCS (SGMII/Serdes) clock out of
* reset */
ocelot_port_writel(ocelot_port, DEV_CLOCK_CFG_LINK_SPEED(speed),
DEV_CLOCK_CFG);
/* No PFC */
ocelot_write_gix(ocelot, ANA_PFC_PFC_CFG_FC_LINK_SPEED(speed),
ANA_PFC_PFC_CFG, port);
/* Core: Enable port for frame transfer */
ocelot_fields_write(ocelot, port,
QSYS_SWITCH_PORT_MODE_PORT_ENA, 1);
/* Flow control */
ocelot_write_rix(ocelot, SYS_MAC_FC_CFG_PAUSE_VAL_CFG(0xffff) |
SYS_MAC_FC_CFG_RX_FC_ENA | SYS_MAC_FC_CFG_TX_FC_ENA |
SYS_MAC_FC_CFG_ZERO_PAUSE_ENA |
SYS_MAC_FC_CFG_FC_LATENCY_CFG(0x7) |
SYS_MAC_FC_CFG_FC_LINK_SPEED(speed),
SYS_MAC_FC_CFG, port);
ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, port);
}
EXPORT_SYMBOL(ocelot_adjust_link);
void ocelot_port_enable(struct ocelot *ocelot, int port,
struct phy_device *phy)
{
/* Enable receiving frames on the port, and activate auto-learning of
* MAC addresses.
*/
ocelot_write_gix(ocelot, ANA_PORT_PORT_CFG_LEARNAUTO |
ANA_PORT_PORT_CFG_RECV_ENA |
ANA_PORT_PORT_CFG_PORTID_VAL(port),
ANA_PORT_PORT_CFG, port);
}
EXPORT_SYMBOL(ocelot_port_enable);
void ocelot_port_disable(struct ocelot *ocelot, int port)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
ocelot_port_writel(ocelot_port, 0, DEV_MAC_ENA_CFG);
ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 0);
}
EXPORT_SYMBOL(ocelot_port_disable);
void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port,
struct sk_buff *clone)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
spin_lock(&ocelot_port->ts_id_lock);
2020-09-18 09:07:24 +08:00
skb_shinfo(clone)->tx_flags |= SKBTX_IN_PROGRESS;
/* Store timestamp ID in cb[0] of sk_buff */
clone->cb[0] = ocelot_port->ts_id;
ocelot_port->ts_id = (ocelot_port->ts_id + 1) % 4;
skb_queue_tail(&ocelot_port->tx_skbs, clone);
2020-09-18 09:07:24 +08:00
spin_unlock(&ocelot_port->ts_id_lock);
}
EXPORT_SYMBOL(ocelot_port_add_txtstamp_skb);
static void ocelot_get_hwtimestamp(struct ocelot *ocelot,
struct timespec64 *ts)
{
unsigned long flags;
u32 val;
spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
/* Read current PTP time to get seconds */
val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_SAVE);
ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
ts->tv_sec = ocelot_read_rix(ocelot, PTP_PIN_TOD_SEC_LSB, TOD_ACC_PIN);
/* Read packet HW timestamp from FIFO */
val = ocelot_read(ocelot, SYS_PTP_TXSTAMP);
ts->tv_nsec = SYS_PTP_TXSTAMP_PTP_TXSTAMP(val);
/* Sec has incremented since the ts was registered */
if ((ts->tv_sec & 0x1) != !!(val & SYS_PTP_TXSTAMP_PTP_TXSTAMP_SEC))
ts->tv_sec--;
spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
}
void ocelot_get_txtstamp(struct ocelot *ocelot)
{
int budget = OCELOT_PTP_QUEUE_SZ;
while (budget--) {
struct sk_buff *skb, *skb_tmp, *skb_match = NULL;
struct skb_shared_hwtstamps shhwtstamps;
struct ocelot_port *port;
struct timespec64 ts;
unsigned long flags;
u32 val, id, txport;
val = ocelot_read(ocelot, SYS_PTP_STATUS);
/* Check if a timestamp can be retrieved */
if (!(val & SYS_PTP_STATUS_PTP_MESS_VLD))
break;
WARN_ON(val & SYS_PTP_STATUS_PTP_OVFL);
/* Retrieve the ts ID and Tx port */
id = SYS_PTP_STATUS_PTP_MESS_ID_X(val);
txport = SYS_PTP_STATUS_PTP_MESS_TXPORT_X(val);
/* Retrieve its associated skb */
port = ocelot->ports[txport];
spin_lock_irqsave(&port->tx_skbs.lock, flags);
skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) {
if (skb->cb[0] != id)
continue;
__skb_unlink(skb, &port->tx_skbs);
skb_match = skb;
break;
}
spin_unlock_irqrestore(&port->tx_skbs.lock, flags);
/* Get the h/w timestamp */
ocelot_get_hwtimestamp(ocelot, &ts);
if (unlikely(!skb_match))
continue;
/* Set the timestamp into the skb */
memset(&shhwtstamps, 0, sizeof(shhwtstamps));
shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
skb_complete_tx_timestamp(skb_match, &shhwtstamps);
/* Next ts */
ocelot_write(ocelot, SYS_PTP_NXT_PTP_NXT, SYS_PTP_NXT);
}
}
EXPORT_SYMBOL(ocelot_get_txtstamp);
int ocelot_fdb_add(struct ocelot *ocelot, int port,
net: mscc: ocelot: fix untagged packet drops when enslaving to vlan aware bridge To rehash a previous explanation given in commit 1c44ce560b4d ("net: mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is up"), the switch driver operates the in a mode where a single VLAN can be transmitted as untagged on a particular egress port. That is the "native VLAN on trunk port" use case. The configuration for this native VLAN is driven in 2 ways: - Set the egress port rewriter to strip the VLAN tag for the native VID (as it is egress-untagged, after all). - Configure the ingress port to drop untagged and priority-tagged traffic, if there is no native VLAN. The intention of this setting is that a trunk port with no native VLAN should not accept untagged traffic. Since both of the above configurations for the native VLAN should only be done if VLAN awareness is requested, they are actually done from the ocelot_port_vlan_filtering function, after the basic procedure of toggling the VLAN awareness flag of the port. But there's a problem with that simplistic approach: we are trying to juggle with 2 independent variables from a single function: - Native VLAN of the port - its value is held in port->vid. - VLAN awareness state of the port - currently there are some issues here, more on that later*. The actual problem can be seen when enslaving the switch ports to a VLAN filtering bridge: 0. The driver configures a pvid of zero for each port, when in standalone mode. While the bridge configures a default_pvid of 1 for each port that gets added as a slave to it. 1. The bridge calls ocelot_port_vlan_filtering with vlan_aware=true. The VLAN-filtering-dependent portion of the native VLAN configuration is done, considering that the native VLAN is 0. 2. The bridge calls ocelot_vlan_add with vid=1, pvid=true, untagged=true. The native VLAN changes to 1 (change which gets propagated to hardware). 3. ??? - nobody calls ocelot_port_vlan_filtering again, to reapply the VLAN-filtering-dependent portion of the native VLAN configuration, for the new native VLAN of 1. One can notice that after toggling "ip link set dev br0 type bridge vlan_filtering 0 && ip link set dev br0 type bridge vlan_filtering 1", the new native VLAN finally makes it through and untagged traffic finally starts flowing again. But obviously that shouldn't be needed. So it is clear that 2 independent variables need to both re-trigger the native VLAN configuration. So we introduce the second variable as ocelot_port->vlan_aware. *Actually both the DSA Felix driver and the Ocelot driver already had each its own variable: - Ocelot: ocelot_port_private->vlan_aware - Felix: dsa_port->vlan_filtering but the common Ocelot library needs to work with a single, common, variable, so there is some refactoring done to move the vlan_aware property from the private structure into the common ocelot_port structure. Fixes: 97bb69e1e36e ("net: mscc: ocelot: break apart ocelot_vlan_port_apply") Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-04-15 03:36:15 +08:00
const unsigned char *addr, u16 vid)
{
int pgid = port;
if (port == ocelot->npi)
pgid = PGID_CPU;
return ocelot_mact_learn(ocelot, pgid, addr, vid, ENTRYTYPE_LOCKED);
}
EXPORT_SYMBOL(ocelot_fdb_add);
int ocelot_fdb_del(struct ocelot *ocelot, int port,
const unsigned char *addr, u16 vid)
{
return ocelot_mact_forget(ocelot, addr, vid);
}
EXPORT_SYMBOL(ocelot_fdb_del);
int ocelot_port_fdb_do_dump(const unsigned char *addr, u16 vid,
bool is_static, void *data)
{
struct ocelot_dump_ctx *dump = data;
u32 portid = NETLINK_CB(dump->cb->skb).portid;
u32 seq = dump->cb->nlh->nlmsg_seq;
struct nlmsghdr *nlh;
struct ndmsg *ndm;
if (dump->idx < dump->cb->args[2])
goto skip;
nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
sizeof(*ndm), NLM_F_MULTI);
if (!nlh)
return -EMSGSIZE;
ndm = nlmsg_data(nlh);
ndm->ndm_family = AF_BRIDGE;
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
ndm->ndm_flags = NTF_SELF;
ndm->ndm_type = 0;
ndm->ndm_ifindex = dump->dev->ifindex;
ndm->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE;
if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, addr))
goto nla_put_failure;
if (vid && nla_put_u16(dump->skb, NDA_VLAN, vid))
goto nla_put_failure;
nlmsg_end(dump->skb, nlh);
skip:
dump->idx++;
return 0;
nla_put_failure:
nlmsg_cancel(dump->skb, nlh);
return -EMSGSIZE;
}
EXPORT_SYMBOL(ocelot_port_fdb_do_dump);
static int ocelot_mact_read(struct ocelot *ocelot, int port, int row, int col,
struct ocelot_mact_entry *entry)
{
u32 val, dst, macl, mach;
char mac[ETH_ALEN];
/* Set row and column to read from */
ocelot_field_write(ocelot, ANA_TABLES_MACTINDX_M_INDEX, row);
ocelot_field_write(ocelot, ANA_TABLES_MACTINDX_BUCKET, col);
/* Issue a read command */
ocelot_write(ocelot,
ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_READ),
ANA_TABLES_MACACCESS);
if (ocelot_mact_wait_for_completion(ocelot))
return -ETIMEDOUT;
/* Read the entry flags */
val = ocelot_read(ocelot, ANA_TABLES_MACACCESS);
if (!(val & ANA_TABLES_MACACCESS_VALID))
return -EINVAL;
/* If the entry read has another port configured as its destination,
* do not report it.
*/
dst = (val & ANA_TABLES_MACACCESS_DEST_IDX_M) >> 3;
if (dst != port)
return -EINVAL;
/* Get the entry's MAC address and VLAN id */
macl = ocelot_read(ocelot, ANA_TABLES_MACLDATA);
mach = ocelot_read(ocelot, ANA_TABLES_MACHDATA);
mac[0] = (mach >> 8) & 0xff;
mac[1] = (mach >> 0) & 0xff;
mac[2] = (macl >> 24) & 0xff;
mac[3] = (macl >> 16) & 0xff;
mac[4] = (macl >> 8) & 0xff;
mac[5] = (macl >> 0) & 0xff;
entry->vid = (mach >> 16) & 0xfff;
ether_addr_copy(entry->mac, mac);
return 0;
}
int ocelot_fdb_dump(struct ocelot *ocelot, int port,
dsa_fdb_dump_cb_t *cb, void *data)
{
int i, j;
/* Loop through all the mac tables entries. */
for (i = 0; i < ocelot->num_mact_rows; i++) {
for (j = 0; j < 4; j++) {
struct ocelot_mact_entry entry;
bool is_static;
int ret;
ret = ocelot_mact_read(ocelot, port, i, j, &entry);
/* If the entry is invalid (wrong port, invalid...),
* skip it.
*/
if (ret == -EINVAL)
continue;
else if (ret)
return ret;
is_static = (entry.type == ENTRYTYPE_LOCKED);
ret = cb(entry.mac, entry.vid, is_static, data);
if (ret)
return ret;
}
}
return 0;
}
EXPORT_SYMBOL(ocelot_fdb_dump);
int ocelot_hwstamp_get(struct ocelot *ocelot, int port, struct ifreq *ifr)
{
return copy_to_user(ifr->ifr_data, &ocelot->hwtstamp_config,
sizeof(ocelot->hwtstamp_config)) ? -EFAULT : 0;
}
EXPORT_SYMBOL(ocelot_hwstamp_get);
int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
struct hwtstamp_config cfg;
if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
return -EFAULT;
/* reserved for future extensions */
if (cfg.flags)
return -EINVAL;
/* Tx type sanity check */
switch (cfg.tx_type) {
case HWTSTAMP_TX_ON:
ocelot_port->ptp_cmd = IFH_REW_OP_TWO_STEP_PTP;
break;
case HWTSTAMP_TX_ONESTEP_SYNC:
/* IFH_REW_OP_ONE_STEP_PTP updates the correctional field, we
* need to update the origin time.
*/
ocelot_port->ptp_cmd = IFH_REW_OP_ORIGIN_PTP;
break;
case HWTSTAMP_TX_OFF:
ocelot_port->ptp_cmd = 0;
break;
default:
return -ERANGE;
}
mutex_lock(&ocelot->ptp_lock);
switch (cfg.rx_filter) {
case HWTSTAMP_FILTER_NONE:
break;
case HWTSTAMP_FILTER_ALL:
case HWTSTAMP_FILTER_SOME:
case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
case HWTSTAMP_FILTER_NTP_ALL:
case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
break;
default:
mutex_unlock(&ocelot->ptp_lock);
return -ERANGE;
}
/* Commit back the result & save it */
memcpy(&ocelot->hwtstamp_config, &cfg, sizeof(cfg));
mutex_unlock(&ocelot->ptp_lock);
return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
}
EXPORT_SYMBOL(ocelot_hwstamp_set);
void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
{
int i;
if (sset != ETH_SS_STATS)
return;
for (i = 0; i < ocelot->num_stats; i++)
memcpy(data + i * ETH_GSTRING_LEN, ocelot->stats_layout[i].name,
ETH_GSTRING_LEN);
}
EXPORT_SYMBOL(ocelot_get_strings);
static void ocelot_update_stats(struct ocelot *ocelot)
{
int i, j;
mutex_lock(&ocelot->stats_lock);
for (i = 0; i < ocelot->num_phys_ports; i++) {
/* Configure the port to read the stats from */
ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG);
for (j = 0; j < ocelot->num_stats; j++) {
u32 val;
unsigned int idx = i * ocelot->num_stats + j;
val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
ocelot->stats_layout[j].offset);
if (val < (ocelot->stats[idx] & U32_MAX))
ocelot->stats[idx] += (u64)1 << 32;
ocelot->stats[idx] = (ocelot->stats[idx] &
~(u64)U32_MAX) + val;
}
}
mutex_unlock(&ocelot->stats_lock);
}
static void ocelot_check_stats_work(struct work_struct *work)
{
struct delayed_work *del_work = to_delayed_work(work);
struct ocelot *ocelot = container_of(del_work, struct ocelot,
stats_work);
ocelot_update_stats(ocelot);
queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
OCELOT_STATS_CHECK_DELAY);
}
void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
{
int i;
/* check and update now */
ocelot_update_stats(ocelot);
/* Copy all counters */
for (i = 0; i < ocelot->num_stats; i++)
*data++ = ocelot->stats[port * ocelot->num_stats + i];
}
EXPORT_SYMBOL(ocelot_get_ethtool_stats);
int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset)
{
if (sset != ETH_SS_STATS)
return -EOPNOTSUPP;
return ocelot->num_stats;
}
EXPORT_SYMBOL(ocelot_get_sset_count);
int ocelot_get_ts_info(struct ocelot *ocelot, int port,
struct ethtool_ts_info *info)
{
info->phc_index = ocelot->ptp_clock ?
ptp_clock_index(ocelot->ptp_clock) : -1;
if (info->phc_index == -1) {
info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
SOF_TIMESTAMPING_RX_SOFTWARE |
SOF_TIMESTAMPING_SOFTWARE;
return 0;
}
info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
SOF_TIMESTAMPING_RX_SOFTWARE |
SOF_TIMESTAMPING_SOFTWARE |
SOF_TIMESTAMPING_TX_HARDWARE |
SOF_TIMESTAMPING_RX_HARDWARE |
SOF_TIMESTAMPING_RAW_HARDWARE;
info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON) |
BIT(HWTSTAMP_TX_ONESTEP_SYNC);
info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL);
return 0;
}
EXPORT_SYMBOL(ocelot_get_ts_info);
void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state)
{
u32 port_cfg;
int p, i;
if (!(BIT(port) & ocelot->bridge_mask))
return;
port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, port);
switch (state) {
case BR_STATE_FORWARDING:
ocelot->bridge_fwd_mask |= BIT(port);
fallthrough;
case BR_STATE_LEARNING:
port_cfg |= ANA_PORT_PORT_CFG_LEARN_ENA;
break;
default:
port_cfg &= ~ANA_PORT_PORT_CFG_LEARN_ENA;
ocelot->bridge_fwd_mask &= ~BIT(port);
break;
}
ocelot_write_gix(ocelot, port_cfg, ANA_PORT_PORT_CFG, port);
/* Apply FWD mask. The loop is needed to add/remove the current port as
* a source for the other ports.
*/
for (p = 0; p < ocelot->num_phys_ports; p++) {
net: mscc: ocelot: eliminate confusion between CPU and NPI port Ocelot has the concept of a CPU port. The CPU port is represented in the forwarding and the queueing system, but it is not a physical device. The CPU port can either be accessed via register-based injection/extraction (which is the case of Ocelot), via Frame-DMA (similar to the first one), or "connected" to a physical Ethernet port (called NPI in the datasheet) which is the case of the Felix DSA switch. In Ocelot the CPU port is at index 11. In Felix the CPU port is at index 6. The CPU bit is treated special in the forwarding, as it is never cleared from the forwarding port mask (once added to it). Other than that, it is treated the same as a normal front port. Both Felix and Ocelot should use the CPU port in the same way. This means that Felix should not use the NPI port directly when forwarding to the CPU, but instead use the CPU port. This patch is fixing this such that Felix will use port 6 as its CPU port, and just use the NPI port to carry the traffic. Therefore, eliminate the "ocelot->cpu" variable which was holding the index of the NPI port for Felix, and the index of the CPU port module for Ocelot, so the variable was actually configuring different things for different drivers and causing at least part of the confusion. Also remove the "ocelot->num_cpu_ports" variable, which is the result of another confusion. The 2 CPU ports mentioned in the datasheet are because there are two frame extraction channels (register based or DMA based). This is of no relevance to the driver at the moment, and invisible to the analyzer module. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 22:50:02 +08:00
if (ocelot->bridge_fwd_mask & BIT(p)) {
unsigned long mask = ocelot->bridge_fwd_mask & ~BIT(p);
for (i = 0; i < ocelot->num_phys_ports; i++) {
unsigned long bond_mask = ocelot->lags[i];
if (!bond_mask)
continue;
if (bond_mask & BIT(p)) {
mask &= ~bond_mask;
break;
}
}
ocelot_write_rix(ocelot, mask,
ANA_PGID_PGID, PGID_SRC + p);
} else {
net: mscc: ocelot: eliminate confusion between CPU and NPI port Ocelot has the concept of a CPU port. The CPU port is represented in the forwarding and the queueing system, but it is not a physical device. The CPU port can either be accessed via register-based injection/extraction (which is the case of Ocelot), via Frame-DMA (similar to the first one), or "connected" to a physical Ethernet port (called NPI in the datasheet) which is the case of the Felix DSA switch. In Ocelot the CPU port is at index 11. In Felix the CPU port is at index 6. The CPU bit is treated special in the forwarding, as it is never cleared from the forwarding port mask (once added to it). Other than that, it is treated the same as a normal front port. Both Felix and Ocelot should use the CPU port in the same way. This means that Felix should not use the NPI port directly when forwarding to the CPU, but instead use the CPU port. This patch is fixing this such that Felix will use port 6 as its CPU port, and just use the NPI port to carry the traffic. Therefore, eliminate the "ocelot->cpu" variable which was holding the index of the NPI port for Felix, and the index of the CPU port module for Ocelot, so the variable was actually configuring different things for different drivers and causing at least part of the confusion. Also remove the "ocelot->num_cpu_ports" variable, which is the result of another confusion. The 2 CPU ports mentioned in the datasheet are because there are two frame extraction channels (register based or DMA based). This is of no relevance to the driver at the moment, and invisible to the analyzer module. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 22:50:02 +08:00
ocelot_write_rix(ocelot, 0,
ANA_PGID_PGID, PGID_SRC + p);
}
}
}
EXPORT_SYMBOL(ocelot_bridge_stp_state_set);
void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs)
{
unsigned int age_period = ANA_AUTOAGE_AGE_PERIOD(msecs / 2000);
/* Setting AGE_PERIOD to zero effectively disables automatic aging,
* which is clearly not what our intention is. So avoid that.
*/
if (!age_period)
age_period = 1;
ocelot_rmw(ocelot, age_period, ANA_AUTOAGE_AGE_PERIOD_M, ANA_AUTOAGE);
}
EXPORT_SYMBOL(ocelot_set_ageing_time);
static struct ocelot_multicast *ocelot_multicast_get(struct ocelot *ocelot,
const unsigned char *addr,
u16 vid)
{
struct ocelot_multicast *mc;
list_for_each_entry(mc, &ocelot->multicast, list) {
if (ether_addr_equal(mc->addr, addr) && mc->vid == vid)
return mc;
}
return NULL;
}
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries The current procedure for installing a multicast address is hardcoded for IPv4. But, in the ocelot hardware, there are 3 different procedures for IPv4, IPv6 and for regular L2 multicast. For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4 (01-00-5e-xx-xx-xx), except that the destination port mask is stuffed into first 2 bytes of the MAC address except into first 3 bytes. For plain Ethernet multicast, there's no port-in-address stuffing going on, instead the DEST_IDX (pointer to PGID) is used there, just as for unicast. So we have to use one of the nonreserved multicast PGIDs that the hardware has allocated for this purpose. This patch classifies the type of multicast address based on its first bytes, then redirects to one of the 3 different hardware procedures. Note that this gives us a really better way of redirecting PTP frames sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add a trapping rule for PTP EtherType but got a lot of pushback: https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/ But right now, that isn't needed at all. The application stack (ptp4l) does this for the PTP multicast addresses it's interested in (which are configurable, and include 01-1b-19-00-00-00): memset(&mreq, 0, sizeof(mreq)); mreq.mr_ifindex = index; mreq.mr_type = PACKET_MR_MULTICAST; mreq.mr_alen = MAC_LEN; memcpy(mreq.mr_address, addr1, MAC_LEN); err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); Into the kernel, this translates into a dev_mc_add on the switch network interfaces, and our drivers know that it means they should translate it into a host MDB address (make the CPU port be the destination). Previously, this was broken because all mdb addresses were treated as IPv4 (which 01-1b-19-00-00-00 obviously is not). Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
static enum macaccess_entry_type ocelot_classify_mdb(const unsigned char *addr)
{
if (addr[0] == 0x01 && addr[1] == 0x00 && addr[2] == 0x5e)
return ENTRYTYPE_MACv4;
if (addr[0] == 0x33 && addr[1] == 0x33)
return ENTRYTYPE_MACv6;
return ENTRYTYPE_LOCKED;
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries The current procedure for installing a multicast address is hardcoded for IPv4. But, in the ocelot hardware, there are 3 different procedures for IPv4, IPv6 and for regular L2 multicast. For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4 (01-00-5e-xx-xx-xx), except that the destination port mask is stuffed into first 2 bytes of the MAC address except into first 3 bytes. For plain Ethernet multicast, there's no port-in-address stuffing going on, instead the DEST_IDX (pointer to PGID) is used there, just as for unicast. So we have to use one of the nonreserved multicast PGIDs that the hardware has allocated for this purpose. This patch classifies the type of multicast address based on its first bytes, then redirects to one of the 3 different hardware procedures. Note that this gives us a really better way of redirecting PTP frames sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add a trapping rule for PTP EtherType but got a lot of pushback: https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/ But right now, that isn't needed at all. The application stack (ptp4l) does this for the PTP multicast addresses it's interested in (which are configurable, and include 01-1b-19-00-00-00): memset(&mreq, 0, sizeof(mreq)); mreq.mr_ifindex = index; mreq.mr_type = PACKET_MR_MULTICAST; mreq.mr_alen = MAC_LEN; memcpy(mreq.mr_address, addr1, MAC_LEN); err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); Into the kernel, this translates into a dev_mc_add on the switch network interfaces, and our drivers know that it means they should translate it into a host MDB address (make the CPU port be the destination). Previously, this was broken because all mdb addresses were treated as IPv4 (which 01-1b-19-00-00-00 obviously is not). Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
}
net: mscc: ocelot: support L2 multicast entries There is one main difference in mscc_ocelot between IP multicast and L2 multicast. With IP multicast, destination ports are encoded into the upper bytes of the multicast MAC address. Example: to deliver the address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to program the address of 00:03:08:11:22:33 into hardware. Whereas for L2 multicast, the MAC table entry points to a Port Group ID (PGID), and that PGID contains the port mask that the packet will be forwarded to. As to why it is this way, no clue. My guess is that not all port combinations can be supported simultaneously with the limited number of PGIDs, and this was somehow an issue for IP multicast but not for L2 multicast. Anyway. Prior to this change, the raw L2 multicast code was bogus, due to the fact that there wasn't really any way to test it using the bridge code. There were 2 issues: - A multicast PGID was allocated for each MDB entry, but it wasn't in fact programmed to hardware. It was dummy. - In fact we don't want to reserve a multicast PGID for every single MDB entry. That would be odd because we can only have ~60 PGIDs, but thousands of MDB entries. So instead, we want to reserve a multicast PGID for every single port combination for multicast traffic. And since we can have 2 (or more) MDB entries delivered to the same port group (and therefore PGID), we need to reference-count the PGIDs. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
static struct ocelot_pgid *ocelot_pgid_alloc(struct ocelot *ocelot, int index,
unsigned long ports)
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries The current procedure for installing a multicast address is hardcoded for IPv4. But, in the ocelot hardware, there are 3 different procedures for IPv4, IPv6 and for regular L2 multicast. For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4 (01-00-5e-xx-xx-xx), except that the destination port mask is stuffed into first 2 bytes of the MAC address except into first 3 bytes. For plain Ethernet multicast, there's no port-in-address stuffing going on, instead the DEST_IDX (pointer to PGID) is used there, just as for unicast. So we have to use one of the nonreserved multicast PGIDs that the hardware has allocated for this purpose. This patch classifies the type of multicast address based on its first bytes, then redirects to one of the 3 different hardware procedures. Note that this gives us a really better way of redirecting PTP frames sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add a trapping rule for PTP EtherType but got a lot of pushback: https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/ But right now, that isn't needed at all. The application stack (ptp4l) does this for the PTP multicast addresses it's interested in (which are configurable, and include 01-1b-19-00-00-00): memset(&mreq, 0, sizeof(mreq)); mreq.mr_ifindex = index; mreq.mr_type = PACKET_MR_MULTICAST; mreq.mr_alen = MAC_LEN; memcpy(mreq.mr_address, addr1, MAC_LEN); err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); Into the kernel, this translates into a dev_mc_add on the switch network interfaces, and our drivers know that it means they should translate it into a host MDB address (make the CPU port be the destination). Previously, this was broken because all mdb addresses were treated as IPv4 (which 01-1b-19-00-00-00 obviously is not). Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
{
net: mscc: ocelot: support L2 multicast entries There is one main difference in mscc_ocelot between IP multicast and L2 multicast. With IP multicast, destination ports are encoded into the upper bytes of the multicast MAC address. Example: to deliver the address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to program the address of 00:03:08:11:22:33 into hardware. Whereas for L2 multicast, the MAC table entry points to a Port Group ID (PGID), and that PGID contains the port mask that the packet will be forwarded to. As to why it is this way, no clue. My guess is that not all port combinations can be supported simultaneously with the limited number of PGIDs, and this was somehow an issue for IP multicast but not for L2 multicast. Anyway. Prior to this change, the raw L2 multicast code was bogus, due to the fact that there wasn't really any way to test it using the bridge code. There were 2 issues: - A multicast PGID was allocated for each MDB entry, but it wasn't in fact programmed to hardware. It was dummy. - In fact we don't want to reserve a multicast PGID for every single MDB entry. That would be odd because we can only have ~60 PGIDs, but thousands of MDB entries. So instead, we want to reserve a multicast PGID for every single port combination for multicast traffic. And since we can have 2 (or more) MDB entries delivered to the same port group (and therefore PGID), we need to reference-count the PGIDs. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
struct ocelot_pgid *pgid;
pgid = kzalloc(sizeof(*pgid), GFP_KERNEL);
if (!pgid)
return ERR_PTR(-ENOMEM);
pgid->ports = ports;
pgid->index = index;
refcount_set(&pgid->refcount, 1);
list_add_tail(&pgid->list, &ocelot->pgids);
return pgid;
}
static void ocelot_pgid_free(struct ocelot *ocelot, struct ocelot_pgid *pgid)
{
if (!refcount_dec_and_test(&pgid->refcount))
return;
list_del(&pgid->list);
kfree(pgid);
}
static struct ocelot_pgid *ocelot_mdb_get_pgid(struct ocelot *ocelot,
const struct ocelot_multicast *mc)
{
struct ocelot_pgid *pgid;
int index;
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries The current procedure for installing a multicast address is hardcoded for IPv4. But, in the ocelot hardware, there are 3 different procedures for IPv4, IPv6 and for regular L2 multicast. For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4 (01-00-5e-xx-xx-xx), except that the destination port mask is stuffed into first 2 bytes of the MAC address except into first 3 bytes. For plain Ethernet multicast, there's no port-in-address stuffing going on, instead the DEST_IDX (pointer to PGID) is used there, just as for unicast. So we have to use one of the nonreserved multicast PGIDs that the hardware has allocated for this purpose. This patch classifies the type of multicast address based on its first bytes, then redirects to one of the 3 different hardware procedures. Note that this gives us a really better way of redirecting PTP frames sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add a trapping rule for PTP EtherType but got a lot of pushback: https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/ But right now, that isn't needed at all. The application stack (ptp4l) does this for the PTP multicast addresses it's interested in (which are configurable, and include 01-1b-19-00-00-00): memset(&mreq, 0, sizeof(mreq)); mreq.mr_ifindex = index; mreq.mr_type = PACKET_MR_MULTICAST; mreq.mr_alen = MAC_LEN; memcpy(mreq.mr_address, addr1, MAC_LEN); err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); Into the kernel, this translates into a dev_mc_add on the switch network interfaces, and our drivers know that it means they should translate it into a host MDB address (make the CPU port be the destination). Previously, this was broken because all mdb addresses were treated as IPv4 (which 01-1b-19-00-00-00 obviously is not). Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
/* According to VSC7514 datasheet 3.9.1.5 IPv4 Multicast Entries and
* 3.9.1.6 IPv6 Multicast Entries, "Instead of a lookup in the
* destination mask table (PGID), the destination set is programmed as
* part of the entry MAC address.", and the DEST_IDX is set to 0.
*/
if (mc->entry_type == ENTRYTYPE_MACv4 ||
mc->entry_type == ENTRYTYPE_MACv6)
net: mscc: ocelot: support L2 multicast entries There is one main difference in mscc_ocelot between IP multicast and L2 multicast. With IP multicast, destination ports are encoded into the upper bytes of the multicast MAC address. Example: to deliver the address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to program the address of 00:03:08:11:22:33 into hardware. Whereas for L2 multicast, the MAC table entry points to a Port Group ID (PGID), and that PGID contains the port mask that the packet will be forwarded to. As to why it is this way, no clue. My guess is that not all port combinations can be supported simultaneously with the limited number of PGIDs, and this was somehow an issue for IP multicast but not for L2 multicast. Anyway. Prior to this change, the raw L2 multicast code was bogus, due to the fact that there wasn't really any way to test it using the bridge code. There were 2 issues: - A multicast PGID was allocated for each MDB entry, but it wasn't in fact programmed to hardware. It was dummy. - In fact we don't want to reserve a multicast PGID for every single MDB entry. That would be odd because we can only have ~60 PGIDs, but thousands of MDB entries. So instead, we want to reserve a multicast PGID for every single port combination for multicast traffic. And since we can have 2 (or more) MDB entries delivered to the same port group (and therefore PGID), we need to reference-count the PGIDs. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
return ocelot_pgid_alloc(ocelot, 0, mc->ports);
list_for_each_entry(pgid, &ocelot->pgids, list) {
/* When searching for a nonreserved multicast PGID, ignore the
* dummy PGID of zero that we have for MACv4/MACv6 entries
*/
if (pgid->index && pgid->ports == mc->ports) {
refcount_inc(&pgid->refcount);
return pgid;
}
}
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries The current procedure for installing a multicast address is hardcoded for IPv4. But, in the ocelot hardware, there are 3 different procedures for IPv4, IPv6 and for regular L2 multicast. For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4 (01-00-5e-xx-xx-xx), except that the destination port mask is stuffed into first 2 bytes of the MAC address except into first 3 bytes. For plain Ethernet multicast, there's no port-in-address stuffing going on, instead the DEST_IDX (pointer to PGID) is used there, just as for unicast. So we have to use one of the nonreserved multicast PGIDs that the hardware has allocated for this purpose. This patch classifies the type of multicast address based on its first bytes, then redirects to one of the 3 different hardware procedures. Note that this gives us a really better way of redirecting PTP frames sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add a trapping rule for PTP EtherType but got a lot of pushback: https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/ But right now, that isn't needed at all. The application stack (ptp4l) does this for the PTP multicast addresses it's interested in (which are configurable, and include 01-1b-19-00-00-00): memset(&mreq, 0, sizeof(mreq)); mreq.mr_ifindex = index; mreq.mr_type = PACKET_MR_MULTICAST; mreq.mr_alen = MAC_LEN; memcpy(mreq.mr_address, addr1, MAC_LEN); err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); Into the kernel, this translates into a dev_mc_add on the switch network interfaces, and our drivers know that it means they should translate it into a host MDB address (make the CPU port be the destination). Previously, this was broken because all mdb addresses were treated as IPv4 (which 01-1b-19-00-00-00 obviously is not). Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
net: mscc: ocelot: support L2 multicast entries There is one main difference in mscc_ocelot between IP multicast and L2 multicast. With IP multicast, destination ports are encoded into the upper bytes of the multicast MAC address. Example: to deliver the address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to program the address of 00:03:08:11:22:33 into hardware. Whereas for L2 multicast, the MAC table entry points to a Port Group ID (PGID), and that PGID contains the port mask that the packet will be forwarded to. As to why it is this way, no clue. My guess is that not all port combinations can be supported simultaneously with the limited number of PGIDs, and this was somehow an issue for IP multicast but not for L2 multicast. Anyway. Prior to this change, the raw L2 multicast code was bogus, due to the fact that there wasn't really any way to test it using the bridge code. There were 2 issues: - A multicast PGID was allocated for each MDB entry, but it wasn't in fact programmed to hardware. It was dummy. - In fact we don't want to reserve a multicast PGID for every single MDB entry. That would be odd because we can only have ~60 PGIDs, but thousands of MDB entries. So instead, we want to reserve a multicast PGID for every single port combination for multicast traffic. And since we can have 2 (or more) MDB entries delivered to the same port group (and therefore PGID), we need to reference-count the PGIDs. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
/* Search for a free index in the nonreserved multicast PGID area */
for_each_nonreserved_multicast_dest_pgid(ocelot, index) {
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries The current procedure for installing a multicast address is hardcoded for IPv4. But, in the ocelot hardware, there are 3 different procedures for IPv4, IPv6 and for regular L2 multicast. For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4 (01-00-5e-xx-xx-xx), except that the destination port mask is stuffed into first 2 bytes of the MAC address except into first 3 bytes. For plain Ethernet multicast, there's no port-in-address stuffing going on, instead the DEST_IDX (pointer to PGID) is used there, just as for unicast. So we have to use one of the nonreserved multicast PGIDs that the hardware has allocated for this purpose. This patch classifies the type of multicast address based on its first bytes, then redirects to one of the 3 different hardware procedures. Note that this gives us a really better way of redirecting PTP frames sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add a trapping rule for PTP EtherType but got a lot of pushback: https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/ But right now, that isn't needed at all. The application stack (ptp4l) does this for the PTP multicast addresses it's interested in (which are configurable, and include 01-1b-19-00-00-00): memset(&mreq, 0, sizeof(mreq)); mreq.mr_ifindex = index; mreq.mr_type = PACKET_MR_MULTICAST; mreq.mr_alen = MAC_LEN; memcpy(mreq.mr_address, addr1, MAC_LEN); err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); Into the kernel, this translates into a dev_mc_add on the switch network interfaces, and our drivers know that it means they should translate it into a host MDB address (make the CPU port be the destination). Previously, this was broken because all mdb addresses were treated as IPv4 (which 01-1b-19-00-00-00 obviously is not). Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
bool used = false;
net: mscc: ocelot: support L2 multicast entries There is one main difference in mscc_ocelot between IP multicast and L2 multicast. With IP multicast, destination ports are encoded into the upper bytes of the multicast MAC address. Example: to deliver the address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to program the address of 00:03:08:11:22:33 into hardware. Whereas for L2 multicast, the MAC table entry points to a Port Group ID (PGID), and that PGID contains the port mask that the packet will be forwarded to. As to why it is this way, no clue. My guess is that not all port combinations can be supported simultaneously with the limited number of PGIDs, and this was somehow an issue for IP multicast but not for L2 multicast. Anyway. Prior to this change, the raw L2 multicast code was bogus, due to the fact that there wasn't really any way to test it using the bridge code. There were 2 issues: - A multicast PGID was allocated for each MDB entry, but it wasn't in fact programmed to hardware. It was dummy. - In fact we don't want to reserve a multicast PGID for every single MDB entry. That would be odd because we can only have ~60 PGIDs, but thousands of MDB entries. So instead, we want to reserve a multicast PGID for every single port combination for multicast traffic. And since we can have 2 (or more) MDB entries delivered to the same port group (and therefore PGID), we need to reference-count the PGIDs. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
list_for_each_entry(pgid, &ocelot->pgids, list) {
if (pgid->index == index) {
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries The current procedure for installing a multicast address is hardcoded for IPv4. But, in the ocelot hardware, there are 3 different procedures for IPv4, IPv6 and for regular L2 multicast. For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4 (01-00-5e-xx-xx-xx), except that the destination port mask is stuffed into first 2 bytes of the MAC address except into first 3 bytes. For plain Ethernet multicast, there's no port-in-address stuffing going on, instead the DEST_IDX (pointer to PGID) is used there, just as for unicast. So we have to use one of the nonreserved multicast PGIDs that the hardware has allocated for this purpose. This patch classifies the type of multicast address based on its first bytes, then redirects to one of the 3 different hardware procedures. Note that this gives us a really better way of redirecting PTP frames sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add a trapping rule for PTP EtherType but got a lot of pushback: https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/ But right now, that isn't needed at all. The application stack (ptp4l) does this for the PTP multicast addresses it's interested in (which are configurable, and include 01-1b-19-00-00-00): memset(&mreq, 0, sizeof(mreq)); mreq.mr_ifindex = index; mreq.mr_type = PACKET_MR_MULTICAST; mreq.mr_alen = MAC_LEN; memcpy(mreq.mr_address, addr1, MAC_LEN); err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); Into the kernel, this translates into a dev_mc_add on the switch network interfaces, and our drivers know that it means they should translate it into a host MDB address (make the CPU port be the destination). Previously, this was broken because all mdb addresses were treated as IPv4 (which 01-1b-19-00-00-00 obviously is not). Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
used = true;
break;
}
}
if (!used)
net: mscc: ocelot: support L2 multicast entries There is one main difference in mscc_ocelot between IP multicast and L2 multicast. With IP multicast, destination ports are encoded into the upper bytes of the multicast MAC address. Example: to deliver the address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to program the address of 00:03:08:11:22:33 into hardware. Whereas for L2 multicast, the MAC table entry points to a Port Group ID (PGID), and that PGID contains the port mask that the packet will be forwarded to. As to why it is this way, no clue. My guess is that not all port combinations can be supported simultaneously with the limited number of PGIDs, and this was somehow an issue for IP multicast but not for L2 multicast. Anyway. Prior to this change, the raw L2 multicast code was bogus, due to the fact that there wasn't really any way to test it using the bridge code. There were 2 issues: - A multicast PGID was allocated for each MDB entry, but it wasn't in fact programmed to hardware. It was dummy. - In fact we don't want to reserve a multicast PGID for every single MDB entry. That would be odd because we can only have ~60 PGIDs, but thousands of MDB entries. So instead, we want to reserve a multicast PGID for every single port combination for multicast traffic. And since we can have 2 (or more) MDB entries delivered to the same port group (and therefore PGID), we need to reference-count the PGIDs. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
return ocelot_pgid_alloc(ocelot, index, mc->ports);
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries The current procedure for installing a multicast address is hardcoded for IPv4. But, in the ocelot hardware, there are 3 different procedures for IPv4, IPv6 and for regular L2 multicast. For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4 (01-00-5e-xx-xx-xx), except that the destination port mask is stuffed into first 2 bytes of the MAC address except into first 3 bytes. For plain Ethernet multicast, there's no port-in-address stuffing going on, instead the DEST_IDX (pointer to PGID) is used there, just as for unicast. So we have to use one of the nonreserved multicast PGIDs that the hardware has allocated for this purpose. This patch classifies the type of multicast address based on its first bytes, then redirects to one of the 3 different hardware procedures. Note that this gives us a really better way of redirecting PTP frames sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add a trapping rule for PTP EtherType but got a lot of pushback: https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/ But right now, that isn't needed at all. The application stack (ptp4l) does this for the PTP multicast addresses it's interested in (which are configurable, and include 01-1b-19-00-00-00): memset(&mreq, 0, sizeof(mreq)); mreq.mr_ifindex = index; mreq.mr_type = PACKET_MR_MULTICAST; mreq.mr_alen = MAC_LEN; memcpy(mreq.mr_address, addr1, MAC_LEN); err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); Into the kernel, this translates into a dev_mc_add on the switch network interfaces, and our drivers know that it means they should translate it into a host MDB address (make the CPU port be the destination). Previously, this was broken because all mdb addresses were treated as IPv4 (which 01-1b-19-00-00-00 obviously is not). Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
}
net: mscc: ocelot: support L2 multicast entries There is one main difference in mscc_ocelot between IP multicast and L2 multicast. With IP multicast, destination ports are encoded into the upper bytes of the multicast MAC address. Example: to deliver the address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to program the address of 00:03:08:11:22:33 into hardware. Whereas for L2 multicast, the MAC table entry points to a Port Group ID (PGID), and that PGID contains the port mask that the packet will be forwarded to. As to why it is this way, no clue. My guess is that not all port combinations can be supported simultaneously with the limited number of PGIDs, and this was somehow an issue for IP multicast but not for L2 multicast. Anyway. Prior to this change, the raw L2 multicast code was bogus, due to the fact that there wasn't really any way to test it using the bridge code. There were 2 issues: - A multicast PGID was allocated for each MDB entry, but it wasn't in fact programmed to hardware. It was dummy. - In fact we don't want to reserve a multicast PGID for every single MDB entry. That would be odd because we can only have ~60 PGIDs, but thousands of MDB entries. So instead, we want to reserve a multicast PGID for every single port combination for multicast traffic. And since we can have 2 (or more) MDB entries delivered to the same port group (and therefore PGID), we need to reference-count the PGIDs. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
return ERR_PTR(-ENOSPC);
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries The current procedure for installing a multicast address is hardcoded for IPv4. But, in the ocelot hardware, there are 3 different procedures for IPv4, IPv6 and for regular L2 multicast. For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4 (01-00-5e-xx-xx-xx), except that the destination port mask is stuffed into first 2 bytes of the MAC address except into first 3 bytes. For plain Ethernet multicast, there's no port-in-address stuffing going on, instead the DEST_IDX (pointer to PGID) is used there, just as for unicast. So we have to use one of the nonreserved multicast PGIDs that the hardware has allocated for this purpose. This patch classifies the type of multicast address based on its first bytes, then redirects to one of the 3 different hardware procedures. Note that this gives us a really better way of redirecting PTP frames sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add a trapping rule for PTP EtherType but got a lot of pushback: https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/ But right now, that isn't needed at all. The application stack (ptp4l) does this for the PTP multicast addresses it's interested in (which are configurable, and include 01-1b-19-00-00-00): memset(&mreq, 0, sizeof(mreq)); mreq.mr_ifindex = index; mreq.mr_type = PACKET_MR_MULTICAST; mreq.mr_alen = MAC_LEN; memcpy(mreq.mr_address, addr1, MAC_LEN); err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); Into the kernel, this translates into a dev_mc_add on the switch network interfaces, and our drivers know that it means they should translate it into a host MDB address (make the CPU port be the destination). Previously, this was broken because all mdb addresses were treated as IPv4 (which 01-1b-19-00-00-00 obviously is not). Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
}
static void ocelot_encode_ports_to_mdb(unsigned char *addr,
struct ocelot_multicast *mc)
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries The current procedure for installing a multicast address is hardcoded for IPv4. But, in the ocelot hardware, there are 3 different procedures for IPv4, IPv6 and for regular L2 multicast. For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4 (01-00-5e-xx-xx-xx), except that the destination port mask is stuffed into first 2 bytes of the MAC address except into first 3 bytes. For plain Ethernet multicast, there's no port-in-address stuffing going on, instead the DEST_IDX (pointer to PGID) is used there, just as for unicast. So we have to use one of the nonreserved multicast PGIDs that the hardware has allocated for this purpose. This patch classifies the type of multicast address based on its first bytes, then redirects to one of the 3 different hardware procedures. Note that this gives us a really better way of redirecting PTP frames sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add a trapping rule for PTP EtherType but got a lot of pushback: https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/ But right now, that isn't needed at all. The application stack (ptp4l) does this for the PTP multicast addresses it's interested in (which are configurable, and include 01-1b-19-00-00-00): memset(&mreq, 0, sizeof(mreq)); mreq.mr_ifindex = index; mreq.mr_type = PACKET_MR_MULTICAST; mreq.mr_alen = MAC_LEN; memcpy(mreq.mr_address, addr1, MAC_LEN); err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); Into the kernel, this translates into a dev_mc_add on the switch network interfaces, and our drivers know that it means they should translate it into a host MDB address (make the CPU port be the destination). Previously, this was broken because all mdb addresses were treated as IPv4 (which 01-1b-19-00-00-00 obviously is not). Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
{
ether_addr_copy(addr, mc->addr);
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries The current procedure for installing a multicast address is hardcoded for IPv4. But, in the ocelot hardware, there are 3 different procedures for IPv4, IPv6 and for regular L2 multicast. For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4 (01-00-5e-xx-xx-xx), except that the destination port mask is stuffed into first 2 bytes of the MAC address except into first 3 bytes. For plain Ethernet multicast, there's no port-in-address stuffing going on, instead the DEST_IDX (pointer to PGID) is used there, just as for unicast. So we have to use one of the nonreserved multicast PGIDs that the hardware has allocated for this purpose. This patch classifies the type of multicast address based on its first bytes, then redirects to one of the 3 different hardware procedures. Note that this gives us a really better way of redirecting PTP frames sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add a trapping rule for PTP EtherType but got a lot of pushback: https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/ But right now, that isn't needed at all. The application stack (ptp4l) does this for the PTP multicast addresses it's interested in (which are configurable, and include 01-1b-19-00-00-00): memset(&mreq, 0, sizeof(mreq)); mreq.mr_ifindex = index; mreq.mr_type = PACKET_MR_MULTICAST; mreq.mr_alen = MAC_LEN; memcpy(mreq.mr_address, addr1, MAC_LEN); err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); Into the kernel, this translates into a dev_mc_add on the switch network interfaces, and our drivers know that it means they should translate it into a host MDB address (make the CPU port be the destination). Previously, this was broken because all mdb addresses were treated as IPv4 (which 01-1b-19-00-00-00 obviously is not). Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
if (mc->entry_type == ENTRYTYPE_MACv4) {
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries The current procedure for installing a multicast address is hardcoded for IPv4. But, in the ocelot hardware, there are 3 different procedures for IPv4, IPv6 and for regular L2 multicast. For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4 (01-00-5e-xx-xx-xx), except that the destination port mask is stuffed into first 2 bytes of the MAC address except into first 3 bytes. For plain Ethernet multicast, there's no port-in-address stuffing going on, instead the DEST_IDX (pointer to PGID) is used there, just as for unicast. So we have to use one of the nonreserved multicast PGIDs that the hardware has allocated for this purpose. This patch classifies the type of multicast address based on its first bytes, then redirects to one of the 3 different hardware procedures. Note that this gives us a really better way of redirecting PTP frames sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add a trapping rule for PTP EtherType but got a lot of pushback: https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/ But right now, that isn't needed at all. The application stack (ptp4l) does this for the PTP multicast addresses it's interested in (which are configurable, and include 01-1b-19-00-00-00): memset(&mreq, 0, sizeof(mreq)); mreq.mr_ifindex = index; mreq.mr_type = PACKET_MR_MULTICAST; mreq.mr_alen = MAC_LEN; memcpy(mreq.mr_address, addr1, MAC_LEN); err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); Into the kernel, this translates into a dev_mc_add on the switch network interfaces, and our drivers know that it means they should translate it into a host MDB address (make the CPU port be the destination). Previously, this was broken because all mdb addresses were treated as IPv4 (which 01-1b-19-00-00-00 obviously is not). Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
addr[0] = 0;
addr[1] = mc->ports >> 8;
addr[2] = mc->ports & 0xff;
} else if (mc->entry_type == ENTRYTYPE_MACv6) {
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries The current procedure for installing a multicast address is hardcoded for IPv4. But, in the ocelot hardware, there are 3 different procedures for IPv4, IPv6 and for regular L2 multicast. For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4 (01-00-5e-xx-xx-xx), except that the destination port mask is stuffed into first 2 bytes of the MAC address except into first 3 bytes. For plain Ethernet multicast, there's no port-in-address stuffing going on, instead the DEST_IDX (pointer to PGID) is used there, just as for unicast. So we have to use one of the nonreserved multicast PGIDs that the hardware has allocated for this purpose. This patch classifies the type of multicast address based on its first bytes, then redirects to one of the 3 different hardware procedures. Note that this gives us a really better way of redirecting PTP frames sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add a trapping rule for PTP EtherType but got a lot of pushback: https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/ But right now, that isn't needed at all. The application stack (ptp4l) does this for the PTP multicast addresses it's interested in (which are configurable, and include 01-1b-19-00-00-00): memset(&mreq, 0, sizeof(mreq)); mreq.mr_ifindex = index; mreq.mr_type = PACKET_MR_MULTICAST; mreq.mr_alen = MAC_LEN; memcpy(mreq.mr_address, addr1, MAC_LEN); err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); Into the kernel, this translates into a dev_mc_add on the switch network interfaces, and our drivers know that it means they should translate it into a host MDB address (make the CPU port be the destination). Previously, this was broken because all mdb addresses were treated as IPv4 (which 01-1b-19-00-00-00 obviously is not). Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
addr[0] = mc->ports >> 8;
addr[1] = mc->ports & 0xff;
}
}
int ocelot_port_mdb_add(struct ocelot *ocelot, int port,
const struct switchdev_obj_port_mdb *mdb)
{
unsigned char addr[ETH_ALEN];
struct ocelot_multicast *mc;
net: mscc: ocelot: support L2 multicast entries There is one main difference in mscc_ocelot between IP multicast and L2 multicast. With IP multicast, destination ports are encoded into the upper bytes of the multicast MAC address. Example: to deliver the address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to program the address of 00:03:08:11:22:33 into hardware. Whereas for L2 multicast, the MAC table entry points to a Port Group ID (PGID), and that PGID contains the port mask that the packet will be forwarded to. As to why it is this way, no clue. My guess is that not all port combinations can be supported simultaneously with the limited number of PGIDs, and this was somehow an issue for IP multicast but not for L2 multicast. Anyway. Prior to this change, the raw L2 multicast code was bogus, due to the fact that there wasn't really any way to test it using the bridge code. There were 2 issues: - A multicast PGID was allocated for each MDB entry, but it wasn't in fact programmed to hardware. It was dummy. - In fact we don't want to reserve a multicast PGID for every single MDB entry. That would be odd because we can only have ~60 PGIDs, but thousands of MDB entries. So instead, we want to reserve a multicast PGID for every single port combination for multicast traffic. And since we can have 2 (or more) MDB entries delivered to the same port group (and therefore PGID), we need to reference-count the PGIDs. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
struct ocelot_pgid *pgid;
u16 vid = mdb->vid;
if (port == ocelot->npi)
port = ocelot->num_phys_ports;
mc = ocelot_multicast_get(ocelot, mdb->addr, vid);
if (!mc) {
/* New entry */
mc = devm_kzalloc(ocelot->dev, sizeof(*mc), GFP_KERNEL);
if (!mc)
return -ENOMEM;
mc->entry_type = ocelot_classify_mdb(mdb->addr);
ether_addr_copy(mc->addr, mdb->addr);
mc->vid = vid;
list_add_tail(&mc->list, &ocelot->multicast);
} else {
net: mscc: ocelot: support L2 multicast entries There is one main difference in mscc_ocelot between IP multicast and L2 multicast. With IP multicast, destination ports are encoded into the upper bytes of the multicast MAC address. Example: to deliver the address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to program the address of 00:03:08:11:22:33 into hardware. Whereas for L2 multicast, the MAC table entry points to a Port Group ID (PGID), and that PGID contains the port mask that the packet will be forwarded to. As to why it is this way, no clue. My guess is that not all port combinations can be supported simultaneously with the limited number of PGIDs, and this was somehow an issue for IP multicast but not for L2 multicast. Anyway. Prior to this change, the raw L2 multicast code was bogus, due to the fact that there wasn't really any way to test it using the bridge code. There were 2 issues: - A multicast PGID was allocated for each MDB entry, but it wasn't in fact programmed to hardware. It was dummy. - In fact we don't want to reserve a multicast PGID for every single MDB entry. That would be odd because we can only have ~60 PGIDs, but thousands of MDB entries. So instead, we want to reserve a multicast PGID for every single port combination for multicast traffic. And since we can have 2 (or more) MDB entries delivered to the same port group (and therefore PGID), we need to reference-count the PGIDs. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
/* Existing entry. Clean up the current port mask from
* hardware now, because we'll be modifying it.
*/
ocelot_pgid_free(ocelot, mc->pgid);
ocelot_encode_ports_to_mdb(addr, mc);
ocelot_mact_forget(ocelot, addr, vid);
}
mc->ports |= BIT(port);
net: mscc: ocelot: support L2 multicast entries There is one main difference in mscc_ocelot between IP multicast and L2 multicast. With IP multicast, destination ports are encoded into the upper bytes of the multicast MAC address. Example: to deliver the address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to program the address of 00:03:08:11:22:33 into hardware. Whereas for L2 multicast, the MAC table entry points to a Port Group ID (PGID), and that PGID contains the port mask that the packet will be forwarded to. As to why it is this way, no clue. My guess is that not all port combinations can be supported simultaneously with the limited number of PGIDs, and this was somehow an issue for IP multicast but not for L2 multicast. Anyway. Prior to this change, the raw L2 multicast code was bogus, due to the fact that there wasn't really any way to test it using the bridge code. There were 2 issues: - A multicast PGID was allocated for each MDB entry, but it wasn't in fact programmed to hardware. It was dummy. - In fact we don't want to reserve a multicast PGID for every single MDB entry. That would be odd because we can only have ~60 PGIDs, but thousands of MDB entries. So instead, we want to reserve a multicast PGID for every single port combination for multicast traffic. And since we can have 2 (or more) MDB entries delivered to the same port group (and therefore PGID), we need to reference-count the PGIDs. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
pgid = ocelot_mdb_get_pgid(ocelot, mc);
if (IS_ERR(pgid)) {
dev_err(ocelot->dev,
"Cannot allocate PGID for mdb %pM vid %d\n",
mc->addr, mc->vid);
devm_kfree(ocelot->dev, mc);
return PTR_ERR(pgid);
}
mc->pgid = pgid;
ocelot_encode_ports_to_mdb(addr, mc);
net: mscc: ocelot: support L2 multicast entries There is one main difference in mscc_ocelot between IP multicast and L2 multicast. With IP multicast, destination ports are encoded into the upper bytes of the multicast MAC address. Example: to deliver the address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to program the address of 00:03:08:11:22:33 into hardware. Whereas for L2 multicast, the MAC table entry points to a Port Group ID (PGID), and that PGID contains the port mask that the packet will be forwarded to. As to why it is this way, no clue. My guess is that not all port combinations can be supported simultaneously with the limited number of PGIDs, and this was somehow an issue for IP multicast but not for L2 multicast. Anyway. Prior to this change, the raw L2 multicast code was bogus, due to the fact that there wasn't really any way to test it using the bridge code. There were 2 issues: - A multicast PGID was allocated for each MDB entry, but it wasn't in fact programmed to hardware. It was dummy. - In fact we don't want to reserve a multicast PGID for every single MDB entry. That would be odd because we can only have ~60 PGIDs, but thousands of MDB entries. So instead, we want to reserve a multicast PGID for every single port combination for multicast traffic. And since we can have 2 (or more) MDB entries delivered to the same port group (and therefore PGID), we need to reference-count the PGIDs. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
if (mc->entry_type != ENTRYTYPE_MACv4 &&
mc->entry_type != ENTRYTYPE_MACv6)
ocelot_write_rix(ocelot, pgid->ports, ANA_PGID_PGID,
pgid->index);
return ocelot_mact_learn(ocelot, pgid->index, addr, vid,
mc->entry_type);
}
EXPORT_SYMBOL(ocelot_port_mdb_add);
int ocelot_port_mdb_del(struct ocelot *ocelot, int port,
const struct switchdev_obj_port_mdb *mdb)
{
unsigned char addr[ETH_ALEN];
struct ocelot_multicast *mc;
net: mscc: ocelot: support L2 multicast entries There is one main difference in mscc_ocelot between IP multicast and L2 multicast. With IP multicast, destination ports are encoded into the upper bytes of the multicast MAC address. Example: to deliver the address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to program the address of 00:03:08:11:22:33 into hardware. Whereas for L2 multicast, the MAC table entry points to a Port Group ID (PGID), and that PGID contains the port mask that the packet will be forwarded to. As to why it is this way, no clue. My guess is that not all port combinations can be supported simultaneously with the limited number of PGIDs, and this was somehow an issue for IP multicast but not for L2 multicast. Anyway. Prior to this change, the raw L2 multicast code was bogus, due to the fact that there wasn't really any way to test it using the bridge code. There were 2 issues: - A multicast PGID was allocated for each MDB entry, but it wasn't in fact programmed to hardware. It was dummy. - In fact we don't want to reserve a multicast PGID for every single MDB entry. That would be odd because we can only have ~60 PGIDs, but thousands of MDB entries. So instead, we want to reserve a multicast PGID for every single port combination for multicast traffic. And since we can have 2 (or more) MDB entries delivered to the same port group (and therefore PGID), we need to reference-count the PGIDs. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
struct ocelot_pgid *pgid;
u16 vid = mdb->vid;
if (port == ocelot->npi)
port = ocelot->num_phys_ports;
mc = ocelot_multicast_get(ocelot, mdb->addr, vid);
if (!mc)
return -ENOENT;
ocelot_encode_ports_to_mdb(addr, mc);
ocelot_mact_forget(ocelot, addr, vid);
net: mscc: ocelot: support L2 multicast entries There is one main difference in mscc_ocelot between IP multicast and L2 multicast. With IP multicast, destination ports are encoded into the upper bytes of the multicast MAC address. Example: to deliver the address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to program the address of 00:03:08:11:22:33 into hardware. Whereas for L2 multicast, the MAC table entry points to a Port Group ID (PGID), and that PGID contains the port mask that the packet will be forwarded to. As to why it is this way, no clue. My guess is that not all port combinations can be supported simultaneously with the limited number of PGIDs, and this was somehow an issue for IP multicast but not for L2 multicast. Anyway. Prior to this change, the raw L2 multicast code was bogus, due to the fact that there wasn't really any way to test it using the bridge code. There were 2 issues: - A multicast PGID was allocated for each MDB entry, but it wasn't in fact programmed to hardware. It was dummy. - In fact we don't want to reserve a multicast PGID for every single MDB entry. That would be odd because we can only have ~60 PGIDs, but thousands of MDB entries. So instead, we want to reserve a multicast PGID for every single port combination for multicast traffic. And since we can have 2 (or more) MDB entries delivered to the same port group (and therefore PGID), we need to reference-count the PGIDs. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
ocelot_pgid_free(ocelot, mc->pgid);
mc->ports &= ~BIT(port);
if (!mc->ports) {
list_del(&mc->list);
devm_kfree(ocelot->dev, mc);
return 0;
}
net: mscc: ocelot: support L2 multicast entries There is one main difference in mscc_ocelot between IP multicast and L2 multicast. With IP multicast, destination ports are encoded into the upper bytes of the multicast MAC address. Example: to deliver the address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to program the address of 00:03:08:11:22:33 into hardware. Whereas for L2 multicast, the MAC table entry points to a Port Group ID (PGID), and that PGID contains the port mask that the packet will be forwarded to. As to why it is this way, no clue. My guess is that not all port combinations can be supported simultaneously with the limited number of PGIDs, and this was somehow an issue for IP multicast but not for L2 multicast. Anyway. Prior to this change, the raw L2 multicast code was bogus, due to the fact that there wasn't really any way to test it using the bridge code. There were 2 issues: - A multicast PGID was allocated for each MDB entry, but it wasn't in fact programmed to hardware. It was dummy. - In fact we don't want to reserve a multicast PGID for every single MDB entry. That would be odd because we can only have ~60 PGIDs, but thousands of MDB entries. So instead, we want to reserve a multicast PGID for every single port combination for multicast traffic. And since we can have 2 (or more) MDB entries delivered to the same port group (and therefore PGID), we need to reference-count the PGIDs. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
/* We have a PGID with fewer ports now */
pgid = ocelot_mdb_get_pgid(ocelot, mc);
if (IS_ERR(pgid))
return PTR_ERR(pgid);
mc->pgid = pgid;
ocelot_encode_ports_to_mdb(addr, mc);
net: mscc: ocelot: support L2 multicast entries There is one main difference in mscc_ocelot between IP multicast and L2 multicast. With IP multicast, destination ports are encoded into the upper bytes of the multicast MAC address. Example: to deliver the address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to program the address of 00:03:08:11:22:33 into hardware. Whereas for L2 multicast, the MAC table entry points to a Port Group ID (PGID), and that PGID contains the port mask that the packet will be forwarded to. As to why it is this way, no clue. My guess is that not all port combinations can be supported simultaneously with the limited number of PGIDs, and this was somehow an issue for IP multicast but not for L2 multicast. Anyway. Prior to this change, the raw L2 multicast code was bogus, due to the fact that there wasn't really any way to test it using the bridge code. There were 2 issues: - A multicast PGID was allocated for each MDB entry, but it wasn't in fact programmed to hardware. It was dummy. - In fact we don't want to reserve a multicast PGID for every single MDB entry. That would be odd because we can only have ~60 PGIDs, but thousands of MDB entries. So instead, we want to reserve a multicast PGID for every single port combination for multicast traffic. And since we can have 2 (or more) MDB entries delivered to the same port group (and therefore PGID), we need to reference-count the PGIDs. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
if (mc->entry_type != ENTRYTYPE_MACv4 &&
mc->entry_type != ENTRYTYPE_MACv6)
ocelot_write_rix(ocelot, pgid->ports, ANA_PGID_PGID,
pgid->index);
return ocelot_mact_learn(ocelot, pgid->index, addr, vid,
mc->entry_type);
}
EXPORT_SYMBOL(ocelot_port_mdb_del);
int ocelot_port_bridge_join(struct ocelot *ocelot, int port,
struct net_device *bridge)
{
if (!ocelot->bridge_mask) {
ocelot->hw_bridge_dev = bridge;
} else {
if (ocelot->hw_bridge_dev != bridge)
/* This is adding the port to a second bridge, this is
* unsupported */
return -ENODEV;
}
ocelot->bridge_mask |= BIT(port);
return 0;
}
EXPORT_SYMBOL(ocelot_port_bridge_join);
int ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
struct net_device *bridge)
{
struct ocelot_vlan pvid = {0}, native_vlan = {0};
net: dsa: propagate switchdev vlan_filtering prepare phase to drivers A driver may refuse to enable VLAN filtering for any reason beyond what the DSA framework cares about, such as: - having tc-flower rules that rely on the switch being VLAN-aware - the particular switch does not support VLAN, even if the driver does (the DSA framework just checks for the presence of the .port_vlan_add and .port_vlan_del pointers) - simply not supporting this configuration to be toggled at runtime Currently, when a driver rejects a configuration it cannot support, it does this from the commit phase, which triggers various warnings in switchdev. So propagate the prepare phase to drivers, to give them the ability to refuse invalid configurations cleanly and avoid the warnings. Since we need to modify all function prototypes and check for the prepare phase from within the drivers, take that opportunity and move the existing driver restrictions within the prepare phase where that is possible and easy. Cc: Florian Fainelli <f.fainelli@gmail.com> Cc: Martin Blumenstingl <martin.blumenstingl@googlemail.com> Cc: Hauke Mehrtens <hauke@hauke-m.de> Cc: Woojung Huh <woojung.huh@microchip.com> Cc: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com> Cc: Sean Wang <sean.wang@mediatek.com> Cc: Landen Chao <Landen.Chao@mediatek.com> Cc: Andrew Lunn <andrew@lunn.ch> Cc: Vivien Didelot <vivien.didelot@gmail.com> Cc: Jonathan McDowell <noodles@earth.li> Cc: Linus Walleij <linus.walleij@linaro.org> Cc: Alexandre Belloni <alexandre.belloni@bootlin.com> Cc: Claudiu Manoil <claudiu.manoil@nxp.com> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-10-03 06:06:46 +08:00
struct switchdev_trans trans;
int ret;
ocelot->bridge_mask &= ~BIT(port);
if (!ocelot->bridge_mask)
ocelot->hw_bridge_dev = NULL;
net: dsa: propagate switchdev vlan_filtering prepare phase to drivers A driver may refuse to enable VLAN filtering for any reason beyond what the DSA framework cares about, such as: - having tc-flower rules that rely on the switch being VLAN-aware - the particular switch does not support VLAN, even if the driver does (the DSA framework just checks for the presence of the .port_vlan_add and .port_vlan_del pointers) - simply not supporting this configuration to be toggled at runtime Currently, when a driver rejects a configuration it cannot support, it does this from the commit phase, which triggers various warnings in switchdev. So propagate the prepare phase to drivers, to give them the ability to refuse invalid configurations cleanly and avoid the warnings. Since we need to modify all function prototypes and check for the prepare phase from within the drivers, take that opportunity and move the existing driver restrictions within the prepare phase where that is possible and easy. Cc: Florian Fainelli <f.fainelli@gmail.com> Cc: Martin Blumenstingl <martin.blumenstingl@googlemail.com> Cc: Hauke Mehrtens <hauke@hauke-m.de> Cc: Woojung Huh <woojung.huh@microchip.com> Cc: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com> Cc: Sean Wang <sean.wang@mediatek.com> Cc: Landen Chao <Landen.Chao@mediatek.com> Cc: Andrew Lunn <andrew@lunn.ch> Cc: Vivien Didelot <vivien.didelot@gmail.com> Cc: Jonathan McDowell <noodles@earth.li> Cc: Linus Walleij <linus.walleij@linaro.org> Cc: Alexandre Belloni <alexandre.belloni@bootlin.com> Cc: Claudiu Manoil <claudiu.manoil@nxp.com> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-10-03 06:06:46 +08:00
trans.ph_prepare = true;
ret = ocelot_port_vlan_filtering(ocelot, port, false, &trans);
if (ret)
return ret;
trans.ph_prepare = false;
ret = ocelot_port_vlan_filtering(ocelot, port, false, &trans);
if (ret)
return ret;
ocelot_port_set_pvid(ocelot, port, pvid);
ocelot_port_set_native_vlan(ocelot, port, native_vlan);
return 0;
}
EXPORT_SYMBOL(ocelot_port_bridge_leave);
static void ocelot_set_aggr_pgids(struct ocelot *ocelot)
{
int i, port, lag;
/* Reset destination and aggregation PGIDS */
for_each_unicast_dest_pgid(ocelot, port)
ocelot_write_rix(ocelot, BIT(port), ANA_PGID_PGID, port);
for_each_aggr_pgid(ocelot, i)
ocelot_write_rix(ocelot, GENMASK(ocelot->num_phys_ports - 1, 0),
ANA_PGID_PGID, i);
/* Now, set PGIDs for each LAG */
for (lag = 0; lag < ocelot->num_phys_ports; lag++) {
unsigned long bond_mask;
int aggr_count = 0;
u8 aggr_idx[16];
bond_mask = ocelot->lags[lag];
if (!bond_mask)
continue;
for_each_set_bit(port, &bond_mask, ocelot->num_phys_ports) {
// Destination mask
ocelot_write_rix(ocelot, bond_mask,
ANA_PGID_PGID, port);
aggr_idx[aggr_count] = port;
aggr_count++;
}
for_each_aggr_pgid(ocelot, i) {
u32 ac;
ac = ocelot_read_rix(ocelot, ANA_PGID_PGID, i);
ac &= ~bond_mask;
ac |= BIT(aggr_idx[i % aggr_count]);
ocelot_write_rix(ocelot, ac, ANA_PGID_PGID, i);
}
}
}
static void ocelot_setup_lag(struct ocelot *ocelot, int lag)
{
unsigned long bond_mask = ocelot->lags[lag];
unsigned int p;
for_each_set_bit(p, &bond_mask, ocelot->num_phys_ports) {
u32 port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, p);
port_cfg &= ~ANA_PORT_PORT_CFG_PORTID_VAL_M;
/* Use lag port as logical port for port i */
ocelot_write_gix(ocelot, port_cfg |
ANA_PORT_PORT_CFG_PORTID_VAL(lag),
ANA_PORT_PORT_CFG, p);
}
}
int ocelot_port_lag_join(struct ocelot *ocelot, int port,
struct net_device *bond)
{
struct net_device *ndev;
u32 bond_mask = 0;
int lag, lp;
rcu_read_lock();
for_each_netdev_in_bond_rcu(bond, ndev) {
struct ocelot_port_private *priv = netdev_priv(ndev);
bond_mask |= BIT(priv->chip_port);
}
rcu_read_unlock();
lp = __ffs(bond_mask);
/* If the new port is the lowest one, use it as the logical port from
* now on
*/
if (port == lp) {
lag = port;
ocelot->lags[port] = bond_mask;
bond_mask &= ~BIT(port);
if (bond_mask) {
lp = __ffs(bond_mask);
ocelot->lags[lp] = 0;
}
} else {
lag = lp;
ocelot->lags[lp] |= BIT(port);
}
ocelot_setup_lag(ocelot, lag);
ocelot_set_aggr_pgids(ocelot);
return 0;
}
EXPORT_SYMBOL(ocelot_port_lag_join);
void ocelot_port_lag_leave(struct ocelot *ocelot, int port,
struct net_device *bond)
{
u32 port_cfg;
int i;
/* Remove port from any lag */
for (i = 0; i < ocelot->num_phys_ports; i++)
ocelot->lags[i] &= ~BIT(port);
/* if it was the logical port of the lag, move the lag config to the
* next port
*/
if (ocelot->lags[port]) {
int n = __ffs(ocelot->lags[port]);
ocelot->lags[n] = ocelot->lags[port];
ocelot->lags[port] = 0;
ocelot_setup_lag(ocelot, n);
}
port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, port);
port_cfg &= ~ANA_PORT_PORT_CFG_PORTID_VAL_M;
ocelot_write_gix(ocelot, port_cfg | ANA_PORT_PORT_CFG_PORTID_VAL(port),
ANA_PORT_PORT_CFG, port);
ocelot_set_aggr_pgids(ocelot);
}
EXPORT_SYMBOL(ocelot_port_lag_leave);
/* Configure the maximum SDU (L2 payload) on RX to the value specified in @sdu.
* The length of VLAN tags is accounted for automatically via DEV_MAC_TAGS_CFG.
* In the special case that it's the NPI port that we're configuring, the
* length of the tag and optional prefix needs to be accounted for privately,
* in order to be able to sustain communication at the requested @sdu.
*/
void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
int maxlen = sdu + ETH_HLEN + ETH_FCS_LEN;
int pause_start, pause_stop;
int atop, atop_tot;
if (port == ocelot->npi) {
maxlen += OCELOT_TAG_LEN;
if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_SHORT)
maxlen += OCELOT_SHORT_PREFIX_LEN;
else if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_LONG)
maxlen += OCELOT_LONG_PREFIX_LEN;
}
ocelot_port_writel(ocelot_port, maxlen, DEV_MAC_MAXLEN_CFG);
/* Set Pause watermark hysteresis */
pause_start = 6 * maxlen / OCELOT_BUFFER_CELL_SZ;
pause_stop = 4 * maxlen / OCELOT_BUFFER_CELL_SZ;
ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_START,
pause_start);
ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_STOP,
pause_stop);
/* Tail dropping watermarks */
atop_tot = (ocelot->shared_queue_sz - 9 * maxlen) /
OCELOT_BUFFER_CELL_SZ;
atop = (9 * maxlen) / OCELOT_BUFFER_CELL_SZ;
ocelot_write_rix(ocelot, ocelot->ops->wm_enc(atop), SYS_ATOP, port);
ocelot_write(ocelot, ocelot->ops->wm_enc(atop_tot), SYS_ATOP_TOT_CFG);
}
EXPORT_SYMBOL(ocelot_port_set_maxlen);
int ocelot_get_max_mtu(struct ocelot *ocelot, int port)
{
int max_mtu = 65535 - ETH_HLEN - ETH_FCS_LEN;
if (port == ocelot->npi) {
max_mtu -= OCELOT_TAG_LEN;
if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_SHORT)
max_mtu -= OCELOT_SHORT_PREFIX_LEN;
else if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_LONG)
max_mtu -= OCELOT_LONG_PREFIX_LEN;
}
return max_mtu;
}
EXPORT_SYMBOL(ocelot_get_max_mtu);
void ocelot_init_port(struct ocelot *ocelot, int port)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
skb_queue_head_init(&ocelot_port->tx_skbs);
2020-09-18 09:07:24 +08:00
spin_lock_init(&ocelot_port->ts_id_lock);
/* Basic L2 initialization */
/* Set MAC IFG Gaps
* FDX: TX_IFG = 5, RX_IFG1 = RX_IFG2 = 0
* !FDX: TX_IFG = 5, RX_IFG1 = RX_IFG2 = 5
*/
ocelot_port_writel(ocelot_port, DEV_MAC_IFG_CFG_TX_IFG(5),
DEV_MAC_IFG_CFG);
/* Load seed (0) and set MAC HDX late collision */
ocelot_port_writel(ocelot_port, DEV_MAC_HDX_CFG_LATE_COL_POS(67) |
DEV_MAC_HDX_CFG_SEED_LOAD,
DEV_MAC_HDX_CFG);
mdelay(1);
ocelot_port_writel(ocelot_port, DEV_MAC_HDX_CFG_LATE_COL_POS(67),
DEV_MAC_HDX_CFG);
/* Set Max Length and maximum tags allowed */
ocelot_port_set_maxlen(ocelot, port, ETH_DATA_LEN);
ocelot_port_writel(ocelot_port, DEV_MAC_TAGS_CFG_TAG_ID(ETH_P_8021AD) |
DEV_MAC_TAGS_CFG_VLAN_AWR_ENA |
DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA |
DEV_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA,
DEV_MAC_TAGS_CFG);
/* Set SMAC of Pause frame (00:00:00:00:00:00) */
ocelot_port_writel(ocelot_port, 0, DEV_MAC_FC_MAC_HIGH_CFG);
ocelot_port_writel(ocelot_port, 0, DEV_MAC_FC_MAC_LOW_CFG);
/* Enable transmission of pause frames */
ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 1);
/* Drop frames with multicast source address */
ocelot_rmw_gix(ocelot, ANA_PORT_DROP_CFG_DROP_MC_SMAC_ENA,
ANA_PORT_DROP_CFG_DROP_MC_SMAC_ENA,
ANA_PORT_DROP_CFG, port);
/* Set default VLAN and tag type to 8021Q. */
ocelot_rmw_gix(ocelot, REW_PORT_VLAN_CFG_PORT_TPID(ETH_P_8021Q),
REW_PORT_VLAN_CFG_PORT_TPID_M,
REW_PORT_VLAN_CFG, port);
/* Enable vcap lookups */
ocelot_vcap_enable(ocelot, port);
}
EXPORT_SYMBOL(ocelot_init_port);
/* Configure and enable the CPU port module, which is a set of queues
* accessible through register MMIO, frame DMA or Ethernet (in case
* NPI mode is used).
net: mscc: ocelot: eliminate confusion between CPU and NPI port Ocelot has the concept of a CPU port. The CPU port is represented in the forwarding and the queueing system, but it is not a physical device. The CPU port can either be accessed via register-based injection/extraction (which is the case of Ocelot), via Frame-DMA (similar to the first one), or "connected" to a physical Ethernet port (called NPI in the datasheet) which is the case of the Felix DSA switch. In Ocelot the CPU port is at index 11. In Felix the CPU port is at index 6. The CPU bit is treated special in the forwarding, as it is never cleared from the forwarding port mask (once added to it). Other than that, it is treated the same as a normal front port. Both Felix and Ocelot should use the CPU port in the same way. This means that Felix should not use the NPI port directly when forwarding to the CPU, but instead use the CPU port. This patch is fixing this such that Felix will use port 6 as its CPU port, and just use the NPI port to carry the traffic. Therefore, eliminate the "ocelot->cpu" variable which was holding the index of the NPI port for Felix, and the index of the CPU port module for Ocelot, so the variable was actually configuring different things for different drivers and causing at least part of the confusion. Also remove the "ocelot->num_cpu_ports" variable, which is the result of another confusion. The 2 CPU ports mentioned in the datasheet are because there are two frame extraction channels (register based or DMA based). This is of no relevance to the driver at the moment, and invisible to the analyzer module. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 22:50:02 +08:00
*/
static void ocelot_cpu_port_init(struct ocelot *ocelot)
{
net: mscc: ocelot: eliminate confusion between CPU and NPI port Ocelot has the concept of a CPU port. The CPU port is represented in the forwarding and the queueing system, but it is not a physical device. The CPU port can either be accessed via register-based injection/extraction (which is the case of Ocelot), via Frame-DMA (similar to the first one), or "connected" to a physical Ethernet port (called NPI in the datasheet) which is the case of the Felix DSA switch. In Ocelot the CPU port is at index 11. In Felix the CPU port is at index 6. The CPU bit is treated special in the forwarding, as it is never cleared from the forwarding port mask (once added to it). Other than that, it is treated the same as a normal front port. Both Felix and Ocelot should use the CPU port in the same way. This means that Felix should not use the NPI port directly when forwarding to the CPU, but instead use the CPU port. This patch is fixing this such that Felix will use port 6 as its CPU port, and just use the NPI port to carry the traffic. Therefore, eliminate the "ocelot->cpu" variable which was holding the index of the NPI port for Felix, and the index of the CPU port module for Ocelot, so the variable was actually configuring different things for different drivers and causing at least part of the confusion. Also remove the "ocelot->num_cpu_ports" variable, which is the result of another confusion. The 2 CPU ports mentioned in the datasheet are because there are two frame extraction channels (register based or DMA based). This is of no relevance to the driver at the moment, and invisible to the analyzer module. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 22:50:02 +08:00
int cpu = ocelot->num_phys_ports;
/* The unicast destination PGID for the CPU port module is unused */
ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, cpu);
net: mscc: ocelot: eliminate confusion between CPU and NPI port Ocelot has the concept of a CPU port. The CPU port is represented in the forwarding and the queueing system, but it is not a physical device. The CPU port can either be accessed via register-based injection/extraction (which is the case of Ocelot), via Frame-DMA (similar to the first one), or "connected" to a physical Ethernet port (called NPI in the datasheet) which is the case of the Felix DSA switch. In Ocelot the CPU port is at index 11. In Felix the CPU port is at index 6. The CPU bit is treated special in the forwarding, as it is never cleared from the forwarding port mask (once added to it). Other than that, it is treated the same as a normal front port. Both Felix and Ocelot should use the CPU port in the same way. This means that Felix should not use the NPI port directly when forwarding to the CPU, but instead use the CPU port. This patch is fixing this such that Felix will use port 6 as its CPU port, and just use the NPI port to carry the traffic. Therefore, eliminate the "ocelot->cpu" variable which was holding the index of the NPI port for Felix, and the index of the CPU port module for Ocelot, so the variable was actually configuring different things for different drivers and causing at least part of the confusion. Also remove the "ocelot->num_cpu_ports" variable, which is the result of another confusion. The 2 CPU ports mentioned in the datasheet are because there are two frame extraction channels (register based or DMA based). This is of no relevance to the driver at the moment, and invisible to the analyzer module. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 22:50:02 +08:00
/* Instead set up a multicast destination PGID for traffic copied to
* the CPU. Whitelisted MAC addresses like the port netdevice MAC
* addresses will be copied to the CPU via this PGID.
*/
ocelot_write_rix(ocelot, BIT(cpu), ANA_PGID_PGID, PGID_CPU);
ocelot_write_gix(ocelot, ANA_PORT_PORT_CFG_RECV_ENA |
ANA_PORT_PORT_CFG_PORTID_VAL(cpu),
ANA_PORT_PORT_CFG, cpu);
net: mscc: ocelot: eliminate confusion between CPU and NPI port Ocelot has the concept of a CPU port. The CPU port is represented in the forwarding and the queueing system, but it is not a physical device. The CPU port can either be accessed via register-based injection/extraction (which is the case of Ocelot), via Frame-DMA (similar to the first one), or "connected" to a physical Ethernet port (called NPI in the datasheet) which is the case of the Felix DSA switch. In Ocelot the CPU port is at index 11. In Felix the CPU port is at index 6. The CPU bit is treated special in the forwarding, as it is never cleared from the forwarding port mask (once added to it). Other than that, it is treated the same as a normal front port. Both Felix and Ocelot should use the CPU port in the same way. This means that Felix should not use the NPI port directly when forwarding to the CPU, but instead use the CPU port. This patch is fixing this such that Felix will use port 6 as its CPU port, and just use the NPI port to carry the traffic. Therefore, eliminate the "ocelot->cpu" variable which was holding the index of the NPI port for Felix, and the index of the CPU port module for Ocelot, so the variable was actually configuring different things for different drivers and causing at least part of the confusion. Also remove the "ocelot->num_cpu_ports" variable, which is the result of another confusion. The 2 CPU ports mentioned in the datasheet are because there are two frame extraction channels (register based or DMA based). This is of no relevance to the driver at the moment, and invisible to the analyzer module. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 22:50:02 +08:00
/* Enable CPU port module */
ocelot_fields_write(ocelot, cpu, QSYS_SWITCH_PORT_MODE_PORT_ENA, 1);
net: mscc: ocelot: eliminate confusion between CPU and NPI port Ocelot has the concept of a CPU port. The CPU port is represented in the forwarding and the queueing system, but it is not a physical device. The CPU port can either be accessed via register-based injection/extraction (which is the case of Ocelot), via Frame-DMA (similar to the first one), or "connected" to a physical Ethernet port (called NPI in the datasheet) which is the case of the Felix DSA switch. In Ocelot the CPU port is at index 11. In Felix the CPU port is at index 6. The CPU bit is treated special in the forwarding, as it is never cleared from the forwarding port mask (once added to it). Other than that, it is treated the same as a normal front port. Both Felix and Ocelot should use the CPU port in the same way. This means that Felix should not use the NPI port directly when forwarding to the CPU, but instead use the CPU port. This patch is fixing this such that Felix will use port 6 as its CPU port, and just use the NPI port to carry the traffic. Therefore, eliminate the "ocelot->cpu" variable which was holding the index of the NPI port for Felix, and the index of the CPU port module for Ocelot, so the variable was actually configuring different things for different drivers and causing at least part of the confusion. Also remove the "ocelot->num_cpu_ports" variable, which is the result of another confusion. The 2 CPU ports mentioned in the datasheet are because there are two frame extraction channels (register based or DMA based). This is of no relevance to the driver at the moment, and invisible to the analyzer module. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 22:50:02 +08:00
/* CPU port Injection/Extraction configuration */
ocelot_fields_write(ocelot, cpu, SYS_PORT_MODE_INCL_XTR_HDR,
ocelot->xtr_prefix);
ocelot_fields_write(ocelot, cpu, SYS_PORT_MODE_INCL_INJ_HDR,
ocelot->inj_prefix);
/* Configure the CPU port to be VLAN aware */
ocelot_write_gix(ocelot, ANA_PORT_VLAN_CFG_VLAN_VID(0) |
ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA |
ANA_PORT_VLAN_CFG_VLAN_POP_CNT(1),
ANA_PORT_VLAN_CFG, cpu);
}
int ocelot_init(struct ocelot *ocelot)
{
char queue_name[32];
int i, ret;
u32 port;
if (ocelot->ops->reset) {
ret = ocelot->ops->reset(ocelot);
if (ret) {
dev_err(ocelot->dev, "Switch reset failed\n");
return ret;
}
}
ocelot->lags = devm_kcalloc(ocelot->dev, ocelot->num_phys_ports,
sizeof(u32), GFP_KERNEL);
if (!ocelot->lags)
return -ENOMEM;
ocelot->stats = devm_kcalloc(ocelot->dev,
ocelot->num_phys_ports * ocelot->num_stats,
sizeof(u64), GFP_KERNEL);
if (!ocelot->stats)
return -ENOMEM;
mutex_init(&ocelot->stats_lock);
mutex_init(&ocelot->ptp_lock);
spin_lock_init(&ocelot->ptp_clock_lock);
snprintf(queue_name, sizeof(queue_name), "%s-stats",
dev_name(ocelot->dev));
ocelot->stats_queue = create_singlethread_workqueue(queue_name);
if (!ocelot->stats_queue)
return -ENOMEM;
INIT_LIST_HEAD(&ocelot->multicast);
net: mscc: ocelot: support L2 multicast entries There is one main difference in mscc_ocelot between IP multicast and L2 multicast. With IP multicast, destination ports are encoded into the upper bytes of the multicast MAC address. Example: to deliver the address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to program the address of 00:03:08:11:22:33 into hardware. Whereas for L2 multicast, the MAC table entry points to a Port Group ID (PGID), and that PGID contains the port mask that the packet will be forwarded to. As to why it is this way, no clue. My guess is that not all port combinations can be supported simultaneously with the limited number of PGIDs, and this was somehow an issue for IP multicast but not for L2 multicast. Anyway. Prior to this change, the raw L2 multicast code was bogus, due to the fact that there wasn't really any way to test it using the bridge code. There were 2 issues: - A multicast PGID was allocated for each MDB entry, but it wasn't in fact programmed to hardware. It was dummy. - In fact we don't want to reserve a multicast PGID for every single MDB entry. That would be odd because we can only have ~60 PGIDs, but thousands of MDB entries. So instead, we want to reserve a multicast PGID for every single port combination for multicast traffic. And since we can have 2 (or more) MDB entries delivered to the same port group (and therefore PGID), we need to reference-count the PGIDs. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
INIT_LIST_HEAD(&ocelot->pgids);
ocelot_mact_init(ocelot);
ocelot_vlan_init(ocelot);
ocelot_vcap_init(ocelot);
ocelot_cpu_port_init(ocelot);
for (port = 0; port < ocelot->num_phys_ports; port++) {
/* Clear all counters (5 groups) */
ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(port) |
SYS_STAT_CFG_STAT_CLEAR_SHOT(0x7f),
SYS_STAT_CFG);
}
/* Only use S-Tag */
ocelot_write(ocelot, ETH_P_8021AD, SYS_VLAN_ETYPE_CFG);
/* Aggregation mode */
ocelot_write(ocelot, ANA_AGGR_CFG_AC_SMAC_ENA |
ANA_AGGR_CFG_AC_DMAC_ENA |
ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA |
ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA, ANA_AGGR_CFG);
/* Set MAC age time to default value. The entry is aged after
* 2*AGE_PERIOD
*/
ocelot_write(ocelot,
ANA_AUTOAGE_AGE_PERIOD(BR_DEFAULT_AGEING_TIME / 2 / HZ),
ANA_AUTOAGE);
/* Disable learning for frames discarded by VLAN ingress filtering */
regmap_field_write(ocelot->regfields[ANA_ADVLEARN_VLAN_CHK], 1);
/* Setup frame ageing - fixed value "2 sec" - in 6.5 us units */
ocelot_write(ocelot, SYS_FRM_AGING_AGE_TX_ENA |
SYS_FRM_AGING_MAX_AGE(307692), SYS_FRM_AGING);
/* Setup flooding PGIDs */
net: mscc: ocelot: fix dropping of unknown IPv4 multicast on Seville The current assumption is that the felix DSA driver has flooding knobs per traffic class, while ocelot switchdev has a single flooding knob. This was correct for felix VSC9959 and ocelot VSC7514, but with the introduction of seville VSC9953, we see a switch driven by felix.c which has a single flooding knob. So it is clear that we must do what should have been done from the beginning, which is not to overwrite the configuration done by ocelot.c in felix, but instead to teach the common ocelot library about the differences in our switches, and set up the flooding PGIDs centrally. The effect that the bogus iteration through FELIX_NUM_TC has upon seville is quite dramatic. ANA_FLOODING is located at 0x00b548, and ANA_FLOODING_IPMC is located at 0x00b54c. So the bogus iteration will actually overwrite ANA_FLOODING_IPMC when attempting to write ANA_FLOODING[1]. There is no ANA_FLOODING[1] in sevile, just ANA_FLOODING. And when ANA_FLOODING_IPMC is overwritten with a bogus value, the effect is that ANA_FLOODING_IPMC gets the value of 0x0003CF7D: MC6_DATA = 61, MC6_CTRL = 61, MC4_DATA = 60, MC4_CTRL = 0. Because MC4_CTRL is zero, this means that IPv4 multicast control packets are not flooded, but dropped. An invalid configuration, and this is how the issue was actually spotted. Reported-by: Eldar Gasanov <eldargasanov2@gmail.com> Reported-by: Maxim Kochetkov <fido_max@inbox.ru> Tested-by: Eldar Gasanov <eldargasanov2@gmail.com> Fixes: 84705fc16552 ("net: dsa: felix: introduce support for Seville VSC9953 switch") Fixes: 3c7b51bd39b2 ("net: dsa: felix: allow flooding for all traffic classes") Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com> Link: https://lore.kernel.org/r/20201204175416.1445937-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-05 01:54:16 +08:00
for (i = 0; i < ocelot->num_flooding_pgids; i++)
ocelot_write_rix(ocelot, ANA_FLOODING_FLD_MULTICAST(PGID_MC) |
ANA_FLOODING_FLD_BROADCAST(PGID_MC) |
ANA_FLOODING_FLD_UNICAST(PGID_UC),
ANA_FLOODING, i);
ocelot_write(ocelot, ANA_FLOODING_IPMC_FLD_MC6_DATA(PGID_MCIPV6) |
ANA_FLOODING_IPMC_FLD_MC6_CTRL(PGID_MC) |
ANA_FLOODING_IPMC_FLD_MC4_DATA(PGID_MCIPV4) |
ANA_FLOODING_IPMC_FLD_MC4_CTRL(PGID_MC),
ANA_FLOODING_IPMC);
for (port = 0; port < ocelot->num_phys_ports; port++) {
/* Transmit the frame to the local port. */
ocelot_write_rix(ocelot, BIT(port), ANA_PGID_PGID, port);
/* Do not forward BPDU frames to the front ports. */
ocelot_write_gix(ocelot,
ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_REDIR_ENA(0xffff),
ANA_PORT_CPU_FWD_BPDU_CFG,
port);
/* Ensure bridging is disabled */
ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_SRC + port);
}
/* Allow broadcast MAC frames. */
for_each_nonreserved_multicast_dest_pgid(ocelot, i) {
u32 val = ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports - 1, 0));
ocelot_write_rix(ocelot, val, ANA_PGID_PGID, i);
}
ocelot_write_rix(ocelot,
ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports, 0)),
ANA_PGID_PGID, PGID_MC);
ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_MCIPV4);
ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_MCIPV6);
/* Allow manual injection via DEVCPU_QS registers, and byte swap these
* registers endianness.
*/
ocelot_write_rix(ocelot, QS_INJ_GRP_CFG_BYTE_SWAP |
QS_INJ_GRP_CFG_MODE(1), QS_INJ_GRP_CFG, 0);
ocelot_write_rix(ocelot, QS_XTR_GRP_CFG_BYTE_SWAP |
QS_XTR_GRP_CFG_MODE(1), QS_XTR_GRP_CFG, 0);
ocelot_write(ocelot, ANA_CPUQ_CFG_CPUQ_MIRROR(2) |
ANA_CPUQ_CFG_CPUQ_LRN(2) |
ANA_CPUQ_CFG_CPUQ_MAC_COPY(2) |
ANA_CPUQ_CFG_CPUQ_SRC_COPY(2) |
ANA_CPUQ_CFG_CPUQ_LOCKED_PORTMOVE(2) |
ANA_CPUQ_CFG_CPUQ_ALLBRIDGE(6) |
ANA_CPUQ_CFG_CPUQ_IPMC_CTRL(6) |
ANA_CPUQ_CFG_CPUQ_IGMP(6) |
ANA_CPUQ_CFG_CPUQ_MLD(6), ANA_CPUQ_CFG);
for (i = 0; i < 16; i++)
ocelot_write_rix(ocelot, ANA_CPUQ_8021_CFG_CPUQ_GARP_VAL(6) |
ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
ANA_CPUQ_8021_CFG, i);
INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
OCELOT_STATS_CHECK_DELAY);
return 0;
}
EXPORT_SYMBOL(ocelot_init);
void ocelot_deinit(struct ocelot *ocelot)
{
cancel_delayed_work(&ocelot->stats_work);
destroy_workqueue(ocelot->stats_queue);
mutex_destroy(&ocelot->stats_lock);
}
EXPORT_SYMBOL(ocelot_deinit);
void ocelot_deinit_port(struct ocelot *ocelot, int port)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
skb_queue_purge(&ocelot_port->tx_skbs);
}
EXPORT_SYMBOL(ocelot_deinit_port);
MODULE_LICENSE("Dual MIT/GPL");