2018-05-15 04:04:57 +08:00
|
|
|
// SPDX-License-Identifier: (GPL-2.0 OR MIT)
|
|
|
|
/*
|
|
|
|
* Microsemi Ocelot Switch driver
|
|
|
|
*
|
|
|
|
* Copyright (c) 2017 Microsemi Corporation
|
|
|
|
*/
|
2021-02-14 06:37:56 +08:00
|
|
|
#include <linux/dsa/ocelot.h>
|
2018-05-15 04:04:57 +08:00
|
|
|
#include <linux/if_bridge.h>
|
2021-04-27 12:22:03 +08:00
|
|
|
#include <linux/ptp_classify.h>
|
2020-09-30 06:27:26 +08:00
|
|
|
#include <soc/mscc/ocelot_vcap.h>
|
2018-05-15 04:04:57 +08:00
|
|
|
#include "ocelot.h"
|
2020-06-20 23:43:45 +08:00
|
|
|
#include "ocelot_vcap.h"
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2018-12-20 21:16:31 +08:00
|
|
|
#define TABLE_UPDATE_SLEEP_US 10
|
|
|
|
#define TABLE_UPDATE_TIMEOUT_US 100000
|
|
|
|
|
2018-05-15 04:04:57 +08:00
|
|
|
struct ocelot_mact_entry {
|
|
|
|
u8 mac[ETH_ALEN];
|
|
|
|
u16 vid;
|
|
|
|
enum macaccess_entry_type type;
|
|
|
|
};
|
|
|
|
|
2018-12-20 21:16:31 +08:00
|
|
|
static inline u32 ocelot_mact_read_macaccess(struct ocelot *ocelot)
|
2018-05-15 04:04:57 +08:00
|
|
|
{
|
2018-12-20 21:16:31 +08:00
|
|
|
return ocelot_read(ocelot, ANA_TABLES_MACACCESS);
|
|
|
|
}
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2018-12-20 21:16:31 +08:00
|
|
|
static inline int ocelot_mact_wait_for_completion(struct ocelot *ocelot)
|
|
|
|
{
|
|
|
|
u32 val;
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2018-12-20 21:16:31 +08:00
|
|
|
return readx_poll_timeout(ocelot_mact_read_macaccess,
|
|
|
|
ocelot, val,
|
|
|
|
(val & ANA_TABLES_MACACCESS_MAC_TABLE_CMD_M) ==
|
|
|
|
MACACCESS_CMD_IDLE,
|
|
|
|
TABLE_UPDATE_SLEEP_US, TABLE_UPDATE_TIMEOUT_US);
|
2018-05-15 04:04:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void ocelot_mact_select(struct ocelot *ocelot,
|
|
|
|
const unsigned char mac[ETH_ALEN],
|
|
|
|
unsigned int vid)
|
|
|
|
{
|
|
|
|
u32 macl = 0, mach = 0;
|
|
|
|
|
|
|
|
/* Set the MAC address to handle and the vlan associated in a format
|
|
|
|
* understood by the hardware.
|
|
|
|
*/
|
|
|
|
mach |= vid << 16;
|
|
|
|
mach |= mac[0] << 8;
|
|
|
|
mach |= mac[1] << 0;
|
|
|
|
macl |= mac[2] << 24;
|
|
|
|
macl |= mac[3] << 16;
|
|
|
|
macl |= mac[4] << 8;
|
|
|
|
macl |= mac[5] << 0;
|
|
|
|
|
|
|
|
ocelot_write(ocelot, macl, ANA_TABLES_MACLDATA);
|
|
|
|
ocelot_write(ocelot, mach, ANA_TABLES_MACHDATA);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2020-06-20 23:43:44 +08:00
|
|
|
int ocelot_mact_learn(struct ocelot *ocelot, int port,
|
|
|
|
const unsigned char mac[ETH_ALEN],
|
|
|
|
unsigned int vid, enum macaccess_entry_type type)
|
2018-05-15 04:04:57 +08:00
|
|
|
{
|
2021-01-19 22:06:38 +08:00
|
|
|
u32 cmd = ANA_TABLES_MACACCESS_VALID |
|
|
|
|
ANA_TABLES_MACACCESS_DEST_IDX(port) |
|
|
|
|
ANA_TABLES_MACACCESS_ENTRYTYPE(type) |
|
|
|
|
ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_LEARN);
|
|
|
|
unsigned int mc_ports;
|
|
|
|
|
|
|
|
/* Set MAC_CPU_COPY if the CPU port is used by a multicast entry */
|
|
|
|
if (type == ENTRYTYPE_MACv4)
|
|
|
|
mc_ports = (mac[1] << 8) | mac[2];
|
|
|
|
else if (type == ENTRYTYPE_MACv6)
|
|
|
|
mc_ports = (mac[0] << 8) | mac[1];
|
|
|
|
else
|
|
|
|
mc_ports = 0;
|
|
|
|
|
|
|
|
if (mc_ports & BIT(ocelot->num_phys_ports))
|
|
|
|
cmd |= ANA_TABLES_MACACCESS_MAC_CPU_COPY;
|
|
|
|
|
2018-05-15 04:04:57 +08:00
|
|
|
ocelot_mact_select(ocelot, mac, vid);
|
|
|
|
|
|
|
|
/* Issue a write command */
|
2021-01-19 22:06:38 +08:00
|
|
|
ocelot_write(ocelot, cmd, ANA_TABLES_MACACCESS);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2021-10-25 19:59:25 +08:00
|
|
|
return ocelot_mact_wait_for_completion(ocelot);
|
2018-05-15 04:04:57 +08:00
|
|
|
}
|
2020-06-20 23:43:44 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_mact_learn);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2020-06-20 23:43:44 +08:00
|
|
|
int ocelot_mact_forget(struct ocelot *ocelot,
|
|
|
|
const unsigned char mac[ETH_ALEN], unsigned int vid)
|
2018-05-15 04:04:57 +08:00
|
|
|
{
|
|
|
|
ocelot_mact_select(ocelot, mac, vid);
|
|
|
|
|
|
|
|
/* Issue a forget command */
|
|
|
|
ocelot_write(ocelot,
|
|
|
|
ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_FORGET),
|
|
|
|
ANA_TABLES_MACACCESS);
|
|
|
|
|
2021-10-25 19:59:25 +08:00
|
|
|
return ocelot_mact_wait_for_completion(ocelot);
|
2018-05-15 04:04:57 +08:00
|
|
|
}
|
2020-06-20 23:43:44 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_mact_forget);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
|
|
|
static void ocelot_mact_init(struct ocelot *ocelot)
|
|
|
|
{
|
|
|
|
/* Configure the learning mode entries attributes:
|
|
|
|
* - Do not copy the frame to the CPU extraction queues.
|
|
|
|
* - Use the vlan and mac_cpoy for dmac lookup.
|
|
|
|
*/
|
|
|
|
ocelot_rmw(ocelot, 0,
|
|
|
|
ANA_AGENCTRL_LEARN_CPU_COPY | ANA_AGENCTRL_IGNORE_DMAC_FLAGS
|
|
|
|
| ANA_AGENCTRL_LEARN_FWD_KILL
|
|
|
|
| ANA_AGENCTRL_LEARN_IGNORE_VLAN,
|
|
|
|
ANA_AGENCTRL);
|
|
|
|
|
2021-10-25 19:59:25 +08:00
|
|
|
/* Clear the MAC table */
|
2018-05-15 04:04:57 +08:00
|
|
|
ocelot_write(ocelot, MACACCESS_CMD_INIT, ANA_TABLES_MACACCESS);
|
|
|
|
}
|
|
|
|
|
2019-11-09 21:02:52 +08:00
|
|
|
static void ocelot_vcap_enable(struct ocelot *ocelot, int port)
|
2019-05-31 15:16:56 +08:00
|
|
|
{
|
|
|
|
ocelot_write_gix(ocelot, ANA_PORT_VCAP_S2_CFG_S2_ENA |
|
|
|
|
ANA_PORT_VCAP_S2_CFG_S2_IP6_CFG(0xa),
|
2019-11-09 21:02:52 +08:00
|
|
|
ANA_PORT_VCAP_S2_CFG, port);
|
net: mscc: ocelot: offload ingress skbedit and vlan actions to VCAP IS1
VCAP IS1 is a VCAP module which can filter on the most common L2/L3/L4
Ethernet keys, and modify the results of the basic QoS classification
and VLAN classification based on those flow keys.
There are 3 VCAP IS1 lookups, mapped over chains 10000, 11000 and 12000.
Currently the driver is hardcoded to use IS1_ACTION_TYPE_NORMAL half
keys.
Note that the VLAN_MANGLE has been omitted for now. In hardware, the
VCAP_IS1_ACT_VID_REPLACE_ENA field replaces the classified VLAN
(metadata associated with the frame) and not the VLAN from the header
itself. There are currently some issues which need to be addressed when
operating in standalone, or in bridge with vlan_filtering=0 modes,
because in those cases the switch ports have VLAN awareness disabled,
and changing the classified VLAN to anything other than the pvid causes
the packets to be dropped. Another issue is that on egress, we expect
port tagging to push the classified VLAN, but port tagging is disabled
in the modes mentioned above, so although the classified VLAN is
replaced, it is not visible in the packet transmitted by the switch.
Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-10-02 20:02:23 +08:00
|
|
|
|
|
|
|
ocelot_write_gix(ocelot, ANA_PORT_VCAP_CFG_S1_ENA,
|
|
|
|
ANA_PORT_VCAP_CFG, port);
|
2020-10-02 20:02:24 +08:00
|
|
|
|
|
|
|
ocelot_rmw_gix(ocelot, REW_PORT_CFG_ES0_EN,
|
|
|
|
REW_PORT_CFG_ES0_EN,
|
|
|
|
REW_PORT_CFG, port);
|
2019-05-31 15:16:56 +08:00
|
|
|
}
|
|
|
|
|
2018-12-20 21:16:31 +08:00
|
|
|
static inline u32 ocelot_vlant_read_vlanaccess(struct ocelot *ocelot)
|
2018-05-15 04:04:57 +08:00
|
|
|
{
|
2018-12-20 21:16:31 +08:00
|
|
|
return ocelot_read(ocelot, ANA_TABLES_VLANACCESS);
|
|
|
|
}
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2018-12-20 21:16:31 +08:00
|
|
|
static inline int ocelot_vlant_wait_for_completion(struct ocelot *ocelot)
|
|
|
|
{
|
|
|
|
u32 val;
|
|
|
|
|
|
|
|
return readx_poll_timeout(ocelot_vlant_read_vlanaccess,
|
|
|
|
ocelot,
|
|
|
|
val,
|
|
|
|
(val & ANA_TABLES_VLANACCESS_VLAN_TBL_CMD_M) ==
|
|
|
|
ANA_TABLES_VLANACCESS_CMD_IDLE,
|
|
|
|
TABLE_UPDATE_SLEEP_US, TABLE_UPDATE_TIMEOUT_US);
|
2018-05-15 04:04:57 +08:00
|
|
|
}
|
|
|
|
|
2018-06-26 20:28:49 +08:00
|
|
|
static int ocelot_vlant_set_mask(struct ocelot *ocelot, u16 vid, u32 mask)
|
|
|
|
{
|
|
|
|
/* Select the VID to configure */
|
|
|
|
ocelot_write(ocelot, ANA_TABLES_VLANTIDX_V_INDEX(vid),
|
|
|
|
ANA_TABLES_VLANTIDX);
|
|
|
|
/* Set the vlan port members mask and issue a write command */
|
|
|
|
ocelot_write(ocelot, ANA_TABLES_VLANACCESS_VLAN_PORT_MASK(mask) |
|
|
|
|
ANA_TABLES_VLANACCESS_CMD_WRITE,
|
|
|
|
ANA_TABLES_VLANACCESS);
|
|
|
|
|
|
|
|
return ocelot_vlant_wait_for_completion(ocelot);
|
|
|
|
}
|
|
|
|
|
net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged
At present, the ocelot driver accepts a single egress-untagged bridge
VLAN, meaning that this sequence of operations:
ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
bridge vlan add dev swp0 vid 2 pvid untagged
fails because the bridge automatically installs VID 1 as a pvid & untagged
VLAN, and vid 2 would be the second untagged VLAN on this port. It is
necessary to delete VID 1 before proceeding to add VID 2.
This limitation comes from the fact that we operate the port tag, when
it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode.
The ocelot switches do not have full flexibility and can either have one
single VID as egress-untagged, or all of them.
There are use cases for having all VLANs as egress-untagged as well, and
this patch adds support for that.
The change rewrites ocelot_port_set_native_vlan() into a more generic
ocelot_port_manage_port_tag() function. Because the software bridge's
state, transmitted to us via switchdev, can become very complex, we
don't attempt to track all possible state transitions, but instead take
a more declarative approach and just make ocelot_port_manage_port_tag()
figure out which more to operate in:
- port is VLAN-unaware: the classified VLAN (internal, unrelated to the
802.1Q header) is not inserted into packets on egress
- port is VLAN-aware:
- port has tagged VLANs:
-> port has no untagged VLAN: set up as pure trunk
-> port has one untagged VLAN: set up as trunk port + native VLAN
-> port has more than one untagged VLAN: this is an invalid config
which is rejected by ocelot_vlan_prepare
- port has no tagged VLANs
-> set up as pure egress-untagged port
We don't keep the number of tagged and untagged VLANs, we just count the
structures we keep.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:50 +08:00
|
|
|
static int ocelot_port_num_untagged_vlans(struct ocelot *ocelot, int port)
|
2018-06-26 20:28:49 +08:00
|
|
|
{
|
net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged
At present, the ocelot driver accepts a single egress-untagged bridge
VLAN, meaning that this sequence of operations:
ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
bridge vlan add dev swp0 vid 2 pvid untagged
fails because the bridge automatically installs VID 1 as a pvid & untagged
VLAN, and vid 2 would be the second untagged VLAN on this port. It is
necessary to delete VID 1 before proceeding to add VID 2.
This limitation comes from the fact that we operate the port tag, when
it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode.
The ocelot switches do not have full flexibility and can either have one
single VID as egress-untagged, or all of them.
There are use cases for having all VLANs as egress-untagged as well, and
this patch adds support for that.
The change rewrites ocelot_port_set_native_vlan() into a more generic
ocelot_port_manage_port_tag() function. Because the software bridge's
state, transmitted to us via switchdev, can become very complex, we
don't attempt to track all possible state transitions, but instead take
a more declarative approach and just make ocelot_port_manage_port_tag()
figure out which more to operate in:
- port is VLAN-unaware: the classified VLAN (internal, unrelated to the
802.1Q header) is not inserted into packets on egress
- port is VLAN-aware:
- port has tagged VLANs:
-> port has no untagged VLAN: set up as pure trunk
-> port has one untagged VLAN: set up as trunk port + native VLAN
-> port has more than one untagged VLAN: this is an invalid config
which is rejected by ocelot_vlan_prepare
- port has no tagged VLANs
-> set up as pure egress-untagged port
We don't keep the number of tagged and untagged VLANs, we just count the
structures we keep.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:50 +08:00
|
|
|
struct ocelot_bridge_vlan *vlan;
|
|
|
|
int num_untagged = 0;
|
|
|
|
|
|
|
|
list_for_each_entry(vlan, &ocelot->vlans, list) {
|
|
|
|
if (!(vlan->portmask & BIT(port)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (vlan->untagged & BIT(port))
|
|
|
|
num_untagged++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return num_untagged;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ocelot_port_num_tagged_vlans(struct ocelot *ocelot, int port)
|
|
|
|
{
|
|
|
|
struct ocelot_bridge_vlan *vlan;
|
|
|
|
int num_tagged = 0;
|
|
|
|
|
|
|
|
list_for_each_entry(vlan, &ocelot->vlans, list) {
|
|
|
|
if (!(vlan->portmask & BIT(port)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!(vlan->untagged & BIT(port)))
|
|
|
|
num_tagged++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return num_tagged;
|
|
|
|
}
|
2018-06-26 20:28:49 +08:00
|
|
|
|
net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged
At present, the ocelot driver accepts a single egress-untagged bridge
VLAN, meaning that this sequence of operations:
ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
bridge vlan add dev swp0 vid 2 pvid untagged
fails because the bridge automatically installs VID 1 as a pvid & untagged
VLAN, and vid 2 would be the second untagged VLAN on this port. It is
necessary to delete VID 1 before proceeding to add VID 2.
This limitation comes from the fact that we operate the port tag, when
it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode.
The ocelot switches do not have full flexibility and can either have one
single VID as egress-untagged, or all of them.
There are use cases for having all VLANs as egress-untagged as well, and
this patch adds support for that.
The change rewrites ocelot_port_set_native_vlan() into a more generic
ocelot_port_manage_port_tag() function. Because the software bridge's
state, transmitted to us via switchdev, can become very complex, we
don't attempt to track all possible state transitions, but instead take
a more declarative approach and just make ocelot_port_manage_port_tag()
figure out which more to operate in:
- port is VLAN-unaware: the classified VLAN (internal, unrelated to the
802.1Q header) is not inserted into packets on egress
- port is VLAN-aware:
- port has tagged VLANs:
-> port has no untagged VLAN: set up as pure trunk
-> port has one untagged VLAN: set up as trunk port + native VLAN
-> port has more than one untagged VLAN: this is an invalid config
which is rejected by ocelot_vlan_prepare
- port has no tagged VLANs
-> set up as pure egress-untagged port
We don't keep the number of tagged and untagged VLANs, we just count the
structures we keep.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:50 +08:00
|
|
|
/* We use native VLAN when we have to mix egress-tagged VLANs with exactly
|
|
|
|
* _one_ egress-untagged VLAN (_the_ native VLAN)
|
|
|
|
*/
|
|
|
|
static bool ocelot_port_uses_native_vlan(struct ocelot *ocelot, int port)
|
|
|
|
{
|
|
|
|
return ocelot_port_num_tagged_vlans(ocelot, port) &&
|
|
|
|
ocelot_port_num_untagged_vlans(ocelot, port) == 1;
|
|
|
|
}
|
2020-10-31 18:29:13 +08:00
|
|
|
|
net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged
At present, the ocelot driver accepts a single egress-untagged bridge
VLAN, meaning that this sequence of operations:
ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
bridge vlan add dev swp0 vid 2 pvid untagged
fails because the bridge automatically installs VID 1 as a pvid & untagged
VLAN, and vid 2 would be the second untagged VLAN on this port. It is
necessary to delete VID 1 before proceeding to add VID 2.
This limitation comes from the fact that we operate the port tag, when
it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode.
The ocelot switches do not have full flexibility and can either have one
single VID as egress-untagged, or all of them.
There are use cases for having all VLANs as egress-untagged as well, and
this patch adds support for that.
The change rewrites ocelot_port_set_native_vlan() into a more generic
ocelot_port_manage_port_tag() function. Because the software bridge's
state, transmitted to us via switchdev, can become very complex, we
don't attempt to track all possible state transitions, but instead take
a more declarative approach and just make ocelot_port_manage_port_tag()
figure out which more to operate in:
- port is VLAN-unaware: the classified VLAN (internal, unrelated to the
802.1Q header) is not inserted into packets on egress
- port is VLAN-aware:
- port has tagged VLANs:
-> port has no untagged VLAN: set up as pure trunk
-> port has one untagged VLAN: set up as trunk port + native VLAN
-> port has more than one untagged VLAN: this is an invalid config
which is rejected by ocelot_vlan_prepare
- port has no tagged VLANs
-> set up as pure egress-untagged port
We don't keep the number of tagged and untagged VLANs, we just count the
structures we keep.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:50 +08:00
|
|
|
static struct ocelot_bridge_vlan *
|
|
|
|
ocelot_port_find_native_vlan(struct ocelot *ocelot, int port)
|
|
|
|
{
|
|
|
|
struct ocelot_bridge_vlan *vlan;
|
|
|
|
|
|
|
|
list_for_each_entry(vlan, &ocelot->vlans, list)
|
|
|
|
if (vlan->portmask & BIT(port) && vlan->untagged & BIT(port))
|
|
|
|
return vlan;
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Keep in sync REW_TAG_CFG_TAG_CFG and, if applicable,
|
|
|
|
* REW_PORT_VLAN_CFG_PORT_VID, with the bridge VLAN table and VLAN awareness
|
|
|
|
* state of the port.
|
|
|
|
*/
|
|
|
|
static void ocelot_port_manage_port_tag(struct ocelot *ocelot, int port)
|
|
|
|
{
|
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
|
|
|
enum ocelot_port_tag_config tag_cfg;
|
|
|
|
bool uses_native_vlan = false;
|
2018-06-26 20:28:49 +08:00
|
|
|
|
net: mscc: ocelot: fix untagged packet drops when enslaving to vlan aware bridge
To rehash a previous explanation given in commit 1c44ce560b4d ("net:
mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is
up"), the switch driver operates the in a mode where a single VLAN can
be transmitted as untagged on a particular egress port. That is the
"native VLAN on trunk port" use case.
The configuration for this native VLAN is driven in 2 ways:
- Set the egress port rewriter to strip the VLAN tag for the native
VID (as it is egress-untagged, after all).
- Configure the ingress port to drop untagged and priority-tagged
traffic, if there is no native VLAN. The intention of this setting is
that a trunk port with no native VLAN should not accept untagged
traffic.
Since both of the above configurations for the native VLAN should only
be done if VLAN awareness is requested, they are actually done from the
ocelot_port_vlan_filtering function, after the basic procedure of
toggling the VLAN awareness flag of the port.
But there's a problem with that simplistic approach: we are trying to
juggle with 2 independent variables from a single function:
- Native VLAN of the port - its value is held in port->vid.
- VLAN awareness state of the port - currently there are some issues
here, more on that later*.
The actual problem can be seen when enslaving the switch ports to a VLAN
filtering bridge:
0. The driver configures a pvid of zero for each port, when in
standalone mode. While the bridge configures a default_pvid of 1 for
each port that gets added as a slave to it.
1. The bridge calls ocelot_port_vlan_filtering with vlan_aware=true.
The VLAN-filtering-dependent portion of the native VLAN
configuration is done, considering that the native VLAN is 0.
2. The bridge calls ocelot_vlan_add with vid=1, pvid=true,
untagged=true. The native VLAN changes to 1 (change which gets
propagated to hardware).
3. ??? - nobody calls ocelot_port_vlan_filtering again, to reapply the
VLAN-filtering-dependent portion of the native VLAN configuration,
for the new native VLAN of 1. One can notice that after toggling "ip
link set dev br0 type bridge vlan_filtering 0 && ip link set dev br0
type bridge vlan_filtering 1", the new native VLAN finally makes it
through and untagged traffic finally starts flowing again. But
obviously that shouldn't be needed.
So it is clear that 2 independent variables need to both re-trigger the
native VLAN configuration. So we introduce the second variable as
ocelot_port->vlan_aware.
*Actually both the DSA Felix driver and the Ocelot driver already had
each its own variable:
- Ocelot: ocelot_port_private->vlan_aware
- Felix: dsa_port->vlan_filtering
but the common Ocelot library needs to work with a single, common,
variable, so there is some refactoring done to move the vlan_aware
property from the private structure into the common ocelot_port
structure.
Fixes: 97bb69e1e36e ("net: mscc: ocelot: break apart ocelot_vlan_port_apply")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-04-15 03:36:15 +08:00
|
|
|
if (ocelot_port->vlan_aware) {
|
net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged
At present, the ocelot driver accepts a single egress-untagged bridge
VLAN, meaning that this sequence of operations:
ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
bridge vlan add dev swp0 vid 2 pvid untagged
fails because the bridge automatically installs VID 1 as a pvid & untagged
VLAN, and vid 2 would be the second untagged VLAN on this port. It is
necessary to delete VID 1 before proceeding to add VID 2.
This limitation comes from the fact that we operate the port tag, when
it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode.
The ocelot switches do not have full flexibility and can either have one
single VID as egress-untagged, or all of them.
There are use cases for having all VLANs as egress-untagged as well, and
this patch adds support for that.
The change rewrites ocelot_port_set_native_vlan() into a more generic
ocelot_port_manage_port_tag() function. Because the software bridge's
state, transmitted to us via switchdev, can become very complex, we
don't attempt to track all possible state transitions, but instead take
a more declarative approach and just make ocelot_port_manage_port_tag()
figure out which more to operate in:
- port is VLAN-unaware: the classified VLAN (internal, unrelated to the
802.1Q header) is not inserted into packets on egress
- port is VLAN-aware:
- port has tagged VLANs:
-> port has no untagged VLAN: set up as pure trunk
-> port has one untagged VLAN: set up as trunk port + native VLAN
-> port has more than one untagged VLAN: this is an invalid config
which is rejected by ocelot_vlan_prepare
- port has no tagged VLANs
-> set up as pure egress-untagged port
We don't keep the number of tagged and untagged VLANs, we just count the
structures we keep.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:50 +08:00
|
|
|
uses_native_vlan = ocelot_port_uses_native_vlan(ocelot, port);
|
|
|
|
|
|
|
|
if (uses_native_vlan)
|
2021-10-21 01:58:48 +08:00
|
|
|
tag_cfg = OCELOT_PORT_TAG_NATIVE;
|
net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged
At present, the ocelot driver accepts a single egress-untagged bridge
VLAN, meaning that this sequence of operations:
ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
bridge vlan add dev swp0 vid 2 pvid untagged
fails because the bridge automatically installs VID 1 as a pvid & untagged
VLAN, and vid 2 would be the second untagged VLAN on this port. It is
necessary to delete VID 1 before proceeding to add VID 2.
This limitation comes from the fact that we operate the port tag, when
it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode.
The ocelot switches do not have full flexibility and can either have one
single VID as egress-untagged, or all of them.
There are use cases for having all VLANs as egress-untagged as well, and
this patch adds support for that.
The change rewrites ocelot_port_set_native_vlan() into a more generic
ocelot_port_manage_port_tag() function. Because the software bridge's
state, transmitted to us via switchdev, can become very complex, we
don't attempt to track all possible state transitions, but instead take
a more declarative approach and just make ocelot_port_manage_port_tag()
figure out which more to operate in:
- port is VLAN-unaware: the classified VLAN (internal, unrelated to the
802.1Q header) is not inserted into packets on egress
- port is VLAN-aware:
- port has tagged VLANs:
-> port has no untagged VLAN: set up as pure trunk
-> port has one untagged VLAN: set up as trunk port + native VLAN
-> port has more than one untagged VLAN: this is an invalid config
which is rejected by ocelot_vlan_prepare
- port has no tagged VLANs
-> set up as pure egress-untagged port
We don't keep the number of tagged and untagged VLANs, we just count the
structures we keep.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:50 +08:00
|
|
|
else if (ocelot_port_num_untagged_vlans(ocelot, port))
|
|
|
|
tag_cfg = OCELOT_PORT_TAG_DISABLED;
|
2018-06-26 20:28:49 +08:00
|
|
|
else
|
2021-10-21 01:58:48 +08:00
|
|
|
tag_cfg = OCELOT_PORT_TAG_TRUNK;
|
2019-11-09 21:02:47 +08:00
|
|
|
} else {
|
2021-10-21 01:58:48 +08:00
|
|
|
tag_cfg = OCELOT_PORT_TAG_DISABLED;
|
2018-06-26 20:28:49 +08:00
|
|
|
}
|
net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged
At present, the ocelot driver accepts a single egress-untagged bridge
VLAN, meaning that this sequence of operations:
ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
bridge vlan add dev swp0 vid 2 pvid untagged
fails because the bridge automatically installs VID 1 as a pvid & untagged
VLAN, and vid 2 would be the second untagged VLAN on this port. It is
necessary to delete VID 1 before proceeding to add VID 2.
This limitation comes from the fact that we operate the port tag, when
it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode.
The ocelot switches do not have full flexibility and can either have one
single VID as egress-untagged, or all of them.
There are use cases for having all VLANs as egress-untagged as well, and
this patch adds support for that.
The change rewrites ocelot_port_set_native_vlan() into a more generic
ocelot_port_manage_port_tag() function. Because the software bridge's
state, transmitted to us via switchdev, can become very complex, we
don't attempt to track all possible state transitions, but instead take
a more declarative approach and just make ocelot_port_manage_port_tag()
figure out which more to operate in:
- port is VLAN-unaware: the classified VLAN (internal, unrelated to the
802.1Q header) is not inserted into packets on egress
- port is VLAN-aware:
- port has tagged VLANs:
-> port has no untagged VLAN: set up as pure trunk
-> port has one untagged VLAN: set up as trunk port + native VLAN
-> port has more than one untagged VLAN: this is an invalid config
which is rejected by ocelot_vlan_prepare
- port has no tagged VLANs
-> set up as pure egress-untagged port
We don't keep the number of tagged and untagged VLANs, we just count the
structures we keep.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:50 +08:00
|
|
|
|
2021-10-21 01:58:48 +08:00
|
|
|
ocelot_rmw_gix(ocelot, REW_TAG_CFG_TAG_CFG(tag_cfg),
|
2018-06-26 20:28:49 +08:00
|
|
|
REW_TAG_CFG_TAG_CFG_M,
|
2019-11-09 21:02:47 +08:00
|
|
|
REW_TAG_CFG, port);
|
net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged
At present, the ocelot driver accepts a single egress-untagged bridge
VLAN, meaning that this sequence of operations:
ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
bridge vlan add dev swp0 vid 2 pvid untagged
fails because the bridge automatically installs VID 1 as a pvid & untagged
VLAN, and vid 2 would be the second untagged VLAN on this port. It is
necessary to delete VID 1 before proceeding to add VID 2.
This limitation comes from the fact that we operate the port tag, when
it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode.
The ocelot switches do not have full flexibility and can either have one
single VID as egress-untagged, or all of them.
There are use cases for having all VLANs as egress-untagged as well, and
this patch adds support for that.
The change rewrites ocelot_port_set_native_vlan() into a more generic
ocelot_port_manage_port_tag() function. Because the software bridge's
state, transmitted to us via switchdev, can become very complex, we
don't attempt to track all possible state transitions, but instead take
a more declarative approach and just make ocelot_port_manage_port_tag()
figure out which more to operate in:
- port is VLAN-unaware: the classified VLAN (internal, unrelated to the
802.1Q header) is not inserted into packets on egress
- port is VLAN-aware:
- port has tagged VLANs:
-> port has no untagged VLAN: set up as pure trunk
-> port has one untagged VLAN: set up as trunk port + native VLAN
-> port has more than one untagged VLAN: this is an invalid config
which is rejected by ocelot_vlan_prepare
- port has no tagged VLANs
-> set up as pure egress-untagged port
We don't keep the number of tagged and untagged VLANs, we just count the
structures we keep.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:50 +08:00
|
|
|
|
|
|
|
if (uses_native_vlan) {
|
|
|
|
struct ocelot_bridge_vlan *native_vlan;
|
|
|
|
|
|
|
|
/* Not having a native VLAN is impossible, because
|
|
|
|
* ocelot_port_num_untagged_vlans has returned 1.
|
|
|
|
* So there is no use in checking for NULL here.
|
|
|
|
*/
|
|
|
|
native_vlan = ocelot_port_find_native_vlan(ocelot, port);
|
|
|
|
|
|
|
|
ocelot_rmw_gix(ocelot,
|
|
|
|
REW_PORT_VLAN_CFG_PORT_VID(native_vlan->vid),
|
|
|
|
REW_PORT_VLAN_CFG_PORT_VID_M,
|
|
|
|
REW_PORT_VLAN_CFG, port);
|
|
|
|
}
|
2019-11-09 21:02:47 +08:00
|
|
|
}
|
|
|
|
|
net: mscc: ocelot: use the pvid of zero when bridged with vlan_filtering=0
Currently, mscc_ocelot ports configure pvid=0 in standalone mode, and
inherit the pvid from the bridge when one is present.
When the bridge has vlan_filtering=0, the software semantics are that
packets should be received regardless of whether there's a pvid
configured on the ingress port or not. However, ocelot does not observe
those semantics today.
Moreover, changing the PVID is also a problem with vlan_filtering=0.
We are privately remapping the VID of FDB, MDB entries to the port's
PVID when those are VLAN-unaware (i.e. when the VID of these entries
comes to us as 0). But we have no logic of adjusting that remapping when
the user changes the pvid and vlan_filtering is 0. So stale entries
would be left behind, and untagged traffic will stop matching on them.
And even if we were to solve that, there's an even bigger problem. If
swp0 has pvid 1, and swp1 has pvid 2, and both are under a vlan_filtering=0
bridge, they should be able to forward traffic between one another.
However, with ocelot they wouldn't do that.
The simplest way of fixing this is to never configure the pvid based on
what the bridge is asking for, when vlan_filtering is 0. Only if there
was a VLAN that the bridge couldn't mangle, that we could use as pvid....
So, turns out, there's 0 just for that. And for a reason: IEEE
802.1Q-2018, page 247, Table 9-2-Reserved VID values says:
The null VID. Indicates that the tag header contains only
priority information; no VID is present in the frame.
This VID value shall not be configured as a PVID or a member
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
of a VID Set, or configured in any FDB entry, or used in any
Management operation.
So, aren't we doing exactly what 802.1Q says not to? Well, in a way, but
what we're doing here is just driver-level bookkeeping, all for the
better. The fact that we're using a pvid of 0 is not observable behavior
from the outside world: the network stack does not see the classified
VLAN that the switch uses, in vlan_filtering=0 mode. And we're also more
consistent with the standalone mode now.
And now that we use the pvid of 0 in this mode, there's another advantage:
we don't need to perform any VID remapping for FDB and MDB entries either,
we can just use the VID of 0 that the bridge is passing to us.
The only gotcha is that every time we change the vlan_filtering setting,
we need to reapply the pvid (either to 0, or to the value from the bridge).
A small side-effect visible in the patch is that ocelot_port_set_pvid
needs to be moved above ocelot_port_vlan_filtering, so that it can be
called from there without forward-declarations.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-31 18:29:10 +08:00
|
|
|
/* Default vlan to clasify for untagged frames (may be zero) */
|
2020-10-31 18:29:12 +08:00
|
|
|
static void ocelot_port_set_pvid(struct ocelot *ocelot, int port,
|
2021-10-21 01:58:52 +08:00
|
|
|
const struct ocelot_bridge_vlan *pvid_vlan)
|
net: mscc: ocelot: use the pvid of zero when bridged with vlan_filtering=0
Currently, mscc_ocelot ports configure pvid=0 in standalone mode, and
inherit the pvid from the bridge when one is present.
When the bridge has vlan_filtering=0, the software semantics are that
packets should be received regardless of whether there's a pvid
configured on the ingress port or not. However, ocelot does not observe
those semantics today.
Moreover, changing the PVID is also a problem with vlan_filtering=0.
We are privately remapping the VID of FDB, MDB entries to the port's
PVID when those are VLAN-unaware (i.e. when the VID of these entries
comes to us as 0). But we have no logic of adjusting that remapping when
the user changes the pvid and vlan_filtering is 0. So stale entries
would be left behind, and untagged traffic will stop matching on them.
And even if we were to solve that, there's an even bigger problem. If
swp0 has pvid 1, and swp1 has pvid 2, and both are under a vlan_filtering=0
bridge, they should be able to forward traffic between one another.
However, with ocelot they wouldn't do that.
The simplest way of fixing this is to never configure the pvid based on
what the bridge is asking for, when vlan_filtering is 0. Only if there
was a VLAN that the bridge couldn't mangle, that we could use as pvid....
So, turns out, there's 0 just for that. And for a reason: IEEE
802.1Q-2018, page 247, Table 9-2-Reserved VID values says:
The null VID. Indicates that the tag header contains only
priority information; no VID is present in the frame.
This VID value shall not be configured as a PVID or a member
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
of a VID Set, or configured in any FDB entry, or used in any
Management operation.
So, aren't we doing exactly what 802.1Q says not to? Well, in a way, but
what we're doing here is just driver-level bookkeeping, all for the
better. The fact that we're using a pvid of 0 is not observable behavior
from the outside world: the network stack does not see the classified
VLAN that the switch uses, in vlan_filtering=0 mode. And we're also more
consistent with the standalone mode now.
And now that we use the pvid of 0 in this mode, there's another advantage:
we don't need to perform any VID remapping for FDB and MDB entries either,
we can just use the VID of 0 that the bridge is passing to us.
The only gotcha is that every time we change the vlan_filtering setting,
we need to reapply the pvid (either to 0, or to the value from the bridge).
A small side-effect visible in the patch is that ocelot_port_set_pvid
needs to be moved above ocelot_port_vlan_filtering, so that it can be
called from there without forward-declarations.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-31 18:29:10 +08:00
|
|
|
{
|
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
2021-10-21 01:58:52 +08:00
|
|
|
u16 pvid = OCELOT_VLAN_UNAWARE_PVID;
|
2020-10-31 18:29:14 +08:00
|
|
|
u32 val = 0;
|
net: mscc: ocelot: use the pvid of zero when bridged with vlan_filtering=0
Currently, mscc_ocelot ports configure pvid=0 in standalone mode, and
inherit the pvid from the bridge when one is present.
When the bridge has vlan_filtering=0, the software semantics are that
packets should be received regardless of whether there's a pvid
configured on the ingress port or not. However, ocelot does not observe
those semantics today.
Moreover, changing the PVID is also a problem with vlan_filtering=0.
We are privately remapping the VID of FDB, MDB entries to the port's
PVID when those are VLAN-unaware (i.e. when the VID of these entries
comes to us as 0). But we have no logic of adjusting that remapping when
the user changes the pvid and vlan_filtering is 0. So stale entries
would be left behind, and untagged traffic will stop matching on them.
And even if we were to solve that, there's an even bigger problem. If
swp0 has pvid 1, and swp1 has pvid 2, and both are under a vlan_filtering=0
bridge, they should be able to forward traffic between one another.
However, with ocelot they wouldn't do that.
The simplest way of fixing this is to never configure the pvid based on
what the bridge is asking for, when vlan_filtering is 0. Only if there
was a VLAN that the bridge couldn't mangle, that we could use as pvid....
So, turns out, there's 0 just for that. And for a reason: IEEE
802.1Q-2018, page 247, Table 9-2-Reserved VID values says:
The null VID. Indicates that the tag header contains only
priority information; no VID is present in the frame.
This VID value shall not be configured as a PVID or a member
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
of a VID Set, or configured in any FDB entry, or used in any
Management operation.
So, aren't we doing exactly what 802.1Q says not to? Well, in a way, but
what we're doing here is just driver-level bookkeeping, all for the
better. The fact that we're using a pvid of 0 is not observable behavior
from the outside world: the network stack does not see the classified
VLAN that the switch uses, in vlan_filtering=0 mode. And we're also more
consistent with the standalone mode now.
And now that we use the pvid of 0 in this mode, there's another advantage:
we don't need to perform any VID remapping for FDB and MDB entries either,
we can just use the VID of 0 that the bridge is passing to us.
The only gotcha is that every time we change the vlan_filtering setting,
we need to reapply the pvid (either to 0, or to the value from the bridge).
A small side-effect visible in the patch is that ocelot_port_set_pvid
needs to be moved above ocelot_port_vlan_filtering, so that it can be
called from there without forward-declarations.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-31 18:29:10 +08:00
|
|
|
|
2020-10-31 18:29:12 +08:00
|
|
|
ocelot_port->pvid_vlan = pvid_vlan;
|
net: mscc: ocelot: use the pvid of zero when bridged with vlan_filtering=0
Currently, mscc_ocelot ports configure pvid=0 in standalone mode, and
inherit the pvid from the bridge when one is present.
When the bridge has vlan_filtering=0, the software semantics are that
packets should be received regardless of whether there's a pvid
configured on the ingress port or not. However, ocelot does not observe
those semantics today.
Moreover, changing the PVID is also a problem with vlan_filtering=0.
We are privately remapping the VID of FDB, MDB entries to the port's
PVID when those are VLAN-unaware (i.e. when the VID of these entries
comes to us as 0). But we have no logic of adjusting that remapping when
the user changes the pvid and vlan_filtering is 0. So stale entries
would be left behind, and untagged traffic will stop matching on them.
And even if we were to solve that, there's an even bigger problem. If
swp0 has pvid 1, and swp1 has pvid 2, and both are under a vlan_filtering=0
bridge, they should be able to forward traffic between one another.
However, with ocelot they wouldn't do that.
The simplest way of fixing this is to never configure the pvid based on
what the bridge is asking for, when vlan_filtering is 0. Only if there
was a VLAN that the bridge couldn't mangle, that we could use as pvid....
So, turns out, there's 0 just for that. And for a reason: IEEE
802.1Q-2018, page 247, Table 9-2-Reserved VID values says:
The null VID. Indicates that the tag header contains only
priority information; no VID is present in the frame.
This VID value shall not be configured as a PVID or a member
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
of a VID Set, or configured in any FDB entry, or used in any
Management operation.
So, aren't we doing exactly what 802.1Q says not to? Well, in a way, but
what we're doing here is just driver-level bookkeeping, all for the
better. The fact that we're using a pvid of 0 is not observable behavior
from the outside world: the network stack does not see the classified
VLAN that the switch uses, in vlan_filtering=0 mode. And we're also more
consistent with the standalone mode now.
And now that we use the pvid of 0 in this mode, there's another advantage:
we don't need to perform any VID remapping for FDB and MDB entries either,
we can just use the VID of 0 that the bridge is passing to us.
The only gotcha is that every time we change the vlan_filtering setting,
we need to reapply the pvid (either to 0, or to the value from the bridge).
A small side-effect visible in the patch is that ocelot_port_set_pvid
needs to be moved above ocelot_port_vlan_filtering, so that it can be
called from there without forward-declarations.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-31 18:29:10 +08:00
|
|
|
|
2021-10-21 01:58:52 +08:00
|
|
|
if (ocelot_port->vlan_aware && pvid_vlan)
|
|
|
|
pvid = pvid_vlan->vid;
|
net: mscc: ocelot: use the pvid of zero when bridged with vlan_filtering=0
Currently, mscc_ocelot ports configure pvid=0 in standalone mode, and
inherit the pvid from the bridge when one is present.
When the bridge has vlan_filtering=0, the software semantics are that
packets should be received regardless of whether there's a pvid
configured on the ingress port or not. However, ocelot does not observe
those semantics today.
Moreover, changing the PVID is also a problem with vlan_filtering=0.
We are privately remapping the VID of FDB, MDB entries to the port's
PVID when those are VLAN-unaware (i.e. when the VID of these entries
comes to us as 0). But we have no logic of adjusting that remapping when
the user changes the pvid and vlan_filtering is 0. So stale entries
would be left behind, and untagged traffic will stop matching on them.
And even if we were to solve that, there's an even bigger problem. If
swp0 has pvid 1, and swp1 has pvid 2, and both are under a vlan_filtering=0
bridge, they should be able to forward traffic between one another.
However, with ocelot they wouldn't do that.
The simplest way of fixing this is to never configure the pvid based on
what the bridge is asking for, when vlan_filtering is 0. Only if there
was a VLAN that the bridge couldn't mangle, that we could use as pvid....
So, turns out, there's 0 just for that. And for a reason: IEEE
802.1Q-2018, page 247, Table 9-2-Reserved VID values says:
The null VID. Indicates that the tag header contains only
priority information; no VID is present in the frame.
This VID value shall not be configured as a PVID or a member
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
of a VID Set, or configured in any FDB entry, or used in any
Management operation.
So, aren't we doing exactly what 802.1Q says not to? Well, in a way, but
what we're doing here is just driver-level bookkeeping, all for the
better. The fact that we're using a pvid of 0 is not observable behavior
from the outside world: the network stack does not see the classified
VLAN that the switch uses, in vlan_filtering=0 mode. And we're also more
consistent with the standalone mode now.
And now that we use the pvid of 0 in this mode, there's another advantage:
we don't need to perform any VID remapping for FDB and MDB entries either,
we can just use the VID of 0 that the bridge is passing to us.
The only gotcha is that every time we change the vlan_filtering setting,
we need to reapply the pvid (either to 0, or to the value from the bridge).
A small side-effect visible in the patch is that ocelot_port_set_pvid
needs to be moved above ocelot_port_vlan_filtering, so that it can be
called from there without forward-declarations.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-31 18:29:10 +08:00
|
|
|
|
|
|
|
ocelot_rmw_gix(ocelot,
|
2021-10-21 01:58:52 +08:00
|
|
|
ANA_PORT_VLAN_CFG_VLAN_VID(pvid),
|
net: mscc: ocelot: use the pvid of zero when bridged with vlan_filtering=0
Currently, mscc_ocelot ports configure pvid=0 in standalone mode, and
inherit the pvid from the bridge when one is present.
When the bridge has vlan_filtering=0, the software semantics are that
packets should be received regardless of whether there's a pvid
configured on the ingress port or not. However, ocelot does not observe
those semantics today.
Moreover, changing the PVID is also a problem with vlan_filtering=0.
We are privately remapping the VID of FDB, MDB entries to the port's
PVID when those are VLAN-unaware (i.e. when the VID of these entries
comes to us as 0). But we have no logic of adjusting that remapping when
the user changes the pvid and vlan_filtering is 0. So stale entries
would be left behind, and untagged traffic will stop matching on them.
And even if we were to solve that, there's an even bigger problem. If
swp0 has pvid 1, and swp1 has pvid 2, and both are under a vlan_filtering=0
bridge, they should be able to forward traffic between one another.
However, with ocelot they wouldn't do that.
The simplest way of fixing this is to never configure the pvid based on
what the bridge is asking for, when vlan_filtering is 0. Only if there
was a VLAN that the bridge couldn't mangle, that we could use as pvid....
So, turns out, there's 0 just for that. And for a reason: IEEE
802.1Q-2018, page 247, Table 9-2-Reserved VID values says:
The null VID. Indicates that the tag header contains only
priority information; no VID is present in the frame.
This VID value shall not be configured as a PVID or a member
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
of a VID Set, or configured in any FDB entry, or used in any
Management operation.
So, aren't we doing exactly what 802.1Q says not to? Well, in a way, but
what we're doing here is just driver-level bookkeeping, all for the
better. The fact that we're using a pvid of 0 is not observable behavior
from the outside world: the network stack does not see the classified
VLAN that the switch uses, in vlan_filtering=0 mode. And we're also more
consistent with the standalone mode now.
And now that we use the pvid of 0 in this mode, there's another advantage:
we don't need to perform any VID remapping for FDB and MDB entries either,
we can just use the VID of 0 that the bridge is passing to us.
The only gotcha is that every time we change the vlan_filtering setting,
we need to reapply the pvid (either to 0, or to the value from the bridge).
A small side-effect visible in the patch is that ocelot_port_set_pvid
needs to be moved above ocelot_port_vlan_filtering, so that it can be
called from there without forward-declarations.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-31 18:29:10 +08:00
|
|
|
ANA_PORT_VLAN_CFG_VLAN_VID_M,
|
|
|
|
ANA_PORT_VLAN_CFG, port);
|
2020-10-31 18:29:14 +08:00
|
|
|
|
|
|
|
/* If there's no pvid, we should drop not only untagged traffic (which
|
|
|
|
* happens automatically), but also 802.1p traffic which gets
|
|
|
|
* classified to VLAN 0, but that is always in our RX filter, so it
|
|
|
|
* would get accepted were it not for this setting.
|
|
|
|
*/
|
2021-10-21 01:58:52 +08:00
|
|
|
if (!pvid_vlan && ocelot_port->vlan_aware)
|
2020-10-31 18:29:14 +08:00
|
|
|
val = ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA |
|
|
|
|
ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA;
|
|
|
|
|
|
|
|
ocelot_rmw_gix(ocelot, val,
|
|
|
|
ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA |
|
|
|
|
ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA,
|
|
|
|
ANA_PORT_DROP_CFG, port);
|
net: mscc: ocelot: use the pvid of zero when bridged with vlan_filtering=0
Currently, mscc_ocelot ports configure pvid=0 in standalone mode, and
inherit the pvid from the bridge when one is present.
When the bridge has vlan_filtering=0, the software semantics are that
packets should be received regardless of whether there's a pvid
configured on the ingress port or not. However, ocelot does not observe
those semantics today.
Moreover, changing the PVID is also a problem with vlan_filtering=0.
We are privately remapping the VID of FDB, MDB entries to the port's
PVID when those are VLAN-unaware (i.e. when the VID of these entries
comes to us as 0). But we have no logic of adjusting that remapping when
the user changes the pvid and vlan_filtering is 0. So stale entries
would be left behind, and untagged traffic will stop matching on them.
And even if we were to solve that, there's an even bigger problem. If
swp0 has pvid 1, and swp1 has pvid 2, and both are under a vlan_filtering=0
bridge, they should be able to forward traffic between one another.
However, with ocelot they wouldn't do that.
The simplest way of fixing this is to never configure the pvid based on
what the bridge is asking for, when vlan_filtering is 0. Only if there
was a VLAN that the bridge couldn't mangle, that we could use as pvid....
So, turns out, there's 0 just for that. And for a reason: IEEE
802.1Q-2018, page 247, Table 9-2-Reserved VID values says:
The null VID. Indicates that the tag header contains only
priority information; no VID is present in the frame.
This VID value shall not be configured as a PVID or a member
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
of a VID Set, or configured in any FDB entry, or used in any
Management operation.
So, aren't we doing exactly what 802.1Q says not to? Well, in a way, but
what we're doing here is just driver-level bookkeeping, all for the
better. The fact that we're using a pvid of 0 is not observable behavior
from the outside world: the network stack does not see the classified
VLAN that the switch uses, in vlan_filtering=0 mode. And we're also more
consistent with the standalone mode now.
And now that we use the pvid of 0 in this mode, there's another advantage:
we don't need to perform any VID remapping for FDB and MDB entries either,
we can just use the VID of 0 that the bridge is passing to us.
The only gotcha is that every time we change the vlan_filtering setting,
we need to reapply the pvid (either to 0, or to the value from the bridge).
A small side-effect visible in the patch is that ocelot_port_set_pvid
needs to be moved above ocelot_port_vlan_filtering, so that it can be
called from there without forward-declarations.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-31 18:29:10 +08:00
|
|
|
}
|
|
|
|
|
net: mscc: ocelot: convert the VLAN masks to a list
First and foremost, the driver currently allocates a constant sized
4K * u32 (16KB memory) array for the VLAN masks. However, a typical
application might not need so many VLANs, so if we dynamically allocate
the memory as needed, we might actually save some space.
Secondly, we'll need to keep more advanced bookkeeping of the VLANs we
have, notably we'll have to check how many untagged and how many tagged
VLANs we have. This will have to stay in a structure, and allocating
another 16 KB array for that is again a bit too much.
So refactor the bridge VLANs in a linked list of structures.
The hook points inside the driver are ocelot_vlan_member_add() and
ocelot_vlan_member_del(), which previously used to operate on the
ocelot->vlan_mask[vid] array element.
ocelot_vlan_member_add() and ocelot_vlan_member_del() used to call
ocelot_vlan_member_set() to commit to the ocelot->vlan_mask.
Additionally, we had two calls to ocelot_vlan_member_set() from outside
those callers, and those were directly from ocelot_vlan_init().
Those calls do not set up bridging service VLANs, instead they:
- clear the VLAN table on reset
- set the port pvid to the value used by this driver for VLAN-unaware
standalone port operation (VID 0)
So now, when we have a structure which represents actual bridge VLANs,
VID 0 doesn't belong in that structure, since it is not part of the
bridging layer.
So delete the middle man, ocelot_vlan_member_set(), and let
ocelot_vlan_init() call directly ocelot_vlant_set_mask() which forgoes
any data structure and writes directly to hardware, which is all that we
need.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:49 +08:00
|
|
|
static struct ocelot_bridge_vlan *ocelot_bridge_vlan_find(struct ocelot *ocelot,
|
|
|
|
u16 vid)
|
2021-08-20 01:40:08 +08:00
|
|
|
{
|
net: mscc: ocelot: convert the VLAN masks to a list
First and foremost, the driver currently allocates a constant sized
4K * u32 (16KB memory) array for the VLAN masks. However, a typical
application might not need so many VLANs, so if we dynamically allocate
the memory as needed, we might actually save some space.
Secondly, we'll need to keep more advanced bookkeeping of the VLANs we
have, notably we'll have to check how many untagged and how many tagged
VLANs we have. This will have to stay in a structure, and allocating
another 16 KB array for that is again a bit too much.
So refactor the bridge VLANs in a linked list of structures.
The hook points inside the driver are ocelot_vlan_member_add() and
ocelot_vlan_member_del(), which previously used to operate on the
ocelot->vlan_mask[vid] array element.
ocelot_vlan_member_add() and ocelot_vlan_member_del() used to call
ocelot_vlan_member_set() to commit to the ocelot->vlan_mask.
Additionally, we had two calls to ocelot_vlan_member_set() from outside
those callers, and those were directly from ocelot_vlan_init().
Those calls do not set up bridging service VLANs, instead they:
- clear the VLAN table on reset
- set the port pvid to the value used by this driver for VLAN-unaware
standalone port operation (VID 0)
So now, when we have a structure which represents actual bridge VLANs,
VID 0 doesn't belong in that structure, since it is not part of the
bridging layer.
So delete the middle man, ocelot_vlan_member_set(), and let
ocelot_vlan_init() call directly ocelot_vlant_set_mask() which forgoes
any data structure and writes directly to hardware, which is all that we
need.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:49 +08:00
|
|
|
struct ocelot_bridge_vlan *vlan;
|
2021-08-20 01:40:08 +08:00
|
|
|
|
net: mscc: ocelot: convert the VLAN masks to a list
First and foremost, the driver currently allocates a constant sized
4K * u32 (16KB memory) array for the VLAN masks. However, a typical
application might not need so many VLANs, so if we dynamically allocate
the memory as needed, we might actually save some space.
Secondly, we'll need to keep more advanced bookkeeping of the VLANs we
have, notably we'll have to check how many untagged and how many tagged
VLANs we have. This will have to stay in a structure, and allocating
another 16 KB array for that is again a bit too much.
So refactor the bridge VLANs in a linked list of structures.
The hook points inside the driver are ocelot_vlan_member_add() and
ocelot_vlan_member_del(), which previously used to operate on the
ocelot->vlan_mask[vid] array element.
ocelot_vlan_member_add() and ocelot_vlan_member_del() used to call
ocelot_vlan_member_set() to commit to the ocelot->vlan_mask.
Additionally, we had two calls to ocelot_vlan_member_set() from outside
those callers, and those were directly from ocelot_vlan_init().
Those calls do not set up bridging service VLANs, instead they:
- clear the VLAN table on reset
- set the port pvid to the value used by this driver for VLAN-unaware
standalone port operation (VID 0)
So now, when we have a structure which represents actual bridge VLANs,
VID 0 doesn't belong in that structure, since it is not part of the
bridging layer.
So delete the middle man, ocelot_vlan_member_set(), and let
ocelot_vlan_init() call directly ocelot_vlant_set_mask() which forgoes
any data structure and writes directly to hardware, which is all that we
need.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:49 +08:00
|
|
|
list_for_each_entry(vlan, &ocelot->vlans, list)
|
|
|
|
if (vlan->vid == vid)
|
|
|
|
return vlan;
|
2021-08-20 01:40:08 +08:00
|
|
|
|
net: mscc: ocelot: convert the VLAN masks to a list
First and foremost, the driver currently allocates a constant sized
4K * u32 (16KB memory) array for the VLAN masks. However, a typical
application might not need so many VLANs, so if we dynamically allocate
the memory as needed, we might actually save some space.
Secondly, we'll need to keep more advanced bookkeeping of the VLANs we
have, notably we'll have to check how many untagged and how many tagged
VLANs we have. This will have to stay in a structure, and allocating
another 16 KB array for that is again a bit too much.
So refactor the bridge VLANs in a linked list of structures.
The hook points inside the driver are ocelot_vlan_member_add() and
ocelot_vlan_member_del(), which previously used to operate on the
ocelot->vlan_mask[vid] array element.
ocelot_vlan_member_add() and ocelot_vlan_member_del() used to call
ocelot_vlan_member_set() to commit to the ocelot->vlan_mask.
Additionally, we had two calls to ocelot_vlan_member_set() from outside
those callers, and those were directly from ocelot_vlan_init().
Those calls do not set up bridging service VLANs, instead they:
- clear the VLAN table on reset
- set the port pvid to the value used by this driver for VLAN-unaware
standalone port operation (VID 0)
So now, when we have a structure which represents actual bridge VLANs,
VID 0 doesn't belong in that structure, since it is not part of the
bridging layer.
So delete the middle man, ocelot_vlan_member_set(), and let
ocelot_vlan_init() call directly ocelot_vlant_set_mask() which forgoes
any data structure and writes directly to hardware, which is all that we
need.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:49 +08:00
|
|
|
return NULL;
|
2021-08-20 01:40:08 +08:00
|
|
|
}
|
|
|
|
|
net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged
At present, the ocelot driver accepts a single egress-untagged bridge
VLAN, meaning that this sequence of operations:
ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
bridge vlan add dev swp0 vid 2 pvid untagged
fails because the bridge automatically installs VID 1 as a pvid & untagged
VLAN, and vid 2 would be the second untagged VLAN on this port. It is
necessary to delete VID 1 before proceeding to add VID 2.
This limitation comes from the fact that we operate the port tag, when
it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode.
The ocelot switches do not have full flexibility and can either have one
single VID as egress-untagged, or all of them.
There are use cases for having all VLANs as egress-untagged as well, and
this patch adds support for that.
The change rewrites ocelot_port_set_native_vlan() into a more generic
ocelot_port_manage_port_tag() function. Because the software bridge's
state, transmitted to us via switchdev, can become very complex, we
don't attempt to track all possible state transitions, but instead take
a more declarative approach and just make ocelot_port_manage_port_tag()
figure out which more to operate in:
- port is VLAN-unaware: the classified VLAN (internal, unrelated to the
802.1Q header) is not inserted into packets on egress
- port is VLAN-aware:
- port has tagged VLANs:
-> port has no untagged VLAN: set up as pure trunk
-> port has one untagged VLAN: set up as trunk port + native VLAN
-> port has more than one untagged VLAN: this is an invalid config
which is rejected by ocelot_vlan_prepare
- port has no tagged VLANs
-> set up as pure egress-untagged port
We don't keep the number of tagged and untagged VLANs, we just count the
structures we keep.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:50 +08:00
|
|
|
static int ocelot_vlan_member_add(struct ocelot *ocelot, int port, u16 vid,
|
|
|
|
bool untagged)
|
2021-08-20 01:40:08 +08:00
|
|
|
{
|
net: mscc: ocelot: convert the VLAN masks to a list
First and foremost, the driver currently allocates a constant sized
4K * u32 (16KB memory) array for the VLAN masks. However, a typical
application might not need so many VLANs, so if we dynamically allocate
the memory as needed, we might actually save some space.
Secondly, we'll need to keep more advanced bookkeeping of the VLANs we
have, notably we'll have to check how many untagged and how many tagged
VLANs we have. This will have to stay in a structure, and allocating
another 16 KB array for that is again a bit too much.
So refactor the bridge VLANs in a linked list of structures.
The hook points inside the driver are ocelot_vlan_member_add() and
ocelot_vlan_member_del(), which previously used to operate on the
ocelot->vlan_mask[vid] array element.
ocelot_vlan_member_add() and ocelot_vlan_member_del() used to call
ocelot_vlan_member_set() to commit to the ocelot->vlan_mask.
Additionally, we had two calls to ocelot_vlan_member_set() from outside
those callers, and those were directly from ocelot_vlan_init().
Those calls do not set up bridging service VLANs, instead they:
- clear the VLAN table on reset
- set the port pvid to the value used by this driver for VLAN-unaware
standalone port operation (VID 0)
So now, when we have a structure which represents actual bridge VLANs,
VID 0 doesn't belong in that structure, since it is not part of the
bridging layer.
So delete the middle man, ocelot_vlan_member_set(), and let
ocelot_vlan_init() call directly ocelot_vlant_set_mask() which forgoes
any data structure and writes directly to hardware, which is all that we
need.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:49 +08:00
|
|
|
struct ocelot_bridge_vlan *vlan = ocelot_bridge_vlan_find(ocelot, vid);
|
|
|
|
unsigned long portmask;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (vlan) {
|
|
|
|
portmask = vlan->portmask | BIT(port);
|
|
|
|
|
|
|
|
err = ocelot_vlant_set_mask(ocelot, vid, portmask);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
vlan->portmask = portmask;
|
net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged
At present, the ocelot driver accepts a single egress-untagged bridge
VLAN, meaning that this sequence of operations:
ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
bridge vlan add dev swp0 vid 2 pvid untagged
fails because the bridge automatically installs VID 1 as a pvid & untagged
VLAN, and vid 2 would be the second untagged VLAN on this port. It is
necessary to delete VID 1 before proceeding to add VID 2.
This limitation comes from the fact that we operate the port tag, when
it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode.
The ocelot switches do not have full flexibility and can either have one
single VID as egress-untagged, or all of them.
There are use cases for having all VLANs as egress-untagged as well, and
this patch adds support for that.
The change rewrites ocelot_port_set_native_vlan() into a more generic
ocelot_port_manage_port_tag() function. Because the software bridge's
state, transmitted to us via switchdev, can become very complex, we
don't attempt to track all possible state transitions, but instead take
a more declarative approach and just make ocelot_port_manage_port_tag()
figure out which more to operate in:
- port is VLAN-unaware: the classified VLAN (internal, unrelated to the
802.1Q header) is not inserted into packets on egress
- port is VLAN-aware:
- port has tagged VLANs:
-> port has no untagged VLAN: set up as pure trunk
-> port has one untagged VLAN: set up as trunk port + native VLAN
-> port has more than one untagged VLAN: this is an invalid config
which is rejected by ocelot_vlan_prepare
- port has no tagged VLANs
-> set up as pure egress-untagged port
We don't keep the number of tagged and untagged VLANs, we just count the
structures we keep.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:50 +08:00
|
|
|
/* Bridge VLANs can be overwritten with a different
|
|
|
|
* egress-tagging setting, so make sure to override an untagged
|
|
|
|
* with a tagged VID if that's going on.
|
|
|
|
*/
|
|
|
|
if (untagged)
|
|
|
|
vlan->untagged |= BIT(port);
|
|
|
|
else
|
|
|
|
vlan->untagged &= ~BIT(port);
|
net: mscc: ocelot: convert the VLAN masks to a list
First and foremost, the driver currently allocates a constant sized
4K * u32 (16KB memory) array for the VLAN masks. However, a typical
application might not need so many VLANs, so if we dynamically allocate
the memory as needed, we might actually save some space.
Secondly, we'll need to keep more advanced bookkeeping of the VLANs we
have, notably we'll have to check how many untagged and how many tagged
VLANs we have. This will have to stay in a structure, and allocating
another 16 KB array for that is again a bit too much.
So refactor the bridge VLANs in a linked list of structures.
The hook points inside the driver are ocelot_vlan_member_add() and
ocelot_vlan_member_del(), which previously used to operate on the
ocelot->vlan_mask[vid] array element.
ocelot_vlan_member_add() and ocelot_vlan_member_del() used to call
ocelot_vlan_member_set() to commit to the ocelot->vlan_mask.
Additionally, we had two calls to ocelot_vlan_member_set() from outside
those callers, and those were directly from ocelot_vlan_init().
Those calls do not set up bridging service VLANs, instead they:
- clear the VLAN table on reset
- set the port pvid to the value used by this driver for VLAN-unaware
standalone port operation (VID 0)
So now, when we have a structure which represents actual bridge VLANs,
VID 0 doesn't belong in that structure, since it is not part of the
bridging layer.
So delete the middle man, ocelot_vlan_member_set(), and let
ocelot_vlan_init() call directly ocelot_vlant_set_mask() which forgoes
any data structure and writes directly to hardware, which is all that we
need.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:49 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
|
|
|
|
if (!vlan)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
portmask = BIT(port);
|
|
|
|
|
|
|
|
err = ocelot_vlant_set_mask(ocelot, vid, portmask);
|
|
|
|
if (err) {
|
|
|
|
kfree(vlan);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
vlan->vid = vid;
|
|
|
|
vlan->portmask = portmask;
|
net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged
At present, the ocelot driver accepts a single egress-untagged bridge
VLAN, meaning that this sequence of operations:
ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
bridge vlan add dev swp0 vid 2 pvid untagged
fails because the bridge automatically installs VID 1 as a pvid & untagged
VLAN, and vid 2 would be the second untagged VLAN on this port. It is
necessary to delete VID 1 before proceeding to add VID 2.
This limitation comes from the fact that we operate the port tag, when
it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode.
The ocelot switches do not have full flexibility and can either have one
single VID as egress-untagged, or all of them.
There are use cases for having all VLANs as egress-untagged as well, and
this patch adds support for that.
The change rewrites ocelot_port_set_native_vlan() into a more generic
ocelot_port_manage_port_tag() function. Because the software bridge's
state, transmitted to us via switchdev, can become very complex, we
don't attempt to track all possible state transitions, but instead take
a more declarative approach and just make ocelot_port_manage_port_tag()
figure out which more to operate in:
- port is VLAN-unaware: the classified VLAN (internal, unrelated to the
802.1Q header) is not inserted into packets on egress
- port is VLAN-aware:
- port has tagged VLANs:
-> port has no untagged VLAN: set up as pure trunk
-> port has one untagged VLAN: set up as trunk port + native VLAN
-> port has more than one untagged VLAN: this is an invalid config
which is rejected by ocelot_vlan_prepare
- port has no tagged VLANs
-> set up as pure egress-untagged port
We don't keep the number of tagged and untagged VLANs, we just count the
structures we keep.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:50 +08:00
|
|
|
if (untagged)
|
|
|
|
vlan->untagged = BIT(port);
|
net: mscc: ocelot: convert the VLAN masks to a list
First and foremost, the driver currently allocates a constant sized
4K * u32 (16KB memory) array for the VLAN masks. However, a typical
application might not need so many VLANs, so if we dynamically allocate
the memory as needed, we might actually save some space.
Secondly, we'll need to keep more advanced bookkeeping of the VLANs we
have, notably we'll have to check how many untagged and how many tagged
VLANs we have. This will have to stay in a structure, and allocating
another 16 KB array for that is again a bit too much.
So refactor the bridge VLANs in a linked list of structures.
The hook points inside the driver are ocelot_vlan_member_add() and
ocelot_vlan_member_del(), which previously used to operate on the
ocelot->vlan_mask[vid] array element.
ocelot_vlan_member_add() and ocelot_vlan_member_del() used to call
ocelot_vlan_member_set() to commit to the ocelot->vlan_mask.
Additionally, we had two calls to ocelot_vlan_member_set() from outside
those callers, and those were directly from ocelot_vlan_init().
Those calls do not set up bridging service VLANs, instead they:
- clear the VLAN table on reset
- set the port pvid to the value used by this driver for VLAN-unaware
standalone port operation (VID 0)
So now, when we have a structure which represents actual bridge VLANs,
VID 0 doesn't belong in that structure, since it is not part of the
bridging layer.
So delete the middle man, ocelot_vlan_member_set(), and let
ocelot_vlan_init() call directly ocelot_vlant_set_mask() which forgoes
any data structure and writes directly to hardware, which is all that we
need.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:49 +08:00
|
|
|
INIT_LIST_HEAD(&vlan->list);
|
|
|
|
list_add_tail(&vlan->list, &ocelot->vlans);
|
|
|
|
|
|
|
|
return 0;
|
2021-08-20 01:40:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int ocelot_vlan_member_del(struct ocelot *ocelot, int port, u16 vid)
|
|
|
|
{
|
net: mscc: ocelot: convert the VLAN masks to a list
First and foremost, the driver currently allocates a constant sized
4K * u32 (16KB memory) array for the VLAN masks. However, a typical
application might not need so many VLANs, so if we dynamically allocate
the memory as needed, we might actually save some space.
Secondly, we'll need to keep more advanced bookkeeping of the VLANs we
have, notably we'll have to check how many untagged and how many tagged
VLANs we have. This will have to stay in a structure, and allocating
another 16 KB array for that is again a bit too much.
So refactor the bridge VLANs in a linked list of structures.
The hook points inside the driver are ocelot_vlan_member_add() and
ocelot_vlan_member_del(), which previously used to operate on the
ocelot->vlan_mask[vid] array element.
ocelot_vlan_member_add() and ocelot_vlan_member_del() used to call
ocelot_vlan_member_set() to commit to the ocelot->vlan_mask.
Additionally, we had two calls to ocelot_vlan_member_set() from outside
those callers, and those were directly from ocelot_vlan_init().
Those calls do not set up bridging service VLANs, instead they:
- clear the VLAN table on reset
- set the port pvid to the value used by this driver for VLAN-unaware
standalone port operation (VID 0)
So now, when we have a structure which represents actual bridge VLANs,
VID 0 doesn't belong in that structure, since it is not part of the
bridging layer.
So delete the middle man, ocelot_vlan_member_set(), and let
ocelot_vlan_init() call directly ocelot_vlant_set_mask() which forgoes
any data structure and writes directly to hardware, which is all that we
need.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:49 +08:00
|
|
|
struct ocelot_bridge_vlan *vlan = ocelot_bridge_vlan_find(ocelot, vid);
|
|
|
|
unsigned long portmask;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (!vlan)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
portmask = vlan->portmask & ~BIT(port);
|
|
|
|
|
|
|
|
err = ocelot_vlant_set_mask(ocelot, vid, portmask);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
vlan->portmask = portmask;
|
|
|
|
if (vlan->portmask)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
list_del(&vlan->list);
|
|
|
|
kfree(vlan);
|
|
|
|
|
|
|
|
return 0;
|
2021-08-20 01:40:08 +08:00
|
|
|
}
|
|
|
|
|
2020-10-03 06:06:46 +08:00
|
|
|
int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
|
2021-08-20 01:40:07 +08:00
|
|
|
bool vlan_aware, struct netlink_ext_ack *extack)
|
2019-11-09 21:02:47 +08:00
|
|
|
{
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
|
|
|
struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1];
|
2019-11-09 21:02:47 +08:00
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
|
|
|
struct ocelot_vcap_filter *filter;
|
net: mscc: ocelot: fix untagged packet drops when enslaving to vlan aware bridge
To rehash a previous explanation given in commit 1c44ce560b4d ("net:
mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is
up"), the switch driver operates the in a mode where a single VLAN can
be transmitted as untagged on a particular egress port. That is the
"native VLAN on trunk port" use case.
The configuration for this native VLAN is driven in 2 ways:
- Set the egress port rewriter to strip the VLAN tag for the native
VID (as it is egress-untagged, after all).
- Configure the ingress port to drop untagged and priority-tagged
traffic, if there is no native VLAN. The intention of this setting is
that a trunk port with no native VLAN should not accept untagged
traffic.
Since both of the above configurations for the native VLAN should only
be done if VLAN awareness is requested, they are actually done from the
ocelot_port_vlan_filtering function, after the basic procedure of
toggling the VLAN awareness flag of the port.
But there's a problem with that simplistic approach: we are trying to
juggle with 2 independent variables from a single function:
- Native VLAN of the port - its value is held in port->vid.
- VLAN awareness state of the port - currently there are some issues
here, more on that later*.
The actual problem can be seen when enslaving the switch ports to a VLAN
filtering bridge:
0. The driver configures a pvid of zero for each port, when in
standalone mode. While the bridge configures a default_pvid of 1 for
each port that gets added as a slave to it.
1. The bridge calls ocelot_port_vlan_filtering with vlan_aware=true.
The VLAN-filtering-dependent portion of the native VLAN
configuration is done, considering that the native VLAN is 0.
2. The bridge calls ocelot_vlan_add with vid=1, pvid=true,
untagged=true. The native VLAN changes to 1 (change which gets
propagated to hardware).
3. ??? - nobody calls ocelot_port_vlan_filtering again, to reapply the
VLAN-filtering-dependent portion of the native VLAN configuration,
for the new native VLAN of 1. One can notice that after toggling "ip
link set dev br0 type bridge vlan_filtering 0 && ip link set dev br0
type bridge vlan_filtering 1", the new native VLAN finally makes it
through and untagged traffic finally starts flowing again. But
obviously that shouldn't be needed.
So it is clear that 2 independent variables need to both re-trigger the
native VLAN configuration. So we introduce the second variable as
ocelot_port->vlan_aware.
*Actually both the DSA Felix driver and the Ocelot driver already had
each its own variable:
- Ocelot: ocelot_port_private->vlan_aware
- Felix: dsa_port->vlan_filtering
but the common Ocelot library needs to work with a single, common,
variable, so there is some refactoring done to move the vlan_aware
property from the private structure into the common ocelot_port
structure.
Fixes: 97bb69e1e36e ("net: mscc: ocelot: break apart ocelot_vlan_port_apply")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-04-15 03:36:15 +08:00
|
|
|
u32 val;
|
2019-11-09 21:02:47 +08:00
|
|
|
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
|
|
|
list_for_each_entry(filter, &block->rules, list) {
|
|
|
|
if (filter->ingress_port_mask & BIT(port) &&
|
|
|
|
filter->action.vid_replace_ena) {
|
2021-08-20 01:40:07 +08:00
|
|
|
NL_SET_ERR_MSG_MOD(extack,
|
|
|
|
"Cannot change VLAN state with vlan modify rules active");
|
net: switchdev: remove the transaction structure from port attributes
Since the introduction of the switchdev API, port attributes were
transmitted to drivers for offloading using a two-step transactional
model, with a prepare phase that was supposed to catch all errors, and a
commit phase that was supposed to never fail.
Some classes of failures can never be avoided, like hardware access, or
memory allocation. In the latter case, merely attempting to move the
memory allocation to the preparation phase makes it impossible to avoid
memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused
transaction item queue") which has removed the unused mechanism of
passing on the allocated memory between one phase and another.
It is time we admit that separating the preparation from the commit
phase is something that is best left for the driver to decide, and not
something that should be baked into the API, especially since there are
no switchdev callers that depend on this.
This patch removes the struct switchdev_trans member from switchdev port
attribute notifier structures, and converts drivers to not look at this
member.
In part, this patch contains a revert of my previous commit 2e554a7a5d8a
("net: dsa: propagate switchdev vlan_filtering prepare phase to
drivers").
For the most part, the conversion was trivial except for:
- Rocker's world implementation based on Broadcom OF-DPA had an odd
implementation of ofdpa_port_attr_bridge_flags_set. The conversion was
done mechanically, by pasting the implementation twice, then only
keeping the code that would get executed during prepare phase on top,
then only keeping the code that gets executed during the commit phase
on bottom, then simplifying the resulting code until this was obtained.
- DSA's offloading of STP state, bridge flags, VLAN filtering and
multicast router could be converted right away. But the ageing time
could not, so a shim was introduced and this was left for a further
commit.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de> # hellcreek
Reviewed-by: Linus Walleij <linus.walleij@linaro.org> # RTL8366RB
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-09 08:01:50 +08:00
|
|
|
return -EBUSY;
|
net: mscc: ocelot: offload VLAN mangle action to VCAP IS1
The VCAP_IS1_ACT_VID_REPLACE_ENA action, from the VCAP IS1 ingress TCAM,
changes the classified VLAN.
We are only exposing this ability for switch ports that are under VLAN
aware bridges. This is because in standalone ports mode and under a
bridge with vlan_filtering=0, the ocelot driver configures the switch to
operate as VLAN-unaware, so the classified VLAN is not derived from the
802.1Q header from the packet, but instead is always equal to the
port-based VLAN ID of the ingress port. We _can_ still change the
classified VLAN for packets when operating in this mode, but the end
result will most likely be a drop, since both the ingress and the egress
port need to be members of the modified VLAN. And even if we install the
new classified VLAN into the VLAN table of the switch, the result would
still not be as expected: we wouldn't see, on the output port, the
modified VLAN tag, but the original one, even though the classified VLAN
was indeed modified. This is because of how the hardware works: on
egress, what is pushed to the frame is a "port tag", which gives us the
following options:
- Tag all frames with port tag (derived from the classified VLAN)
- Tag all frames with port tag, except if the classified VLAN is 0 or
equal to the native VLAN of the egress port
- No port tag
Needless to say, in VLAN-unaware mode we are disabling the port tag.
Otherwise, the existing VLAN tag would be ignored, and a second VLAN
tag (the port tag), holding the classified VLAN, would be pushed
(instead of replacing the existing 802.1Q tag). This is definitely not
what the user wanted when installing a "vlan modify" action.
So it is simply not worth bothering with VLAN modify rules under other
configurations except when the ports are fully VLAN-aware.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-08 19:56:58 +08:00
|
|
|
}
|
|
|
|
}
|
2020-10-03 06:06:46 +08:00
|
|
|
|
net: mscc: ocelot: fix untagged packet drops when enslaving to vlan aware bridge
To rehash a previous explanation given in commit 1c44ce560b4d ("net:
mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is
up"), the switch driver operates the in a mode where a single VLAN can
be transmitted as untagged on a particular egress port. That is the
"native VLAN on trunk port" use case.
The configuration for this native VLAN is driven in 2 ways:
- Set the egress port rewriter to strip the VLAN tag for the native
VID (as it is egress-untagged, after all).
- Configure the ingress port to drop untagged and priority-tagged
traffic, if there is no native VLAN. The intention of this setting is
that a trunk port with no native VLAN should not accept untagged
traffic.
Since both of the above configurations for the native VLAN should only
be done if VLAN awareness is requested, they are actually done from the
ocelot_port_vlan_filtering function, after the basic procedure of
toggling the VLAN awareness flag of the port.
But there's a problem with that simplistic approach: we are trying to
juggle with 2 independent variables from a single function:
- Native VLAN of the port - its value is held in port->vid.
- VLAN awareness state of the port - currently there are some issues
here, more on that later*.
The actual problem can be seen when enslaving the switch ports to a VLAN
filtering bridge:
0. The driver configures a pvid of zero for each port, when in
standalone mode. While the bridge configures a default_pvid of 1 for
each port that gets added as a slave to it.
1. The bridge calls ocelot_port_vlan_filtering with vlan_aware=true.
The VLAN-filtering-dependent portion of the native VLAN
configuration is done, considering that the native VLAN is 0.
2. The bridge calls ocelot_vlan_add with vid=1, pvid=true,
untagged=true. The native VLAN changes to 1 (change which gets
propagated to hardware).
3. ??? - nobody calls ocelot_port_vlan_filtering again, to reapply the
VLAN-filtering-dependent portion of the native VLAN configuration,
for the new native VLAN of 1. One can notice that after toggling "ip
link set dev br0 type bridge vlan_filtering 0 && ip link set dev br0
type bridge vlan_filtering 1", the new native VLAN finally makes it
through and untagged traffic finally starts flowing again. But
obviously that shouldn't be needed.
So it is clear that 2 independent variables need to both re-trigger the
native VLAN configuration. So we introduce the second variable as
ocelot_port->vlan_aware.
*Actually both the DSA Felix driver and the Ocelot driver already had
each its own variable:
- Ocelot: ocelot_port_private->vlan_aware
- Felix: dsa_port->vlan_filtering
but the common Ocelot library needs to work with a single, common,
variable, so there is some refactoring done to move the vlan_aware
property from the private structure into the common ocelot_port
structure.
Fixes: 97bb69e1e36e ("net: mscc: ocelot: break apart ocelot_vlan_port_apply")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-04-15 03:36:15 +08:00
|
|
|
ocelot_port->vlan_aware = vlan_aware;
|
2019-11-09 21:02:47 +08:00
|
|
|
|
net: mscc: ocelot: fix untagged packet drops when enslaving to vlan aware bridge
To rehash a previous explanation given in commit 1c44ce560b4d ("net:
mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is
up"), the switch driver operates the in a mode where a single VLAN can
be transmitted as untagged on a particular egress port. That is the
"native VLAN on trunk port" use case.
The configuration for this native VLAN is driven in 2 ways:
- Set the egress port rewriter to strip the VLAN tag for the native
VID (as it is egress-untagged, after all).
- Configure the ingress port to drop untagged and priority-tagged
traffic, if there is no native VLAN. The intention of this setting is
that a trunk port with no native VLAN should not accept untagged
traffic.
Since both of the above configurations for the native VLAN should only
be done if VLAN awareness is requested, they are actually done from the
ocelot_port_vlan_filtering function, after the basic procedure of
toggling the VLAN awareness flag of the port.
But there's a problem with that simplistic approach: we are trying to
juggle with 2 independent variables from a single function:
- Native VLAN of the port - its value is held in port->vid.
- VLAN awareness state of the port - currently there are some issues
here, more on that later*.
The actual problem can be seen when enslaving the switch ports to a VLAN
filtering bridge:
0. The driver configures a pvid of zero for each port, when in
standalone mode. While the bridge configures a default_pvid of 1 for
each port that gets added as a slave to it.
1. The bridge calls ocelot_port_vlan_filtering with vlan_aware=true.
The VLAN-filtering-dependent portion of the native VLAN
configuration is done, considering that the native VLAN is 0.
2. The bridge calls ocelot_vlan_add with vid=1, pvid=true,
untagged=true. The native VLAN changes to 1 (change which gets
propagated to hardware).
3. ??? - nobody calls ocelot_port_vlan_filtering again, to reapply the
VLAN-filtering-dependent portion of the native VLAN configuration,
for the new native VLAN of 1. One can notice that after toggling "ip
link set dev br0 type bridge vlan_filtering 0 && ip link set dev br0
type bridge vlan_filtering 1", the new native VLAN finally makes it
through and untagged traffic finally starts flowing again. But
obviously that shouldn't be needed.
So it is clear that 2 independent variables need to both re-trigger the
native VLAN configuration. So we introduce the second variable as
ocelot_port->vlan_aware.
*Actually both the DSA Felix driver and the Ocelot driver already had
each its own variable:
- Ocelot: ocelot_port_private->vlan_aware
- Felix: dsa_port->vlan_filtering
but the common Ocelot library needs to work with a single, common,
variable, so there is some refactoring done to move the vlan_aware
property from the private structure into the common ocelot_port
structure.
Fixes: 97bb69e1e36e ("net: mscc: ocelot: break apart ocelot_vlan_port_apply")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-04-15 03:36:15 +08:00
|
|
|
if (vlan_aware)
|
|
|
|
val = ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA |
|
|
|
|
ANA_PORT_VLAN_CFG_VLAN_POP_CNT(1);
|
|
|
|
else
|
|
|
|
val = 0;
|
|
|
|
ocelot_rmw_gix(ocelot, val,
|
|
|
|
ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA |
|
|
|
|
ANA_PORT_VLAN_CFG_VLAN_POP_CNT_M,
|
|
|
|
ANA_PORT_VLAN_CFG, port);
|
2019-11-09 21:02:47 +08:00
|
|
|
|
2020-10-31 18:29:12 +08:00
|
|
|
ocelot_port_set_pvid(ocelot, port, ocelot_port->pvid_vlan);
|
net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged
At present, the ocelot driver accepts a single egress-untagged bridge
VLAN, meaning that this sequence of operations:
ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
bridge vlan add dev swp0 vid 2 pvid untagged
fails because the bridge automatically installs VID 1 as a pvid & untagged
VLAN, and vid 2 would be the second untagged VLAN on this port. It is
necessary to delete VID 1 before proceeding to add VID 2.
This limitation comes from the fact that we operate the port tag, when
it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode.
The ocelot switches do not have full flexibility and can either have one
single VID as egress-untagged, or all of them.
There are use cases for having all VLANs as egress-untagged as well, and
this patch adds support for that.
The change rewrites ocelot_port_set_native_vlan() into a more generic
ocelot_port_manage_port_tag() function. Because the software bridge's
state, transmitted to us via switchdev, can become very complex, we
don't attempt to track all possible state transitions, but instead take
a more declarative approach and just make ocelot_port_manage_port_tag()
figure out which more to operate in:
- port is VLAN-unaware: the classified VLAN (internal, unrelated to the
802.1Q header) is not inserted into packets on egress
- port is VLAN-aware:
- port has tagged VLANs:
-> port has no untagged VLAN: set up as pure trunk
-> port has one untagged VLAN: set up as trunk port + native VLAN
-> port has more than one untagged VLAN: this is an invalid config
which is rejected by ocelot_vlan_prepare
- port has no tagged VLANs
-> set up as pure egress-untagged port
We don't keep the number of tagged and untagged VLANs, we just count the
structures we keep.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:50 +08:00
|
|
|
ocelot_port_manage_port_tag(ocelot, port);
|
2020-10-03 06:06:46 +08:00
|
|
|
|
|
|
|
return 0;
|
2019-11-09 21:02:47 +08:00
|
|
|
}
|
net: mscc: ocelot: fix untagged packet drops when enslaving to vlan aware bridge
To rehash a previous explanation given in commit 1c44ce560b4d ("net:
mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is
up"), the switch driver operates the in a mode where a single VLAN can
be transmitted as untagged on a particular egress port. That is the
"native VLAN on trunk port" use case.
The configuration for this native VLAN is driven in 2 ways:
- Set the egress port rewriter to strip the VLAN tag for the native
VID (as it is egress-untagged, after all).
- Configure the ingress port to drop untagged and priority-tagged
traffic, if there is no native VLAN. The intention of this setting is
that a trunk port with no native VLAN should not accept untagged
traffic.
Since both of the above configurations for the native VLAN should only
be done if VLAN awareness is requested, they are actually done from the
ocelot_port_vlan_filtering function, after the basic procedure of
toggling the VLAN awareness flag of the port.
But there's a problem with that simplistic approach: we are trying to
juggle with 2 independent variables from a single function:
- Native VLAN of the port - its value is held in port->vid.
- VLAN awareness state of the port - currently there are some issues
here, more on that later*.
The actual problem can be seen when enslaving the switch ports to a VLAN
filtering bridge:
0. The driver configures a pvid of zero for each port, when in
standalone mode. While the bridge configures a default_pvid of 1 for
each port that gets added as a slave to it.
1. The bridge calls ocelot_port_vlan_filtering with vlan_aware=true.
The VLAN-filtering-dependent portion of the native VLAN
configuration is done, considering that the native VLAN is 0.
2. The bridge calls ocelot_vlan_add with vid=1, pvid=true,
untagged=true. The native VLAN changes to 1 (change which gets
propagated to hardware).
3. ??? - nobody calls ocelot_port_vlan_filtering again, to reapply the
VLAN-filtering-dependent portion of the native VLAN configuration,
for the new native VLAN of 1. One can notice that after toggling "ip
link set dev br0 type bridge vlan_filtering 0 && ip link set dev br0
type bridge vlan_filtering 1", the new native VLAN finally makes it
through and untagged traffic finally starts flowing again. But
obviously that shouldn't be needed.
So it is clear that 2 independent variables need to both re-trigger the
native VLAN configuration. So we introduce the second variable as
ocelot_port->vlan_aware.
*Actually both the DSA Felix driver and the Ocelot driver already had
each its own variable:
- Ocelot: ocelot_port_private->vlan_aware
- Felix: dsa_port->vlan_filtering
but the common Ocelot library needs to work with a single, common,
variable, so there is some refactoring done to move the vlan_aware
property from the private structure into the common ocelot_port
structure.
Fixes: 97bb69e1e36e ("net: mscc: ocelot: break apart ocelot_vlan_port_apply")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-04-15 03:36:15 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_port_vlan_filtering);
|
2019-11-09 21:02:47 +08:00
|
|
|
|
2020-10-31 18:29:15 +08:00
|
|
|
int ocelot_vlan_prepare(struct ocelot *ocelot, int port, u16 vid, bool pvid,
|
2021-08-20 01:40:06 +08:00
|
|
|
bool untagged, struct netlink_ext_ack *extack)
|
2020-10-31 18:29:15 +08:00
|
|
|
{
|
net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged
At present, the ocelot driver accepts a single egress-untagged bridge
VLAN, meaning that this sequence of operations:
ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
bridge vlan add dev swp0 vid 2 pvid untagged
fails because the bridge automatically installs VID 1 as a pvid & untagged
VLAN, and vid 2 would be the second untagged VLAN on this port. It is
necessary to delete VID 1 before proceeding to add VID 2.
This limitation comes from the fact that we operate the port tag, when
it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode.
The ocelot switches do not have full flexibility and can either have one
single VID as egress-untagged, or all of them.
There are use cases for having all VLANs as egress-untagged as well, and
this patch adds support for that.
The change rewrites ocelot_port_set_native_vlan() into a more generic
ocelot_port_manage_port_tag() function. Because the software bridge's
state, transmitted to us via switchdev, can become very complex, we
don't attempt to track all possible state transitions, but instead take
a more declarative approach and just make ocelot_port_manage_port_tag()
figure out which more to operate in:
- port is VLAN-unaware: the classified VLAN (internal, unrelated to the
802.1Q header) is not inserted into packets on egress
- port is VLAN-aware:
- port has tagged VLANs:
-> port has no untagged VLAN: set up as pure trunk
-> port has one untagged VLAN: set up as trunk port + native VLAN
-> port has more than one untagged VLAN: this is an invalid config
which is rejected by ocelot_vlan_prepare
- port has no tagged VLANs
-> set up as pure egress-untagged port
We don't keep the number of tagged and untagged VLANs, we just count the
structures we keep.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:50 +08:00
|
|
|
if (untagged) {
|
|
|
|
/* We are adding an egress-tagged VLAN */
|
|
|
|
if (ocelot_port_uses_native_vlan(ocelot, port)) {
|
|
|
|
NL_SET_ERR_MSG_MOD(extack,
|
|
|
|
"Port with egress-tagged VLANs cannot have more than one egress-untagged (native) VLAN");
|
|
|
|
return -EBUSY;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* We are adding an egress-tagged VLAN */
|
|
|
|
if (ocelot_port_num_untagged_vlans(ocelot, port) > 1) {
|
|
|
|
NL_SET_ERR_MSG_MOD(extack,
|
|
|
|
"Port with more than one egress-untagged VLAN cannot have egress-tagged VLANs");
|
|
|
|
return -EBUSY;
|
|
|
|
}
|
2020-10-31 18:29:15 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(ocelot_vlan_prepare);
|
|
|
|
|
2019-11-14 23:03:27 +08:00
|
|
|
int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid,
|
|
|
|
bool untagged)
|
2018-06-26 20:28:49 +08:00
|
|
|
{
|
2021-08-20 01:40:08 +08:00
|
|
|
int err;
|
2018-06-26 20:28:49 +08:00
|
|
|
|
net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged
At present, the ocelot driver accepts a single egress-untagged bridge
VLAN, meaning that this sequence of operations:
ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
bridge vlan add dev swp0 vid 2 pvid untagged
fails because the bridge automatically installs VID 1 as a pvid & untagged
VLAN, and vid 2 would be the second untagged VLAN on this port. It is
necessary to delete VID 1 before proceeding to add VID 2.
This limitation comes from the fact that we operate the port tag, when
it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode.
The ocelot switches do not have full flexibility and can either have one
single VID as egress-untagged, or all of them.
There are use cases for having all VLANs as egress-untagged as well, and
this patch adds support for that.
The change rewrites ocelot_port_set_native_vlan() into a more generic
ocelot_port_manage_port_tag() function. Because the software bridge's
state, transmitted to us via switchdev, can become very complex, we
don't attempt to track all possible state transitions, but instead take
a more declarative approach and just make ocelot_port_manage_port_tag()
figure out which more to operate in:
- port is VLAN-unaware: the classified VLAN (internal, unrelated to the
802.1Q header) is not inserted into packets on egress
- port is VLAN-aware:
- port has tagged VLANs:
-> port has no untagged VLAN: set up as pure trunk
-> port has one untagged VLAN: set up as trunk port + native VLAN
-> port has more than one untagged VLAN: this is an invalid config
which is rejected by ocelot_vlan_prepare
- port has no tagged VLANs
-> set up as pure egress-untagged port
We don't keep the number of tagged and untagged VLANs, we just count the
structures we keep.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:50 +08:00
|
|
|
err = ocelot_vlan_member_add(ocelot, port, vid, untagged);
|
2021-08-20 01:40:08 +08:00
|
|
|
if (err)
|
|
|
|
return err;
|
2018-06-26 20:28:49 +08:00
|
|
|
|
|
|
|
/* Default ingress vlan classification */
|
2021-10-21 01:58:52 +08:00
|
|
|
if (pvid)
|
|
|
|
ocelot_port_set_pvid(ocelot, port,
|
|
|
|
ocelot_bridge_vlan_find(ocelot, vid));
|
2018-06-26 20:28:49 +08:00
|
|
|
|
|
|
|
/* Untagged egress vlan clasification */
|
net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged
At present, the ocelot driver accepts a single egress-untagged bridge
VLAN, meaning that this sequence of operations:
ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
bridge vlan add dev swp0 vid 2 pvid untagged
fails because the bridge automatically installs VID 1 as a pvid & untagged
VLAN, and vid 2 would be the second untagged VLAN on this port. It is
necessary to delete VID 1 before proceeding to add VID 2.
This limitation comes from the fact that we operate the port tag, when
it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode.
The ocelot switches do not have full flexibility and can either have one
single VID as egress-untagged, or all of them.
There are use cases for having all VLANs as egress-untagged as well, and
this patch adds support for that.
The change rewrites ocelot_port_set_native_vlan() into a more generic
ocelot_port_manage_port_tag() function. Because the software bridge's
state, transmitted to us via switchdev, can become very complex, we
don't attempt to track all possible state transitions, but instead take
a more declarative approach and just make ocelot_port_manage_port_tag()
figure out which more to operate in:
- port is VLAN-unaware: the classified VLAN (internal, unrelated to the
802.1Q header) is not inserted into packets on egress
- port is VLAN-aware:
- port has tagged VLANs:
-> port has no untagged VLAN: set up as pure trunk
-> port has one untagged VLAN: set up as trunk port + native VLAN
-> port has more than one untagged VLAN: this is an invalid config
which is rejected by ocelot_vlan_prepare
- port has no tagged VLANs
-> set up as pure egress-untagged port
We don't keep the number of tagged and untagged VLANs, we just count the
structures we keep.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:50 +08:00
|
|
|
ocelot_port_manage_port_tag(ocelot, port);
|
2018-06-26 20:28:49 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2019-11-14 23:03:27 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_vlan_add);
|
2018-06-26 20:28:49 +08:00
|
|
|
|
2019-11-14 23:03:27 +08:00
|
|
|
int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid)
|
2019-11-09 21:02:48 +08:00
|
|
|
{
|
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
2021-08-20 01:40:08 +08:00
|
|
|
int err;
|
2018-06-26 20:28:49 +08:00
|
|
|
|
2021-08-20 01:40:08 +08:00
|
|
|
err = ocelot_vlan_member_del(ocelot, port, vid);
|
|
|
|
if (err)
|
|
|
|
return err;
|
2018-06-26 20:28:49 +08:00
|
|
|
|
2020-10-31 18:29:14 +08:00
|
|
|
/* Ingress */
|
2021-10-21 01:58:52 +08:00
|
|
|
if (ocelot_port->pvid_vlan && ocelot_port->pvid_vlan->vid == vid)
|
|
|
|
ocelot_port_set_pvid(ocelot, port, NULL);
|
2020-10-31 18:29:14 +08:00
|
|
|
|
2018-06-26 20:28:49 +08:00
|
|
|
/* Egress */
|
net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged
At present, the ocelot driver accepts a single egress-untagged bridge
VLAN, meaning that this sequence of operations:
ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
bridge vlan add dev swp0 vid 2 pvid untagged
fails because the bridge automatically installs VID 1 as a pvid & untagged
VLAN, and vid 2 would be the second untagged VLAN on this port. It is
necessary to delete VID 1 before proceeding to add VID 2.
This limitation comes from the fact that we operate the port tag, when
it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode.
The ocelot switches do not have full flexibility and can either have one
single VID as egress-untagged, or all of them.
There are use cases for having all VLANs as egress-untagged as well, and
this patch adds support for that.
The change rewrites ocelot_port_set_native_vlan() into a more generic
ocelot_port_manage_port_tag() function. Because the software bridge's
state, transmitted to us via switchdev, can become very complex, we
don't attempt to track all possible state transitions, but instead take
a more declarative approach and just make ocelot_port_manage_port_tag()
figure out which more to operate in:
- port is VLAN-unaware: the classified VLAN (internal, unrelated to the
802.1Q header) is not inserted into packets on egress
- port is VLAN-aware:
- port has tagged VLANs:
-> port has no untagged VLAN: set up as pure trunk
-> port has one untagged VLAN: set up as trunk port + native VLAN
-> port has more than one untagged VLAN: this is an invalid config
which is rejected by ocelot_vlan_prepare
- port has no tagged VLANs
-> set up as pure egress-untagged port
We don't keep the number of tagged and untagged VLANs, we just count the
structures we keep.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:50 +08:00
|
|
|
ocelot_port_manage_port_tag(ocelot, port);
|
2018-06-26 20:28:49 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2019-11-14 23:03:27 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_vlan_del);
|
2018-06-26 20:28:49 +08:00
|
|
|
|
2018-05-15 04:04:57 +08:00
|
|
|
static void ocelot_vlan_init(struct ocelot *ocelot)
|
|
|
|
{
|
2021-08-20 01:40:08 +08:00
|
|
|
unsigned long all_ports = GENMASK(ocelot->num_phys_ports - 1, 0);
|
2018-06-26 20:28:49 +08:00
|
|
|
u16 port, vid;
|
|
|
|
|
2018-05-15 04:04:57 +08:00
|
|
|
/* Clear VLAN table, by default all ports are members of all VLANs */
|
|
|
|
ocelot_write(ocelot, ANA_TABLES_VLANACCESS_CMD_INIT,
|
|
|
|
ANA_TABLES_VLANACCESS);
|
|
|
|
ocelot_vlant_wait_for_completion(ocelot);
|
2018-06-26 20:28:49 +08:00
|
|
|
|
|
|
|
/* Configure the port VLAN memberships */
|
2021-08-20 01:40:08 +08:00
|
|
|
for (vid = 1; vid < VLAN_N_VID; vid++)
|
net: mscc: ocelot: convert the VLAN masks to a list
First and foremost, the driver currently allocates a constant sized
4K * u32 (16KB memory) array for the VLAN masks. However, a typical
application might not need so many VLANs, so if we dynamically allocate
the memory as needed, we might actually save some space.
Secondly, we'll need to keep more advanced bookkeeping of the VLANs we
have, notably we'll have to check how many untagged and how many tagged
VLANs we have. This will have to stay in a structure, and allocating
another 16 KB array for that is again a bit too much.
So refactor the bridge VLANs in a linked list of structures.
The hook points inside the driver are ocelot_vlan_member_add() and
ocelot_vlan_member_del(), which previously used to operate on the
ocelot->vlan_mask[vid] array element.
ocelot_vlan_member_add() and ocelot_vlan_member_del() used to call
ocelot_vlan_member_set() to commit to the ocelot->vlan_mask.
Additionally, we had two calls to ocelot_vlan_member_set() from outside
those callers, and those were directly from ocelot_vlan_init().
Those calls do not set up bridging service VLANs, instead they:
- clear the VLAN table on reset
- set the port pvid to the value used by this driver for VLAN-unaware
standalone port operation (VID 0)
So now, when we have a structure which represents actual bridge VLANs,
VID 0 doesn't belong in that structure, since it is not part of the
bridging layer.
So delete the middle man, ocelot_vlan_member_set(), and let
ocelot_vlan_init() call directly ocelot_vlant_set_mask() which forgoes
any data structure and writes directly to hardware, which is all that we
need.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:49 +08:00
|
|
|
ocelot_vlant_set_mask(ocelot, vid, 0);
|
2018-06-26 20:28:49 +08:00
|
|
|
|
|
|
|
/* Because VLAN filtering is enabled, we need VID 0 to get untagged
|
|
|
|
* traffic. It is added automatically if 8021q module is loaded, but
|
|
|
|
* we can't rely on it since module may be not loaded.
|
|
|
|
*/
|
net: mscc: ocelot: add the local station MAC addresses in VID 0
The ocelot switchdev driver does not include the CPU port in the list of
flooding destinations for unknown traffic, instead that traffic is
supposed to match FDB entries to reach the CPU.
The addresses it installs are:
(a) the station MAC address, in ocelot_probe_port() and later during
runtime in ocelot_port_set_mac_address(). These are the VLAN-unaware
addresses. The VLAN-aware addresses are in ocelot_vlan_vid_add().
(b) multicast addresses added with dev_mc_add() (not bridge host MDB
entries) in ocelot_mc_sync()
(c) multicast destination MAC addresses for MRP in ocelot_mrp_save_mac(),
to make sure those are dropped (not forwarded) by the bridging
service, just trapped to the CPU
So we can see that the logic is slightly buggy ever since the initial
commit a556c76adc05 ("net: mscc: Add initial Ocelot switch support").
This is because, when ocelot_probe_port() runs, the port pvid is 0.
Then we join a VLAN-aware bridge, the pvid becomes 1, we call
ocelot_port_set_mac_address(), this learns the new MAC address in VID 1
(also fails to forget the old one, since it thinks it's in VID 1, but
that's not so important). Then when we leave the VLAN-aware bridge,
outside world is unable to ping our new MAC address because it isn't
learned in VID 0, the VLAN-unaware pvid.
[ note: this is strictly based on static analysis, I don't have hardware
to test. But there are also many more corner cases ]
The basic idea is that we should have a separation of concerns, and the
FDB entries used for standalone operation should be managed by the
driver, and the FDB entries used by the bridging service should be
managed by the bridge. So the standalone and VLAN-unaware bridge FDB
entries should not follow the bridge PVID, because that will only be
active when the bridge is VLAN-aware. So since the port pvid is
coincidentally zero during probe time, just make those entries
statically go to VID 0.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:51 +08:00
|
|
|
ocelot_vlant_set_mask(ocelot, OCELOT_VLAN_UNAWARE_PVID, all_ports);
|
2018-06-26 20:28:49 +08:00
|
|
|
|
|
|
|
/* Set vlan ingress filter mask to all ports but the CPU port by
|
|
|
|
* default.
|
|
|
|
*/
|
2021-08-20 01:40:08 +08:00
|
|
|
ocelot_write(ocelot, all_ports, ANA_VLANMASK);
|
2018-06-26 20:28:49 +08:00
|
|
|
|
|
|
|
for (port = 0; port < ocelot->num_phys_ports; port++) {
|
|
|
|
ocelot_write_gix(ocelot, 0, REW_PORT_VLAN_CFG, port);
|
|
|
|
ocelot_write_gix(ocelot, 0, REW_TAG_CFG, port);
|
|
|
|
}
|
2018-05-15 04:04:57 +08:00
|
|
|
}
|
|
|
|
|
net: dsa: felix: implement port flushing on .phylink_mac_link_down
There are several issues which may be seen when the link goes down while
forwarding traffic, all of which can be attributed to the fact that the
port flushing procedure from the reference manual was not closely
followed.
With flow control enabled on both the ingress port and the egress port,
it may happen when a link goes down that Ethernet packets are in flight.
In flow control mode, frames are held back and not dropped. When there
is enough traffic in flight (example: iperf3 TCP), then the ingress port
might enter congestion and never exit that state. This is a problem,
because it is the egress port's link that went down, and that has caused
the inability of the ingress port to send packets to any other port.
This is solved by flushing the egress port's queues when it goes down.
There is also a problem when performing stream splitting for
IEEE 802.1CB traffic (not yet upstream, but a sort of multicast,
basically). There, if one port from the destination ports mask goes
down, splitting the stream towards the other destinations will no longer
be performed. This can be traced down to this line:
ocelot_port_writel(ocelot_port, 0, DEV_MAC_ENA_CFG);
which should have been instead, as per the reference manual:
ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA,
DEV_MAC_ENA_CFG);
Basically only DEV_MAC_ENA_CFG_RX_ENA should be disabled, but not
DEV_MAC_ENA_CFG_TX_ENA - I don't have further insight into why that is
the case, but apparently multicasting to several ports will cause issues
if at least one of them doesn't have DEV_MAC_ENA_CFG_TX_ENA set.
I am not sure what the state of the Ocelot VSC7514 driver is, but
probably not as bad as Felix/Seville, since VSC7514 uses phylib and has
the following in ocelot_adjust_link:
if (!phydev->link)
return;
therefore the port is not really put down when the link is lost, unlike
the DSA drivers which use .phylink_mac_link_down for that.
Nonetheless, I put ocelot_port_flush() in the common ocelot.c because it
needs to access some registers from drivers/net/ethernet/mscc/ocelot_rew.h
which are not exported in include/soc/mscc/ and a bugfix patch should
probably not move headers around.
Fixes: bdeced75b13f ("net: dsa: felix: Add PCS operations for PHYLINK")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-09 01:36:27 +08:00
|
|
|
static u32 ocelot_read_eq_avail(struct ocelot *ocelot, int port)
|
|
|
|
{
|
|
|
|
return ocelot_read_rix(ocelot, QSYS_SW_STATUS, port);
|
|
|
|
}
|
|
|
|
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 09:47:48 +08:00
|
|
|
static int ocelot_port_flush(struct ocelot *ocelot, int port)
|
net: dsa: felix: implement port flushing on .phylink_mac_link_down
There are several issues which may be seen when the link goes down while
forwarding traffic, all of which can be attributed to the fact that the
port flushing procedure from the reference manual was not closely
followed.
With flow control enabled on both the ingress port and the egress port,
it may happen when a link goes down that Ethernet packets are in flight.
In flow control mode, frames are held back and not dropped. When there
is enough traffic in flight (example: iperf3 TCP), then the ingress port
might enter congestion and never exit that state. This is a problem,
because it is the egress port's link that went down, and that has caused
the inability of the ingress port to send packets to any other port.
This is solved by flushing the egress port's queues when it goes down.
There is also a problem when performing stream splitting for
IEEE 802.1CB traffic (not yet upstream, but a sort of multicast,
basically). There, if one port from the destination ports mask goes
down, splitting the stream towards the other destinations will no longer
be performed. This can be traced down to this line:
ocelot_port_writel(ocelot_port, 0, DEV_MAC_ENA_CFG);
which should have been instead, as per the reference manual:
ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA,
DEV_MAC_ENA_CFG);
Basically only DEV_MAC_ENA_CFG_RX_ENA should be disabled, but not
DEV_MAC_ENA_CFG_TX_ENA - I don't have further insight into why that is
the case, but apparently multicasting to several ports will cause issues
if at least one of them doesn't have DEV_MAC_ENA_CFG_TX_ENA set.
I am not sure what the state of the Ocelot VSC7514 driver is, but
probably not as bad as Felix/Seville, since VSC7514 uses phylib and has
the following in ocelot_adjust_link:
if (!phydev->link)
return;
therefore the port is not really put down when the link is lost, unlike
the DSA drivers which use .phylink_mac_link_down for that.
Nonetheless, I put ocelot_port_flush() in the common ocelot.c because it
needs to access some registers from drivers/net/ethernet/mscc/ocelot_rew.h
which are not exported in include/soc/mscc/ and a bugfix patch should
probably not move headers around.
Fixes: bdeced75b13f ("net: dsa: felix: Add PCS operations for PHYLINK")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-09 01:36:27 +08:00
|
|
|
{
|
2021-06-08 19:15:35 +08:00
|
|
|
unsigned int pause_ena;
|
net: dsa: felix: implement port flushing on .phylink_mac_link_down
There are several issues which may be seen when the link goes down while
forwarding traffic, all of which can be attributed to the fact that the
port flushing procedure from the reference manual was not closely
followed.
With flow control enabled on both the ingress port and the egress port,
it may happen when a link goes down that Ethernet packets are in flight.
In flow control mode, frames are held back and not dropped. When there
is enough traffic in flight (example: iperf3 TCP), then the ingress port
might enter congestion and never exit that state. This is a problem,
because it is the egress port's link that went down, and that has caused
the inability of the ingress port to send packets to any other port.
This is solved by flushing the egress port's queues when it goes down.
There is also a problem when performing stream splitting for
IEEE 802.1CB traffic (not yet upstream, but a sort of multicast,
basically). There, if one port from the destination ports mask goes
down, splitting the stream towards the other destinations will no longer
be performed. This can be traced down to this line:
ocelot_port_writel(ocelot_port, 0, DEV_MAC_ENA_CFG);
which should have been instead, as per the reference manual:
ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA,
DEV_MAC_ENA_CFG);
Basically only DEV_MAC_ENA_CFG_RX_ENA should be disabled, but not
DEV_MAC_ENA_CFG_TX_ENA - I don't have further insight into why that is
the case, but apparently multicasting to several ports will cause issues
if at least one of them doesn't have DEV_MAC_ENA_CFG_TX_ENA set.
I am not sure what the state of the Ocelot VSC7514 driver is, but
probably not as bad as Felix/Seville, since VSC7514 uses phylib and has
the following in ocelot_adjust_link:
if (!phydev->link)
return;
therefore the port is not really put down when the link is lost, unlike
the DSA drivers which use .phylink_mac_link_down for that.
Nonetheless, I put ocelot_port_flush() in the common ocelot.c because it
needs to access some registers from drivers/net/ethernet/mscc/ocelot_rew.h
which are not exported in include/soc/mscc/ and a bugfix patch should
probably not move headers around.
Fixes: bdeced75b13f ("net: dsa: felix: Add PCS operations for PHYLINK")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-09 01:36:27 +08:00
|
|
|
int err, val;
|
|
|
|
|
|
|
|
/* Disable dequeuing from the egress queues */
|
|
|
|
ocelot_rmw_rix(ocelot, QSYS_PORT_MODE_DEQUEUE_DIS,
|
|
|
|
QSYS_PORT_MODE_DEQUEUE_DIS,
|
|
|
|
QSYS_PORT_MODE, port);
|
|
|
|
|
|
|
|
/* Disable flow control */
|
2021-06-08 19:15:35 +08:00
|
|
|
ocelot_fields_read(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, &pause_ena);
|
net: dsa: felix: implement port flushing on .phylink_mac_link_down
There are several issues which may be seen when the link goes down while
forwarding traffic, all of which can be attributed to the fact that the
port flushing procedure from the reference manual was not closely
followed.
With flow control enabled on both the ingress port and the egress port,
it may happen when a link goes down that Ethernet packets are in flight.
In flow control mode, frames are held back and not dropped. When there
is enough traffic in flight (example: iperf3 TCP), then the ingress port
might enter congestion and never exit that state. This is a problem,
because it is the egress port's link that went down, and that has caused
the inability of the ingress port to send packets to any other port.
This is solved by flushing the egress port's queues when it goes down.
There is also a problem when performing stream splitting for
IEEE 802.1CB traffic (not yet upstream, but a sort of multicast,
basically). There, if one port from the destination ports mask goes
down, splitting the stream towards the other destinations will no longer
be performed. This can be traced down to this line:
ocelot_port_writel(ocelot_port, 0, DEV_MAC_ENA_CFG);
which should have been instead, as per the reference manual:
ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA,
DEV_MAC_ENA_CFG);
Basically only DEV_MAC_ENA_CFG_RX_ENA should be disabled, but not
DEV_MAC_ENA_CFG_TX_ENA - I don't have further insight into why that is
the case, but apparently multicasting to several ports will cause issues
if at least one of them doesn't have DEV_MAC_ENA_CFG_TX_ENA set.
I am not sure what the state of the Ocelot VSC7514 driver is, but
probably not as bad as Felix/Seville, since VSC7514 uses phylib and has
the following in ocelot_adjust_link:
if (!phydev->link)
return;
therefore the port is not really put down when the link is lost, unlike
the DSA drivers which use .phylink_mac_link_down for that.
Nonetheless, I put ocelot_port_flush() in the common ocelot.c because it
needs to access some registers from drivers/net/ethernet/mscc/ocelot_rew.h
which are not exported in include/soc/mscc/ and a bugfix patch should
probably not move headers around.
Fixes: bdeced75b13f ("net: dsa: felix: Add PCS operations for PHYLINK")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-09 01:36:27 +08:00
|
|
|
ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0);
|
|
|
|
|
|
|
|
/* Disable priority flow control */
|
|
|
|
ocelot_fields_write(ocelot, port,
|
|
|
|
QSYS_SWITCH_PORT_MODE_TX_PFC_ENA, 0);
|
|
|
|
|
|
|
|
/* Wait at least the time it takes to receive a frame of maximum length
|
|
|
|
* at the port.
|
|
|
|
* Worst-case delays for 10 kilobyte jumbo frames are:
|
|
|
|
* 8 ms on a 10M port
|
|
|
|
* 800 μs on a 100M port
|
|
|
|
* 80 μs on a 1G port
|
|
|
|
* 32 μs on a 2.5G port
|
|
|
|
*/
|
|
|
|
usleep_range(8000, 10000);
|
|
|
|
|
|
|
|
/* Disable half duplex backpressure. */
|
|
|
|
ocelot_rmw_rix(ocelot, 0, SYS_FRONT_PORT_MODE_HDX_MODE,
|
|
|
|
SYS_FRONT_PORT_MODE, port);
|
|
|
|
|
|
|
|
/* Flush the queues associated with the port. */
|
|
|
|
ocelot_rmw_gix(ocelot, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG_FLUSH_ENA,
|
|
|
|
REW_PORT_CFG, port);
|
|
|
|
|
|
|
|
/* Enable dequeuing from the egress queues. */
|
|
|
|
ocelot_rmw_rix(ocelot, 0, QSYS_PORT_MODE_DEQUEUE_DIS, QSYS_PORT_MODE,
|
|
|
|
port);
|
|
|
|
|
|
|
|
/* Wait until flushing is complete. */
|
|
|
|
err = read_poll_timeout(ocelot_read_eq_avail, val, !val,
|
|
|
|
100, 2000000, false, ocelot, port);
|
|
|
|
|
|
|
|
/* Clear flushing again. */
|
|
|
|
ocelot_rmw_gix(ocelot, 0, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG, port);
|
|
|
|
|
2021-06-08 19:15:35 +08:00
|
|
|
/* Re-enable flow control */
|
|
|
|
ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, pause_ena);
|
|
|
|
|
net: dsa: felix: implement port flushing on .phylink_mac_link_down
There are several issues which may be seen when the link goes down while
forwarding traffic, all of which can be attributed to the fact that the
port flushing procedure from the reference manual was not closely
followed.
With flow control enabled on both the ingress port and the egress port,
it may happen when a link goes down that Ethernet packets are in flight.
In flow control mode, frames are held back and not dropped. When there
is enough traffic in flight (example: iperf3 TCP), then the ingress port
might enter congestion and never exit that state. This is a problem,
because it is the egress port's link that went down, and that has caused
the inability of the ingress port to send packets to any other port.
This is solved by flushing the egress port's queues when it goes down.
There is also a problem when performing stream splitting for
IEEE 802.1CB traffic (not yet upstream, but a sort of multicast,
basically). There, if one port from the destination ports mask goes
down, splitting the stream towards the other destinations will no longer
be performed. This can be traced down to this line:
ocelot_port_writel(ocelot_port, 0, DEV_MAC_ENA_CFG);
which should have been instead, as per the reference manual:
ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA,
DEV_MAC_ENA_CFG);
Basically only DEV_MAC_ENA_CFG_RX_ENA should be disabled, but not
DEV_MAC_ENA_CFG_TX_ENA - I don't have further insight into why that is
the case, but apparently multicasting to several ports will cause issues
if at least one of them doesn't have DEV_MAC_ENA_CFG_TX_ENA set.
I am not sure what the state of the Ocelot VSC7514 driver is, but
probably not as bad as Felix/Seville, since VSC7514 uses phylib and has
the following in ocelot_adjust_link:
if (!phydev->link)
return;
therefore the port is not really put down when the link is lost, unlike
the DSA drivers which use .phylink_mac_link_down for that.
Nonetheless, I put ocelot_port_flush() in the common ocelot.c because it
needs to access some registers from drivers/net/ethernet/mscc/ocelot_rew.h
which are not exported in include/soc/mscc/ and a bugfix patch should
probably not move headers around.
Fixes: bdeced75b13f ("net: dsa: felix: Add PCS operations for PHYLINK")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-09 01:36:27 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 09:47:48 +08:00
|
|
|
void ocelot_phylink_mac_link_down(struct ocelot *ocelot, int port,
|
|
|
|
unsigned int link_an_mode,
|
|
|
|
phy_interface_t interface,
|
|
|
|
unsigned long quirks)
|
2018-05-15 04:04:57 +08:00
|
|
|
{
|
2019-11-09 21:02:59 +08:00
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 09:47:48 +08:00
|
|
|
int err;
|
2018-05-15 04:04:57 +08:00
|
|
|
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 09:47:48 +08:00
|
|
|
ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA,
|
|
|
|
DEV_MAC_ENA_CFG);
|
|
|
|
|
|
|
|
ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 0);
|
|
|
|
|
|
|
|
err = ocelot_port_flush(ocelot, port);
|
|
|
|
if (err)
|
|
|
|
dev_err(ocelot->dev, "failed to flush port %d: %d\n",
|
|
|
|
port, err);
|
|
|
|
|
|
|
|
/* Put the port in reset. */
|
|
|
|
if (interface != PHY_INTERFACE_MODE_QSGMII ||
|
|
|
|
!(quirks & OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP))
|
|
|
|
ocelot_port_rmwl(ocelot_port,
|
|
|
|
DEV_CLOCK_CFG_MAC_TX_RST |
|
2021-10-11 10:27:41 +08:00
|
|
|
DEV_CLOCK_CFG_MAC_RX_RST,
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 09:47:48 +08:00
|
|
|
DEV_CLOCK_CFG_MAC_TX_RST |
|
2021-10-11 10:27:41 +08:00
|
|
|
DEV_CLOCK_CFG_MAC_RX_RST,
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 09:47:48 +08:00
|
|
|
DEV_CLOCK_CFG);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(ocelot_phylink_mac_link_down);
|
|
|
|
|
|
|
|
void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port,
|
|
|
|
struct phy_device *phydev,
|
|
|
|
unsigned int link_an_mode,
|
|
|
|
phy_interface_t interface,
|
|
|
|
int speed, int duplex,
|
|
|
|
bool tx_pause, bool rx_pause,
|
|
|
|
unsigned long quirks)
|
|
|
|
{
|
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
|
|
|
int mac_speed, mode = 0;
|
|
|
|
u32 mac_fc_cfg;
|
|
|
|
|
|
|
|
/* The MAC might be integrated in systems where the MAC speed is fixed
|
|
|
|
* and it's the PCS who is performing the rate adaptation, so we have
|
|
|
|
* to write "1000Mbps" into the LINK_SPEED field of DEV_CLOCK_CFG
|
|
|
|
* (which is also its default value).
|
|
|
|
*/
|
|
|
|
if ((quirks & OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION) ||
|
|
|
|
speed == SPEED_1000) {
|
|
|
|
mac_speed = OCELOT_SPEED_1000;
|
|
|
|
mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA;
|
|
|
|
} else if (speed == SPEED_2500) {
|
|
|
|
mac_speed = OCELOT_SPEED_2500;
|
|
|
|
mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA;
|
|
|
|
} else if (speed == SPEED_100) {
|
|
|
|
mac_speed = OCELOT_SPEED_100;
|
|
|
|
} else {
|
|
|
|
mac_speed = OCELOT_SPEED_10;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (duplex == DUPLEX_FULL)
|
|
|
|
mode |= DEV_MAC_MODE_CFG_FDX_ENA;
|
|
|
|
|
|
|
|
ocelot_port_writel(ocelot_port, mode, DEV_MAC_MODE_CFG);
|
|
|
|
|
|
|
|
/* Take port out of reset by clearing the MAC_TX_RST, MAC_RX_RST and
|
|
|
|
* PORT_RST bits in DEV_CLOCK_CFG.
|
|
|
|
*/
|
|
|
|
ocelot_port_writel(ocelot_port, DEV_CLOCK_CFG_LINK_SPEED(mac_speed),
|
|
|
|
DEV_CLOCK_CFG);
|
|
|
|
|
|
|
|
switch (speed) {
|
2018-05-15 04:04:57 +08:00
|
|
|
case SPEED_10:
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 09:47:48 +08:00
|
|
|
mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(OCELOT_SPEED_10);
|
2018-05-15 04:04:57 +08:00
|
|
|
break;
|
|
|
|
case SPEED_100:
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 09:47:48 +08:00
|
|
|
mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(OCELOT_SPEED_100);
|
2018-05-15 04:04:57 +08:00
|
|
|
break;
|
|
|
|
case SPEED_1000:
|
|
|
|
case SPEED_2500:
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 09:47:48 +08:00
|
|
|
mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(OCELOT_SPEED_1000);
|
2018-05-15 04:04:57 +08:00
|
|
|
break;
|
|
|
|
default:
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 09:47:48 +08:00
|
|
|
dev_err(ocelot->dev, "Unsupported speed on port %d: %d\n",
|
|
|
|
port, speed);
|
2018-05-15 04:04:57 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 09:47:48 +08:00
|
|
|
/* Handle RX pause in all cases, with 2500base-X this is used for rate
|
|
|
|
* adaptation.
|
|
|
|
*/
|
|
|
|
mac_fc_cfg |= SYS_MAC_FC_CFG_RX_FC_ENA;
|
2020-02-29 22:31:11 +08:00
|
|
|
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 09:47:48 +08:00
|
|
|
if (tx_pause)
|
|
|
|
mac_fc_cfg |= SYS_MAC_FC_CFG_TX_FC_ENA |
|
|
|
|
SYS_MAC_FC_CFG_PAUSE_VAL_CFG(0xffff) |
|
|
|
|
SYS_MAC_FC_CFG_FC_LATENCY_CFG(0x7) |
|
|
|
|
SYS_MAC_FC_CFG_ZERO_PAUSE_ENA;
|
2020-02-29 22:31:11 +08:00
|
|
|
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 09:47:48 +08:00
|
|
|
/* Flow control. Link speed is only used here to evaluate the time
|
|
|
|
* specification in incoming pause frames.
|
|
|
|
*/
|
|
|
|
ocelot_write_rix(ocelot, mac_fc_cfg, SYS_MAC_FC_CFG, port);
|
2020-02-29 22:31:11 +08:00
|
|
|
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 09:47:48 +08:00
|
|
|
ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, port);
|
2020-02-29 22:31:11 +08:00
|
|
|
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 09:47:48 +08:00
|
|
|
ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, tx_pause);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 09:47:48 +08:00
|
|
|
/* Undo the effects of ocelot_phylink_mac_link_down:
|
|
|
|
* enable MAC module
|
|
|
|
*/
|
2019-11-09 21:02:53 +08:00
|
|
|
ocelot_port_writel(ocelot_port, DEV_MAC_ENA_CFG_RX_ENA |
|
2018-05-15 04:04:57 +08:00
|
|
|
DEV_MAC_ENA_CFG_TX_ENA, DEV_MAC_ENA_CFG);
|
|
|
|
|
|
|
|
/* Core: Enable port for frame transfer */
|
net: mscc: ocelot: convert QSYS_SWITCH_PORT_MODE and SYS_PORT_MODE to regfields
Currently Felix and Ocelot share the same bit layout in these per-port
registers, but Seville does not. So we need reg_fields for that.
Actually since these are per-port registers, we need to also specify the
number of ports, and register size per port, and use the regmap API for
multiple ports.
There's a more subtle point to be made about the other 2 register
fields:
- QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG
- QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE
which we are not writing any longer, for 2 reasons:
- Using the previous API (ocelot_write_rix), we were only writing 1 for
Felix and Ocelot, which was their hardware-default value, and which
there wasn't any intention in changing.
- In the case of SCH_NEXT_CFG, in fact Seville does not have this
register field at all, and therefore, if we want to have common code
we would be required to not write to it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-14 00:57:03 +08:00
|
|
|
ocelot_fields_write(ocelot, port,
|
|
|
|
QSYS_SWITCH_PORT_MODE_PORT_ENA, 1);
|
2019-11-09 21:02:57 +08:00
|
|
|
}
|
net: mscc: ocelot: convert to phylink
The felix DSA driver, which is a wrapper over the same hardware class as
ocelot, is integrated with phylink, but ocelot is using the plain PHY
library. It makes sense to bring together the two implementations, which
is what this patch achieves.
This is a large patch and hard to break up, but it does the following:
The existing ocelot_adjust_link writes some registers, and
felix_phylink_mac_link_up writes some registers, some of them are
common, but both functions write to some registers to which the other
doesn't.
The main reasons for this are:
- Felix switches so far have used an NXP PCS so they had no need to
write the PCS1G registers that ocelot_adjust_link writes
- Felix switches have the MAC fixed at 1G, so some of the MAC speed
changes actually break the link and must be avoided.
The naming conventions for the functions introduced in this patch are:
- vsc7514_phylink_{mac_config,validate} are specific to the Ocelot
instantiations and placed in ocelot_net.c which is built only for the
ocelot switchdev driver.
- ocelot_phylink_mac_link_{up,down} are shared between the ocelot
switchdev driver and the felix DSA driver (they are put in the common
lib).
One by one, the registers written by ocelot_adjust_link are:
DEV_MAC_MODE_CFG - felix_phylink_mac_link_up had no need to write this
register since its out-of-reset value was fine and
did not need changing. The write is moved to the
common ocelot_phylink_mac_link_up and on felix it is
guarded by a quirk bit that makes the written value
identical with the out-of-reset one
DEV_PORT_MISC - runtime invariant, was moved to vsc7514_phylink_mac_config
PCS1G_MODE_CFG - same as above
PCS1G_SD_CFG - same as above
PCS1G_CFG - same as above
PCS1G_ANEG_CFG - same as above
PCS1G_LB_CFG - same as above
DEV_MAC_ENA_CFG - both ocelot_adjust_link and ocelot_port_disable
touched this. felix_phylink_mac_link_{up,down} also
do. We go with what felix does and put it in
ocelot_phylink_mac_link_up.
DEV_CLOCK_CFG - ocelot_adjust_link and felix_phylink_mac_link_up both
write this, but to different values. Move to the common
ocelot_phylink_mac_link_up and make sure via the quirk
that the old values are preserved for both.
ANA_PFC_PFC_CFG - ocelot_adjust_link wrote this, felix_phylink_mac_link_up
did not. Runtime invariant, speed does not matter since
PFC is disabled via the RX_PFC_ENA bits which are cleared.
Move to vsc7514_phylink_mac_config.
QSYS_SWITCH_PORT_MODE_PORT_ENA - both ocelot_adjust_link and
felix_phylink_mac_link_{up,down} wrote
this. Ocelot also wrote this register
from ocelot_port_disable. Keep what
felix did, move in ocelot_phylink_mac_link_{up,down}
and delete ocelot_port_disable.
ANA_POL_FLOWC - same as above
SYS_MAC_FC_CFG - same as above, except slight behavior change. Whereas
ocelot always enabled RX and TX flow control, felix
listened to phylink (for the most part, at least - see
the 2500base-X comment).
The registers which only felix_phylink_mac_link_up wrote are:
SYS_PAUSE_CFG_PAUSE_ENA - this is why I am not sure that flow control
worked on ocelot. Not it should, since the
code is shared with felix where it does.
ANA_PORT_PORT_CFG - this is a Frame Analyzer block register, phylink
should be the one touching them, deleted.
Other changes:
- The old phylib registration code was in mscc_ocelot_init_ports. It is
hard to work with 2 levels of indentation already in, and with hard to
follow teardown logic. The new phylink registration code was moved
inside ocelot_probe_port(), right between alloc_etherdev() and
register_netdev(). It could not be done before (=> outside of)
ocelot_probe_port() because ocelot_probe_port() allocates the struct
ocelot_port which we then use to assign ocelot_port->phy_mode to. It
is more preferable to me to have all PHY handling logic inside the
same function.
- On the same topic: struct ocelot_port_private :: serdes is only used
in ocelot_port_open to set the SERDES protocol to Ethernet. This is
logically a runtime invariant and can be done just once, when the port
registers with phylink. We therefore don't even need to keep the
serdes reference inside struct ocelot_port_private, or to use the devm
variant of of_phy_get().
- Phylink needs a valid phy-mode for phylink_create() to succeed, and
the existing device tree bindings in arch/mips/boot/dts/mscc/ocelot_pcb120.dts
don't define one for the internal PHY ports. So we patch
PHY_INTERFACE_MODE_NA into PHY_INTERFACE_MODE_INTERNAL.
- There was a strategically placed:
switch (priv->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
which made the code skip the serdes initialization for the internal
PHY ports. Frankly that is not all that obvious, so now we explicitly
initialize the serdes under an "if" condition and not rely on code
jumps, so everything is clearer.
- There was a write of OCELOT_SPEED_1000 to DEV_CLOCK_CFG for QSGMII
ports. Since that is in fact the default value for the register field
DEV_CLOCK_CFG_LINK_SPEED, I can only guess the intention was to clear
the adjacent fields, MAC_TX_RST and MAC_RX_RST, aka take the port out
of reset, which does match the comment. I don't even want to know why
this code is placed there, but if there is indeed an issue that all
ports that share a QSGMII lane must all be up, then this logic is
already buggy, since mscc_ocelot_init_ports iterates using
for_each_available_child_of_node, so nobody prevents the user from
putting a 'status = "disabled";' for some QSGMII ports which would
break the driver's assumption.
In any case, in the eventuality that I'm right, we would have yet
another issue if ocelot_phylink_mac_link_down would reset those ports
and that would be forbidden, so since the ocelot_adjust_link logic did
not do that (maybe for a reason), add another quirk to preserve the
old logic.
The ocelot driver teardown goes through all ports in one fell swoop.
When initialization of one port fails, the ocelot->ports[port] pointer
for that is reset to NULL, and teardown is done only for non-NULL ports,
so there is no reason to do partial teardowns, let the central
mscc_ocelot_release_ports() do its job.
Tested bind, unbind, rebind, link up, link down, speed change on mock-up
hardware (modified the driver to probe on Felix VSC9959). Also
regression tested the felix DSA driver. Could not test the Ocelot
specific bits (PCS1G, SERDES, device tree bindings).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 09:47:48 +08:00
|
|
|
EXPORT_SYMBOL_GPL(ocelot_phylink_mac_link_up);
|
2019-11-09 21:02:57 +08:00
|
|
|
|
2021-10-12 19:40:36 +08:00
|
|
|
static int ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port,
|
|
|
|
struct sk_buff *clone)
|
2019-11-20 16:23:16 +08:00
|
|
|
{
|
2020-09-23 19:24:20 +08:00
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
2021-10-12 19:40:36 +08:00
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&ocelot->ts_id_lock, flags);
|
2019-11-20 16:23:16 +08:00
|
|
|
|
2021-10-12 19:40:36 +08:00
|
|
|
if (ocelot_port->ptp_skbs_in_flight == OCELOT_MAX_PTP_ID ||
|
|
|
|
ocelot->ptp_skbs_in_flight == OCELOT_PTP_FIFO_SIZE) {
|
|
|
|
spin_unlock_irqrestore(&ocelot->ts_id_lock, flags);
|
|
|
|
return -EBUSY;
|
|
|
|
}
|
2020-09-18 09:07:24 +08:00
|
|
|
|
2020-09-23 19:24:20 +08:00
|
|
|
skb_shinfo(clone)->tx_flags |= SKBTX_IN_PROGRESS;
|
2021-04-27 12:22:00 +08:00
|
|
|
/* Store timestamp ID in OCELOT_SKB_CB(clone)->ts_id */
|
|
|
|
OCELOT_SKB_CB(clone)->ts_id = ocelot_port->ts_id;
|
2020-09-18 09:07:24 +08:00
|
|
|
|
net: mscc: ocelot: make use of all 63 PTP timestamp identifiers
At present, there is a problem when user space bombards a port with PTP
event frames which have TX timestamping requests (or when a tc-taprio
offload is installed on a port, which delays the TX timestamps by a
significant amount of time). The driver will happily roll over the 2-bit
timestamp ID and this will cause incorrect matches between an skb and
the TX timestamp collected from the FIFO.
The Ocelot switches have a 6-bit PTP timestamp identifier, and the value
63 is reserved, so that leaves identifiers 0-62 to be used.
The timestamp identifiers are selected by the REW_OP packet field, and
are actually shared between CPU-injected frames and frames which match a
VCAP IS2 rule that modifies the REW_OP. The hardware supports
partitioning between the two uses of the REW_OP field through the
PTP_ID_LOW and PTP_ID_HIGH registers, and by default reserves the PTP
IDs 0-3 for CPU-injected traffic and the rest for VCAP IS2.
The driver does not use VCAP IS2 to set REW_OP for 2-step timestamping,
and it also writes 0xffffffff to both PTP_ID_HIGH and PTP_ID_LOW in
ocelot_init_timestamp() which makes all timestamp identifiers available
to CPU injection.
Therefore, we can make use of all 63 timestamp identifiers, which should
allow more timestampable packets to be in flight on each port. This is
only part of the solution, more issues will be addressed in future changes.
Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-10-12 19:40:35 +08:00
|
|
|
ocelot_port->ts_id++;
|
|
|
|
if (ocelot_port->ts_id == OCELOT_MAX_PTP_ID)
|
|
|
|
ocelot_port->ts_id = 0;
|
2021-04-27 12:22:02 +08:00
|
|
|
|
2021-10-12 19:40:36 +08:00
|
|
|
ocelot_port->ptp_skbs_in_flight++;
|
|
|
|
ocelot->ptp_skbs_in_flight++;
|
2021-04-27 12:22:03 +08:00
|
|
|
|
2020-09-23 19:24:20 +08:00
|
|
|
skb_queue_tail(&ocelot_port->tx_skbs, clone);
|
2021-04-27 12:22:03 +08:00
|
|
|
|
2021-10-12 19:40:36 +08:00
|
|
|
spin_unlock_irqrestore(&ocelot->ts_id_lock, flags);
|
|
|
|
|
|
|
|
return 0;
|
2021-04-27 12:22:03 +08:00
|
|
|
}
|
|
|
|
|
2021-10-12 19:40:38 +08:00
|
|
|
static bool ocelot_ptp_is_onestep_sync(struct sk_buff *skb,
|
|
|
|
unsigned int ptp_class)
|
2021-04-27 12:22:03 +08:00
|
|
|
{
|
|
|
|
struct ptp_header *hdr;
|
|
|
|
u8 msgtype, twostep;
|
|
|
|
|
|
|
|
hdr = ptp_parse_header(skb, ptp_class);
|
|
|
|
if (!hdr)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
msgtype = ptp_get_msgtype(hdr, ptp_class);
|
|
|
|
twostep = hdr->flag_field[0] & 0x2;
|
|
|
|
|
|
|
|
if (msgtype == PTP_MSGTYPE_SYNC && twostep == 0)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-04-27 12:22:02 +08:00
|
|
|
int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port,
|
|
|
|
struct sk_buff *skb,
|
|
|
|
struct sk_buff **clone)
|
|
|
|
{
|
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
|
|
|
u8 ptp_cmd = ocelot_port->ptp_cmd;
|
2021-10-12 19:40:38 +08:00
|
|
|
unsigned int ptp_class;
|
2021-10-12 19:40:36 +08:00
|
|
|
int err;
|
2021-04-27 12:22:02 +08:00
|
|
|
|
2021-10-12 19:40:38 +08:00
|
|
|
/* Don't do anything if PTP timestamping not enabled */
|
|
|
|
if (!ptp_cmd)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
ptp_class = ptp_classify_raw(skb);
|
|
|
|
if (ptp_class == PTP_CLASS_NONE)
|
|
|
|
return -EINVAL;
|
2021-04-27 12:22:02 +08:00
|
|
|
|
2021-04-27 12:22:03 +08:00
|
|
|
/* Store ptp_cmd in OCELOT_SKB_CB(skb)->ptp_cmd */
|
|
|
|
if (ptp_cmd == IFH_REW_OP_ORIGIN_PTP) {
|
2021-10-12 19:40:38 +08:00
|
|
|
if (ocelot_ptp_is_onestep_sync(skb, ptp_class)) {
|
2021-04-27 12:22:03 +08:00
|
|
|
OCELOT_SKB_CB(skb)->ptp_cmd = ptp_cmd;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Fall back to two-step timestamping */
|
|
|
|
ptp_cmd = IFH_REW_OP_TWO_STEP_PTP;
|
|
|
|
}
|
|
|
|
|
2021-04-27 12:22:02 +08:00
|
|
|
if (ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
|
|
|
|
*clone = skb_clone_sk(skb);
|
|
|
|
if (!(*clone))
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2021-10-12 19:40:36 +08:00
|
|
|
err = ocelot_port_add_txtstamp_skb(ocelot, port, *clone);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2021-04-27 12:22:03 +08:00
|
|
|
OCELOT_SKB_CB(skb)->ptp_cmd = ptp_cmd;
|
net: mscc: ocelot: cross-check the sequence id from the timestamp FIFO with the skb PTP header
The sad reality is that when a PTP frame with a TX timestamping request
is transmitted, it isn't guaranteed that it will make it all the way to
the wire (due to congestion inside the switch), and that a timestamp
will be taken by the hardware and placed in the timestamp FIFO where an
IRQ will be raised for it.
The implication is that if enough PTP frames are silently dropped by the
hardware such that the timestamp ID has rolled over, it is possible to
match a timestamp to an old skb.
Furthermore, nobody will match on the real skb corresponding to this
timestamp, since we stupidly matched on a previous one that was stale in
the queue, and stopped there.
So PTP timestamping will be broken and there will be no way to recover.
It looks like the hardware parses the sequenceID from the PTP header,
and also provides that metadata for each timestamp. The driver currently
ignores this, but it shouldn't.
As an extra resiliency measure, do the following:
- check whether the PTP sequenceID also matches between the skb and the
timestamp, treat the skb as stale otherwise and free it
- if we see a stale skb, don't stop there and try to match an skb one
more time, chances are there's one more skb in the queue with the same
timestamp ID, otherwise we wouldn't have ever found the stale one (it
is by timestamp ID that we matched it).
While this does not prevent PTP packet drops, it at least prevents
the catastrophic consequences of incorrect timestamp matching.
Since we already call ptp_classify_raw in the TX path, save the result
in the skb->cb of the clone, and just use that result in the interrupt
code path.
Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-10-12 19:40:39 +08:00
|
|
|
OCELOT_SKB_CB(*clone)->ptp_class = ptp_class;
|
2021-04-27 12:22:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(ocelot_port_txtstamp_request);
|
2019-11-20 16:23:16 +08:00
|
|
|
|
2019-11-20 16:23:15 +08:00
|
|
|
static void ocelot_get_hwtimestamp(struct ocelot *ocelot,
|
|
|
|
struct timespec64 *ts)
|
2019-08-12 22:45:37 +08:00
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
u32 val;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
|
|
|
|
|
|
|
|
/* Read current PTP time to get seconds */
|
|
|
|
val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
|
|
|
|
|
|
|
|
val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
|
|
|
|
val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_SAVE);
|
|
|
|
ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
|
|
|
|
ts->tv_sec = ocelot_read_rix(ocelot, PTP_PIN_TOD_SEC_LSB, TOD_ACC_PIN);
|
|
|
|
|
|
|
|
/* Read packet HW timestamp from FIFO */
|
|
|
|
val = ocelot_read(ocelot, SYS_PTP_TXSTAMP);
|
|
|
|
ts->tv_nsec = SYS_PTP_TXSTAMP_PTP_TXSTAMP(val);
|
|
|
|
|
|
|
|
/* Sec has incremented since the ts was registered */
|
|
|
|
if ((ts->tv_sec & 0x1) != !!(val & SYS_PTP_TXSTAMP_PTP_TXSTAMP_SEC))
|
|
|
|
ts->tv_sec--;
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
|
|
|
|
}
|
2019-11-20 16:23:15 +08:00
|
|
|
|
net: mscc: ocelot: cross-check the sequence id from the timestamp FIFO with the skb PTP header
The sad reality is that when a PTP frame with a TX timestamping request
is transmitted, it isn't guaranteed that it will make it all the way to
the wire (due to congestion inside the switch), and that a timestamp
will be taken by the hardware and placed in the timestamp FIFO where an
IRQ will be raised for it.
The implication is that if enough PTP frames are silently dropped by the
hardware such that the timestamp ID has rolled over, it is possible to
match a timestamp to an old skb.
Furthermore, nobody will match on the real skb corresponding to this
timestamp, since we stupidly matched on a previous one that was stale in
the queue, and stopped there.
So PTP timestamping will be broken and there will be no way to recover.
It looks like the hardware parses the sequenceID from the PTP header,
and also provides that metadata for each timestamp. The driver currently
ignores this, but it shouldn't.
As an extra resiliency measure, do the following:
- check whether the PTP sequenceID also matches between the skb and the
timestamp, treat the skb as stale otherwise and free it
- if we see a stale skb, don't stop there and try to match an skb one
more time, chances are there's one more skb in the queue with the same
timestamp ID, otherwise we wouldn't have ever found the stale one (it
is by timestamp ID that we matched it).
While this does not prevent PTP packet drops, it at least prevents
the catastrophic consequences of incorrect timestamp matching.
Since we already call ptp_classify_raw in the TX path, save the result
in the skb->cb of the clone, and just use that result in the interrupt
code path.
Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-10-12 19:40:39 +08:00
|
|
|
static bool ocelot_validate_ptp_skb(struct sk_buff *clone, u16 seqid)
|
|
|
|
{
|
|
|
|
struct ptp_header *hdr;
|
|
|
|
|
|
|
|
hdr = ptp_parse_header(clone, OCELOT_SKB_CB(clone)->ptp_class);
|
|
|
|
if (WARN_ON(!hdr))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return seqid == ntohs(hdr->sequence_id);
|
|
|
|
}
|
|
|
|
|
2019-11-20 16:23:15 +08:00
|
|
|
void ocelot_get_txtstamp(struct ocelot *ocelot)
|
|
|
|
{
|
|
|
|
int budget = OCELOT_PTP_QUEUE_SZ;
|
|
|
|
|
|
|
|
while (budget--) {
|
2019-11-27 15:27:57 +08:00
|
|
|
struct sk_buff *skb, *skb_tmp, *skb_match = NULL;
|
2019-11-20 16:23:15 +08:00
|
|
|
struct skb_shared_hwtstamps shhwtstamps;
|
net: mscc: ocelot: cross-check the sequence id from the timestamp FIFO with the skb PTP header
The sad reality is that when a PTP frame with a TX timestamping request
is transmitted, it isn't guaranteed that it will make it all the way to
the wire (due to congestion inside the switch), and that a timestamp
will be taken by the hardware and placed in the timestamp FIFO where an
IRQ will be raised for it.
The implication is that if enough PTP frames are silently dropped by the
hardware such that the timestamp ID has rolled over, it is possible to
match a timestamp to an old skb.
Furthermore, nobody will match on the real skb corresponding to this
timestamp, since we stupidly matched on a previous one that was stale in
the queue, and stopped there.
So PTP timestamping will be broken and there will be no way to recover.
It looks like the hardware parses the sequenceID from the PTP header,
and also provides that metadata for each timestamp. The driver currently
ignores this, but it shouldn't.
As an extra resiliency measure, do the following:
- check whether the PTP sequenceID also matches between the skb and the
timestamp, treat the skb as stale otherwise and free it
- if we see a stale skb, don't stop there and try to match an skb one
more time, chances are there's one more skb in the queue with the same
timestamp ID, otherwise we wouldn't have ever found the stale one (it
is by timestamp ID that we matched it).
While this does not prevent PTP packet drops, it at least prevents
the catastrophic consequences of incorrect timestamp matching.
Since we already call ptp_classify_raw in the TX path, save the result
in the skb->cb of the clone, and just use that result in the interrupt
code path.
Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-10-12 19:40:39 +08:00
|
|
|
u32 val, id, seqid, txport;
|
2019-11-20 16:23:15 +08:00
|
|
|
struct ocelot_port *port;
|
|
|
|
struct timespec64 ts;
|
2019-11-27 15:27:57 +08:00
|
|
|
unsigned long flags;
|
2019-11-20 16:23:15 +08:00
|
|
|
|
|
|
|
val = ocelot_read(ocelot, SYS_PTP_STATUS);
|
|
|
|
|
|
|
|
/* Check if a timestamp can be retrieved */
|
|
|
|
if (!(val & SYS_PTP_STATUS_PTP_MESS_VLD))
|
|
|
|
break;
|
|
|
|
|
|
|
|
WARN_ON(val & SYS_PTP_STATUS_PTP_OVFL);
|
|
|
|
|
|
|
|
/* Retrieve the ts ID and Tx port */
|
|
|
|
id = SYS_PTP_STATUS_PTP_MESS_ID_X(val);
|
|
|
|
txport = SYS_PTP_STATUS_PTP_MESS_TXPORT_X(val);
|
net: mscc: ocelot: cross-check the sequence id from the timestamp FIFO with the skb PTP header
The sad reality is that when a PTP frame with a TX timestamping request
is transmitted, it isn't guaranteed that it will make it all the way to
the wire (due to congestion inside the switch), and that a timestamp
will be taken by the hardware and placed in the timestamp FIFO where an
IRQ will be raised for it.
The implication is that if enough PTP frames are silently dropped by the
hardware such that the timestamp ID has rolled over, it is possible to
match a timestamp to an old skb.
Furthermore, nobody will match on the real skb corresponding to this
timestamp, since we stupidly matched on a previous one that was stale in
the queue, and stopped there.
So PTP timestamping will be broken and there will be no way to recover.
It looks like the hardware parses the sequenceID from the PTP header,
and also provides that metadata for each timestamp. The driver currently
ignores this, but it shouldn't.
As an extra resiliency measure, do the following:
- check whether the PTP sequenceID also matches between the skb and the
timestamp, treat the skb as stale otherwise and free it
- if we see a stale skb, don't stop there and try to match an skb one
more time, chances are there's one more skb in the queue with the same
timestamp ID, otherwise we wouldn't have ever found the stale one (it
is by timestamp ID that we matched it).
While this does not prevent PTP packet drops, it at least prevents
the catastrophic consequences of incorrect timestamp matching.
Since we already call ptp_classify_raw in the TX path, save the result
in the skb->cb of the clone, and just use that result in the interrupt
code path.
Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-10-12 19:40:39 +08:00
|
|
|
seqid = SYS_PTP_STATUS_PTP_MESS_SEQ_ID(val);
|
2019-11-20 16:23:15 +08:00
|
|
|
|
|
|
|
port = ocelot->ports[txport];
|
|
|
|
|
2021-10-12 19:40:36 +08:00
|
|
|
spin_lock(&ocelot->ts_id_lock);
|
|
|
|
port->ptp_skbs_in_flight--;
|
|
|
|
ocelot->ptp_skbs_in_flight--;
|
|
|
|
spin_unlock(&ocelot->ts_id_lock);
|
|
|
|
|
|
|
|
/* Retrieve its associated skb */
|
net: mscc: ocelot: cross-check the sequence id from the timestamp FIFO with the skb PTP header
The sad reality is that when a PTP frame with a TX timestamping request
is transmitted, it isn't guaranteed that it will make it all the way to
the wire (due to congestion inside the switch), and that a timestamp
will be taken by the hardware and placed in the timestamp FIFO where an
IRQ will be raised for it.
The implication is that if enough PTP frames are silently dropped by the
hardware such that the timestamp ID has rolled over, it is possible to
match a timestamp to an old skb.
Furthermore, nobody will match on the real skb corresponding to this
timestamp, since we stupidly matched on a previous one that was stale in
the queue, and stopped there.
So PTP timestamping will be broken and there will be no way to recover.
It looks like the hardware parses the sequenceID from the PTP header,
and also provides that metadata for each timestamp. The driver currently
ignores this, but it shouldn't.
As an extra resiliency measure, do the following:
- check whether the PTP sequenceID also matches between the skb and the
timestamp, treat the skb as stale otherwise and free it
- if we see a stale skb, don't stop there and try to match an skb one
more time, chances are there's one more skb in the queue with the same
timestamp ID, otherwise we wouldn't have ever found the stale one (it
is by timestamp ID that we matched it).
While this does not prevent PTP packet drops, it at least prevents
the catastrophic consequences of incorrect timestamp matching.
Since we already call ptp_classify_raw in the TX path, save the result
in the skb->cb of the clone, and just use that result in the interrupt
code path.
Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-10-12 19:40:39 +08:00
|
|
|
try_again:
|
2019-11-27 15:27:57 +08:00
|
|
|
spin_lock_irqsave(&port->tx_skbs.lock, flags);
|
2019-11-20 16:23:15 +08:00
|
|
|
|
2019-11-27 15:27:57 +08:00
|
|
|
skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) {
|
2021-04-27 12:22:00 +08:00
|
|
|
if (OCELOT_SKB_CB(skb)->ts_id != id)
|
2019-11-27 15:27:57 +08:00
|
|
|
continue;
|
|
|
|
__skb_unlink(skb, &port->tx_skbs);
|
|
|
|
skb_match = skb;
|
2019-11-27 15:27:56 +08:00
|
|
|
break;
|
2019-11-20 16:23:15 +08:00
|
|
|
}
|
|
|
|
|
2019-11-27 15:27:57 +08:00
|
|
|
spin_unlock_irqrestore(&port->tx_skbs.lock, flags);
|
|
|
|
|
2021-10-12 19:40:37 +08:00
|
|
|
if (WARN_ON(!skb_match))
|
|
|
|
continue;
|
|
|
|
|
net: mscc: ocelot: cross-check the sequence id from the timestamp FIFO with the skb PTP header
The sad reality is that when a PTP frame with a TX timestamping request
is transmitted, it isn't guaranteed that it will make it all the way to
the wire (due to congestion inside the switch), and that a timestamp
will be taken by the hardware and placed in the timestamp FIFO where an
IRQ will be raised for it.
The implication is that if enough PTP frames are silently dropped by the
hardware such that the timestamp ID has rolled over, it is possible to
match a timestamp to an old skb.
Furthermore, nobody will match on the real skb corresponding to this
timestamp, since we stupidly matched on a previous one that was stale in
the queue, and stopped there.
So PTP timestamping will be broken and there will be no way to recover.
It looks like the hardware parses the sequenceID from the PTP header,
and also provides that metadata for each timestamp. The driver currently
ignores this, but it shouldn't.
As an extra resiliency measure, do the following:
- check whether the PTP sequenceID also matches between the skb and the
timestamp, treat the skb as stale otherwise and free it
- if we see a stale skb, don't stop there and try to match an skb one
more time, chances are there's one more skb in the queue with the same
timestamp ID, otherwise we wouldn't have ever found the stale one (it
is by timestamp ID that we matched it).
While this does not prevent PTP packet drops, it at least prevents
the catastrophic consequences of incorrect timestamp matching.
Since we already call ptp_classify_raw in the TX path, save the result
in the skb->cb of the clone, and just use that result in the interrupt
code path.
Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-10-12 19:40:39 +08:00
|
|
|
if (!ocelot_validate_ptp_skb(skb_match, seqid)) {
|
|
|
|
dev_err_ratelimited(ocelot->dev,
|
|
|
|
"port %d received stale TX timestamp for seqid %d, discarding\n",
|
|
|
|
txport, seqid);
|
|
|
|
dev_kfree_skb_any(skb);
|
|
|
|
goto try_again;
|
|
|
|
}
|
|
|
|
|
2020-07-27 18:26:14 +08:00
|
|
|
/* Get the h/w timestamp */
|
|
|
|
ocelot_get_hwtimestamp(ocelot, &ts);
|
2019-11-20 16:23:15 +08:00
|
|
|
|
|
|
|
/* Set the timestamp into the skb */
|
|
|
|
memset(&shhwtstamps, 0, sizeof(shhwtstamps));
|
|
|
|
shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
|
2020-09-23 19:24:20 +08:00
|
|
|
skb_complete_tx_timestamp(skb_match, &shhwtstamps);
|
2020-07-27 18:26:14 +08:00
|
|
|
|
|
|
|
/* Next ts */
|
|
|
|
ocelot_write(ocelot, SYS_PTP_NXT_PTP_NXT, SYS_PTP_NXT);
|
2019-11-20 16:23:15 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(ocelot_get_txtstamp);
|
2019-08-12 22:45:37 +08:00
|
|
|
|
2021-02-14 06:37:59 +08:00
|
|
|
static int ocelot_rx_frame_word(struct ocelot *ocelot, u8 grp, bool ifh,
|
|
|
|
u32 *rval)
|
|
|
|
{
|
|
|
|
u32 bytes_valid, val;
|
|
|
|
|
|
|
|
val = ocelot_read_rix(ocelot, QS_XTR_RD, grp);
|
|
|
|
if (val == XTR_NOT_READY) {
|
|
|
|
if (ifh)
|
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
do {
|
|
|
|
val = ocelot_read_rix(ocelot, QS_XTR_RD, grp);
|
|
|
|
} while (val == XTR_NOT_READY);
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (val) {
|
|
|
|
case XTR_ABORT:
|
|
|
|
return -EIO;
|
|
|
|
case XTR_EOF_0:
|
|
|
|
case XTR_EOF_1:
|
|
|
|
case XTR_EOF_2:
|
|
|
|
case XTR_EOF_3:
|
|
|
|
case XTR_PRUNED:
|
|
|
|
bytes_valid = XTR_VALID_BYTES(val);
|
|
|
|
val = ocelot_read_rix(ocelot, QS_XTR_RD, grp);
|
|
|
|
if (val == XTR_ESCAPE)
|
|
|
|
*rval = ocelot_read_rix(ocelot, QS_XTR_RD, grp);
|
|
|
|
else
|
|
|
|
*rval = val;
|
|
|
|
|
|
|
|
return bytes_valid;
|
|
|
|
case XTR_ESCAPE:
|
|
|
|
*rval = ocelot_read_rix(ocelot, QS_XTR_RD, grp);
|
|
|
|
|
|
|
|
return 4;
|
|
|
|
default:
|
|
|
|
*rval = val;
|
|
|
|
|
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ocelot_xtr_poll_xfh(struct ocelot *ocelot, int grp, u32 *xfh)
|
|
|
|
{
|
|
|
|
int i, err = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < OCELOT_TAG_LEN / 4; i++) {
|
|
|
|
err = ocelot_rx_frame_word(ocelot, grp, true, &xfh[i]);
|
|
|
|
if (err != 4)
|
|
|
|
return (err < 0) ? err : -EIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb)
|
|
|
|
{
|
|
|
|
struct skb_shared_hwtstamps *shhwtstamps;
|
2021-03-17 04:10:19 +08:00
|
|
|
u64 tod_in_ns, full_ts_in_ns;
|
2021-02-14 06:37:59 +08:00
|
|
|
u64 timestamp, src_port, len;
|
|
|
|
u32 xfh[OCELOT_TAG_LEN / 4];
|
|
|
|
struct net_device *dev;
|
|
|
|
struct timespec64 ts;
|
|
|
|
struct sk_buff *skb;
|
|
|
|
int sz, buf_len;
|
|
|
|
u32 val, *buf;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = ocelot_xtr_poll_xfh(ocelot, grp, xfh);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
ocelot_xfh_get_src_port(xfh, &src_port);
|
|
|
|
ocelot_xfh_get_len(xfh, &len);
|
|
|
|
ocelot_xfh_get_rew_val(xfh, ×tamp);
|
|
|
|
|
|
|
|
if (WARN_ON(src_port >= ocelot->num_phys_ports))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
dev = ocelot->ops->port_to_netdev(ocelot, src_port);
|
|
|
|
if (!dev)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
skb = netdev_alloc_skb(dev, len);
|
|
|
|
if (unlikely(!skb)) {
|
|
|
|
netdev_err(dev, "Unable to allocate sk_buff\n");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf_len = len - ETH_FCS_LEN;
|
|
|
|
buf = (u32 *)skb_put(skb, buf_len);
|
|
|
|
|
|
|
|
len = 0;
|
|
|
|
do {
|
|
|
|
sz = ocelot_rx_frame_word(ocelot, grp, false, &val);
|
|
|
|
if (sz < 0) {
|
|
|
|
err = sz;
|
|
|
|
goto out_free_skb;
|
|
|
|
}
|
|
|
|
*buf++ = val;
|
|
|
|
len += sz;
|
|
|
|
} while (len < buf_len);
|
|
|
|
|
|
|
|
/* Read the FCS */
|
|
|
|
sz = ocelot_rx_frame_word(ocelot, grp, false, &val);
|
|
|
|
if (sz < 0) {
|
|
|
|
err = sz;
|
|
|
|
goto out_free_skb;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Update the statistics if part of the FCS was read before */
|
|
|
|
len -= ETH_FCS_LEN - sz;
|
|
|
|
|
|
|
|
if (unlikely(dev->features & NETIF_F_RXFCS)) {
|
|
|
|
buf = (u32 *)skb_put(skb, ETH_FCS_LEN);
|
|
|
|
*buf = val;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ocelot->ptp) {
|
|
|
|
ocelot_ptp_gettime64(&ocelot->ptp_info, &ts);
|
|
|
|
|
|
|
|
tod_in_ns = ktime_set(ts.tv_sec, ts.tv_nsec);
|
|
|
|
if ((tod_in_ns & 0xffffffff) < timestamp)
|
|
|
|
full_ts_in_ns = (((tod_in_ns >> 32) - 1) << 32) |
|
|
|
|
timestamp;
|
|
|
|
else
|
|
|
|
full_ts_in_ns = (tod_in_ns & GENMASK_ULL(63, 32)) |
|
|
|
|
timestamp;
|
|
|
|
|
|
|
|
shhwtstamps = skb_hwtstamps(skb);
|
|
|
|
memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
|
|
|
|
shhwtstamps->hwtstamp = full_ts_in_ns;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Everything we see on an interface that is in the HW bridge
|
|
|
|
* has already been forwarded.
|
|
|
|
*/
|
2021-03-19 07:36:36 +08:00
|
|
|
if (ocelot->ports[src_port]->bridge)
|
2021-02-14 06:37:59 +08:00
|
|
|
skb->offload_fwd_mark = 1;
|
|
|
|
|
|
|
|
skb->protocol = eth_type_trans(skb, dev);
|
2021-02-17 05:42:03 +08:00
|
|
|
|
2021-02-14 06:37:59 +08:00
|
|
|
*nskb = skb;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_free_skb:
|
|
|
|
kfree_skb(skb);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(ocelot_xtr_poll_frame);
|
|
|
|
|
2021-02-14 06:37:54 +08:00
|
|
|
bool ocelot_can_inject(struct ocelot *ocelot, int grp)
|
|
|
|
{
|
|
|
|
u32 val = ocelot_read(ocelot, QS_INJ_STATUS);
|
|
|
|
|
|
|
|
if (!(val & QS_INJ_STATUS_FIFO_RDY(BIT(grp))))
|
|
|
|
return false;
|
|
|
|
if (val & QS_INJ_STATUS_WMARK_REACHED(BIT(grp)))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(ocelot_can_inject);
|
|
|
|
|
|
|
|
void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp,
|
|
|
|
u32 rew_op, struct sk_buff *skb)
|
|
|
|
{
|
2021-02-14 06:37:56 +08:00
|
|
|
u32 ifh[OCELOT_TAG_LEN / 4] = {0};
|
2021-02-14 06:37:54 +08:00
|
|
|
unsigned int i, count, last;
|
|
|
|
|
|
|
|
ocelot_write_rix(ocelot, QS_INJ_CTRL_GAP_SIZE(1) |
|
|
|
|
QS_INJ_CTRL_SOF, QS_INJ_CTRL, grp);
|
|
|
|
|
2021-02-14 06:37:56 +08:00
|
|
|
ocelot_ifh_set_bypass(ifh, 1);
|
2021-02-15 21:31:43 +08:00
|
|
|
ocelot_ifh_set_dest(ifh, BIT_ULL(port));
|
2021-02-14 06:37:56 +08:00
|
|
|
ocelot_ifh_set_tag_type(ifh, IFH_TAG_TYPE_C);
|
2021-10-01 23:15:27 +08:00
|
|
|
ocelot_ifh_set_vlan_tci(ifh, skb_vlan_tag_get(skb));
|
2021-02-14 06:37:56 +08:00
|
|
|
ocelot_ifh_set_rew_op(ifh, rew_op);
|
2021-02-14 06:37:54 +08:00
|
|
|
|
|
|
|
for (i = 0; i < OCELOT_TAG_LEN / 4; i++)
|
2021-02-14 06:37:56 +08:00
|
|
|
ocelot_write_rix(ocelot, ifh[i], QS_INJ_WR, grp);
|
2021-02-14 06:37:54 +08:00
|
|
|
|
|
|
|
count = DIV_ROUND_UP(skb->len, 4);
|
|
|
|
last = skb->len % 4;
|
|
|
|
for (i = 0; i < count; i++)
|
|
|
|
ocelot_write_rix(ocelot, ((u32 *)skb->data)[i], QS_INJ_WR, grp);
|
|
|
|
|
|
|
|
/* Add padding */
|
|
|
|
while (i < (OCELOT_BUFFER_CELL_SZ / 4)) {
|
|
|
|
ocelot_write_rix(ocelot, 0, QS_INJ_WR, grp);
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Indicate EOF and valid bytes in last word */
|
|
|
|
ocelot_write_rix(ocelot, QS_INJ_CTRL_GAP_SIZE(1) |
|
|
|
|
QS_INJ_CTRL_VLD_BYTES(skb->len < OCELOT_BUFFER_CELL_SZ ? 0 : last) |
|
|
|
|
QS_INJ_CTRL_EOF,
|
|
|
|
QS_INJ_CTRL, grp);
|
|
|
|
|
|
|
|
/* Add dummy CRC */
|
|
|
|
ocelot_write_rix(ocelot, 0, QS_INJ_WR, grp);
|
|
|
|
skb_tx_timestamp(skb);
|
|
|
|
|
|
|
|
skb->dev->stats.tx_packets++;
|
|
|
|
skb->dev->stats.tx_bytes += skb->len;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(ocelot_port_inject_frame);
|
|
|
|
|
net: dsa: tag_ocelot_8021q: add support for PTP timestamping
For TX timestamping, we use the felix_txtstamp method which is common
with the regular (non-8021q) ocelot tagger. This method says that skb
deferral is needed, prepares a timestamp request ID, and puts a clone of
the skb in a queue waiting for the timestamp IRQ.
felix_txtstamp is called by dsa_skb_tx_timestamp() just before the
tagger's xmit method. In the tagger xmit, we divert the packets
classified by dsa_skb_tx_timestamp() as PTP towards the MMIO-based
injection registers, and we declare them as dead towards dsa_slave_xmit.
If not PTP, we proceed with normal tag_8021q stuff.
Then the timestamp IRQ fires, the clone queued up from felix_txtstamp is
matched to the TX timestamp retrieved from the switch's FIFO based on
the timestamp request ID, and the clone is delivered to the stack.
On RX, thanks to the VCAP IS2 rule that redirects the frames with an
EtherType for 1588 towards two destinations:
- the CPU port module (for MMIO based extraction) and
- if the "no XTR IRQ" workaround is in place, the dsa_8021q CPU port
the relevant data path processing starts in the ptp_classify_raw BPF
classifier installed by DSA in the RX data path (post tagger, which is
completely unaware that it saw a PTP packet).
This time we can't reuse the same implementation of .port_rxtstamp that
also works with the default ocelot tagger. That is because felix_rxtstamp
is given an skb with a freshly stripped DSA header, and it says "I don't
need deferral for its RX timestamp, it's right in it, let me show you";
and it just points to the header right behind skb->data, from where it
unpacks the timestamp and annotates the skb with it.
The same thing cannot happen with tag_ocelot_8021q, because for one
thing, the skb did not have an extraction frame header in the first
place, but a VLAN tag with no timestamp information. So the code paths
in felix_rxtstamp for the regular and 8021q tagger are completely
independent. With tag_8021q, the timestamp must come from the packet's
duplicate delivered to the CPU port module, but there is potentially
complex logic to be handled [ and prone to reordering ] if we were to
just start reading packets from the CPU port module, and try to match
them to the one we received over Ethernet and which needs an RX
timestamp. So we do something simple: we tell DSA "give me some time to
think" (we request skb deferral by returning false from .port_rxtstamp)
and we just drop the frame we got over Ethernet with no attempt to match
it to anything - we just treat it as a notification that there's data to
be processed from the CPU port module's queues. Then we proceed to read
the packets from those, one by one, which we deliver up the stack,
timestamped, using netif_rx - the same function that any driver would
use anyway if it needed RX timestamp deferral. So the assumption is that
we'll come across the PTP packet that triggered the CPU extraction
notification eventually, but we don't know when exactly. Thanks to the
VCAP IS2 trap/redirect rule and the exclusion of the CPU port module
from the flooding replicators, only PTP frames should be present in the
CPU port module's RX queues anyway.
There is just one conflict between the VCAP IS2 trapping rule and the
semantics of the BPF classifier. Namely, ptp_classify_raw() deems
general messages as non-timestampable, but still, those are trapped to
the CPU port module since they have an EtherType of ETH_P_1588. So, if
the "no XTR IRQ" workaround is in place, we need to run another BPF
classifier on the frames extracted over MMIO, to avoid duplicates being
sent to the stack (once over Ethernet, once over MMIO). It doesn't look
like it's possible to install VCAP IS2 rules based on keys extracted
from the 1588 frame headers.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-02-14 06:38:01 +08:00
|
|
|
void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp)
|
|
|
|
{
|
|
|
|
while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp))
|
|
|
|
ocelot_read_rix(ocelot, QS_XTR_RD, grp);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(ocelot_drain_cpu_queue);
|
|
|
|
|
2019-11-14 23:03:27 +08:00
|
|
|
int ocelot_fdb_add(struct ocelot *ocelot, int port,
|
net: mscc: ocelot: fix untagged packet drops when enslaving to vlan aware bridge
To rehash a previous explanation given in commit 1c44ce560b4d ("net:
mscc: ocelot: fix vlan_filtering when enslaving to bridge before link is
up"), the switch driver operates the in a mode where a single VLAN can
be transmitted as untagged on a particular egress port. That is the
"native VLAN on trunk port" use case.
The configuration for this native VLAN is driven in 2 ways:
- Set the egress port rewriter to strip the VLAN tag for the native
VID (as it is egress-untagged, after all).
- Configure the ingress port to drop untagged and priority-tagged
traffic, if there is no native VLAN. The intention of this setting is
that a trunk port with no native VLAN should not accept untagged
traffic.
Since both of the above configurations for the native VLAN should only
be done if VLAN awareness is requested, they are actually done from the
ocelot_port_vlan_filtering function, after the basic procedure of
toggling the VLAN awareness flag of the port.
But there's a problem with that simplistic approach: we are trying to
juggle with 2 independent variables from a single function:
- Native VLAN of the port - its value is held in port->vid.
- VLAN awareness state of the port - currently there are some issues
here, more on that later*.
The actual problem can be seen when enslaving the switch ports to a VLAN
filtering bridge:
0. The driver configures a pvid of zero for each port, when in
standalone mode. While the bridge configures a default_pvid of 1 for
each port that gets added as a slave to it.
1. The bridge calls ocelot_port_vlan_filtering with vlan_aware=true.
The VLAN-filtering-dependent portion of the native VLAN
configuration is done, considering that the native VLAN is 0.
2. The bridge calls ocelot_vlan_add with vid=1, pvid=true,
untagged=true. The native VLAN changes to 1 (change which gets
propagated to hardware).
3. ??? - nobody calls ocelot_port_vlan_filtering again, to reapply the
VLAN-filtering-dependent portion of the native VLAN configuration,
for the new native VLAN of 1. One can notice that after toggling "ip
link set dev br0 type bridge vlan_filtering 0 && ip link set dev br0
type bridge vlan_filtering 1", the new native VLAN finally makes it
through and untagged traffic finally starts flowing again. But
obviously that shouldn't be needed.
So it is clear that 2 independent variables need to both re-trigger the
native VLAN configuration. So we introduce the second variable as
ocelot_port->vlan_aware.
*Actually both the DSA Felix driver and the Ocelot driver already had
each its own variable:
- Ocelot: ocelot_port_private->vlan_aware
- Felix: dsa_port->vlan_filtering
but the common Ocelot library needs to work with a single, common,
variable, so there is some refactoring done to move the vlan_aware
property from the private structure into the common ocelot_port
structure.
Fixes: 97bb69e1e36e ("net: mscc: ocelot: break apart ocelot_vlan_port_apply")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-04-15 03:36:15 +08:00
|
|
|
const unsigned char *addr, u16 vid)
|
2018-05-15 04:04:57 +08:00
|
|
|
{
|
2020-06-21 19:46:00 +08:00
|
|
|
int pgid = port;
|
|
|
|
|
|
|
|
if (port == ocelot->npi)
|
|
|
|
pgid = PGID_CPU;
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2020-06-21 19:46:00 +08:00
|
|
|
return ocelot_mact_learn(ocelot, pgid, addr, vid, ENTRYTYPE_LOCKED);
|
2018-05-15 04:04:57 +08:00
|
|
|
}
|
2019-11-14 23:03:27 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_fdb_add);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2019-11-14 23:03:27 +08:00
|
|
|
int ocelot_fdb_del(struct ocelot *ocelot, int port,
|
|
|
|
const unsigned char *addr, u16 vid)
|
net: mscc: ocelot: break out fdb operations into abstract implementations
To be able to implement a DSA front-end over ocelot_fdb_add,
ocelot_fdb_del, ocelot_fdb_dump, these need to have a simple function
prototype that is independent of struct net_device, netlink skb, etc.
So rename the ndo ops of the ocelot driver into
ocelot_port_fdb_{add,del,dump}, and have them all call the abstract
implementations. At the same time, refactor ocelot_port_fdb_do_dump into
a function whose prototype is compatible with dsa_fdb_dump_cb_t, so that
the do_dump implementations can live together and be called by the
ocelot_fdb_dump through a function pointer.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-09 21:02:49 +08:00
|
|
|
{
|
2018-05-15 04:04:57 +08:00
|
|
|
return ocelot_mact_forget(ocelot, addr, vid);
|
|
|
|
}
|
2019-11-14 23:03:27 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_fdb_del);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2020-06-20 23:43:44 +08:00
|
|
|
int ocelot_port_fdb_do_dump(const unsigned char *addr, u16 vid,
|
|
|
|
bool is_static, void *data)
|
2018-05-15 04:04:57 +08:00
|
|
|
{
|
net: mscc: ocelot: break out fdb operations into abstract implementations
To be able to implement a DSA front-end over ocelot_fdb_add,
ocelot_fdb_del, ocelot_fdb_dump, these need to have a simple function
prototype that is independent of struct net_device, netlink skb, etc.
So rename the ndo ops of the ocelot driver into
ocelot_port_fdb_{add,del,dump}, and have them all call the abstract
implementations. At the same time, refactor ocelot_port_fdb_do_dump into
a function whose prototype is compatible with dsa_fdb_dump_cb_t, so that
the do_dump implementations can live together and be called by the
ocelot_fdb_dump through a function pointer.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-09 21:02:49 +08:00
|
|
|
struct ocelot_dump_ctx *dump = data;
|
2018-05-15 04:04:57 +08:00
|
|
|
u32 portid = NETLINK_CB(dump->cb->skb).portid;
|
|
|
|
u32 seq = dump->cb->nlh->nlmsg_seq;
|
|
|
|
struct nlmsghdr *nlh;
|
|
|
|
struct ndmsg *ndm;
|
|
|
|
|
|
|
|
if (dump->idx < dump->cb->args[2])
|
|
|
|
goto skip;
|
|
|
|
|
|
|
|
nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
|
|
|
|
sizeof(*ndm), NLM_F_MULTI);
|
|
|
|
if (!nlh)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
ndm = nlmsg_data(nlh);
|
|
|
|
ndm->ndm_family = AF_BRIDGE;
|
|
|
|
ndm->ndm_pad1 = 0;
|
|
|
|
ndm->ndm_pad2 = 0;
|
|
|
|
ndm->ndm_flags = NTF_SELF;
|
|
|
|
ndm->ndm_type = 0;
|
|
|
|
ndm->ndm_ifindex = dump->dev->ifindex;
|
net: mscc: ocelot: break out fdb operations into abstract implementations
To be able to implement a DSA front-end over ocelot_fdb_add,
ocelot_fdb_del, ocelot_fdb_dump, these need to have a simple function
prototype that is independent of struct net_device, netlink skb, etc.
So rename the ndo ops of the ocelot driver into
ocelot_port_fdb_{add,del,dump}, and have them all call the abstract
implementations. At the same time, refactor ocelot_port_fdb_do_dump into
a function whose prototype is compatible with dsa_fdb_dump_cb_t, so that
the do_dump implementations can live together and be called by the
ocelot_fdb_dump through a function pointer.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-09 21:02:49 +08:00
|
|
|
ndm->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE;
|
2018-05-15 04:04:57 +08:00
|
|
|
|
net: mscc: ocelot: break out fdb operations into abstract implementations
To be able to implement a DSA front-end over ocelot_fdb_add,
ocelot_fdb_del, ocelot_fdb_dump, these need to have a simple function
prototype that is independent of struct net_device, netlink skb, etc.
So rename the ndo ops of the ocelot driver into
ocelot_port_fdb_{add,del,dump}, and have them all call the abstract
implementations. At the same time, refactor ocelot_port_fdb_do_dump into
a function whose prototype is compatible with dsa_fdb_dump_cb_t, so that
the do_dump implementations can live together and be called by the
ocelot_fdb_dump through a function pointer.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-09 21:02:49 +08:00
|
|
|
if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, addr))
|
2018-05-15 04:04:57 +08:00
|
|
|
goto nla_put_failure;
|
|
|
|
|
net: mscc: ocelot: break out fdb operations into abstract implementations
To be able to implement a DSA front-end over ocelot_fdb_add,
ocelot_fdb_del, ocelot_fdb_dump, these need to have a simple function
prototype that is independent of struct net_device, netlink skb, etc.
So rename the ndo ops of the ocelot driver into
ocelot_port_fdb_{add,del,dump}, and have them all call the abstract
implementations. At the same time, refactor ocelot_port_fdb_do_dump into
a function whose prototype is compatible with dsa_fdb_dump_cb_t, so that
the do_dump implementations can live together and be called by the
ocelot_fdb_dump through a function pointer.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-09 21:02:49 +08:00
|
|
|
if (vid && nla_put_u16(dump->skb, NDA_VLAN, vid))
|
2018-05-15 04:04:57 +08:00
|
|
|
goto nla_put_failure;
|
|
|
|
|
|
|
|
nlmsg_end(dump->skb, nlh);
|
|
|
|
|
|
|
|
skip:
|
|
|
|
dump->idx++;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
nla_put_failure:
|
|
|
|
nlmsg_cancel(dump->skb, nlh);
|
|
|
|
return -EMSGSIZE;
|
|
|
|
}
|
2020-06-20 23:43:44 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_port_fdb_do_dump);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
net: mscc: ocelot: break out fdb operations into abstract implementations
To be able to implement a DSA front-end over ocelot_fdb_add,
ocelot_fdb_del, ocelot_fdb_dump, these need to have a simple function
prototype that is independent of struct net_device, netlink skb, etc.
So rename the ndo ops of the ocelot driver into
ocelot_port_fdb_{add,del,dump}, and have them all call the abstract
implementations. At the same time, refactor ocelot_port_fdb_do_dump into
a function whose prototype is compatible with dsa_fdb_dump_cb_t, so that
the do_dump implementations can live together and be called by the
ocelot_fdb_dump through a function pointer.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-09 21:02:49 +08:00
|
|
|
static int ocelot_mact_read(struct ocelot *ocelot, int port, int row, int col,
|
|
|
|
struct ocelot_mact_entry *entry)
|
2018-05-15 04:04:57 +08:00
|
|
|
{
|
|
|
|
u32 val, dst, macl, mach;
|
net: mscc: ocelot: break out fdb operations into abstract implementations
To be able to implement a DSA front-end over ocelot_fdb_add,
ocelot_fdb_del, ocelot_fdb_dump, these need to have a simple function
prototype that is independent of struct net_device, netlink skb, etc.
So rename the ndo ops of the ocelot driver into
ocelot_port_fdb_{add,del,dump}, and have them all call the abstract
implementations. At the same time, refactor ocelot_port_fdb_do_dump into
a function whose prototype is compatible with dsa_fdb_dump_cb_t, so that
the do_dump implementations can live together and be called by the
ocelot_fdb_dump through a function pointer.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-09 21:02:49 +08:00
|
|
|
char mac[ETH_ALEN];
|
2018-05-15 04:04:57 +08:00
|
|
|
|
|
|
|
/* Set row and column to read from */
|
|
|
|
ocelot_field_write(ocelot, ANA_TABLES_MACTINDX_M_INDEX, row);
|
|
|
|
ocelot_field_write(ocelot, ANA_TABLES_MACTINDX_BUCKET, col);
|
|
|
|
|
|
|
|
/* Issue a read command */
|
|
|
|
ocelot_write(ocelot,
|
|
|
|
ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_READ),
|
|
|
|
ANA_TABLES_MACACCESS);
|
|
|
|
|
|
|
|
if (ocelot_mact_wait_for_completion(ocelot))
|
|
|
|
return -ETIMEDOUT;
|
|
|
|
|
|
|
|
/* Read the entry flags */
|
|
|
|
val = ocelot_read(ocelot, ANA_TABLES_MACACCESS);
|
|
|
|
if (!(val & ANA_TABLES_MACACCESS_VALID))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* If the entry read has another port configured as its destination,
|
|
|
|
* do not report it.
|
|
|
|
*/
|
|
|
|
dst = (val & ANA_TABLES_MACACCESS_DEST_IDX_M) >> 3;
|
net: mscc: ocelot: break out fdb operations into abstract implementations
To be able to implement a DSA front-end over ocelot_fdb_add,
ocelot_fdb_del, ocelot_fdb_dump, these need to have a simple function
prototype that is independent of struct net_device, netlink skb, etc.
So rename the ndo ops of the ocelot driver into
ocelot_port_fdb_{add,del,dump}, and have them all call the abstract
implementations. At the same time, refactor ocelot_port_fdb_do_dump into
a function whose prototype is compatible with dsa_fdb_dump_cb_t, so that
the do_dump implementations can live together and be called by the
ocelot_fdb_dump through a function pointer.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-09 21:02:49 +08:00
|
|
|
if (dst != port)
|
2018-05-15 04:04:57 +08:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* Get the entry's MAC address and VLAN id */
|
|
|
|
macl = ocelot_read(ocelot, ANA_TABLES_MACLDATA);
|
|
|
|
mach = ocelot_read(ocelot, ANA_TABLES_MACHDATA);
|
|
|
|
|
|
|
|
mac[0] = (mach >> 8) & 0xff;
|
|
|
|
mac[1] = (mach >> 0) & 0xff;
|
|
|
|
mac[2] = (macl >> 24) & 0xff;
|
|
|
|
mac[3] = (macl >> 16) & 0xff;
|
|
|
|
mac[4] = (macl >> 8) & 0xff;
|
|
|
|
mac[5] = (macl >> 0) & 0xff;
|
|
|
|
|
|
|
|
entry->vid = (mach >> 16) & 0xfff;
|
|
|
|
ether_addr_copy(entry->mac, mac);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-11-14 23:03:27 +08:00
|
|
|
int ocelot_fdb_dump(struct ocelot *ocelot, int port,
|
|
|
|
dsa_fdb_dump_cb_t *cb, void *data)
|
2018-05-15 04:04:57 +08:00
|
|
|
{
|
net: mscc: ocelot: break out fdb operations into abstract implementations
To be able to implement a DSA front-end over ocelot_fdb_add,
ocelot_fdb_del, ocelot_fdb_dump, these need to have a simple function
prototype that is independent of struct net_device, netlink skb, etc.
So rename the ndo ops of the ocelot driver into
ocelot_port_fdb_{add,del,dump}, and have them all call the abstract
implementations. At the same time, refactor ocelot_port_fdb_do_dump into
a function whose prototype is compatible with dsa_fdb_dump_cb_t, so that
the do_dump implementations can live together and be called by the
ocelot_fdb_dump through a function pointer.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-09 21:02:49 +08:00
|
|
|
int i, j;
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2020-05-04 06:20:26 +08:00
|
|
|
/* Loop through all the mac tables entries. */
|
|
|
|
for (i = 0; i < ocelot->num_mact_rows; i++) {
|
2018-05-15 04:04:57 +08:00
|
|
|
for (j = 0; j < 4; j++) {
|
net: mscc: ocelot: break out fdb operations into abstract implementations
To be able to implement a DSA front-end over ocelot_fdb_add,
ocelot_fdb_del, ocelot_fdb_dump, these need to have a simple function
prototype that is independent of struct net_device, netlink skb, etc.
So rename the ndo ops of the ocelot driver into
ocelot_port_fdb_{add,del,dump}, and have them all call the abstract
implementations. At the same time, refactor ocelot_port_fdb_do_dump into
a function whose prototype is compatible with dsa_fdb_dump_cb_t, so that
the do_dump implementations can live together and be called by the
ocelot_fdb_dump through a function pointer.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-09 21:02:49 +08:00
|
|
|
struct ocelot_mact_entry entry;
|
|
|
|
bool is_static;
|
2021-10-25 19:59:25 +08:00
|
|
|
int ret;
|
net: mscc: ocelot: break out fdb operations into abstract implementations
To be able to implement a DSA front-end over ocelot_fdb_add,
ocelot_fdb_del, ocelot_fdb_dump, these need to have a simple function
prototype that is independent of struct net_device, netlink skb, etc.
So rename the ndo ops of the ocelot driver into
ocelot_port_fdb_{add,del,dump}, and have them all call the abstract
implementations. At the same time, refactor ocelot_port_fdb_do_dump into
a function whose prototype is compatible with dsa_fdb_dump_cb_t, so that
the do_dump implementations can live together and be called by the
ocelot_fdb_dump through a function pointer.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-09 21:02:49 +08:00
|
|
|
|
2021-10-25 19:59:25 +08:00
|
|
|
ret = ocelot_mact_read(ocelot, port, i, j, &entry);
|
2018-05-15 04:04:57 +08:00
|
|
|
/* If the entry is invalid (wrong port, invalid...),
|
|
|
|
* skip it.
|
|
|
|
*/
|
2021-10-25 19:59:25 +08:00
|
|
|
if (ret == -EINVAL)
|
2018-05-15 04:04:57 +08:00
|
|
|
continue;
|
2021-10-25 19:59:25 +08:00
|
|
|
else if (ret)
|
|
|
|
return ret;
|
net: mscc: ocelot: break out fdb operations into abstract implementations
To be able to implement a DSA front-end over ocelot_fdb_add,
ocelot_fdb_del, ocelot_fdb_dump, these need to have a simple function
prototype that is independent of struct net_device, netlink skb, etc.
So rename the ndo ops of the ocelot driver into
ocelot_port_fdb_{add,del,dump}, and have them all call the abstract
implementations. At the same time, refactor ocelot_port_fdb_do_dump into
a function whose prototype is compatible with dsa_fdb_dump_cb_t, so that
the do_dump implementations can live together and be called by the
ocelot_fdb_dump through a function pointer.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-09 21:02:49 +08:00
|
|
|
|
|
|
|
is_static = (entry.type == ENTRYTYPE_LOCKED);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2021-10-25 19:59:25 +08:00
|
|
|
ret = cb(entry.mac, entry.vid, is_static, data);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2018-05-15 04:04:57 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-25 19:59:25 +08:00
|
|
|
return 0;
|
net: mscc: ocelot: break out fdb operations into abstract implementations
To be able to implement a DSA front-end over ocelot_fdb_add,
ocelot_fdb_del, ocelot_fdb_dump, these need to have a simple function
prototype that is independent of struct net_device, netlink skb, etc.
So rename the ndo ops of the ocelot driver into
ocelot_port_fdb_{add,del,dump}, and have them all call the abstract
implementations. At the same time, refactor ocelot_port_fdb_do_dump into
a function whose prototype is compatible with dsa_fdb_dump_cb_t, so that
the do_dump implementations can live together and be called by the
ocelot_fdb_dump through a function pointer.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-09 21:02:49 +08:00
|
|
|
}
|
2019-11-14 23:03:27 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_fdb_dump);
|
net: mscc: ocelot: break out fdb operations into abstract implementations
To be able to implement a DSA front-end over ocelot_fdb_add,
ocelot_fdb_del, ocelot_fdb_dump, these need to have a simple function
prototype that is independent of struct net_device, netlink skb, etc.
So rename the ndo ops of the ocelot driver into
ocelot_port_fdb_{add,del,dump}, and have them all call the abstract
implementations. At the same time, refactor ocelot_port_fdb_do_dump into
a function whose prototype is compatible with dsa_fdb_dump_cb_t, so that
the do_dump implementations can live together and be called by the
ocelot_fdb_dump through a function pointer.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-11-09 21:02:49 +08:00
|
|
|
|
2019-11-20 16:23:14 +08:00
|
|
|
int ocelot_hwstamp_get(struct ocelot *ocelot, int port, struct ifreq *ifr)
|
2019-08-12 22:45:37 +08:00
|
|
|
{
|
|
|
|
return copy_to_user(ifr->ifr_data, &ocelot->hwtstamp_config,
|
|
|
|
sizeof(ocelot->hwtstamp_config)) ? -EFAULT : 0;
|
|
|
|
}
|
2019-11-20 16:23:14 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_hwstamp_get);
|
2019-08-12 22:45:37 +08:00
|
|
|
|
2019-11-20 16:23:14 +08:00
|
|
|
int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr)
|
2019-08-12 22:45:37 +08:00
|
|
|
{
|
2019-11-09 21:02:50 +08:00
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
2019-08-12 22:45:37 +08:00
|
|
|
struct hwtstamp_config cfg;
|
|
|
|
|
|
|
|
if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
/* reserved for future extensions */
|
|
|
|
if (cfg.flags)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
/* Tx type sanity check */
|
|
|
|
switch (cfg.tx_type) {
|
|
|
|
case HWTSTAMP_TX_ON:
|
2019-11-09 21:02:50 +08:00
|
|
|
ocelot_port->ptp_cmd = IFH_REW_OP_TWO_STEP_PTP;
|
2019-08-12 22:45:37 +08:00
|
|
|
break;
|
|
|
|
case HWTSTAMP_TX_ONESTEP_SYNC:
|
|
|
|
/* IFH_REW_OP_ONE_STEP_PTP updates the correctional field, we
|
|
|
|
* need to update the origin time.
|
|
|
|
*/
|
2019-11-09 21:02:50 +08:00
|
|
|
ocelot_port->ptp_cmd = IFH_REW_OP_ORIGIN_PTP;
|
2019-08-12 22:45:37 +08:00
|
|
|
break;
|
|
|
|
case HWTSTAMP_TX_OFF:
|
2019-11-09 21:02:50 +08:00
|
|
|
ocelot_port->ptp_cmd = 0;
|
2019-08-12 22:45:37 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return -ERANGE;
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_lock(&ocelot->ptp_lock);
|
|
|
|
|
|
|
|
switch (cfg.rx_filter) {
|
|
|
|
case HWTSTAMP_FILTER_NONE:
|
|
|
|
break;
|
|
|
|
case HWTSTAMP_FILTER_ALL:
|
|
|
|
case HWTSTAMP_FILTER_SOME:
|
|
|
|
case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
|
|
|
|
case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
|
|
|
|
case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
|
|
|
|
case HWTSTAMP_FILTER_NTP_ALL:
|
|
|
|
case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
|
|
|
|
case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
|
|
|
|
case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
|
|
|
|
case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
|
|
|
|
case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
|
|
|
|
case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
|
|
|
|
case HWTSTAMP_FILTER_PTP_V2_EVENT:
|
|
|
|
case HWTSTAMP_FILTER_PTP_V2_SYNC:
|
|
|
|
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
|
|
|
|
cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
mutex_unlock(&ocelot->ptp_lock);
|
|
|
|
return -ERANGE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Commit back the result & save it */
|
|
|
|
memcpy(&ocelot->hwtstamp_config, &cfg, sizeof(cfg));
|
|
|
|
mutex_unlock(&ocelot->ptp_lock);
|
|
|
|
|
|
|
|
return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
|
|
|
|
}
|
2019-11-20 16:23:14 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_hwstamp_set);
|
2019-08-12 22:45:37 +08:00
|
|
|
|
2019-11-14 23:03:27 +08:00
|
|
|
void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data)
|
2018-05-15 04:04:57 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (sset != ETH_SS_STATS)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (i = 0; i < ocelot->num_stats; i++)
|
|
|
|
memcpy(data + i * ETH_GSTRING_LEN, ocelot->stats_layout[i].name,
|
|
|
|
ETH_GSTRING_LEN);
|
|
|
|
}
|
2019-11-14 23:03:27 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_get_strings);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2019-04-16 22:51:59 +08:00
|
|
|
static void ocelot_update_stats(struct ocelot *ocelot)
|
2018-05-15 04:04:57 +08:00
|
|
|
{
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
mutex_lock(&ocelot->stats_lock);
|
|
|
|
|
|
|
|
for (i = 0; i < ocelot->num_phys_ports; i++) {
|
|
|
|
/* Configure the port to read the stats from */
|
|
|
|
ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG);
|
|
|
|
|
|
|
|
for (j = 0; j < ocelot->num_stats; j++) {
|
|
|
|
u32 val;
|
|
|
|
unsigned int idx = i * ocelot->num_stats + j;
|
|
|
|
|
|
|
|
val = ocelot_read_rix(ocelot, SYS_COUNT_RX_OCTETS,
|
|
|
|
ocelot->stats_layout[j].offset);
|
|
|
|
|
|
|
|
if (val < (ocelot->stats[idx] & U32_MAX))
|
|
|
|
ocelot->stats[idx] += (u64)1 << 32;
|
|
|
|
|
|
|
|
ocelot->stats[idx] = (ocelot->stats[idx] &
|
|
|
|
~(u64)U32_MAX) + val;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-16 22:51:59 +08:00
|
|
|
mutex_unlock(&ocelot->stats_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ocelot_check_stats_work(struct work_struct *work)
|
|
|
|
{
|
|
|
|
struct delayed_work *del_work = to_delayed_work(work);
|
|
|
|
struct ocelot *ocelot = container_of(del_work, struct ocelot,
|
|
|
|
stats_work);
|
|
|
|
|
|
|
|
ocelot_update_stats(ocelot);
|
|
|
|
|
2018-05-15 04:04:57 +08:00
|
|
|
queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
|
|
|
|
OCELOT_STATS_CHECK_DELAY);
|
|
|
|
}
|
|
|
|
|
2019-11-14 23:03:27 +08:00
|
|
|
void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data)
|
2018-05-15 04:04:57 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* check and update now */
|
2019-04-16 22:51:59 +08:00
|
|
|
ocelot_update_stats(ocelot);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
|
|
|
/* Copy all counters */
|
|
|
|
for (i = 0; i < ocelot->num_stats; i++)
|
2019-11-09 21:02:53 +08:00
|
|
|
*data++ = ocelot->stats[port * ocelot->num_stats + i];
|
2018-05-15 04:04:57 +08:00
|
|
|
}
|
2019-11-14 23:03:27 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_get_ethtool_stats);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2019-11-14 23:03:27 +08:00
|
|
|
int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset)
|
2019-11-09 21:02:54 +08:00
|
|
|
{
|
2018-05-15 04:04:57 +08:00
|
|
|
if (sset != ETH_SS_STATS)
|
|
|
|
return -EOPNOTSUPP;
|
2019-11-09 21:02:54 +08:00
|
|
|
|
2018-05-15 04:04:57 +08:00
|
|
|
return ocelot->num_stats;
|
|
|
|
}
|
2019-11-14 23:03:27 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_get_sset_count);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2019-11-14 23:03:27 +08:00
|
|
|
int ocelot_get_ts_info(struct ocelot *ocelot, int port,
|
|
|
|
struct ethtool_ts_info *info)
|
2019-11-09 21:02:54 +08:00
|
|
|
{
|
2019-08-12 22:45:37 +08:00
|
|
|
info->phc_index = ocelot->ptp_clock ?
|
|
|
|
ptp_clock_index(ocelot->ptp_clock) : -1;
|
2020-04-20 10:46:46 +08:00
|
|
|
if (info->phc_index == -1) {
|
|
|
|
info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
|
|
|
|
SOF_TIMESTAMPING_RX_SOFTWARE |
|
|
|
|
SOF_TIMESTAMPING_SOFTWARE;
|
|
|
|
return 0;
|
|
|
|
}
|
2019-08-12 22:45:37 +08:00
|
|
|
info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
|
|
|
|
SOF_TIMESTAMPING_RX_SOFTWARE |
|
|
|
|
SOF_TIMESTAMPING_SOFTWARE |
|
|
|
|
SOF_TIMESTAMPING_TX_HARDWARE |
|
|
|
|
SOF_TIMESTAMPING_RX_HARDWARE |
|
|
|
|
SOF_TIMESTAMPING_RAW_HARDWARE;
|
|
|
|
info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON) |
|
|
|
|
BIT(HWTSTAMP_TX_ONESTEP_SYNC);
|
|
|
|
info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2019-11-14 23:03:27 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_get_ts_info);
|
2019-08-12 22:45:37 +08:00
|
|
|
|
2021-02-06 06:02:19 +08:00
|
|
|
static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond,
|
|
|
|
bool only_active_ports)
|
2021-02-06 06:02:14 +08:00
|
|
|
{
|
|
|
|
u32 mask = 0;
|
|
|
|
int port;
|
|
|
|
|
|
|
|
for (port = 0; port < ocelot->num_phys_ports; port++) {
|
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
|
|
|
|
|
|
|
if (!ocelot_port)
|
|
|
|
continue;
|
|
|
|
|
2021-02-06 06:02:19 +08:00
|
|
|
if (ocelot_port->bond == bond) {
|
|
|
|
if (only_active_ports && !ocelot_port->lag_tx_active)
|
|
|
|
continue;
|
|
|
|
|
2021-02-06 06:02:14 +08:00
|
|
|
mask |= BIT(port);
|
2021-02-06 06:02:19 +08:00
|
|
|
}
|
2021-02-06 06:02:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
|
2021-09-23 10:03:38 +08:00
|
|
|
static u32 ocelot_get_bridge_fwd_mask(struct ocelot *ocelot, int src_port,
|
2021-03-19 07:36:36 +08:00
|
|
|
struct net_device *bridge)
|
|
|
|
{
|
2021-09-23 10:03:38 +08:00
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[src_port];
|
2021-03-19 07:36:36 +08:00
|
|
|
u32 mask = 0;
|
|
|
|
int port;
|
|
|
|
|
2021-09-23 10:03:38 +08:00
|
|
|
if (!ocelot_port || ocelot_port->bridge != bridge ||
|
|
|
|
ocelot_port->stp_state != BR_STATE_FORWARDING)
|
|
|
|
return 0;
|
|
|
|
|
2021-03-19 07:36:36 +08:00
|
|
|
for (port = 0; port < ocelot->num_phys_ports; port++) {
|
2021-09-23 10:03:38 +08:00
|
|
|
ocelot_port = ocelot->ports[port];
|
2021-03-19 07:36:36 +08:00
|
|
|
|
|
|
|
if (!ocelot_port)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (ocelot_port->stp_state == BR_STATE_FORWARDING &&
|
|
|
|
ocelot_port->bridge == bridge)
|
|
|
|
mask |= BIT(port);
|
|
|
|
}
|
|
|
|
|
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
|
net: dsa: felix: perform switch setup for tag_8021q
Unlike sja1105, the only other user of the software-defined tag_8021q.c
tagger format, the implementation we choose for the Felix DSA switch
driver preserves full functionality under a vlan_filtering bridge
(i.e. IP termination works through the DSA user ports under all
circumstances).
The tag_8021q protocol just wants:
- Identifying the ingress switch port based on the RX VLAN ID, as seen
by the CPU. We achieve this by using the TCAM engines (which are also
used for tc-flower offload) to push the RX VLAN as a second, outer
tag, on egress towards the CPU port.
- Steering traffic injected into the switch from the network stack
towards the correct front port based on the TX VLAN, and consuming
(popping) that header on the switch's egress.
A tc-flower pseudocode of the static configuration done by the driver
would look like this:
$ tc qdisc add dev <cpu-port> clsact
$ for eth in swp0 swp1 swp2 swp3; do \
tc filter add dev <cpu-port> egress flower indev ${eth} \
action vlan push id <rxvlan> protocol 802.1ad; \
tc filter add dev <cpu-port> ingress protocol 802.1Q flower
vlan_id <txvlan> action vlan pop \
action mirred egress redirect dev ${eth}; \
done
but of course since DSA does not register network interfaces for the CPU
port, this configuration would be impossible for the user to do. Also,
due to the same reason, it is impossible for the user to inadvertently
delete these rules using tc. These rules do not collide in any way with
tc-flower, they just consume some TCAM space, which is something we can
live with.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-29 09:00:09 +08:00
|
|
|
static u32 ocelot_get_dsa_8021q_cpu_mask(struct ocelot *ocelot)
|
2021-01-29 09:00:02 +08:00
|
|
|
{
|
net: dsa: felix: perform switch setup for tag_8021q
Unlike sja1105, the only other user of the software-defined tag_8021q.c
tagger format, the implementation we choose for the Felix DSA switch
driver preserves full functionality under a vlan_filtering bridge
(i.e. IP termination works through the DSA user ports under all
circumstances).
The tag_8021q protocol just wants:
- Identifying the ingress switch port based on the RX VLAN ID, as seen
by the CPU. We achieve this by using the TCAM engines (which are also
used for tc-flower offload) to push the RX VLAN as a second, outer
tag, on egress towards the CPU port.
- Steering traffic injected into the switch from the network stack
towards the correct front port based on the TX VLAN, and consuming
(popping) that header on the switch's egress.
A tc-flower pseudocode of the static configuration done by the driver
would look like this:
$ tc qdisc add dev <cpu-port> clsact
$ for eth in swp0 swp1 swp2 swp3; do \
tc filter add dev <cpu-port> egress flower indev ${eth} \
action vlan push id <rxvlan> protocol 802.1ad; \
tc filter add dev <cpu-port> ingress protocol 802.1Q flower
vlan_id <txvlan> action vlan pop \
action mirred egress redirect dev ${eth}; \
done
but of course since DSA does not register network interfaces for the CPU
port, this configuration would be impossible for the user to do. Also,
due to the same reason, it is impossible for the user to inadvertently
delete these rules using tc. These rules do not collide in any way with
tc-flower, they just consume some TCAM space, which is something we can
live with.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-29 09:00:09 +08:00
|
|
|
u32 mask = 0;
|
2021-01-29 09:00:02 +08:00
|
|
|
int port;
|
|
|
|
|
net: dsa: felix: perform switch setup for tag_8021q
Unlike sja1105, the only other user of the software-defined tag_8021q.c
tagger format, the implementation we choose for the Felix DSA switch
driver preserves full functionality under a vlan_filtering bridge
(i.e. IP termination works through the DSA user ports under all
circumstances).
The tag_8021q protocol just wants:
- Identifying the ingress switch port based on the RX VLAN ID, as seen
by the CPU. We achieve this by using the TCAM engines (which are also
used for tc-flower offload) to push the RX VLAN as a second, outer
tag, on egress towards the CPU port.
- Steering traffic injected into the switch from the network stack
towards the correct front port based on the TX VLAN, and consuming
(popping) that header on the switch's egress.
A tc-flower pseudocode of the static configuration done by the driver
would look like this:
$ tc qdisc add dev <cpu-port> clsact
$ for eth in swp0 swp1 swp2 swp3; do \
tc filter add dev <cpu-port> egress flower indev ${eth} \
action vlan push id <rxvlan> protocol 802.1ad; \
tc filter add dev <cpu-port> ingress protocol 802.1Q flower
vlan_id <txvlan> action vlan pop \
action mirred egress redirect dev ${eth}; \
done
but of course since DSA does not register network interfaces for the CPU
port, this configuration would be impossible for the user to do. Also,
due to the same reason, it is impossible for the user to inadvertently
delete these rules using tc. These rules do not collide in any way with
tc-flower, they just consume some TCAM space, which is something we can
live with.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-29 09:00:09 +08:00
|
|
|
for (port = 0; port < ocelot->num_phys_ports; port++) {
|
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
|
|
|
|
|
|
|
if (!ocelot_port)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (ocelot_port->is_dsa_8021q_cpu)
|
|
|
|
mask |= BIT(port);
|
|
|
|
}
|
|
|
|
|
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot)
|
|
|
|
{
|
|
|
|
unsigned long cpu_fwd_mask;
|
|
|
|
int port;
|
|
|
|
|
|
|
|
/* If a DSA tag_8021q CPU exists, it needs to be included in the
|
|
|
|
* regular forwarding path of the front ports regardless of whether
|
|
|
|
* those are bridged or standalone.
|
|
|
|
* If DSA tag_8021q is not used, this returns 0, which is fine because
|
|
|
|
* the hardware-based CPU port module can be a destination for packets
|
|
|
|
* even if it isn't part of PGID_SRC.
|
|
|
|
*/
|
|
|
|
cpu_fwd_mask = ocelot_get_dsa_8021q_cpu_mask(ocelot);
|
|
|
|
|
2021-01-29 09:00:02 +08:00
|
|
|
/* Apply FWD mask. The loop is needed to add/remove the current port as
|
|
|
|
* a source for the other ports.
|
|
|
|
*/
|
|
|
|
for (port = 0; port < ocelot->num_phys_ports; port++) {
|
net: dsa: felix: perform switch setup for tag_8021q
Unlike sja1105, the only other user of the software-defined tag_8021q.c
tagger format, the implementation we choose for the Felix DSA switch
driver preserves full functionality under a vlan_filtering bridge
(i.e. IP termination works through the DSA user ports under all
circumstances).
The tag_8021q protocol just wants:
- Identifying the ingress switch port based on the RX VLAN ID, as seen
by the CPU. We achieve this by using the TCAM engines (which are also
used for tc-flower offload) to push the RX VLAN as a second, outer
tag, on egress towards the CPU port.
- Steering traffic injected into the switch from the network stack
towards the correct front port based on the TX VLAN, and consuming
(popping) that header on the switch's egress.
A tc-flower pseudocode of the static configuration done by the driver
would look like this:
$ tc qdisc add dev <cpu-port> clsact
$ for eth in swp0 swp1 swp2 swp3; do \
tc filter add dev <cpu-port> egress flower indev ${eth} \
action vlan push id <rxvlan> protocol 802.1ad; \
tc filter add dev <cpu-port> ingress protocol 802.1Q flower
vlan_id <txvlan> action vlan pop \
action mirred egress redirect dev ${eth}; \
done
but of course since DSA does not register network interfaces for the CPU
port, this configuration would be impossible for the user to do. Also,
due to the same reason, it is impossible for the user to inadvertently
delete these rules using tc. These rules do not collide in any way with
tc-flower, they just consume some TCAM space, which is something we can
live with.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-29 09:00:09 +08:00
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
|
|
|
unsigned long mask;
|
|
|
|
|
|
|
|
if (!ocelot_port) {
|
|
|
|
/* Unused ports can't send anywhere */
|
|
|
|
mask = 0;
|
|
|
|
} else if (ocelot_port->is_dsa_8021q_cpu) {
|
|
|
|
/* The DSA tag_8021q CPU ports need to be able to
|
|
|
|
* forward packets to all other ports except for
|
|
|
|
* themselves
|
|
|
|
*/
|
|
|
|
mask = GENMASK(ocelot->num_phys_ports - 1, 0);
|
|
|
|
mask &= ~cpu_fwd_mask;
|
2021-03-19 07:36:36 +08:00
|
|
|
} else if (ocelot_port->bridge) {
|
|
|
|
struct net_device *bridge = ocelot_port->bridge;
|
2021-02-06 06:02:17 +08:00
|
|
|
struct net_device *bond = ocelot_port->bond;
|
2021-01-29 09:00:02 +08:00
|
|
|
|
2021-09-23 10:03:38 +08:00
|
|
|
mask = ocelot_get_bridge_fwd_mask(ocelot, port, bridge);
|
2021-08-18 00:04:25 +08:00
|
|
|
mask |= cpu_fwd_mask;
|
2021-03-19 07:36:36 +08:00
|
|
|
mask &= ~BIT(port);
|
2021-02-06 06:02:19 +08:00
|
|
|
if (bond) {
|
|
|
|
mask &= ~ocelot_get_bond_mask(ocelot, bond,
|
|
|
|
false);
|
|
|
|
}
|
2021-01-29 09:00:02 +08:00
|
|
|
} else {
|
net: dsa: felix: perform switch setup for tag_8021q
Unlike sja1105, the only other user of the software-defined tag_8021q.c
tagger format, the implementation we choose for the Felix DSA switch
driver preserves full functionality under a vlan_filtering bridge
(i.e. IP termination works through the DSA user ports under all
circumstances).
The tag_8021q protocol just wants:
- Identifying the ingress switch port based on the RX VLAN ID, as seen
by the CPU. We achieve this by using the TCAM engines (which are also
used for tc-flower offload) to push the RX VLAN as a second, outer
tag, on egress towards the CPU port.
- Steering traffic injected into the switch from the network stack
towards the correct front port based on the TX VLAN, and consuming
(popping) that header on the switch's egress.
A tc-flower pseudocode of the static configuration done by the driver
would look like this:
$ tc qdisc add dev <cpu-port> clsact
$ for eth in swp0 swp1 swp2 swp3; do \
tc filter add dev <cpu-port> egress flower indev ${eth} \
action vlan push id <rxvlan> protocol 802.1ad; \
tc filter add dev <cpu-port> ingress protocol 802.1Q flower
vlan_id <txvlan> action vlan pop \
action mirred egress redirect dev ${eth}; \
done
but of course since DSA does not register network interfaces for the CPU
port, this configuration would be impossible for the user to do. Also,
due to the same reason, it is impossible for the user to inadvertently
delete these rules using tc. These rules do not collide in any way with
tc-flower, they just consume some TCAM space, which is something we can
live with.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-29 09:00:09 +08:00
|
|
|
/* Standalone ports forward only to DSA tag_8021q CPU
|
|
|
|
* ports (if those exist), or to the hardware CPU port
|
|
|
|
* module otherwise.
|
|
|
|
*/
|
|
|
|
mask = cpu_fwd_mask;
|
2021-01-29 09:00:02 +08:00
|
|
|
}
|
net: dsa: felix: perform switch setup for tag_8021q
Unlike sja1105, the only other user of the software-defined tag_8021q.c
tagger format, the implementation we choose for the Felix DSA switch
driver preserves full functionality under a vlan_filtering bridge
(i.e. IP termination works through the DSA user ports under all
circumstances).
The tag_8021q protocol just wants:
- Identifying the ingress switch port based on the RX VLAN ID, as seen
by the CPU. We achieve this by using the TCAM engines (which are also
used for tc-flower offload) to push the RX VLAN as a second, outer
tag, on egress towards the CPU port.
- Steering traffic injected into the switch from the network stack
towards the correct front port based on the TX VLAN, and consuming
(popping) that header on the switch's egress.
A tc-flower pseudocode of the static configuration done by the driver
would look like this:
$ tc qdisc add dev <cpu-port> clsact
$ for eth in swp0 swp1 swp2 swp3; do \
tc filter add dev <cpu-port> egress flower indev ${eth} \
action vlan push id <rxvlan> protocol 802.1ad; \
tc filter add dev <cpu-port> ingress protocol 802.1Q flower
vlan_id <txvlan> action vlan pop \
action mirred egress redirect dev ${eth}; \
done
but of course since DSA does not register network interfaces for the CPU
port, this configuration would be impossible for the user to do. Also,
due to the same reason, it is impossible for the user to inadvertently
delete these rules using tc. These rules do not collide in any way with
tc-flower, they just consume some TCAM space, which is something we can
live with.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-29 09:00:09 +08:00
|
|
|
|
|
|
|
ocelot_write_rix(ocelot, mask, ANA_PGID_PGID, PGID_SRC + port);
|
2021-01-29 09:00:02 +08:00
|
|
|
}
|
|
|
|
}
|
net: dsa: felix: perform switch setup for tag_8021q
Unlike sja1105, the only other user of the software-defined tag_8021q.c
tagger format, the implementation we choose for the Felix DSA switch
driver preserves full functionality under a vlan_filtering bridge
(i.e. IP termination works through the DSA user ports under all
circumstances).
The tag_8021q protocol just wants:
- Identifying the ingress switch port based on the RX VLAN ID, as seen
by the CPU. We achieve this by using the TCAM engines (which are also
used for tc-flower offload) to push the RX VLAN as a second, outer
tag, on egress towards the CPU port.
- Steering traffic injected into the switch from the network stack
towards the correct front port based on the TX VLAN, and consuming
(popping) that header on the switch's egress.
A tc-flower pseudocode of the static configuration done by the driver
would look like this:
$ tc qdisc add dev <cpu-port> clsact
$ for eth in swp0 swp1 swp2 swp3; do \
tc filter add dev <cpu-port> egress flower indev ${eth} \
action vlan push id <rxvlan> protocol 802.1ad; \
tc filter add dev <cpu-port> ingress protocol 802.1Q flower
vlan_id <txvlan> action vlan pop \
action mirred egress redirect dev ${eth}; \
done
but of course since DSA does not register network interfaces for the CPU
port, this configuration would be impossible for the user to do. Also,
due to the same reason, it is impossible for the user to inadvertently
delete these rules using tc. These rules do not collide in any way with
tc-flower, they just consume some TCAM space, which is something we can
live with.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-29 09:00:09 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_apply_bridge_fwd_mask);
|
2021-01-29 09:00:02 +08:00
|
|
|
|
2019-11-14 23:03:27 +08:00
|
|
|
void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state)
|
2018-05-15 04:04:57 +08:00
|
|
|
{
|
2021-02-12 23:15:59 +08:00
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
2021-03-19 07:36:36 +08:00
|
|
|
u32 learn_ena = 0;
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2021-03-19 07:36:36 +08:00
|
|
|
ocelot_port->stp_state = state;
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2021-03-19 07:36:36 +08:00
|
|
|
if ((state == BR_STATE_LEARNING || state == BR_STATE_FORWARDING) &&
|
|
|
|
ocelot_port->learn_ena)
|
|
|
|
learn_ena = ANA_PORT_PORT_CFG_LEARN_ENA;
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2021-03-19 07:36:36 +08:00
|
|
|
ocelot_rmw_gix(ocelot, learn_ena, ANA_PORT_PORT_CFG_LEARN_ENA,
|
|
|
|
ANA_PORT_PORT_CFG, port);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2021-01-29 09:00:02 +08:00
|
|
|
ocelot_apply_bridge_fwd_mask(ocelot);
|
2019-11-09 21:02:51 +08:00
|
|
|
}
|
2019-11-14 23:03:27 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_bridge_stp_state_set);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2019-11-14 23:03:27 +08:00
|
|
|
void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs)
|
2019-11-09 21:02:51 +08:00
|
|
|
{
|
2020-05-04 06:20:27 +08:00
|
|
|
unsigned int age_period = ANA_AUTOAGE_AGE_PERIOD(msecs / 2000);
|
|
|
|
|
|
|
|
/* Setting AGE_PERIOD to zero effectively disables automatic aging,
|
|
|
|
* which is clearly not what our intention is. So avoid that.
|
|
|
|
*/
|
|
|
|
if (!age_period)
|
|
|
|
age_period = 1;
|
|
|
|
|
|
|
|
ocelot_rmw(ocelot, age_period, ANA_AUTOAGE_AGE_PERIOD_M, ANA_AUTOAGE);
|
2018-05-15 04:04:57 +08:00
|
|
|
}
|
2019-11-14 23:03:27 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_set_ageing_time);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
|
|
|
static struct ocelot_multicast *ocelot_multicast_get(struct ocelot *ocelot,
|
|
|
|
const unsigned char *addr,
|
|
|
|
u16 vid)
|
|
|
|
{
|
|
|
|
struct ocelot_multicast *mc;
|
|
|
|
|
|
|
|
list_for_each_entry(mc, &ocelot->multicast, list) {
|
|
|
|
if (ether_addr_equal(mc->addr, addr) && mc->vid == vid)
|
|
|
|
return mc;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries
The current procedure for installing a multicast address is hardcoded
for IPv4. But, in the ocelot hardware, there are 3 different procedures
for IPv4, IPv6 and for regular L2 multicast.
For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4
(01-00-5e-xx-xx-xx), except that the destination port mask is stuffed
into first 2 bytes of the MAC address except into first 3 bytes.
For plain Ethernet multicast, there's no port-in-address stuffing going
on, instead the DEST_IDX (pointer to PGID) is used there, just as for
unicast. So we have to use one of the nonreserved multicast PGIDs that
the hardware has allocated for this purpose.
This patch classifies the type of multicast address based on its first
bytes, then redirects to one of the 3 different hardware procedures.
Note that this gives us a really better way of redirecting PTP frames
sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add
a trapping rule for PTP EtherType but got a lot of pushback:
https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/
But right now, that isn't needed at all. The application stack (ptp4l)
does this for the PTP multicast addresses it's interested in (which are
configurable, and include 01-1b-19-00-00-00):
memset(&mreq, 0, sizeof(mreq));
mreq.mr_ifindex = index;
mreq.mr_type = PACKET_MR_MULTICAST;
mreq.mr_alen = MAC_LEN;
memcpy(mreq.mr_address, addr1, MAC_LEN);
err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq,
sizeof(mreq));
Into the kernel, this translates into a dev_mc_add on the switch network
interfaces, and our drivers know that it means they should translate it
into a host MDB address (make the CPU port be the destination).
Previously, this was broken because all mdb addresses were treated as
IPv4 (which 01-1b-19-00-00-00 obviously is not).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
|
|
|
static enum macaccess_entry_type ocelot_classify_mdb(const unsigned char *addr)
|
|
|
|
{
|
|
|
|
if (addr[0] == 0x01 && addr[1] == 0x00 && addr[2] == 0x5e)
|
|
|
|
return ENTRYTYPE_MACv4;
|
|
|
|
if (addr[0] == 0x33 && addr[1] == 0x33)
|
|
|
|
return ENTRYTYPE_MACv6;
|
2020-10-29 10:27:34 +08:00
|
|
|
return ENTRYTYPE_LOCKED;
|
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries
The current procedure for installing a multicast address is hardcoded
for IPv4. But, in the ocelot hardware, there are 3 different procedures
for IPv4, IPv6 and for regular L2 multicast.
For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4
(01-00-5e-xx-xx-xx), except that the destination port mask is stuffed
into first 2 bytes of the MAC address except into first 3 bytes.
For plain Ethernet multicast, there's no port-in-address stuffing going
on, instead the DEST_IDX (pointer to PGID) is used there, just as for
unicast. So we have to use one of the nonreserved multicast PGIDs that
the hardware has allocated for this purpose.
This patch classifies the type of multicast address based on its first
bytes, then redirects to one of the 3 different hardware procedures.
Note that this gives us a really better way of redirecting PTP frames
sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add
a trapping rule for PTP EtherType but got a lot of pushback:
https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/
But right now, that isn't needed at all. The application stack (ptp4l)
does this for the PTP multicast addresses it's interested in (which are
configurable, and include 01-1b-19-00-00-00):
memset(&mreq, 0, sizeof(mreq));
mreq.mr_ifindex = index;
mreq.mr_type = PACKET_MR_MULTICAST;
mreq.mr_alen = MAC_LEN;
memcpy(mreq.mr_address, addr1, MAC_LEN);
err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq,
sizeof(mreq));
Into the kernel, this translates into a dev_mc_add on the switch network
interfaces, and our drivers know that it means they should translate it
into a host MDB address (make the CPU port be the destination).
Previously, this was broken because all mdb addresses were treated as
IPv4 (which 01-1b-19-00-00-00 obviously is not).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
|
|
|
}
|
|
|
|
|
net: mscc: ocelot: support L2 multicast entries
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.
Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
entry. That would be odd because we can only have ~60 PGIDs, but
thousands of MDB entries. So instead, we want to reserve a multicast
PGID for every single port combination for multicast traffic. And
since we can have 2 (or more) MDB entries delivered to the same port
group (and therefore PGID), we need to reference-count the PGIDs.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
|
|
|
static struct ocelot_pgid *ocelot_pgid_alloc(struct ocelot *ocelot, int index,
|
|
|
|
unsigned long ports)
|
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries
The current procedure for installing a multicast address is hardcoded
for IPv4. But, in the ocelot hardware, there are 3 different procedures
for IPv4, IPv6 and for regular L2 multicast.
For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4
(01-00-5e-xx-xx-xx), except that the destination port mask is stuffed
into first 2 bytes of the MAC address except into first 3 bytes.
For plain Ethernet multicast, there's no port-in-address stuffing going
on, instead the DEST_IDX (pointer to PGID) is used there, just as for
unicast. So we have to use one of the nonreserved multicast PGIDs that
the hardware has allocated for this purpose.
This patch classifies the type of multicast address based on its first
bytes, then redirects to one of the 3 different hardware procedures.
Note that this gives us a really better way of redirecting PTP frames
sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add
a trapping rule for PTP EtherType but got a lot of pushback:
https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/
But right now, that isn't needed at all. The application stack (ptp4l)
does this for the PTP multicast addresses it's interested in (which are
configurable, and include 01-1b-19-00-00-00):
memset(&mreq, 0, sizeof(mreq));
mreq.mr_ifindex = index;
mreq.mr_type = PACKET_MR_MULTICAST;
mreq.mr_alen = MAC_LEN;
memcpy(mreq.mr_address, addr1, MAC_LEN);
err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq,
sizeof(mreq));
Into the kernel, this translates into a dev_mc_add on the switch network
interfaces, and our drivers know that it means they should translate it
into a host MDB address (make the CPU port be the destination).
Previously, this was broken because all mdb addresses were treated as
IPv4 (which 01-1b-19-00-00-00 obviously is not).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
|
|
|
{
|
net: mscc: ocelot: support L2 multicast entries
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.
Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
entry. That would be odd because we can only have ~60 PGIDs, but
thousands of MDB entries. So instead, we want to reserve a multicast
PGID for every single port combination for multicast traffic. And
since we can have 2 (or more) MDB entries delivered to the same port
group (and therefore PGID), we need to reference-count the PGIDs.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
|
|
|
struct ocelot_pgid *pgid;
|
|
|
|
|
|
|
|
pgid = kzalloc(sizeof(*pgid), GFP_KERNEL);
|
|
|
|
if (!pgid)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
pgid->ports = ports;
|
|
|
|
pgid->index = index;
|
|
|
|
refcount_set(&pgid->refcount, 1);
|
|
|
|
list_add_tail(&pgid->list, &ocelot->pgids);
|
|
|
|
|
|
|
|
return pgid;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ocelot_pgid_free(struct ocelot *ocelot, struct ocelot_pgid *pgid)
|
|
|
|
{
|
|
|
|
if (!refcount_dec_and_test(&pgid->refcount))
|
|
|
|
return;
|
|
|
|
|
|
|
|
list_del(&pgid->list);
|
|
|
|
kfree(pgid);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ocelot_pgid *ocelot_mdb_get_pgid(struct ocelot *ocelot,
|
|
|
|
const struct ocelot_multicast *mc)
|
|
|
|
{
|
|
|
|
struct ocelot_pgid *pgid;
|
|
|
|
int index;
|
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries
The current procedure for installing a multicast address is hardcoded
for IPv4. But, in the ocelot hardware, there are 3 different procedures
for IPv4, IPv6 and for regular L2 multicast.
For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4
(01-00-5e-xx-xx-xx), except that the destination port mask is stuffed
into first 2 bytes of the MAC address except into first 3 bytes.
For plain Ethernet multicast, there's no port-in-address stuffing going
on, instead the DEST_IDX (pointer to PGID) is used there, just as for
unicast. So we have to use one of the nonreserved multicast PGIDs that
the hardware has allocated for this purpose.
This patch classifies the type of multicast address based on its first
bytes, then redirects to one of the 3 different hardware procedures.
Note that this gives us a really better way of redirecting PTP frames
sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add
a trapping rule for PTP EtherType but got a lot of pushback:
https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/
But right now, that isn't needed at all. The application stack (ptp4l)
does this for the PTP multicast addresses it's interested in (which are
configurable, and include 01-1b-19-00-00-00):
memset(&mreq, 0, sizeof(mreq));
mreq.mr_ifindex = index;
mreq.mr_type = PACKET_MR_MULTICAST;
mreq.mr_alen = MAC_LEN;
memcpy(mreq.mr_address, addr1, MAC_LEN);
err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq,
sizeof(mreq));
Into the kernel, this translates into a dev_mc_add on the switch network
interfaces, and our drivers know that it means they should translate it
into a host MDB address (make the CPU port be the destination).
Previously, this was broken because all mdb addresses were treated as
IPv4 (which 01-1b-19-00-00-00 obviously is not).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
|
|
|
|
|
|
|
/* According to VSC7514 datasheet 3.9.1.5 IPv4 Multicast Entries and
|
|
|
|
* 3.9.1.6 IPv6 Multicast Entries, "Instead of a lookup in the
|
|
|
|
* destination mask table (PGID), the destination set is programmed as
|
|
|
|
* part of the entry MAC address.", and the DEST_IDX is set to 0.
|
|
|
|
*/
|
2020-10-29 10:27:37 +08:00
|
|
|
if (mc->entry_type == ENTRYTYPE_MACv4 ||
|
|
|
|
mc->entry_type == ENTRYTYPE_MACv6)
|
net: mscc: ocelot: support L2 multicast entries
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.
Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
entry. That would be odd because we can only have ~60 PGIDs, but
thousands of MDB entries. So instead, we want to reserve a multicast
PGID for every single port combination for multicast traffic. And
since we can have 2 (or more) MDB entries delivered to the same port
group (and therefore PGID), we need to reference-count the PGIDs.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
|
|
|
return ocelot_pgid_alloc(ocelot, 0, mc->ports);
|
|
|
|
|
|
|
|
list_for_each_entry(pgid, &ocelot->pgids, list) {
|
|
|
|
/* When searching for a nonreserved multicast PGID, ignore the
|
|
|
|
* dummy PGID of zero that we have for MACv4/MACv6 entries
|
|
|
|
*/
|
|
|
|
if (pgid->index && pgid->ports == mc->ports) {
|
|
|
|
refcount_inc(&pgid->refcount);
|
|
|
|
return pgid;
|
|
|
|
}
|
|
|
|
}
|
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries
The current procedure for installing a multicast address is hardcoded
for IPv4. But, in the ocelot hardware, there are 3 different procedures
for IPv4, IPv6 and for regular L2 multicast.
For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4
(01-00-5e-xx-xx-xx), except that the destination port mask is stuffed
into first 2 bytes of the MAC address except into first 3 bytes.
For plain Ethernet multicast, there's no port-in-address stuffing going
on, instead the DEST_IDX (pointer to PGID) is used there, just as for
unicast. So we have to use one of the nonreserved multicast PGIDs that
the hardware has allocated for this purpose.
This patch classifies the type of multicast address based on its first
bytes, then redirects to one of the 3 different hardware procedures.
Note that this gives us a really better way of redirecting PTP frames
sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add
a trapping rule for PTP EtherType but got a lot of pushback:
https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/
But right now, that isn't needed at all. The application stack (ptp4l)
does this for the PTP multicast addresses it's interested in (which are
configurable, and include 01-1b-19-00-00-00):
memset(&mreq, 0, sizeof(mreq));
mreq.mr_ifindex = index;
mreq.mr_type = PACKET_MR_MULTICAST;
mreq.mr_alen = MAC_LEN;
memcpy(mreq.mr_address, addr1, MAC_LEN);
err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq,
sizeof(mreq));
Into the kernel, this translates into a dev_mc_add on the switch network
interfaces, and our drivers know that it means they should translate it
into a host MDB address (make the CPU port be the destination).
Previously, this was broken because all mdb addresses were treated as
IPv4 (which 01-1b-19-00-00-00 obviously is not).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
|
|
|
|
net: mscc: ocelot: support L2 multicast entries
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.
Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
entry. That would be odd because we can only have ~60 PGIDs, but
thousands of MDB entries. So instead, we want to reserve a multicast
PGID for every single port combination for multicast traffic. And
since we can have 2 (or more) MDB entries delivered to the same port
group (and therefore PGID), we need to reference-count the PGIDs.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
|
|
|
/* Search for a free index in the nonreserved multicast PGID area */
|
|
|
|
for_each_nonreserved_multicast_dest_pgid(ocelot, index) {
|
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries
The current procedure for installing a multicast address is hardcoded
for IPv4. But, in the ocelot hardware, there are 3 different procedures
for IPv4, IPv6 and for regular L2 multicast.
For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4
(01-00-5e-xx-xx-xx), except that the destination port mask is stuffed
into first 2 bytes of the MAC address except into first 3 bytes.
For plain Ethernet multicast, there's no port-in-address stuffing going
on, instead the DEST_IDX (pointer to PGID) is used there, just as for
unicast. So we have to use one of the nonreserved multicast PGIDs that
the hardware has allocated for this purpose.
This patch classifies the type of multicast address based on its first
bytes, then redirects to one of the 3 different hardware procedures.
Note that this gives us a really better way of redirecting PTP frames
sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add
a trapping rule for PTP EtherType but got a lot of pushback:
https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/
But right now, that isn't needed at all. The application stack (ptp4l)
does this for the PTP multicast addresses it's interested in (which are
configurable, and include 01-1b-19-00-00-00):
memset(&mreq, 0, sizeof(mreq));
mreq.mr_ifindex = index;
mreq.mr_type = PACKET_MR_MULTICAST;
mreq.mr_alen = MAC_LEN;
memcpy(mreq.mr_address, addr1, MAC_LEN);
err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq,
sizeof(mreq));
Into the kernel, this translates into a dev_mc_add on the switch network
interfaces, and our drivers know that it means they should translate it
into a host MDB address (make the CPU port be the destination).
Previously, this was broken because all mdb addresses were treated as
IPv4 (which 01-1b-19-00-00-00 obviously is not).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
|
|
|
bool used = false;
|
|
|
|
|
net: mscc: ocelot: support L2 multicast entries
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.
Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
entry. That would be odd because we can only have ~60 PGIDs, but
thousands of MDB entries. So instead, we want to reserve a multicast
PGID for every single port combination for multicast traffic. And
since we can have 2 (or more) MDB entries delivered to the same port
group (and therefore PGID), we need to reference-count the PGIDs.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
|
|
|
list_for_each_entry(pgid, &ocelot->pgids, list) {
|
|
|
|
if (pgid->index == index) {
|
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries
The current procedure for installing a multicast address is hardcoded
for IPv4. But, in the ocelot hardware, there are 3 different procedures
for IPv4, IPv6 and for regular L2 multicast.
For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4
(01-00-5e-xx-xx-xx), except that the destination port mask is stuffed
into first 2 bytes of the MAC address except into first 3 bytes.
For plain Ethernet multicast, there's no port-in-address stuffing going
on, instead the DEST_IDX (pointer to PGID) is used there, just as for
unicast. So we have to use one of the nonreserved multicast PGIDs that
the hardware has allocated for this purpose.
This patch classifies the type of multicast address based on its first
bytes, then redirects to one of the 3 different hardware procedures.
Note that this gives us a really better way of redirecting PTP frames
sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add
a trapping rule for PTP EtherType but got a lot of pushback:
https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/
But right now, that isn't needed at all. The application stack (ptp4l)
does this for the PTP multicast addresses it's interested in (which are
configurable, and include 01-1b-19-00-00-00):
memset(&mreq, 0, sizeof(mreq));
mreq.mr_ifindex = index;
mreq.mr_type = PACKET_MR_MULTICAST;
mreq.mr_alen = MAC_LEN;
memcpy(mreq.mr_address, addr1, MAC_LEN);
err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq,
sizeof(mreq));
Into the kernel, this translates into a dev_mc_add on the switch network
interfaces, and our drivers know that it means they should translate it
into a host MDB address (make the CPU port be the destination).
Previously, this was broken because all mdb addresses were treated as
IPv4 (which 01-1b-19-00-00-00 obviously is not).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
|
|
|
used = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!used)
|
net: mscc: ocelot: support L2 multicast entries
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.
Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
entry. That would be odd because we can only have ~60 PGIDs, but
thousands of MDB entries. So instead, we want to reserve a multicast
PGID for every single port combination for multicast traffic. And
since we can have 2 (or more) MDB entries delivered to the same port
group (and therefore PGID), we need to reference-count the PGIDs.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
|
|
|
return ocelot_pgid_alloc(ocelot, index, mc->ports);
|
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries
The current procedure for installing a multicast address is hardcoded
for IPv4. But, in the ocelot hardware, there are 3 different procedures
for IPv4, IPv6 and for regular L2 multicast.
For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4
(01-00-5e-xx-xx-xx), except that the destination port mask is stuffed
into first 2 bytes of the MAC address except into first 3 bytes.
For plain Ethernet multicast, there's no port-in-address stuffing going
on, instead the DEST_IDX (pointer to PGID) is used there, just as for
unicast. So we have to use one of the nonreserved multicast PGIDs that
the hardware has allocated for this purpose.
This patch classifies the type of multicast address based on its first
bytes, then redirects to one of the 3 different hardware procedures.
Note that this gives us a really better way of redirecting PTP frames
sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add
a trapping rule for PTP EtherType but got a lot of pushback:
https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/
But right now, that isn't needed at all. The application stack (ptp4l)
does this for the PTP multicast addresses it's interested in (which are
configurable, and include 01-1b-19-00-00-00):
memset(&mreq, 0, sizeof(mreq));
mreq.mr_ifindex = index;
mreq.mr_type = PACKET_MR_MULTICAST;
mreq.mr_alen = MAC_LEN;
memcpy(mreq.mr_address, addr1, MAC_LEN);
err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq,
sizeof(mreq));
Into the kernel, this translates into a dev_mc_add on the switch network
interfaces, and our drivers know that it means they should translate it
into a host MDB address (make the CPU port be the destination).
Previously, this was broken because all mdb addresses were treated as
IPv4 (which 01-1b-19-00-00-00 obviously is not).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
|
|
|
}
|
|
|
|
|
net: mscc: ocelot: support L2 multicast entries
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.
Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
entry. That would be odd because we can only have ~60 PGIDs, but
thousands of MDB entries. So instead, we want to reserve a multicast
PGID for every single port combination for multicast traffic. And
since we can have 2 (or more) MDB entries delivered to the same port
group (and therefore PGID), we need to reference-count the PGIDs.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
|
|
|
return ERR_PTR(-ENOSPC);
|
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries
The current procedure for installing a multicast address is hardcoded
for IPv4. But, in the ocelot hardware, there are 3 different procedures
for IPv4, IPv6 and for regular L2 multicast.
For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4
(01-00-5e-xx-xx-xx), except that the destination port mask is stuffed
into first 2 bytes of the MAC address except into first 3 bytes.
For plain Ethernet multicast, there's no port-in-address stuffing going
on, instead the DEST_IDX (pointer to PGID) is used there, just as for
unicast. So we have to use one of the nonreserved multicast PGIDs that
the hardware has allocated for this purpose.
This patch classifies the type of multicast address based on its first
bytes, then redirects to one of the 3 different hardware procedures.
Note that this gives us a really better way of redirecting PTP frames
sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add
a trapping rule for PTP EtherType but got a lot of pushback:
https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/
But right now, that isn't needed at all. The application stack (ptp4l)
does this for the PTP multicast addresses it's interested in (which are
configurable, and include 01-1b-19-00-00-00):
memset(&mreq, 0, sizeof(mreq));
mreq.mr_ifindex = index;
mreq.mr_type = PACKET_MR_MULTICAST;
mreq.mr_alen = MAC_LEN;
memcpy(mreq.mr_address, addr1, MAC_LEN);
err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq,
sizeof(mreq));
Into the kernel, this translates into a dev_mc_add on the switch network
interfaces, and our drivers know that it means they should translate it
into a host MDB address (make the CPU port be the destination).
Previously, this was broken because all mdb addresses were treated as
IPv4 (which 01-1b-19-00-00-00 obviously is not).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void ocelot_encode_ports_to_mdb(unsigned char *addr,
|
2020-10-29 10:27:37 +08:00
|
|
|
struct ocelot_multicast *mc)
|
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries
The current procedure for installing a multicast address is hardcoded
for IPv4. But, in the ocelot hardware, there are 3 different procedures
for IPv4, IPv6 and for regular L2 multicast.
For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4
(01-00-5e-xx-xx-xx), except that the destination port mask is stuffed
into first 2 bytes of the MAC address except into first 3 bytes.
For plain Ethernet multicast, there's no port-in-address stuffing going
on, instead the DEST_IDX (pointer to PGID) is used there, just as for
unicast. So we have to use one of the nonreserved multicast PGIDs that
the hardware has allocated for this purpose.
This patch classifies the type of multicast address based on its first
bytes, then redirects to one of the 3 different hardware procedures.
Note that this gives us a really better way of redirecting PTP frames
sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add
a trapping rule for PTP EtherType but got a lot of pushback:
https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/
But right now, that isn't needed at all. The application stack (ptp4l)
does this for the PTP multicast addresses it's interested in (which are
configurable, and include 01-1b-19-00-00-00):
memset(&mreq, 0, sizeof(mreq));
mreq.mr_ifindex = index;
mreq.mr_type = PACKET_MR_MULTICAST;
mreq.mr_alen = MAC_LEN;
memcpy(mreq.mr_address, addr1, MAC_LEN);
err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq,
sizeof(mreq));
Into the kernel, this translates into a dev_mc_add on the switch network
interfaces, and our drivers know that it means they should translate it
into a host MDB address (make the CPU port be the destination).
Previously, this was broken because all mdb addresses were treated as
IPv4 (which 01-1b-19-00-00-00 obviously is not).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
|
|
|
{
|
2020-10-29 10:27:35 +08:00
|
|
|
ether_addr_copy(addr, mc->addr);
|
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries
The current procedure for installing a multicast address is hardcoded
for IPv4. But, in the ocelot hardware, there are 3 different procedures
for IPv4, IPv6 and for regular L2 multicast.
For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4
(01-00-5e-xx-xx-xx), except that the destination port mask is stuffed
into first 2 bytes of the MAC address except into first 3 bytes.
For plain Ethernet multicast, there's no port-in-address stuffing going
on, instead the DEST_IDX (pointer to PGID) is used there, just as for
unicast. So we have to use one of the nonreserved multicast PGIDs that
the hardware has allocated for this purpose.
This patch classifies the type of multicast address based on its first
bytes, then redirects to one of the 3 different hardware procedures.
Note that this gives us a really better way of redirecting PTP frames
sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add
a trapping rule for PTP EtherType but got a lot of pushback:
https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/
But right now, that isn't needed at all. The application stack (ptp4l)
does this for the PTP multicast addresses it's interested in (which are
configurable, and include 01-1b-19-00-00-00):
memset(&mreq, 0, sizeof(mreq));
mreq.mr_ifindex = index;
mreq.mr_type = PACKET_MR_MULTICAST;
mreq.mr_alen = MAC_LEN;
memcpy(mreq.mr_address, addr1, MAC_LEN);
err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq,
sizeof(mreq));
Into the kernel, this translates into a dev_mc_add on the switch network
interfaces, and our drivers know that it means they should translate it
into a host MDB address (make the CPU port be the destination).
Previously, this was broken because all mdb addresses were treated as
IPv4 (which 01-1b-19-00-00-00 obviously is not).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
|
|
|
|
2020-10-29 10:27:37 +08:00
|
|
|
if (mc->entry_type == ENTRYTYPE_MACv4) {
|
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries
The current procedure for installing a multicast address is hardcoded
for IPv4. But, in the ocelot hardware, there are 3 different procedures
for IPv4, IPv6 and for regular L2 multicast.
For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4
(01-00-5e-xx-xx-xx), except that the destination port mask is stuffed
into first 2 bytes of the MAC address except into first 3 bytes.
For plain Ethernet multicast, there's no port-in-address stuffing going
on, instead the DEST_IDX (pointer to PGID) is used there, just as for
unicast. So we have to use one of the nonreserved multicast PGIDs that
the hardware has allocated for this purpose.
This patch classifies the type of multicast address based on its first
bytes, then redirects to one of the 3 different hardware procedures.
Note that this gives us a really better way of redirecting PTP frames
sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add
a trapping rule for PTP EtherType but got a lot of pushback:
https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/
But right now, that isn't needed at all. The application stack (ptp4l)
does this for the PTP multicast addresses it's interested in (which are
configurable, and include 01-1b-19-00-00-00):
memset(&mreq, 0, sizeof(mreq));
mreq.mr_ifindex = index;
mreq.mr_type = PACKET_MR_MULTICAST;
mreq.mr_alen = MAC_LEN;
memcpy(mreq.mr_address, addr1, MAC_LEN);
err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq,
sizeof(mreq));
Into the kernel, this translates into a dev_mc_add on the switch network
interfaces, and our drivers know that it means they should translate it
into a host MDB address (make the CPU port be the destination).
Previously, this was broken because all mdb addresses were treated as
IPv4 (which 01-1b-19-00-00-00 obviously is not).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
|
|
|
addr[0] = 0;
|
|
|
|
addr[1] = mc->ports >> 8;
|
|
|
|
addr[2] = mc->ports & 0xff;
|
2020-10-29 10:27:37 +08:00
|
|
|
} else if (mc->entry_type == ENTRYTYPE_MACv6) {
|
net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries
The current procedure for installing a multicast address is hardcoded
for IPv4. But, in the ocelot hardware, there are 3 different procedures
for IPv4, IPv6 and for regular L2 multicast.
For IPv6 (33-33-xx-xx-xx-xx), it's the same as for IPv4
(01-00-5e-xx-xx-xx), except that the destination port mask is stuffed
into first 2 bytes of the MAC address except into first 3 bytes.
For plain Ethernet multicast, there's no port-in-address stuffing going
on, instead the DEST_IDX (pointer to PGID) is used there, just as for
unicast. So we have to use one of the nonreserved multicast PGIDs that
the hardware has allocated for this purpose.
This patch classifies the type of multicast address based on its first
bytes, then redirects to one of the 3 different hardware procedures.
Note that this gives us a really better way of redirecting PTP frames
sent at 01-1b-19-00-00-00 to the CPU. Previously, Yangbo Lu tried to add
a trapping rule for PTP EtherType but got a lot of pushback:
https://patchwork.ozlabs.org/project/netdev/patch/20190813025214.18601-5-yangbo.lu@nxp.com/
But right now, that isn't needed at all. The application stack (ptp4l)
does this for the PTP multicast addresses it's interested in (which are
configurable, and include 01-1b-19-00-00-00):
memset(&mreq, 0, sizeof(mreq));
mreq.mr_ifindex = index;
mreq.mr_type = PACKET_MR_MULTICAST;
mreq.mr_alen = MAC_LEN;
memcpy(mreq.mr_address, addr1, MAC_LEN);
err1 = setsockopt(fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP, &mreq,
sizeof(mreq));
Into the kernel, this translates into a dev_mc_add on the switch network
interfaces, and our drivers know that it means they should translate it
into a host MDB address (make the CPU port be the destination).
Previously, this was broken because all mdb addresses were treated as
IPv4 (which 01-1b-19-00-00-00 obviously is not).
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-06-21 19:46:03 +08:00
|
|
|
addr[0] = mc->ports >> 8;
|
|
|
|
addr[1] = mc->ports & 0xff;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-21 19:46:01 +08:00
|
|
|
int ocelot_port_mdb_add(struct ocelot *ocelot, int port,
|
|
|
|
const struct switchdev_obj_port_mdb *mdb)
|
2018-05-15 04:04:57 +08:00
|
|
|
{
|
|
|
|
unsigned char addr[ETH_ALEN];
|
2019-11-09 21:02:53 +08:00
|
|
|
struct ocelot_multicast *mc;
|
net: mscc: ocelot: support L2 multicast entries
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.
Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
entry. That would be odd because we can only have ~60 PGIDs, but
thousands of MDB entries. So instead, we want to reserve a multicast
PGID for every single port combination for multicast traffic. And
since we can have 2 (or more) MDB entries delivered to the same port
group (and therefore PGID), we need to reference-count the PGIDs.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
|
|
|
struct ocelot_pgid *pgid;
|
2018-05-15 04:04:57 +08:00
|
|
|
u16 vid = mdb->vid;
|
|
|
|
|
2020-06-21 19:46:00 +08:00
|
|
|
if (port == ocelot->npi)
|
|
|
|
port = ocelot->num_phys_ports;
|
|
|
|
|
2018-05-15 04:04:57 +08:00
|
|
|
mc = ocelot_multicast_get(ocelot, mdb->addr, vid);
|
|
|
|
if (!mc) {
|
2020-10-29 10:27:36 +08:00
|
|
|
/* New entry */
|
2020-10-29 10:27:37 +08:00
|
|
|
mc = devm_kzalloc(ocelot->dev, sizeof(*mc), GFP_KERNEL);
|
|
|
|
if (!mc)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
mc->entry_type = ocelot_classify_mdb(mdb->addr);
|
|
|
|
ether_addr_copy(mc->addr, mdb->addr);
|
|
|
|
mc->vid = vid;
|
|
|
|
|
2018-05-15 04:04:57 +08:00
|
|
|
list_add_tail(&mc->list, &ocelot->multicast);
|
2020-10-29 10:27:36 +08:00
|
|
|
} else {
|
net: mscc: ocelot: support L2 multicast entries
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.
Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
entry. That would be odd because we can only have ~60 PGIDs, but
thousands of MDB entries. So instead, we want to reserve a multicast
PGID for every single port combination for multicast traffic. And
since we can have 2 (or more) MDB entries delivered to the same port
group (and therefore PGID), we need to reference-count the PGIDs.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
|
|
|
/* Existing entry. Clean up the current port mask from
|
|
|
|
* hardware now, because we'll be modifying it.
|
|
|
|
*/
|
|
|
|
ocelot_pgid_free(ocelot, mc->pgid);
|
2020-10-29 10:27:37 +08:00
|
|
|
ocelot_encode_ports_to_mdb(addr, mc);
|
2018-05-15 04:04:57 +08:00
|
|
|
ocelot_mact_forget(ocelot, addr, vid);
|
|
|
|
}
|
|
|
|
|
2019-11-09 21:02:53 +08:00
|
|
|
mc->ports |= BIT(port);
|
net: mscc: ocelot: support L2 multicast entries
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.
Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
entry. That would be odd because we can only have ~60 PGIDs, but
thousands of MDB entries. So instead, we want to reserve a multicast
PGID for every single port combination for multicast traffic. And
since we can have 2 (or more) MDB entries delivered to the same port
group (and therefore PGID), we need to reference-count the PGIDs.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
|
|
|
|
|
|
|
pgid = ocelot_mdb_get_pgid(ocelot, mc);
|
|
|
|
if (IS_ERR(pgid)) {
|
|
|
|
dev_err(ocelot->dev,
|
|
|
|
"Cannot allocate PGID for mdb %pM vid %d\n",
|
|
|
|
mc->addr, mc->vid);
|
|
|
|
devm_kfree(ocelot->dev, mc);
|
|
|
|
return PTR_ERR(pgid);
|
|
|
|
}
|
|
|
|
mc->pgid = pgid;
|
|
|
|
|
2020-10-29 10:27:37 +08:00
|
|
|
ocelot_encode_ports_to_mdb(addr, mc);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
net: mscc: ocelot: support L2 multicast entries
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.
Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
entry. That would be odd because we can only have ~60 PGIDs, but
thousands of MDB entries. So instead, we want to reserve a multicast
PGID for every single port combination for multicast traffic. And
since we can have 2 (or more) MDB entries delivered to the same port
group (and therefore PGID), we need to reference-count the PGIDs.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
|
|
|
if (mc->entry_type != ENTRYTYPE_MACv4 &&
|
|
|
|
mc->entry_type != ENTRYTYPE_MACv6)
|
|
|
|
ocelot_write_rix(ocelot, pgid->ports, ANA_PGID_PGID,
|
|
|
|
pgid->index);
|
|
|
|
|
|
|
|
return ocelot_mact_learn(ocelot, pgid->index, addr, vid,
|
2020-10-29 10:27:37 +08:00
|
|
|
mc->entry_type);
|
2018-05-15 04:04:57 +08:00
|
|
|
}
|
2020-06-21 19:46:01 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_port_mdb_add);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2020-06-21 19:46:01 +08:00
|
|
|
int ocelot_port_mdb_del(struct ocelot *ocelot, int port,
|
|
|
|
const struct switchdev_obj_port_mdb *mdb)
|
2018-05-15 04:04:57 +08:00
|
|
|
{
|
|
|
|
unsigned char addr[ETH_ALEN];
|
2019-11-09 21:02:53 +08:00
|
|
|
struct ocelot_multicast *mc;
|
net: mscc: ocelot: support L2 multicast entries
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.
Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
entry. That would be odd because we can only have ~60 PGIDs, but
thousands of MDB entries. So instead, we want to reserve a multicast
PGID for every single port combination for multicast traffic. And
since we can have 2 (or more) MDB entries delivered to the same port
group (and therefore PGID), we need to reference-count the PGIDs.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
|
|
|
struct ocelot_pgid *pgid;
|
2018-05-15 04:04:57 +08:00
|
|
|
u16 vid = mdb->vid;
|
|
|
|
|
2020-06-21 19:46:00 +08:00
|
|
|
if (port == ocelot->npi)
|
|
|
|
port = ocelot->num_phys_ports;
|
|
|
|
|
2018-05-15 04:04:57 +08:00
|
|
|
mc = ocelot_multicast_get(ocelot, mdb->addr, vid);
|
|
|
|
if (!mc)
|
|
|
|
return -ENOENT;
|
|
|
|
|
2020-10-29 10:27:37 +08:00
|
|
|
ocelot_encode_ports_to_mdb(addr, mc);
|
2018-05-15 04:04:57 +08:00
|
|
|
ocelot_mact_forget(ocelot, addr, vid);
|
|
|
|
|
net: mscc: ocelot: support L2 multicast entries
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.
Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
entry. That would be odd because we can only have ~60 PGIDs, but
thousands of MDB entries. So instead, we want to reserve a multicast
PGID for every single port combination for multicast traffic. And
since we can have 2 (or more) MDB entries delivered to the same port
group (and therefore PGID), we need to reference-count the PGIDs.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
|
|
|
ocelot_pgid_free(ocelot, mc->pgid);
|
2019-11-09 21:02:53 +08:00
|
|
|
mc->ports &= ~BIT(port);
|
2018-05-15 04:04:57 +08:00
|
|
|
if (!mc->ports) {
|
|
|
|
list_del(&mc->list);
|
|
|
|
devm_kfree(ocelot->dev, mc);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
net: mscc: ocelot: support L2 multicast entries
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.
Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
entry. That would be odd because we can only have ~60 PGIDs, but
thousands of MDB entries. So instead, we want to reserve a multicast
PGID for every single port combination for multicast traffic. And
since we can have 2 (or more) MDB entries delivered to the same port
group (and therefore PGID), we need to reference-count the PGIDs.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
|
|
|
/* We have a PGID with fewer ports now */
|
|
|
|
pgid = ocelot_mdb_get_pgid(ocelot, mc);
|
|
|
|
if (IS_ERR(pgid))
|
|
|
|
return PTR_ERR(pgid);
|
|
|
|
mc->pgid = pgid;
|
|
|
|
|
2020-10-29 10:27:37 +08:00
|
|
|
ocelot_encode_ports_to_mdb(addr, mc);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
net: mscc: ocelot: support L2 multicast entries
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.
Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
entry. That would be odd because we can only have ~60 PGIDs, but
thousands of MDB entries. So instead, we want to reserve a multicast
PGID for every single port combination for multicast traffic. And
since we can have 2 (or more) MDB entries delivered to the same port
group (and therefore PGID), we need to reference-count the PGIDs.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
|
|
|
if (mc->entry_type != ENTRYTYPE_MACv4 &&
|
|
|
|
mc->entry_type != ENTRYTYPE_MACv6)
|
|
|
|
ocelot_write_rix(ocelot, pgid->ports, ANA_PGID_PGID,
|
|
|
|
pgid->index);
|
|
|
|
|
|
|
|
return ocelot_mact_learn(ocelot, pgid->index, addr, vid,
|
2020-10-29 10:27:37 +08:00
|
|
|
mc->entry_type);
|
2018-05-15 04:04:57 +08:00
|
|
|
}
|
2020-06-21 19:46:01 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_port_mdb_del);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2021-03-23 07:51:52 +08:00
|
|
|
void ocelot_port_bridge_join(struct ocelot *ocelot, int port,
|
|
|
|
struct net_device *bridge)
|
2018-05-15 04:04:57 +08:00
|
|
|
{
|
2021-03-19 07:36:36 +08:00
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2021-03-19 07:36:36 +08:00
|
|
|
ocelot_port->bridge = bridge;
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2021-03-23 07:51:52 +08:00
|
|
|
ocelot_apply_bridge_fwd_mask(ocelot);
|
2018-05-15 04:04:57 +08:00
|
|
|
}
|
2019-11-14 23:03:27 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_port_bridge_join);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2021-03-23 07:51:52 +08:00
|
|
|
void ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
|
|
|
|
struct net_device *bridge)
|
2018-05-15 04:04:57 +08:00
|
|
|
{
|
2021-03-19 07:36:36 +08:00
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
2020-10-03 06:06:46 +08:00
|
|
|
|
2021-03-19 07:36:36 +08:00
|
|
|
ocelot_port->bridge = NULL;
|
2018-06-26 20:28:49 +08:00
|
|
|
|
2021-10-21 01:58:52 +08:00
|
|
|
ocelot_port_set_pvid(ocelot, port, NULL);
|
net: mscc: ocelot: allow a config where all bridge VLANs are egress-untagged
At present, the ocelot driver accepts a single egress-untagged bridge
VLAN, meaning that this sequence of operations:
ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
bridge vlan add dev swp0 vid 2 pvid untagged
fails because the bridge automatically installs VID 1 as a pvid & untagged
VLAN, and vid 2 would be the second untagged VLAN on this port. It is
necessary to delete VID 1 before proceeding to add VID 2.
This limitation comes from the fact that we operate the port tag, when
it has an egress-untagged VID, in the OCELOT_PORT_TAG_NATIVE mode.
The ocelot switches do not have full flexibility and can either have one
single VID as egress-untagged, or all of them.
There are use cases for having all VLANs as egress-untagged as well, and
this patch adds support for that.
The change rewrites ocelot_port_set_native_vlan() into a more generic
ocelot_port_manage_port_tag() function. Because the software bridge's
state, transmitted to us via switchdev, can become very complex, we
don't attempt to track all possible state transitions, but instead take
a more declarative approach and just make ocelot_port_manage_port_tag()
figure out which more to operate in:
- port is VLAN-unaware: the classified VLAN (internal, unrelated to the
802.1Q header) is not inserted into packets on egress
- port is VLAN-aware:
- port has tagged VLANs:
-> port has no untagged VLAN: set up as pure trunk
-> port has one untagged VLAN: set up as trunk port + native VLAN
-> port has more than one untagged VLAN: this is an invalid config
which is rejected by ocelot_vlan_prepare
- port has no tagged VLANs
-> set up as pure egress-untagged port
We don't keep the number of tagged and untagged VLANs, we just count the
structures we keep.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:50 +08:00
|
|
|
ocelot_port_manage_port_tag(ocelot, port);
|
2021-03-23 07:51:52 +08:00
|
|
|
ocelot_apply_bridge_fwd_mask(ocelot);
|
2018-05-15 04:04:57 +08:00
|
|
|
}
|
2019-11-14 23:03:27 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_port_bridge_leave);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2018-06-26 20:28:48 +08:00
|
|
|
static void ocelot_set_aggr_pgids(struct ocelot *ocelot)
|
|
|
|
{
|
2021-02-06 06:02:17 +08:00
|
|
|
unsigned long visited = GENMASK(ocelot->num_phys_ports - 1, 0);
|
2018-06-26 20:28:48 +08:00
|
|
|
int i, port, lag;
|
|
|
|
|
|
|
|
/* Reset destination and aggregation PGIDS */
|
2020-06-21 19:46:02 +08:00
|
|
|
for_each_unicast_dest_pgid(ocelot, port)
|
2018-06-26 20:28:48 +08:00
|
|
|
ocelot_write_rix(ocelot, BIT(port), ANA_PGID_PGID, port);
|
|
|
|
|
2020-06-21 19:46:02 +08:00
|
|
|
for_each_aggr_pgid(ocelot, i)
|
2018-06-26 20:28:48 +08:00
|
|
|
ocelot_write_rix(ocelot, GENMASK(ocelot->num_phys_ports - 1, 0),
|
|
|
|
ANA_PGID_PGID, i);
|
|
|
|
|
2021-02-06 06:02:17 +08:00
|
|
|
/* The visited ports bitmask holds the list of ports offloading any
|
|
|
|
* bonding interface. Initially we mark all these ports as unvisited,
|
|
|
|
* then every time we visit a port in this bitmask, we know that it is
|
|
|
|
* the lowest numbered port, i.e. the one whose logical ID == physical
|
|
|
|
* port ID == LAG ID. So we mark as visited all further ports in the
|
|
|
|
* bitmask that are offloading the same bonding interface. This way,
|
|
|
|
* we set up the aggregation PGIDs only once per bonding interface.
|
|
|
|
*/
|
|
|
|
for (port = 0; port < ocelot->num_phys_ports; port++) {
|
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
|
|
|
|
|
|
|
if (!ocelot_port || !ocelot_port->bond)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
visited &= ~BIT(port);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now, set PGIDs for each active LAG */
|
2018-06-26 20:28:48 +08:00
|
|
|
for (lag = 0; lag < ocelot->num_phys_ports; lag++) {
|
2021-02-06 06:02:17 +08:00
|
|
|
struct net_device *bond = ocelot->ports[lag]->bond;
|
2021-02-06 06:02:19 +08:00
|
|
|
int num_active_ports = 0;
|
2018-06-26 20:28:48 +08:00
|
|
|
unsigned long bond_mask;
|
|
|
|
u8 aggr_idx[16];
|
|
|
|
|
2021-02-06 06:02:17 +08:00
|
|
|
if (!bond || (visited & BIT(lag)))
|
2018-06-26 20:28:48 +08:00
|
|
|
continue;
|
|
|
|
|
2021-02-06 06:02:19 +08:00
|
|
|
bond_mask = ocelot_get_bond_mask(ocelot, bond, true);
|
2021-02-06 06:02:17 +08:00
|
|
|
|
2018-06-26 20:28:48 +08:00
|
|
|
for_each_set_bit(port, &bond_mask, ocelot->num_phys_ports) {
|
|
|
|
// Destination mask
|
|
|
|
ocelot_write_rix(ocelot, bond_mask,
|
|
|
|
ANA_PGID_PGID, port);
|
2021-02-06 06:02:19 +08:00
|
|
|
aggr_idx[num_active_ports++] = port;
|
2018-06-26 20:28:48 +08:00
|
|
|
}
|
|
|
|
|
2020-06-21 19:46:02 +08:00
|
|
|
for_each_aggr_pgid(ocelot, i) {
|
2018-06-26 20:28:48 +08:00
|
|
|
u32 ac;
|
|
|
|
|
|
|
|
ac = ocelot_read_rix(ocelot, ANA_PGID_PGID, i);
|
|
|
|
ac &= ~bond_mask;
|
2021-02-06 06:02:19 +08:00
|
|
|
/* Don't do division by zero if there was no active
|
|
|
|
* port. Just make all aggregation codes zero.
|
|
|
|
*/
|
|
|
|
if (num_active_ports)
|
|
|
|
ac |= BIT(aggr_idx[i % num_active_ports]);
|
2018-06-26 20:28:48 +08:00
|
|
|
ocelot_write_rix(ocelot, ac, ANA_PGID_PGID, i);
|
|
|
|
}
|
2021-02-06 06:02:17 +08:00
|
|
|
|
|
|
|
/* Mark all ports in the same LAG as visited to avoid applying
|
|
|
|
* the same config again.
|
|
|
|
*/
|
|
|
|
for (port = lag; port < ocelot->num_phys_ports; port++) {
|
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
|
|
|
|
|
|
|
if (!ocelot_port)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (ocelot_port->bond == bond)
|
|
|
|
visited |= BIT(port);
|
|
|
|
}
|
2018-06-26 20:28:48 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-06 06:02:16 +08:00
|
|
|
/* When offloading a bonding interface, the switch ports configured under the
|
|
|
|
* same bond must have the same logical port ID, equal to the physical port ID
|
|
|
|
* of the lowest numbered physical port in that bond. Otherwise, in standalone/
|
|
|
|
* bridged mode, each port has a logical port ID equal to its physical port ID.
|
|
|
|
*/
|
|
|
|
static void ocelot_setup_logical_port_ids(struct ocelot *ocelot)
|
2018-06-26 20:28:48 +08:00
|
|
|
{
|
2021-02-06 06:02:16 +08:00
|
|
|
int port;
|
2018-06-26 20:28:48 +08:00
|
|
|
|
2021-02-06 06:02:16 +08:00
|
|
|
for (port = 0; port < ocelot->num_phys_ports; port++) {
|
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
|
|
|
struct net_device *bond;
|
|
|
|
|
|
|
|
if (!ocelot_port)
|
|
|
|
continue;
|
2018-06-26 20:28:48 +08:00
|
|
|
|
2021-02-06 06:02:16 +08:00
|
|
|
bond = ocelot_port->bond;
|
|
|
|
if (bond) {
|
2021-02-06 06:02:19 +08:00
|
|
|
int lag = __ffs(ocelot_get_bond_mask(ocelot, bond,
|
|
|
|
false));
|
2018-06-26 20:28:48 +08:00
|
|
|
|
2021-02-06 06:02:16 +08:00
|
|
|
ocelot_rmw_gix(ocelot,
|
|
|
|
ANA_PORT_PORT_CFG_PORTID_VAL(lag),
|
|
|
|
ANA_PORT_PORT_CFG_PORTID_VAL_M,
|
|
|
|
ANA_PORT_PORT_CFG, port);
|
|
|
|
} else {
|
|
|
|
ocelot_rmw_gix(ocelot,
|
|
|
|
ANA_PORT_PORT_CFG_PORTID_VAL(port),
|
|
|
|
ANA_PORT_PORT_CFG_PORTID_VAL_M,
|
|
|
|
ANA_PORT_PORT_CFG, port);
|
|
|
|
}
|
2018-06-26 20:28:48 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-20 23:43:44 +08:00
|
|
|
int ocelot_port_lag_join(struct ocelot *ocelot, int port,
|
2021-02-06 06:02:12 +08:00
|
|
|
struct net_device *bond,
|
|
|
|
struct netdev_lag_upper_info *info)
|
2018-06-26 20:28:48 +08:00
|
|
|
{
|
2021-02-06 06:02:12 +08:00
|
|
|
if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH)
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
2021-02-06 06:02:14 +08:00
|
|
|
ocelot->ports[port]->bond = bond;
|
2018-06-26 20:28:48 +08:00
|
|
|
|
2021-02-06 06:02:16 +08:00
|
|
|
ocelot_setup_logical_port_ids(ocelot);
|
2021-01-29 09:00:02 +08:00
|
|
|
ocelot_apply_bridge_fwd_mask(ocelot);
|
2018-06-26 20:28:48 +08:00
|
|
|
ocelot_set_aggr_pgids(ocelot);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2020-06-20 23:43:44 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_port_lag_join);
|
2018-06-26 20:28:48 +08:00
|
|
|
|
2020-06-20 23:43:44 +08:00
|
|
|
void ocelot_port_lag_leave(struct ocelot *ocelot, int port,
|
|
|
|
struct net_device *bond)
|
2018-06-26 20:28:48 +08:00
|
|
|
{
|
2021-02-06 06:02:14 +08:00
|
|
|
ocelot->ports[port]->bond = NULL;
|
|
|
|
|
2021-02-06 06:02:16 +08:00
|
|
|
ocelot_setup_logical_port_ids(ocelot);
|
2021-01-29 09:00:02 +08:00
|
|
|
ocelot_apply_bridge_fwd_mask(ocelot);
|
2018-06-26 20:28:48 +08:00
|
|
|
ocelot_set_aggr_pgids(ocelot);
|
|
|
|
}
|
2020-06-20 23:43:44 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_port_lag_leave);
|
2018-11-23 07:30:11 +08:00
|
|
|
|
2021-02-06 06:02:19 +08:00
|
|
|
void ocelot_port_lag_change(struct ocelot *ocelot, int port, bool lag_tx_active)
|
|
|
|
{
|
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
|
|
|
|
|
|
|
ocelot_port->lag_tx_active = lag_tx_active;
|
|
|
|
|
|
|
|
/* Rebalance the LAGs */
|
|
|
|
ocelot_set_aggr_pgids(ocelot);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(ocelot_port_lag_change);
|
|
|
|
|
2020-03-10 09:28:18 +08:00
|
|
|
/* Configure the maximum SDU (L2 payload) on RX to the value specified in @sdu.
|
|
|
|
* The length of VLAN tags is accounted for automatically via DEV_MAC_TAGS_CFG.
|
2020-03-28 03:55:47 +08:00
|
|
|
* In the special case that it's the NPI port that we're configuring, the
|
|
|
|
* length of the tag and optional prefix needs to be accounted for privately,
|
|
|
|
* in order to be able to sustain communication at the requested @sdu.
|
2020-03-10 09:28:18 +08:00
|
|
|
*/
|
2020-03-28 03:55:47 +08:00
|
|
|
void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu)
|
2019-11-09 21:02:56 +08:00
|
|
|
{
|
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
2020-03-10 09:28:18 +08:00
|
|
|
int maxlen = sdu + ETH_HLEN + ETH_FCS_LEN;
|
2020-07-14 00:57:05 +08:00
|
|
|
int pause_start, pause_stop;
|
2020-10-05 17:09:11 +08:00
|
|
|
int atop, atop_tot;
|
2019-11-09 21:02:56 +08:00
|
|
|
|
2020-03-28 03:55:47 +08:00
|
|
|
if (port == ocelot->npi) {
|
|
|
|
maxlen += OCELOT_TAG_LEN;
|
|
|
|
|
2021-01-29 09:00:03 +08:00
|
|
|
if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_SHORT)
|
2020-03-28 03:55:47 +08:00
|
|
|
maxlen += OCELOT_SHORT_PREFIX_LEN;
|
2021-01-29 09:00:03 +08:00
|
|
|
else if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_LONG)
|
2020-03-28 03:55:47 +08:00
|
|
|
maxlen += OCELOT_LONG_PREFIX_LEN;
|
|
|
|
}
|
|
|
|
|
2020-03-10 09:28:18 +08:00
|
|
|
ocelot_port_writel(ocelot_port, maxlen, DEV_MAC_MAXLEN_CFG);
|
2019-11-14 23:03:23 +08:00
|
|
|
|
2020-07-14 00:57:05 +08:00
|
|
|
/* Set Pause watermark hysteresis */
|
|
|
|
pause_start = 6 * maxlen / OCELOT_BUFFER_CELL_SZ;
|
|
|
|
pause_stop = 4 * maxlen / OCELOT_BUFFER_CELL_SZ;
|
2020-07-14 00:57:07 +08:00
|
|
|
ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_START,
|
|
|
|
pause_start);
|
|
|
|
ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_STOP,
|
|
|
|
pause_stop);
|
2019-11-14 23:03:23 +08:00
|
|
|
|
2020-10-05 17:09:11 +08:00
|
|
|
/* Tail dropping watermarks */
|
2021-01-15 10:11:11 +08:00
|
|
|
atop_tot = (ocelot->packet_buffer_size - 9 * maxlen) /
|
2020-03-10 09:28:18 +08:00
|
|
|
OCELOT_BUFFER_CELL_SZ;
|
2020-10-05 17:09:11 +08:00
|
|
|
atop = (9 * maxlen) / OCELOT_BUFFER_CELL_SZ;
|
|
|
|
ocelot_write_rix(ocelot, ocelot->ops->wm_enc(atop), SYS_ATOP, port);
|
|
|
|
ocelot_write(ocelot, ocelot->ops->wm_enc(atop_tot), SYS_ATOP_TOT_CFG);
|
2019-11-14 23:03:23 +08:00
|
|
|
}
|
2020-03-28 03:55:47 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_port_set_maxlen);
|
|
|
|
|
|
|
|
int ocelot_get_max_mtu(struct ocelot *ocelot, int port)
|
|
|
|
{
|
|
|
|
int max_mtu = 65535 - ETH_HLEN - ETH_FCS_LEN;
|
|
|
|
|
|
|
|
if (port == ocelot->npi) {
|
|
|
|
max_mtu -= OCELOT_TAG_LEN;
|
|
|
|
|
2021-01-29 09:00:03 +08:00
|
|
|
if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_SHORT)
|
2020-03-28 03:55:47 +08:00
|
|
|
max_mtu -= OCELOT_SHORT_PREFIX_LEN;
|
2021-01-29 09:00:03 +08:00
|
|
|
else if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_LONG)
|
2020-03-28 03:55:47 +08:00
|
|
|
max_mtu -= OCELOT_LONG_PREFIX_LEN;
|
|
|
|
}
|
|
|
|
|
|
|
|
return max_mtu;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(ocelot_get_max_mtu);
|
2019-11-14 23:03:23 +08:00
|
|
|
|
2021-02-12 23:15:59 +08:00
|
|
|
static void ocelot_port_set_learning(struct ocelot *ocelot, int port,
|
|
|
|
bool enabled)
|
|
|
|
{
|
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
|
|
|
u32 val = 0;
|
|
|
|
|
|
|
|
if (enabled)
|
|
|
|
val = ANA_PORT_PORT_CFG_LEARN_ENA;
|
|
|
|
|
|
|
|
ocelot_rmw_gix(ocelot, val, ANA_PORT_PORT_CFG_LEARN_ENA,
|
|
|
|
ANA_PORT_PORT_CFG, port);
|
|
|
|
|
|
|
|
ocelot_port->learn_ena = enabled;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ocelot_port_set_ucast_flood(struct ocelot *ocelot, int port,
|
|
|
|
bool enabled)
|
|
|
|
{
|
|
|
|
u32 val = 0;
|
|
|
|
|
|
|
|
if (enabled)
|
|
|
|
val = BIT(port);
|
|
|
|
|
|
|
|
ocelot_rmw_rix(ocelot, val, BIT(port), ANA_PGID_PGID, PGID_UC);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ocelot_port_set_mcast_flood(struct ocelot *ocelot, int port,
|
|
|
|
bool enabled)
|
|
|
|
{
|
|
|
|
u32 val = 0;
|
|
|
|
|
|
|
|
if (enabled)
|
|
|
|
val = BIT(port);
|
|
|
|
|
|
|
|
ocelot_rmw_rix(ocelot, val, BIT(port), ANA_PGID_PGID, PGID_MC);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ocelot_port_set_bcast_flood(struct ocelot *ocelot, int port,
|
|
|
|
bool enabled)
|
|
|
|
{
|
|
|
|
u32 val = 0;
|
|
|
|
|
|
|
|
if (enabled)
|
|
|
|
val = BIT(port);
|
|
|
|
|
|
|
|
ocelot_rmw_rix(ocelot, val, BIT(port), ANA_PGID_PGID, PGID_BC);
|
|
|
|
}
|
|
|
|
|
|
|
|
int ocelot_port_pre_bridge_flags(struct ocelot *ocelot, int port,
|
|
|
|
struct switchdev_brport_flags flags)
|
|
|
|
{
|
|
|
|
if (flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD |
|
|
|
|
BR_BCAST_FLOOD))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(ocelot_port_pre_bridge_flags);
|
|
|
|
|
|
|
|
void ocelot_port_bridge_flags(struct ocelot *ocelot, int port,
|
|
|
|
struct switchdev_brport_flags flags)
|
|
|
|
{
|
|
|
|
if (flags.mask & BR_LEARNING)
|
|
|
|
ocelot_port_set_learning(ocelot, port,
|
|
|
|
!!(flags.val & BR_LEARNING));
|
|
|
|
|
|
|
|
if (flags.mask & BR_FLOOD)
|
|
|
|
ocelot_port_set_ucast_flood(ocelot, port,
|
|
|
|
!!(flags.val & BR_FLOOD));
|
|
|
|
|
|
|
|
if (flags.mask & BR_MCAST_FLOOD)
|
|
|
|
ocelot_port_set_mcast_flood(ocelot, port,
|
|
|
|
!!(flags.val & BR_MCAST_FLOOD));
|
|
|
|
|
|
|
|
if (flags.mask & BR_BCAST_FLOOD)
|
|
|
|
ocelot_port_set_bcast_flood(ocelot, port,
|
|
|
|
!!(flags.val & BR_BCAST_FLOOD));
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(ocelot_port_bridge_flags);
|
|
|
|
|
2019-11-14 23:03:27 +08:00
|
|
|
void ocelot_init_port(struct ocelot *ocelot, int port)
|
2019-11-14 23:03:23 +08:00
|
|
|
{
|
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
|
|
|
|
2019-11-27 15:27:57 +08:00
|
|
|
skb_queue_head_init(&ocelot_port->tx_skbs);
|
2019-11-09 21:02:56 +08:00
|
|
|
|
|
|
|
/* Basic L2 initialization */
|
|
|
|
|
2019-11-14 23:03:22 +08:00
|
|
|
/* Set MAC IFG Gaps
|
|
|
|
* FDX: TX_IFG = 5, RX_IFG1 = RX_IFG2 = 0
|
|
|
|
* !FDX: TX_IFG = 5, RX_IFG1 = RX_IFG2 = 5
|
|
|
|
*/
|
|
|
|
ocelot_port_writel(ocelot_port, DEV_MAC_IFG_CFG_TX_IFG(5),
|
|
|
|
DEV_MAC_IFG_CFG);
|
|
|
|
|
|
|
|
/* Load seed (0) and set MAC HDX late collision */
|
|
|
|
ocelot_port_writel(ocelot_port, DEV_MAC_HDX_CFG_LATE_COL_POS(67) |
|
|
|
|
DEV_MAC_HDX_CFG_SEED_LOAD,
|
|
|
|
DEV_MAC_HDX_CFG);
|
|
|
|
mdelay(1);
|
|
|
|
ocelot_port_writel(ocelot_port, DEV_MAC_HDX_CFG_LATE_COL_POS(67),
|
|
|
|
DEV_MAC_HDX_CFG);
|
|
|
|
|
|
|
|
/* Set Max Length and maximum tags allowed */
|
2020-03-10 09:28:18 +08:00
|
|
|
ocelot_port_set_maxlen(ocelot, port, ETH_DATA_LEN);
|
2019-11-14 23:03:22 +08:00
|
|
|
ocelot_port_writel(ocelot_port, DEV_MAC_TAGS_CFG_TAG_ID(ETH_P_8021AD) |
|
|
|
|
DEV_MAC_TAGS_CFG_VLAN_AWR_ENA |
|
2020-03-10 09:28:18 +08:00
|
|
|
DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA |
|
2019-11-14 23:03:22 +08:00
|
|
|
DEV_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA,
|
|
|
|
DEV_MAC_TAGS_CFG);
|
|
|
|
|
|
|
|
/* Set SMAC of Pause frame (00:00:00:00:00:00) */
|
|
|
|
ocelot_port_writel(ocelot_port, 0, DEV_MAC_FC_MAC_HIGH_CFG);
|
|
|
|
ocelot_port_writel(ocelot_port, 0, DEV_MAC_FC_MAC_LOW_CFG);
|
|
|
|
|
2020-07-14 00:57:05 +08:00
|
|
|
/* Enable transmission of pause frames */
|
2020-07-14 00:57:07 +08:00
|
|
|
ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 1);
|
2020-07-14 00:57:05 +08:00
|
|
|
|
2019-11-09 21:02:56 +08:00
|
|
|
/* Drop frames with multicast source address */
|
|
|
|
ocelot_rmw_gix(ocelot, ANA_PORT_DROP_CFG_DROP_MC_SMAC_ENA,
|
|
|
|
ANA_PORT_DROP_CFG_DROP_MC_SMAC_ENA,
|
|
|
|
ANA_PORT_DROP_CFG, port);
|
|
|
|
|
|
|
|
/* Set default VLAN and tag type to 8021Q. */
|
|
|
|
ocelot_rmw_gix(ocelot, REW_PORT_VLAN_CFG_PORT_TPID(ETH_P_8021Q),
|
|
|
|
REW_PORT_VLAN_CFG_PORT_TPID_M,
|
|
|
|
REW_PORT_VLAN_CFG, port);
|
|
|
|
|
2021-02-12 23:15:59 +08:00
|
|
|
/* Disable source address learning for standalone mode */
|
|
|
|
ocelot_port_set_learning(ocelot, port, false);
|
|
|
|
|
net: dsa: felix: stop calling ocelot_port_{enable,disable}
ocelot_port_enable touches ANA_PORT_PORT_CFG, which has the following
fields:
- LOCKED_PORTMOVE_CPU, LEARNDROP, LEARNCPU, LEARNAUTO, RECV_ENA, all of
which are written with their hardware default values, also runtime
invariants. So it makes no sense to write these during every .ndo_open.
- PORTID_VAL: this field has an out-of-reset value of zero for all ports
and must be initialized by software. Additionally, the
ocelot_setup_logical_port_ids() code path sets up different logical
port IDs for the ports in a hardware LAG, and we absolutely don't want
.ndo_open to interfere there and reset those values.
So in fact the write from ocelot_port_enable can better be moved to
ocelot_init_port, and the .ndo_open hook deleted.
ocelot_port_disable touches DEV_MAC_ENA_CFG and QSYS_SWITCH_PORT_MODE_PORT_ENA,
in an attempt to undo what ocelot_adjust_link did. But since .ndo_stop
does not get called each time the link falls (i.e. this isn't a
substitute for .phylink_mac_link_down), felix already does better at
this by writing those registers already in felix_phylink_mac_link_down.
So keep ocelot_port_disable (for now, until ocelot is converted to
phylink too), and just delete the felix call to it, which is not
necessary.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-15 09:47:47 +08:00
|
|
|
/* Set the port's initial logical port ID value, enable receiving
|
|
|
|
* frames on it, and configure the MAC address learning type to
|
|
|
|
* automatic.
|
|
|
|
*/
|
|
|
|
ocelot_write_gix(ocelot, ANA_PORT_PORT_CFG_LEARNAUTO |
|
|
|
|
ANA_PORT_PORT_CFG_RECV_ENA |
|
|
|
|
ANA_PORT_PORT_CFG_PORTID_VAL(port),
|
|
|
|
ANA_PORT_PORT_CFG, port);
|
|
|
|
|
2019-11-09 21:02:56 +08:00
|
|
|
/* Enable vcap lookups */
|
|
|
|
ocelot_vcap_enable(ocelot, port);
|
|
|
|
}
|
2019-11-14 23:03:27 +08:00
|
|
|
EXPORT_SYMBOL(ocelot_init_port);
|
2019-11-09 21:02:56 +08:00
|
|
|
|
2020-09-27 03:32:01 +08:00
|
|
|
/* Configure and enable the CPU port module, which is a set of queues
|
|
|
|
* accessible through register MMIO, frame DMA or Ethernet (in case
|
|
|
|
* NPI mode is used).
|
net: mscc: ocelot: eliminate confusion between CPU and NPI port
Ocelot has the concept of a CPU port. The CPU port is represented in the
forwarding and the queueing system, but it is not a physical device. The
CPU port can either be accessed via register-based injection/extraction
(which is the case of Ocelot), via Frame-DMA (similar to the first one),
or "connected" to a physical Ethernet port (called NPI in the datasheet)
which is the case of the Felix DSA switch.
In Ocelot the CPU port is at index 11.
In Felix the CPU port is at index 6.
The CPU bit is treated special in the forwarding, as it is never cleared
from the forwarding port mask (once added to it). Other than that, it is
treated the same as a normal front port.
Both Felix and Ocelot should use the CPU port in the same way. This
means that Felix should not use the NPI port directly when forwarding to
the CPU, but instead use the CPU port.
This patch is fixing this such that Felix will use port 6 as its CPU
port, and just use the NPI port to carry the traffic.
Therefore, eliminate the "ocelot->cpu" variable which was holding the
index of the NPI port for Felix, and the index of the CPU port module
for Ocelot, so the variable was actually configuring different things
for different drivers and causing at least part of the confusion.
Also remove the "ocelot->num_cpu_ports" variable, which is the result of
another confusion. The 2 CPU ports mentioned in the datasheet are
because there are two frame extraction channels (register based or DMA
based). This is of no relevance to the driver at the moment, and
invisible to the analyzer module.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 22:50:02 +08:00
|
|
|
*/
|
2020-09-27 03:32:01 +08:00
|
|
|
static void ocelot_cpu_port_init(struct ocelot *ocelot)
|
2019-11-09 21:03:00 +08:00
|
|
|
{
|
net: mscc: ocelot: eliminate confusion between CPU and NPI port
Ocelot has the concept of a CPU port. The CPU port is represented in the
forwarding and the queueing system, but it is not a physical device. The
CPU port can either be accessed via register-based injection/extraction
(which is the case of Ocelot), via Frame-DMA (similar to the first one),
or "connected" to a physical Ethernet port (called NPI in the datasheet)
which is the case of the Felix DSA switch.
In Ocelot the CPU port is at index 11.
In Felix the CPU port is at index 6.
The CPU bit is treated special in the forwarding, as it is never cleared
from the forwarding port mask (once added to it). Other than that, it is
treated the same as a normal front port.
Both Felix and Ocelot should use the CPU port in the same way. This
means that Felix should not use the NPI port directly when forwarding to
the CPU, but instead use the CPU port.
This patch is fixing this such that Felix will use port 6 as its CPU
port, and just use the NPI port to carry the traffic.
Therefore, eliminate the "ocelot->cpu" variable which was holding the
index of the NPI port for Felix, and the index of the CPU port module
for Ocelot, so the variable was actually configuring different things
for different drivers and causing at least part of the confusion.
Also remove the "ocelot->num_cpu_ports" variable, which is the result of
another confusion. The 2 CPU ports mentioned in the datasheet are
because there are two frame extraction channels (register based or DMA
based). This is of no relevance to the driver at the moment, and
invisible to the analyzer module.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 22:50:02 +08:00
|
|
|
int cpu = ocelot->num_phys_ports;
|
|
|
|
|
|
|
|
/* The unicast destination PGID for the CPU port module is unused */
|
2019-11-09 21:03:00 +08:00
|
|
|
ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, cpu);
|
net: mscc: ocelot: eliminate confusion between CPU and NPI port
Ocelot has the concept of a CPU port. The CPU port is represented in the
forwarding and the queueing system, but it is not a physical device. The
CPU port can either be accessed via register-based injection/extraction
(which is the case of Ocelot), via Frame-DMA (similar to the first one),
or "connected" to a physical Ethernet port (called NPI in the datasheet)
which is the case of the Felix DSA switch.
In Ocelot the CPU port is at index 11.
In Felix the CPU port is at index 6.
The CPU bit is treated special in the forwarding, as it is never cleared
from the forwarding port mask (once added to it). Other than that, it is
treated the same as a normal front port.
Both Felix and Ocelot should use the CPU port in the same way. This
means that Felix should not use the NPI port directly when forwarding to
the CPU, but instead use the CPU port.
This patch is fixing this such that Felix will use port 6 as its CPU
port, and just use the NPI port to carry the traffic.
Therefore, eliminate the "ocelot->cpu" variable which was holding the
index of the NPI port for Felix, and the index of the CPU port module
for Ocelot, so the variable was actually configuring different things
for different drivers and causing at least part of the confusion.
Also remove the "ocelot->num_cpu_ports" variable, which is the result of
another confusion. The 2 CPU ports mentioned in the datasheet are
because there are two frame extraction channels (register based or DMA
based). This is of no relevance to the driver at the moment, and
invisible to the analyzer module.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 22:50:02 +08:00
|
|
|
/* Instead set up a multicast destination PGID for traffic copied to
|
|
|
|
* the CPU. Whitelisted MAC addresses like the port netdevice MAC
|
|
|
|
* addresses will be copied to the CPU via this PGID.
|
|
|
|
*/
|
2019-11-09 21:03:00 +08:00
|
|
|
ocelot_write_rix(ocelot, BIT(cpu), ANA_PGID_PGID, PGID_CPU);
|
|
|
|
ocelot_write_gix(ocelot, ANA_PORT_PORT_CFG_RECV_ENA |
|
|
|
|
ANA_PORT_PORT_CFG_PORTID_VAL(cpu),
|
|
|
|
ANA_PORT_PORT_CFG, cpu);
|
|
|
|
|
net: mscc: ocelot: eliminate confusion between CPU and NPI port
Ocelot has the concept of a CPU port. The CPU port is represented in the
forwarding and the queueing system, but it is not a physical device. The
CPU port can either be accessed via register-based injection/extraction
(which is the case of Ocelot), via Frame-DMA (similar to the first one),
or "connected" to a physical Ethernet port (called NPI in the datasheet)
which is the case of the Felix DSA switch.
In Ocelot the CPU port is at index 11.
In Felix the CPU port is at index 6.
The CPU bit is treated special in the forwarding, as it is never cleared
from the forwarding port mask (once added to it). Other than that, it is
treated the same as a normal front port.
Both Felix and Ocelot should use the CPU port in the same way. This
means that Felix should not use the NPI port directly when forwarding to
the CPU, but instead use the CPU port.
This patch is fixing this such that Felix will use port 6 as its CPU
port, and just use the NPI port to carry the traffic.
Therefore, eliminate the "ocelot->cpu" variable which was holding the
index of the NPI port for Felix, and the index of the CPU port module
for Ocelot, so the variable was actually configuring different things
for different drivers and causing at least part of the confusion.
Also remove the "ocelot->num_cpu_ports" variable, which is the result of
another confusion. The 2 CPU ports mentioned in the datasheet are
because there are two frame extraction channels (register based or DMA
based). This is of no relevance to the driver at the moment, and
invisible to the analyzer module.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 22:50:02 +08:00
|
|
|
/* Enable CPU port module */
|
net: mscc: ocelot: convert QSYS_SWITCH_PORT_MODE and SYS_PORT_MODE to regfields
Currently Felix and Ocelot share the same bit layout in these per-port
registers, but Seville does not. So we need reg_fields for that.
Actually since these are per-port registers, we need to also specify the
number of ports, and register size per port, and use the regmap API for
multiple ports.
There's a more subtle point to be made about the other 2 register
fields:
- QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG
- QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE
which we are not writing any longer, for 2 reasons:
- Using the previous API (ocelot_write_rix), we were only writing 1 for
Felix and Ocelot, which was their hardware-default value, and which
there wasn't any intention in changing.
- In the case of SCH_NEXT_CFG, in fact Seville does not have this
register field at all, and therefore, if we want to have common code
we would be required to not write to it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-14 00:57:03 +08:00
|
|
|
ocelot_fields_write(ocelot, cpu, QSYS_SWITCH_PORT_MODE_PORT_ENA, 1);
|
net: mscc: ocelot: eliminate confusion between CPU and NPI port
Ocelot has the concept of a CPU port. The CPU port is represented in the
forwarding and the queueing system, but it is not a physical device. The
CPU port can either be accessed via register-based injection/extraction
(which is the case of Ocelot), via Frame-DMA (similar to the first one),
or "connected" to a physical Ethernet port (called NPI in the datasheet)
which is the case of the Felix DSA switch.
In Ocelot the CPU port is at index 11.
In Felix the CPU port is at index 6.
The CPU bit is treated special in the forwarding, as it is never cleared
from the forwarding port mask (once added to it). Other than that, it is
treated the same as a normal front port.
Both Felix and Ocelot should use the CPU port in the same way. This
means that Felix should not use the NPI port directly when forwarding to
the CPU, but instead use the CPU port.
This patch is fixing this such that Felix will use port 6 as its CPU
port, and just use the NPI port to carry the traffic.
Therefore, eliminate the "ocelot->cpu" variable which was holding the
index of the NPI port for Felix, and the index of the CPU port module
for Ocelot, so the variable was actually configuring different things
for different drivers and causing at least part of the confusion.
Also remove the "ocelot->num_cpu_ports" variable, which is the result of
another confusion. The 2 CPU ports mentioned in the datasheet are
because there are two frame extraction channels (register based or DMA
based). This is of no relevance to the driver at the moment, and
invisible to the analyzer module.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Suggested-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-02-29 22:50:02 +08:00
|
|
|
/* CPU port Injection/Extraction configuration */
|
net: mscc: ocelot: convert QSYS_SWITCH_PORT_MODE and SYS_PORT_MODE to regfields
Currently Felix and Ocelot share the same bit layout in these per-port
registers, but Seville does not. So we need reg_fields for that.
Actually since these are per-port registers, we need to also specify the
number of ports, and register size per port, and use the regmap API for
multiple ports.
There's a more subtle point to be made about the other 2 register
fields:
- QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG
- QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE
which we are not writing any longer, for 2 reasons:
- Using the previous API (ocelot_write_rix), we were only writing 1 for
Felix and Ocelot, which was their hardware-default value, and which
there wasn't any intention in changing.
- In the case of SCH_NEXT_CFG, in fact Seville does not have this
register field at all, and therefore, if we want to have common code
we would be required to not write to it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-14 00:57:03 +08:00
|
|
|
ocelot_fields_write(ocelot, cpu, SYS_PORT_MODE_INCL_XTR_HDR,
|
2021-01-29 09:00:03 +08:00
|
|
|
OCELOT_TAG_PREFIX_NONE);
|
net: mscc: ocelot: convert QSYS_SWITCH_PORT_MODE and SYS_PORT_MODE to regfields
Currently Felix and Ocelot share the same bit layout in these per-port
registers, but Seville does not. So we need reg_fields for that.
Actually since these are per-port registers, we need to also specify the
number of ports, and register size per port, and use the regmap API for
multiple ports.
There's a more subtle point to be made about the other 2 register
fields:
- QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG
- QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE
which we are not writing any longer, for 2 reasons:
- Using the previous API (ocelot_write_rix), we were only writing 1 for
Felix and Ocelot, which was their hardware-default value, and which
there wasn't any intention in changing.
- In the case of SCH_NEXT_CFG, in fact Seville does not have this
register field at all, and therefore, if we want to have common code
we would be required to not write to it.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-07-14 00:57:03 +08:00
|
|
|
ocelot_fields_write(ocelot, cpu, SYS_PORT_MODE_INCL_INJ_HDR,
|
2021-01-29 09:00:03 +08:00
|
|
|
OCELOT_TAG_PREFIX_NONE);
|
2019-11-09 21:03:00 +08:00
|
|
|
|
|
|
|
/* Configure the CPU port to be VLAN aware */
|
net: mscc: ocelot: add the local station MAC addresses in VID 0
The ocelot switchdev driver does not include the CPU port in the list of
flooding destinations for unknown traffic, instead that traffic is
supposed to match FDB entries to reach the CPU.
The addresses it installs are:
(a) the station MAC address, in ocelot_probe_port() and later during
runtime in ocelot_port_set_mac_address(). These are the VLAN-unaware
addresses. The VLAN-aware addresses are in ocelot_vlan_vid_add().
(b) multicast addresses added with dev_mc_add() (not bridge host MDB
entries) in ocelot_mc_sync()
(c) multicast destination MAC addresses for MRP in ocelot_mrp_save_mac(),
to make sure those are dropped (not forwarded) by the bridging
service, just trapped to the CPU
So we can see that the logic is slightly buggy ever since the initial
commit a556c76adc05 ("net: mscc: Add initial Ocelot switch support").
This is because, when ocelot_probe_port() runs, the port pvid is 0.
Then we join a VLAN-aware bridge, the pvid becomes 1, we call
ocelot_port_set_mac_address(), this learns the new MAC address in VID 1
(also fails to forget the old one, since it thinks it's in VID 1, but
that's not so important). Then when we leave the VLAN-aware bridge,
outside world is unable to ping our new MAC address because it isn't
learned in VID 0, the VLAN-unaware pvid.
[ note: this is strictly based on static analysis, I don't have hardware
to test. But there are also many more corner cases ]
The basic idea is that we should have a separation of concerns, and the
FDB entries used for standalone operation should be managed by the
driver, and the FDB entries used by the bridging service should be
managed by the bridge. So the standalone and VLAN-unaware bridge FDB
entries should not follow the bridge PVID, because that will only be
active when the bridge is VLAN-aware. So since the port pvid is
coincidentally zero during probe time, just make those entries
statically go to VID 0.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:51 +08:00
|
|
|
ocelot_write_gix(ocelot,
|
|
|
|
ANA_PORT_VLAN_CFG_VLAN_VID(OCELOT_VLAN_UNAWARE_PVID) |
|
|
|
|
ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA |
|
|
|
|
ANA_PORT_VLAN_CFG_VLAN_POP_CNT(1),
|
2019-11-09 21:03:00 +08:00
|
|
|
ANA_PORT_VLAN_CFG, cpu);
|
|
|
|
}
|
|
|
|
|
2021-01-15 10:11:11 +08:00
|
|
|
static void ocelot_detect_features(struct ocelot *ocelot)
|
|
|
|
{
|
|
|
|
int mmgt, eq_ctrl;
|
|
|
|
|
|
|
|
/* For Ocelot, Felix, Seville, Serval etc, SYS:MMGT:MMGT:FREECNT holds
|
|
|
|
* the number of 240-byte free memory words (aka 4-cell chunks) and not
|
|
|
|
* 192 bytes as the documentation incorrectly says.
|
|
|
|
*/
|
|
|
|
mmgt = ocelot_read(ocelot, SYS_MMGT);
|
|
|
|
ocelot->packet_buffer_size = 240 * SYS_MMGT_FREECNT(mmgt);
|
|
|
|
|
|
|
|
eq_ctrl = ocelot_read(ocelot, QSYS_EQ_CTRL);
|
|
|
|
ocelot->num_frame_refs = QSYS_MMGT_EQ_CTRL_FP_FREE_CNT(eq_ctrl);
|
|
|
|
}
|
|
|
|
|
2018-05-15 04:04:57 +08:00
|
|
|
int ocelot_init(struct ocelot *ocelot)
|
|
|
|
{
|
|
|
|
char queue_name[32];
|
2019-11-09 21:03:00 +08:00
|
|
|
int i, ret;
|
|
|
|
u32 port;
|
2018-05-15 04:04:57 +08:00
|
|
|
|
2019-11-14 23:03:26 +08:00
|
|
|
if (ocelot->ops->reset) {
|
|
|
|
ret = ocelot->ops->reset(ocelot);
|
|
|
|
if (ret) {
|
|
|
|
dev_err(ocelot->dev, "Switch reset failed\n");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-15 04:04:57 +08:00
|
|
|
ocelot->stats = devm_kcalloc(ocelot->dev,
|
|
|
|
ocelot->num_phys_ports * ocelot->num_stats,
|
|
|
|
sizeof(u64), GFP_KERNEL);
|
|
|
|
if (!ocelot->stats)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
mutex_init(&ocelot->stats_lock);
|
2019-08-12 22:45:37 +08:00
|
|
|
mutex_init(&ocelot->ptp_lock);
|
|
|
|
spin_lock_init(&ocelot->ptp_clock_lock);
|
2021-10-12 19:40:36 +08:00
|
|
|
spin_lock_init(&ocelot->ts_id_lock);
|
2018-05-15 04:04:57 +08:00
|
|
|
snprintf(queue_name, sizeof(queue_name), "%s-stats",
|
|
|
|
dev_name(ocelot->dev));
|
|
|
|
ocelot->stats_queue = create_singlethread_workqueue(queue_name);
|
|
|
|
if (!ocelot->stats_queue)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2020-12-13 03:16:12 +08:00
|
|
|
ocelot->owq = alloc_ordered_workqueue("ocelot-owq", 0);
|
|
|
|
if (!ocelot->owq) {
|
|
|
|
destroy_workqueue(ocelot->stats_queue);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2019-11-09 21:02:58 +08:00
|
|
|
INIT_LIST_HEAD(&ocelot->multicast);
|
net: mscc: ocelot: support L2 multicast entries
There is one main difference in mscc_ocelot between IP multicast and L2
multicast. With IP multicast, destination ports are encoded into the
upper bytes of the multicast MAC address. Example: to deliver the
address 01:00:5E:11:22:33 to ports 3, 8, and 9, one would need to
program the address of 00:03:08:11:22:33 into hardware. Whereas for L2
multicast, the MAC table entry points to a Port Group ID (PGID), and
that PGID contains the port mask that the packet will be forwarded to.
As to why it is this way, no clue. My guess is that not all port
combinations can be supported simultaneously with the limited number of
PGIDs, and this was somehow an issue for IP multicast but not for L2
multicast. Anyway.
Prior to this change, the raw L2 multicast code was bogus, due to the
fact that there wasn't really any way to test it using the bridge code.
There were 2 issues:
- A multicast PGID was allocated for each MDB entry, but it wasn't in
fact programmed to hardware. It was dummy.
- In fact we don't want to reserve a multicast PGID for every single MDB
entry. That would be odd because we can only have ~60 PGIDs, but
thousands of MDB entries. So instead, we want to reserve a multicast
PGID for every single port combination for multicast traffic. And
since we can have 2 (or more) MDB entries delivered to the same port
group (and therefore PGID), we need to reference-count the PGIDs.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-10-29 10:27:38 +08:00
|
|
|
INIT_LIST_HEAD(&ocelot->pgids);
|
net: mscc: ocelot: convert the VLAN masks to a list
First and foremost, the driver currently allocates a constant sized
4K * u32 (16KB memory) array for the VLAN masks. However, a typical
application might not need so many VLANs, so if we dynamically allocate
the memory as needed, we might actually save some space.
Secondly, we'll need to keep more advanced bookkeeping of the VLANs we
have, notably we'll have to check how many untagged and how many tagged
VLANs we have. This will have to stay in a structure, and allocating
another 16 KB array for that is again a bit too much.
So refactor the bridge VLANs in a linked list of structures.
The hook points inside the driver are ocelot_vlan_member_add() and
ocelot_vlan_member_del(), which previously used to operate on the
ocelot->vlan_mask[vid] array element.
ocelot_vlan_member_add() and ocelot_vlan_member_del() used to call
ocelot_vlan_member_set() to commit to the ocelot->vlan_mask.
Additionally, we had two calls to ocelot_vlan_member_set() from outside
those callers, and those were directly from ocelot_vlan_init().
Those calls do not set up bridging service VLANs, instead they:
- clear the VLAN table on reset
- set the port pvid to the value used by this driver for VLAN-unaware
standalone port operation (VID 0)
So now, when we have a structure which represents actual bridge VLANs,
VID 0 doesn't belong in that structure, since it is not part of the
bridging layer.
So delete the middle man, ocelot_vlan_member_set(), and let
ocelot_vlan_init() call directly ocelot_vlant_set_mask() which forgoes
any data structure and writes directly to hardware, which is all that we
need.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-10-21 01:58:49 +08:00
|
|
|
INIT_LIST_HEAD(&ocelot->vlans);
|
2021-01-15 10:11:11 +08:00
|
|
|
ocelot_detect_features(ocelot);
|
2018-05-15 04:04:57 +08:00
|
|
|
ocelot_mact_init(ocelot);
|
|
|
|
ocelot_vlan_init(ocelot);
|
2020-06-20 23:43:46 +08:00
|
|
|
ocelot_vcap_init(ocelot);
|
2020-09-27 03:32:01 +08:00
|
|
|
ocelot_cpu_port_init(ocelot);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
|
|
|
for (port = 0; port < ocelot->num_phys_ports; port++) {
|
|
|
|
/* Clear all counters (5 groups) */
|
|
|
|
ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(port) |
|
|
|
|
SYS_STAT_CFG_STAT_CLEAR_SHOT(0x7f),
|
|
|
|
SYS_STAT_CFG);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Only use S-Tag */
|
|
|
|
ocelot_write(ocelot, ETH_P_8021AD, SYS_VLAN_ETYPE_CFG);
|
|
|
|
|
|
|
|
/* Aggregation mode */
|
|
|
|
ocelot_write(ocelot, ANA_AGGR_CFG_AC_SMAC_ENA |
|
|
|
|
ANA_AGGR_CFG_AC_DMAC_ENA |
|
|
|
|
ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA |
|
2021-02-06 06:02:13 +08:00
|
|
|
ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA |
|
|
|
|
ANA_AGGR_CFG_AC_IP6_FLOW_LBL_ENA |
|
|
|
|
ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA,
|
|
|
|
ANA_AGGR_CFG);
|
2018-05-15 04:04:57 +08:00
|
|
|
|
|
|
|
/* Set MAC age time to default value. The entry is aged after
|
|
|
|
* 2*AGE_PERIOD
|
|
|
|
*/
|
|
|
|
ocelot_write(ocelot,
|
|
|
|
ANA_AUTOAGE_AGE_PERIOD(BR_DEFAULT_AGEING_TIME / 2 / HZ),
|
|
|
|
ANA_AUTOAGE);
|
|
|
|
|
|
|
|
/* Disable learning for frames discarded by VLAN ingress filtering */
|
|
|
|
regmap_field_write(ocelot->regfields[ANA_ADVLEARN_VLAN_CHK], 1);
|
|
|
|
|
|
|
|
/* Setup frame ageing - fixed value "2 sec" - in 6.5 us units */
|
|
|
|
ocelot_write(ocelot, SYS_FRM_AGING_AGE_TX_ENA |
|
|
|
|
SYS_FRM_AGING_MAX_AGE(307692), SYS_FRM_AGING);
|
|
|
|
|
|
|
|
/* Setup flooding PGIDs */
|
net: mscc: ocelot: fix dropping of unknown IPv4 multicast on Seville
The current assumption is that the felix DSA driver has flooding knobs
per traffic class, while ocelot switchdev has a single flooding knob.
This was correct for felix VSC9959 and ocelot VSC7514, but with the
introduction of seville VSC9953, we see a switch driven by felix.c which
has a single flooding knob.
So it is clear that we must do what should have been done from the
beginning, which is not to overwrite the configuration done by ocelot.c
in felix, but instead to teach the common ocelot library about the
differences in our switches, and set up the flooding PGIDs centrally.
The effect that the bogus iteration through FELIX_NUM_TC has upon
seville is quite dramatic. ANA_FLOODING is located at 0x00b548, and
ANA_FLOODING_IPMC is located at 0x00b54c. So the bogus iteration will
actually overwrite ANA_FLOODING_IPMC when attempting to write
ANA_FLOODING[1]. There is no ANA_FLOODING[1] in sevile, just ANA_FLOODING.
And when ANA_FLOODING_IPMC is overwritten with a bogus value, the effect
is that ANA_FLOODING_IPMC gets the value of 0x0003CF7D:
MC6_DATA = 61,
MC6_CTRL = 61,
MC4_DATA = 60,
MC4_CTRL = 0.
Because MC4_CTRL is zero, this means that IPv4 multicast control packets
are not flooded, but dropped. An invalid configuration, and this is how
the issue was actually spotted.
Reported-by: Eldar Gasanov <eldargasanov2@gmail.com>
Reported-by: Maxim Kochetkov <fido_max@inbox.ru>
Tested-by: Eldar Gasanov <eldargasanov2@gmail.com>
Fixes: 84705fc16552 ("net: dsa: felix: introduce support for Seville VSC9953 switch")
Fixes: 3c7b51bd39b2 ("net: dsa: felix: allow flooding for all traffic classes")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://lore.kernel.org/r/20201204175416.1445937-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-05 01:54:16 +08:00
|
|
|
for (i = 0; i < ocelot->num_flooding_pgids; i++)
|
|
|
|
ocelot_write_rix(ocelot, ANA_FLOODING_FLD_MULTICAST(PGID_MC) |
|
2021-02-12 23:15:58 +08:00
|
|
|
ANA_FLOODING_FLD_BROADCAST(PGID_BC) |
|
net: mscc: ocelot: fix dropping of unknown IPv4 multicast on Seville
The current assumption is that the felix DSA driver has flooding knobs
per traffic class, while ocelot switchdev has a single flooding knob.
This was correct for felix VSC9959 and ocelot VSC7514, but with the
introduction of seville VSC9953, we see a switch driven by felix.c which
has a single flooding knob.
So it is clear that we must do what should have been done from the
beginning, which is not to overwrite the configuration done by ocelot.c
in felix, but instead to teach the common ocelot library about the
differences in our switches, and set up the flooding PGIDs centrally.
The effect that the bogus iteration through FELIX_NUM_TC has upon
seville is quite dramatic. ANA_FLOODING is located at 0x00b548, and
ANA_FLOODING_IPMC is located at 0x00b54c. So the bogus iteration will
actually overwrite ANA_FLOODING_IPMC when attempting to write
ANA_FLOODING[1]. There is no ANA_FLOODING[1] in sevile, just ANA_FLOODING.
And when ANA_FLOODING_IPMC is overwritten with a bogus value, the effect
is that ANA_FLOODING_IPMC gets the value of 0x0003CF7D:
MC6_DATA = 61,
MC6_CTRL = 61,
MC4_DATA = 60,
MC4_CTRL = 0.
Because MC4_CTRL is zero, this means that IPv4 multicast control packets
are not flooded, but dropped. An invalid configuration, and this is how
the issue was actually spotted.
Reported-by: Eldar Gasanov <eldargasanov2@gmail.com>
Reported-by: Maxim Kochetkov <fido_max@inbox.ru>
Tested-by: Eldar Gasanov <eldargasanov2@gmail.com>
Fixes: 84705fc16552 ("net: dsa: felix: introduce support for Seville VSC9953 switch")
Fixes: 3c7b51bd39b2 ("net: dsa: felix: allow flooding for all traffic classes")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://lore.kernel.org/r/20201204175416.1445937-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-12-05 01:54:16 +08:00
|
|
|
ANA_FLOODING_FLD_UNICAST(PGID_UC),
|
|
|
|
ANA_FLOODING, i);
|
2018-05-15 04:04:57 +08:00
|
|
|
ocelot_write(ocelot, ANA_FLOODING_IPMC_FLD_MC6_DATA(PGID_MCIPV6) |
|
|
|
|
ANA_FLOODING_IPMC_FLD_MC6_CTRL(PGID_MC) |
|
|
|
|
ANA_FLOODING_IPMC_FLD_MC4_DATA(PGID_MCIPV4) |
|
|
|
|
ANA_FLOODING_IPMC_FLD_MC4_CTRL(PGID_MC),
|
|
|
|
ANA_FLOODING_IPMC);
|
|
|
|
|
|
|
|
for (port = 0; port < ocelot->num_phys_ports; port++) {
|
|
|
|
/* Transmit the frame to the local port. */
|
|
|
|
ocelot_write_rix(ocelot, BIT(port), ANA_PGID_PGID, port);
|
|
|
|
/* Do not forward BPDU frames to the front ports. */
|
|
|
|
ocelot_write_gix(ocelot,
|
|
|
|
ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_REDIR_ENA(0xffff),
|
|
|
|
ANA_PORT_CPU_FWD_BPDU_CFG,
|
|
|
|
port);
|
|
|
|
/* Ensure bridging is disabled */
|
|
|
|
ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_SRC + port);
|
|
|
|
}
|
|
|
|
|
2020-06-21 19:46:02 +08:00
|
|
|
for_each_nonreserved_multicast_dest_pgid(ocelot, i) {
|
2018-05-15 04:04:57 +08:00
|
|
|
u32 val = ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports - 1, 0));
|
|
|
|
|
|
|
|
ocelot_write_rix(ocelot, val, ANA_PGID_PGID, i);
|
|
|
|
}
|
2021-03-17 04:10:17 +08:00
|
|
|
|
|
|
|
ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_BLACKHOLE);
|
|
|
|
|
2021-02-12 23:15:58 +08:00
|
|
|
/* Allow broadcast and unknown L2 multicast to the CPU. */
|
|
|
|
ocelot_rmw_rix(ocelot, ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)),
|
|
|
|
ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)),
|
|
|
|
ANA_PGID_PGID, PGID_MC);
|
|
|
|
ocelot_rmw_rix(ocelot, ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)),
|
|
|
|
ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)),
|
|
|
|
ANA_PGID_PGID, PGID_BC);
|
2018-05-15 04:04:57 +08:00
|
|
|
ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_MCIPV4);
|
|
|
|
ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_MCIPV6);
|
|
|
|
|
|
|
|
/* Allow manual injection via DEVCPU_QS registers, and byte swap these
|
|
|
|
* registers endianness.
|
|
|
|
*/
|
|
|
|
ocelot_write_rix(ocelot, QS_INJ_GRP_CFG_BYTE_SWAP |
|
|
|
|
QS_INJ_GRP_CFG_MODE(1), QS_INJ_GRP_CFG, 0);
|
|
|
|
ocelot_write_rix(ocelot, QS_XTR_GRP_CFG_BYTE_SWAP |
|
|
|
|
QS_XTR_GRP_CFG_MODE(1), QS_XTR_GRP_CFG, 0);
|
|
|
|
ocelot_write(ocelot, ANA_CPUQ_CFG_CPUQ_MIRROR(2) |
|
|
|
|
ANA_CPUQ_CFG_CPUQ_LRN(2) |
|
|
|
|
ANA_CPUQ_CFG_CPUQ_MAC_COPY(2) |
|
|
|
|
ANA_CPUQ_CFG_CPUQ_SRC_COPY(2) |
|
|
|
|
ANA_CPUQ_CFG_CPUQ_LOCKED_PORTMOVE(2) |
|
|
|
|
ANA_CPUQ_CFG_CPUQ_ALLBRIDGE(6) |
|
|
|
|
ANA_CPUQ_CFG_CPUQ_IPMC_CTRL(6) |
|
|
|
|
ANA_CPUQ_CFG_CPUQ_IGMP(6) |
|
|
|
|
ANA_CPUQ_CFG_CPUQ_MLD(6), ANA_CPUQ_CFG);
|
|
|
|
for (i = 0; i < 16; i++)
|
|
|
|
ocelot_write_rix(ocelot, ANA_CPUQ_8021_CFG_CPUQ_GARP_VAL(6) |
|
|
|
|
ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
|
|
|
|
ANA_CPUQ_8021_CFG, i);
|
|
|
|
|
2019-04-16 22:51:59 +08:00
|
|
|
INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
|
2018-05-15 04:04:57 +08:00
|
|
|
queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
|
|
|
|
OCELOT_STATS_CHECK_DELAY);
|
2019-08-12 22:45:37 +08:00
|
|
|
|
2018-05-15 04:04:57 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(ocelot_init);
|
|
|
|
|
|
|
|
void ocelot_deinit(struct ocelot *ocelot)
|
|
|
|
{
|
2019-07-25 21:33:18 +08:00
|
|
|
cancel_delayed_work(&ocelot->stats_work);
|
2018-05-15 04:04:57 +08:00
|
|
|
destroy_workqueue(ocelot->stats_queue);
|
2020-12-13 03:16:12 +08:00
|
|
|
destroy_workqueue(ocelot->owq);
|
2018-05-15 04:04:57 +08:00
|
|
|
mutex_destroy(&ocelot->stats_lock);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(ocelot_deinit);
|
|
|
|
|
2020-09-18 09:07:30 +08:00
|
|
|
void ocelot_deinit_port(struct ocelot *ocelot, int port)
|
|
|
|
{
|
|
|
|
struct ocelot_port *ocelot_port = ocelot->ports[port];
|
|
|
|
|
|
|
|
skb_queue_purge(&ocelot_port->tx_skbs);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(ocelot_deinit_port);
|
|
|
|
|
2018-05-15 04:04:57 +08:00
|
|
|
MODULE_LICENSE("Dual MIT/GPL");
|