Merge branch 'mlxsw-Add-VxLAN-learning-support'

Ido Schimmel says:

====================
mlxsw: Add VxLAN learning support

This patchset adds VxLAN learning support in the mlxsw driver.

The first five patches from Petr add the required switchdev APIs which
allow device drivers to notify the VxLAN driver about learned / aged-out
FDB entries.

First in patch #1, an unnecessary argument is dropped from
__vxlan_fdb_delete().

In patches #2-#4, the VxLAN FDB handling code is extended to make
sending the switchdev events configurable; to mark user-added entries as
such; and to make sure HW-learned FDB entries do not take over
user-added ones.

Finally in patch #5, the necessary switchdev notifications are added and
handled by VxLAN, similarly to how this is handled in the bridge driver.

Patch #6 allows changing of the VxLAN's device ageing time since it is
useful for the selftest in the last patch.

Patch #7 adds support for querying bridge port flags of a given
netdevice, as a new entry should not be learned and notified to the
bridge driver in case learning is disabled on the bridge port.

Next patches gradually add learning support in mlxsw.

The last patch adds a new test case for VxLAN learning.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-11-21 17:10:32 -08:00
commit f072df95f8
13 changed files with 606 additions and 75 deletions

View File

@ -641,6 +641,10 @@ enum mlxsw_reg_sfn_rec_type {
MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC = 0x7,
/* Aged-out MAC address on a LAG port. */
MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG = 0x8,
/* Learned unicast tunnel record. */
MLXSW_REG_SFN_REC_TYPE_LEARNED_UNICAST_TUNNEL = 0xD,
/* Aged-out unicast tunnel record. */
MLXSW_REG_SFN_REC_TYPE_AGED_OUT_UNICAST_TUNNEL = 0xE,
};
/* reg_sfn_rec_type
@ -704,6 +708,66 @@ static inline void mlxsw_reg_sfn_mac_lag_unpack(char *payload, int rec_index,
*p_lag_id = mlxsw_reg_sfn_mac_lag_lag_id_get(payload, rec_index);
}
/* reg_sfn_uc_tunnel_uip_msb
* When protocol is IPv4, the most significant byte of the underlay IPv4
* address of the remote VTEP.
* When protocol is IPv6, reserved.
* Access: RO
*/
MLXSW_ITEM32_INDEXED(reg, sfn, uc_tunnel_uip_msb, MLXSW_REG_SFN_BASE_LEN, 24,
8, MLXSW_REG_SFN_REC_LEN, 0x08, false);
enum mlxsw_reg_sfn_uc_tunnel_protocol {
MLXSW_REG_SFN_UC_TUNNEL_PROTOCOL_IPV4,
MLXSW_REG_SFN_UC_TUNNEL_PROTOCOL_IPV6,
};
/* reg_sfn_uc_tunnel_protocol
* IP protocol.
* Access: RO
*/
MLXSW_ITEM32_INDEXED(reg, sfn, uc_tunnel_protocol, MLXSW_REG_SFN_BASE_LEN, 27,
1, MLXSW_REG_SFN_REC_LEN, 0x0C, false);
/* reg_sfn_uc_tunnel_uip_lsb
* When protocol is IPv4, the least significant bytes of the underlay
* IPv4 address of the remote VTEP.
* When protocol is IPv6, ipv6_id to be queried from TNIPSD.
* Access: RO
*/
MLXSW_ITEM32_INDEXED(reg, sfn, uc_tunnel_uip_lsb, MLXSW_REG_SFN_BASE_LEN, 0,
24, MLXSW_REG_SFN_REC_LEN, 0x0C, false);
enum mlxsw_reg_sfn_tunnel_port {
MLXSW_REG_SFN_TUNNEL_PORT_NVE,
MLXSW_REG_SFN_TUNNEL_PORT_VPLS,
MLXSW_REG_SFN_TUNNEL_FLEX_TUNNEL0,
MLXSW_REG_SFN_TUNNEL_FLEX_TUNNEL1,
};
/* reg_sfn_uc_tunnel_port
* Tunnel port.
* Reserved on Spectrum.
* Access: RO
*/
MLXSW_ITEM32_INDEXED(reg, sfn, tunnel_port, MLXSW_REG_SFN_BASE_LEN, 0, 4,
MLXSW_REG_SFN_REC_LEN, 0x10, false);
static inline void
mlxsw_reg_sfn_uc_tunnel_unpack(char *payload, int rec_index, char *mac,
u16 *p_fid, u32 *p_uip,
enum mlxsw_reg_sfn_uc_tunnel_protocol *p_proto)
{
u32 uip_msb, uip_lsb;
mlxsw_reg_sfn_rec_mac_memcpy_from(payload, rec_index, mac);
*p_fid = mlxsw_reg_sfn_mac_fid_get(payload, rec_index);
uip_msb = mlxsw_reg_sfn_uc_tunnel_uip_msb_get(payload, rec_index);
uip_lsb = mlxsw_reg_sfn_uc_tunnel_uip_lsb_get(payload, rec_index);
*p_uip = uip_msb << 24 | uip_lsb;
*p_proto = mlxsw_reg_sfn_uc_tunnel_protocol_get(payload, rec_index);
}
/* SPMS - Switch Port MSTP/RSTP State Register
* -------------------------------------------
* Configures the spanning tree state of a physical port.

View File

@ -721,6 +721,9 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_prio_qopt_offload *p);
/* spectrum_fid.c */
struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_index(struct mlxsw_sp *mlxsw_sp,
u16 fid_index);
int mlxsw_sp_fid_nve_ifindex(const struct mlxsw_sp_fid *fid, int *nve_ifindex);
struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp,
__be32 vni);
int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni);
@ -728,7 +731,7 @@ int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid,
u32 nve_flood_index);
void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid);
bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid);
int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni);
int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni, int nve_ifindex);
void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid);
bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid);
int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid,
@ -810,6 +813,9 @@ struct mlxsw_sp_nve_params {
extern const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[];
extern const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[];
int mlxsw_sp_nve_learned_ip_resolve(struct mlxsw_sp *mlxsw_sp, u32 uip,
enum mlxsw_sp_l3proto proto,
union mlxsw_sp_l3addr *addr);
int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fid *fid,
enum mlxsw_sp_l3proto proto,

View File

@ -15,6 +15,7 @@
struct mlxsw_sp_fid_family;
struct mlxsw_sp_fid_core {
struct rhashtable fid_ht;
struct rhashtable vni_ht;
struct mlxsw_sp_fid_family *fid_family_arr[MLXSW_SP_FID_TYPE_MAX];
unsigned int *port_fid_mappings;
@ -26,10 +27,12 @@ struct mlxsw_sp_fid {
unsigned int ref_count;
u16 fid_index;
struct mlxsw_sp_fid_family *fid_family;
struct rhash_head ht_node;
struct rhash_head vni_ht_node;
__be32 vni;
u32 nve_flood_index;
int nve_ifindex;
u8 vni_valid:1,
nve_flood_index_valid:1;
};
@ -44,6 +47,12 @@ struct mlxsw_sp_fid_8021d {
int br_ifindex;
};
static const struct rhashtable_params mlxsw_sp_fid_ht_params = {
.key_len = sizeof_field(struct mlxsw_sp_fid, fid_index),
.key_offset = offsetof(struct mlxsw_sp_fid, fid_index),
.head_offset = offsetof(struct mlxsw_sp_fid, ht_node),
};
static const struct rhashtable_params mlxsw_sp_fid_vni_ht_params = {
.key_len = sizeof_field(struct mlxsw_sp_fid, vni),
.key_offset = offsetof(struct mlxsw_sp_fid, vni),
@ -113,6 +122,29 @@ static const int *mlxsw_sp_packet_type_sfgc_types[] = {
[MLXSW_SP_FLOOD_TYPE_MC] = mlxsw_sp_sfgc_mc_packet_types,
};
struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_index(struct mlxsw_sp *mlxsw_sp,
u16 fid_index)
{
struct mlxsw_sp_fid *fid;
fid = rhashtable_lookup_fast(&mlxsw_sp->fid_core->fid_ht, &fid_index,
mlxsw_sp_fid_ht_params);
if (fid)
fid->ref_count++;
return fid;
}
int mlxsw_sp_fid_nve_ifindex(const struct mlxsw_sp_fid *fid, int *nve_ifindex)
{
if (!fid->vni_valid)
return -EINVAL;
*nve_ifindex = fid->nve_ifindex;
return 0;
}
struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp,
__be32 vni)
{
@ -173,7 +205,7 @@ bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid)
return fid->nve_flood_index_valid;
}
int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni)
int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni, int nve_ifindex)
{
struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
@ -183,6 +215,7 @@ int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni)
if (WARN_ON(!ops->vni_set || fid->vni_valid))
return -EINVAL;
fid->nve_ifindex = nve_ifindex;
fid->vni = vni;
err = rhashtable_lookup_insert_fast(&mlxsw_sp->fid_core->vni_ht,
&fid->vni_ht_node,
@ -944,10 +977,17 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp,
if (err)
goto err_configure;
err = rhashtable_insert_fast(&mlxsw_sp->fid_core->fid_ht, &fid->ht_node,
mlxsw_sp_fid_ht_params);
if (err)
goto err_rhashtable_insert;
list_add(&fid->list, &fid_family->fids_list);
fid->ref_count++;
return fid;
err_rhashtable_insert:
fid->fid_family->ops->deconfigure(fid);
err_configure:
__clear_bit(fid_index - fid_family->start_index,
fid_family->fids_bitmap);
@ -959,6 +999,7 @@ err_index_alloc:
void mlxsw_sp_fid_put(struct mlxsw_sp_fid *fid)
{
struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp;
if (--fid->ref_count == 1 && fid->rif) {
/* Destroy the associated RIF and let it drop the last
@ -967,6 +1008,8 @@ void mlxsw_sp_fid_put(struct mlxsw_sp_fid *fid)
return mlxsw_sp_rif_destroy(fid->rif);
} else if (fid->ref_count == 0) {
list_del(&fid->list);
rhashtable_remove_fast(&mlxsw_sp->fid_core->fid_ht,
&fid->ht_node, mlxsw_sp_fid_ht_params);
fid->fid_family->ops->deconfigure(fid);
__clear_bit(fid->fid_index - fid_family->start_index,
fid_family->fids_bitmap);
@ -1126,9 +1169,13 @@ int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp)
return -ENOMEM;
mlxsw_sp->fid_core = fid_core;
err = rhashtable_init(&fid_core->fid_ht, &mlxsw_sp_fid_ht_params);
if (err)
goto err_rhashtable_fid_init;
err = rhashtable_init(&fid_core->vni_ht, &mlxsw_sp_fid_vni_ht_params);
if (err)
goto err_rhashtable_init;
goto err_rhashtable_vni_init;
fid_core->port_fid_mappings = kcalloc(max_ports, sizeof(unsigned int),
GFP_KERNEL);
@ -1157,7 +1204,9 @@ err_fid_ops_register:
kfree(fid_core->port_fid_mappings);
err_alloc_port_fid_mappings:
rhashtable_destroy(&fid_core->vni_ht);
err_rhashtable_init:
err_rhashtable_vni_init:
rhashtable_destroy(&fid_core->fid_ht);
err_rhashtable_fid_init:
kfree(fid_core);
return err;
}
@ -1172,5 +1221,6 @@ void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp)
fid_core->fid_family_arr[i]);
kfree(fid_core->port_fid_mappings);
rhashtable_destroy(&fid_core->vni_ht);
rhashtable_destroy(&fid_core->fid_ht);
kfree(fid_core);
}

View File

@ -174,6 +174,20 @@ mlxsw_sp_nve_mc_record_ops_arr[] = {
[MLXSW_SP_L3_PROTO_IPV6] = &mlxsw_sp_nve_mc_record_ipv6_ops,
};
int mlxsw_sp_nve_learned_ip_resolve(struct mlxsw_sp *mlxsw_sp, u32 uip,
enum mlxsw_sp_l3proto proto,
union mlxsw_sp_l3addr *addr)
{
switch (proto) {
case MLXSW_SP_L3_PROTO_IPV4:
addr->addr4 = cpu_to_be32(uip);
return 0;
default:
WARN_ON(1);
return -EINVAL;
}
}
static struct mlxsw_sp_nve_mc_list *
mlxsw_sp_nve_mc_list_find(struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_nve_mc_list_key *key)
@ -803,7 +817,7 @@ int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid,
return err;
}
err = mlxsw_sp_fid_vni_set(fid, params->vni);
err = mlxsw_sp_fid_vni_set(fid, params->vni, params->dev->ifindex);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed to set VNI on FID");
goto err_fid_vni_set;

View File

@ -17,7 +17,8 @@
#define MLXSW_SP_NVE_VXLAN_PARSING_DEPTH 128
#define MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH 96
#define MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS VXLAN_F_UDP_ZERO_CSUM_TX
#define MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS (VXLAN_F_UDP_ZERO_CSUM_TX | \
VXLAN_F_LEARN)
static bool mlxsw_sp1_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
const struct net_device *dev,
@ -61,11 +62,6 @@ static bool mlxsw_sp1_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
return false;
}
if (cfg->flags & VXLAN_F_LEARN) {
NL_SET_ERR_MSG_MOD(extack, "VxLAN: Learning is not supported");
return false;
}
if (!(cfg->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) {
NL_SET_ERR_MSG_MOD(extack, "VxLAN: UDP checksum is not supported");
return false;

View File

@ -2311,6 +2311,71 @@ void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
bridge_device->ops->vxlan_leave(bridge_device, vxlan_dev);
}
static void
mlxsw_sp_switchdev_vxlan_addr_convert(const union vxlan_addr *vxlan_addr,
enum mlxsw_sp_l3proto *proto,
union mlxsw_sp_l3addr *addr)
{
if (vxlan_addr->sa.sa_family == AF_INET) {
addr->addr4 = vxlan_addr->sin.sin_addr.s_addr;
*proto = MLXSW_SP_L3_PROTO_IPV4;
} else {
addr->addr6 = vxlan_addr->sin6.sin6_addr;
*proto = MLXSW_SP_L3_PROTO_IPV6;
}
}
static void
mlxsw_sp_switchdev_addr_vxlan_convert(enum mlxsw_sp_l3proto proto,
const union mlxsw_sp_l3addr *addr,
union vxlan_addr *vxlan_addr)
{
switch (proto) {
case MLXSW_SP_L3_PROTO_IPV4:
vxlan_addr->sa.sa_family = AF_INET;
vxlan_addr->sin.sin_addr.s_addr = addr->addr4;
break;
case MLXSW_SP_L3_PROTO_IPV6:
vxlan_addr->sa.sa_family = AF_INET6;
vxlan_addr->sin6.sin6_addr = addr->addr6;
break;
}
}
static void mlxsw_sp_fdb_vxlan_call_notifiers(struct net_device *dev,
const char *mac,
enum mlxsw_sp_l3proto proto,
union mlxsw_sp_l3addr *addr,
__be32 vni, bool adding)
{
struct switchdev_notifier_vxlan_fdb_info info;
struct vxlan_dev *vxlan = netdev_priv(dev);
enum switchdev_notifier_type type;
type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE :
SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE;
mlxsw_sp_switchdev_addr_vxlan_convert(proto, addr, &info.remote_ip);
info.remote_port = vxlan->cfg.dst_port;
info.remote_vni = vni;
info.remote_ifindex = 0;
ether_addr_copy(info.eth_addr, mac);
info.vni = vni;
info.offloaded = adding;
call_switchdev_notifiers(type, dev, &info.info);
}
static void mlxsw_sp_fdb_nve_call_notifiers(struct net_device *dev,
const char *mac,
enum mlxsw_sp_l3proto proto,
union mlxsw_sp_l3addr *addr,
__be32 vni,
bool adding)
{
if (netif_is_vxlan(dev))
mlxsw_sp_fdb_vxlan_call_notifiers(dev, mac, proto, addr, vni,
adding);
}
static void
mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type,
const char *mac, u16 vid,
@ -2442,6 +2507,122 @@ just_remove:
goto do_fdb_op;
}
static int
__mlxsw_sp_fdb_notify_mac_uc_tunnel_process(struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_fid *fid,
bool adding,
struct net_device **nve_dev,
u16 *p_vid, __be32 *p_vni)
{
struct mlxsw_sp_bridge_device *bridge_device;
struct net_device *br_dev, *dev;
int nve_ifindex;
int err;
err = mlxsw_sp_fid_nve_ifindex(fid, &nve_ifindex);
if (err)
return err;
err = mlxsw_sp_fid_vni(fid, p_vni);
if (err)
return err;
dev = __dev_get_by_index(&init_net, nve_ifindex);
if (!dev)
return -EINVAL;
*nve_dev = dev;
if (!netif_running(dev))
return -EINVAL;
if (adding && !br_port_flag_is_set(dev, BR_LEARNING))
return -EINVAL;
if (adding && netif_is_vxlan(dev)) {
struct vxlan_dev *vxlan = netdev_priv(dev);
if (!(vxlan->cfg.flags & VXLAN_F_LEARN))
return -EINVAL;
}
br_dev = netdev_master_upper_dev_get(dev);
if (!br_dev)
return -EINVAL;
bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
if (!bridge_device)
return -EINVAL;
*p_vid = bridge_device->ops->fid_vid(bridge_device, fid);
return 0;
}
static void mlxsw_sp_fdb_notify_mac_uc_tunnel_process(struct mlxsw_sp *mlxsw_sp,
char *sfn_pl,
int rec_index,
bool adding)
{
enum mlxsw_reg_sfn_uc_tunnel_protocol sfn_proto;
enum switchdev_notifier_type type;
struct net_device *nve_dev;
union mlxsw_sp_l3addr addr;
struct mlxsw_sp_fid *fid;
char mac[ETH_ALEN];
u16 fid_index, vid;
__be32 vni;
u32 uip;
int err;
mlxsw_reg_sfn_uc_tunnel_unpack(sfn_pl, rec_index, mac, &fid_index,
&uip, &sfn_proto);
fid = mlxsw_sp_fid_lookup_by_index(mlxsw_sp, fid_index);
if (!fid)
goto err_fid_lookup;
err = mlxsw_sp_nve_learned_ip_resolve(mlxsw_sp, uip,
(enum mlxsw_sp_l3proto) sfn_proto,
&addr);
if (err)
goto err_ip_resolve;
err = __mlxsw_sp_fdb_notify_mac_uc_tunnel_process(mlxsw_sp, fid, adding,
&nve_dev, &vid, &vni);
if (err)
goto err_fdb_process;
err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, mac, fid_index,
(enum mlxsw_sp_l3proto) sfn_proto,
&addr, adding, true);
if (err)
goto err_fdb_op;
mlxsw_sp_fdb_nve_call_notifiers(nve_dev, mac,
(enum mlxsw_sp_l3proto) sfn_proto,
&addr, vni, adding);
type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE :
SWITCHDEV_FDB_DEL_TO_BRIDGE;
mlxsw_sp_fdb_call_notifiers(type, mac, vid, nve_dev, adding);
mlxsw_sp_fid_put(fid);
return;
err_fdb_op:
err_fdb_process:
err_ip_resolve:
mlxsw_sp_fid_put(fid);
err_fid_lookup:
/* Remove an FDB entry in case we cannot process it. Otherwise the
* device will keep sending the same notification over and over again.
*/
mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, mac, fid_index,
(enum mlxsw_sp_l3proto) sfn_proto, &addr,
false, true);
}
static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp,
char *sfn_pl, int rec_index)
{
@ -2462,6 +2643,14 @@ static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl,
rec_index, false);
break;
case MLXSW_REG_SFN_REC_TYPE_LEARNED_UNICAST_TUNNEL:
mlxsw_sp_fdb_notify_mac_uc_tunnel_process(mlxsw_sp, sfn_pl,
rec_index, true);
break;
case MLXSW_REG_SFN_REC_TYPE_AGED_OUT_UNICAST_TUNNEL:
mlxsw_sp_fdb_notify_mac_uc_tunnel_process(mlxsw_sp, sfn_pl,
rec_index, false);
break;
}
}
@ -2516,20 +2705,6 @@ struct mlxsw_sp_switchdev_event_work {
unsigned long event;
};
static void
mlxsw_sp_switchdev_vxlan_addr_convert(const union vxlan_addr *vxlan_addr,
enum mlxsw_sp_l3proto *proto,
union mlxsw_sp_l3addr *addr)
{
if (vxlan_addr->sa.sa_family == AF_INET) {
addr->addr4 = vxlan_addr->sin.sin_addr.s_addr;
*proto = MLXSW_SP_L3_PROTO_IPV4;
} else {
addr->addr6 = vxlan_addr->sin6.sin6_addr;
*proto = MLXSW_SP_L3_PROTO_IPV6;
}
}
static void
mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_switchdev_event_work *
@ -2595,7 +2770,8 @@ mlxsw_sp_switchdev_bridge_nve_fdb_event(struct mlxsw_sp_switchdev_event_work *
switchdev_work->event != SWITCHDEV_FDB_DEL_TO_DEVICE)
return;
if (!switchdev_work->fdb_info.added_by_user)
if (switchdev_work->event == SWITCHDEV_FDB_ADD_TO_DEVICE &&
!switchdev_work->fdb_info.added_by_user)
return;
if (!netif_running(dev))

View File

@ -79,9 +79,11 @@ struct vxlan_fdb {
u8 eth_addr[ETH_ALEN];
u16 state; /* see ndm_state */
__be32 vni;
u8 flags; /* see ndm_flags */
u16 flags; /* see ndm_flags and below */
};
#define NTF_VXLAN_ADDED_BY_USER 0x100
/* salt for hash table */
static u32 vxlan_salt __read_mostly;
@ -376,6 +378,7 @@ static void vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan,
.remote_ifindex = rd->remote_ifindex,
.vni = fdb->vni,
.offloaded = rd->offloaded,
.added_by_user = fdb->flags & NTF_VXLAN_ADDED_BY_USER,
};
memcpy(info.eth_addr, fdb->eth_addr, ETH_ALEN);
@ -384,15 +387,19 @@ static void vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan,
}
static void vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
struct vxlan_rdst *rd, int type)
struct vxlan_rdst *rd, int type, bool swdev_notify)
{
switch (type) {
case RTM_NEWNEIGH:
vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd, true);
break;
case RTM_DELNEIGH:
vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd, false);
break;
if (swdev_notify) {
switch (type) {
case RTM_NEWNEIGH:
vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
true);
break;
case RTM_DELNEIGH:
vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
false);
break;
}
}
__vxlan_fdb_notify(vxlan, fdb, rd, type);
@ -409,7 +416,7 @@ static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
.remote_vni = cpu_to_be32(VXLAN_N_VID),
};
vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH);
vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true);
}
static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
@ -421,7 +428,7 @@ static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
memcpy(f.eth_addr, eth_addr, ETH_ALEN);
vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH);
vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true);
}
/* Hash Ethernet address */
@ -540,6 +547,7 @@ int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
fdb_info->remote_ifindex = rdst->remote_ifindex;
fdb_info->vni = vni;
fdb_info->offloaded = rdst->offloaded;
fdb_info->added_by_user = f->flags & NTF_VXLAN_ADDED_BY_USER;
ether_addr_copy(fdb_info->eth_addr, mac);
out:
@ -700,7 +708,7 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan,
const u8 *mac, __u16 state,
__be32 src_vni, __u8 ndm_flags)
__be32 src_vni, __u16 ndm_flags)
{
struct vxlan_fdb *f;
@ -720,7 +728,7 @@ static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan,
static int vxlan_fdb_create(struct vxlan_dev *vxlan,
const u8 *mac, union vxlan_addr *ip,
__u16 state, __be16 port, __be32 src_vni,
__be32 vni, __u32 ifindex, __u8 ndm_flags,
__be32 vni, __u32 ifindex, __u16 ndm_flags,
struct vxlan_fdb **fdb)
{
struct vxlan_rdst *rd = NULL;
@ -756,9 +764,10 @@ static int vxlan_fdb_update(struct vxlan_dev *vxlan,
const u8 *mac, union vxlan_addr *ip,
__u16 state, __u16 flags,
__be16 port, __be32 src_vni, __be32 vni,
__u32 ifindex, __u8 ndm_flags)
__u32 ifindex, __u16 ndm_flags,
bool swdev_notify)
{
__u8 fdb_flags = (ndm_flags & ~NTF_USE);
__u16 fdb_flags = (ndm_flags & ~NTF_USE);
struct vxlan_rdst *rd = NULL;
struct vxlan_fdb *f;
int notify = 0;
@ -771,16 +780,24 @@ static int vxlan_fdb_update(struct vxlan_dev *vxlan,
"lost race to create %pM\n", mac);
return -EEXIST;
}
if (f->state != state) {
f->state = state;
f->updated = jiffies;
notify = 1;
}
if (f->flags != fdb_flags) {
f->flags = fdb_flags;
f->updated = jiffies;
notify = 1;
/* Do not allow an externally learned entry to take over an
* entry added by the user.
*/
if (!(fdb_flags & NTF_EXT_LEARNED) ||
!(f->flags & NTF_VXLAN_ADDED_BY_USER)) {
if (f->state != state) {
f->state = state;
f->updated = jiffies;
notify = 1;
}
if (f->flags != fdb_flags) {
f->flags = fdb_flags;
f->updated = jiffies;
notify = 1;
}
}
if ((flags & NLM_F_REPLACE)) {
/* Only change unicasts */
if (!(is_multicast_ether_addr(f->eth_addr) ||
@ -822,7 +839,7 @@ static int vxlan_fdb_update(struct vxlan_dev *vxlan,
if (notify) {
if (rd == NULL)
rd = first_remote_rtnl(f);
vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH);
vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH, swdev_notify);
}
return 0;
@ -841,7 +858,7 @@ static void vxlan_fdb_free(struct rcu_head *head)
}
static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
bool do_notify)
bool do_notify, bool swdev_notify)
{
struct vxlan_rdst *rd;
@ -851,7 +868,8 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
--vxlan->addrcnt;
if (do_notify)
list_for_each_entry(rd, &f->remotes, list)
vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH);
vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH,
swdev_notify);
hlist_del_rcu(&f->hlist);
call_rcu(&f->rcu, vxlan_fdb_free);
@ -866,10 +884,10 @@ static void vxlan_dst_free(struct rcu_head *head)
}
static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
struct vxlan_rdst *rd)
struct vxlan_rdst *rd, bool swdev_notify)
{
list_del_rcu(&rd->list);
vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH);
vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH, swdev_notify);
call_rcu(&rd->rcu, vxlan_dst_free);
}
@ -968,7 +986,9 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
spin_lock_bh(&vxlan->hash_lock);
err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
port, src_vni, vni, ifindex, ndm->ndm_flags);
port, src_vni, vni, ifindex,
ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER,
true);
spin_unlock_bh(&vxlan->hash_lock);
return err;
@ -977,7 +997,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
const unsigned char *addr, union vxlan_addr ip,
__be16 port, __be32 src_vni, __be32 vni,
u32 ifindex, u16 vid)
u32 ifindex, bool swdev_notify)
{
struct vxlan_fdb *f;
struct vxlan_rdst *rd = NULL;
@ -997,11 +1017,11 @@ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
* otherwise destroy the fdb entry
*/
if (rd && !list_is_singular(&f->remotes)) {
vxlan_fdb_dst_destroy(vxlan, f, rd);
vxlan_fdb_dst_destroy(vxlan, f, rd, swdev_notify);
goto out;
}
vxlan_fdb_destroy(vxlan, f, true);
vxlan_fdb_destroy(vxlan, f, true, swdev_notify);
out:
return 0;
@ -1025,7 +1045,7 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
spin_lock_bh(&vxlan->hash_lock);
err = __vxlan_fdb_delete(vxlan, addr, ip, port, src_vni, vni, ifindex,
vid);
true);
spin_unlock_bh(&vxlan->hash_lock);
return err;
@ -1103,7 +1123,7 @@ static bool vxlan_snoop(struct net_device *dev,
rdst->remote_ip = *src_ip;
f->updated = jiffies;
vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH);
vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true);
} else {
/* learned new entry */
spin_lock(&vxlan->hash_lock);
@ -1116,7 +1136,7 @@ static bool vxlan_snoop(struct net_device *dev,
vxlan->cfg.dst_port,
vni,
vxlan->default_dst.remote_vni,
ifindex, NTF_SELF);
ifindex, NTF_SELF, true);
spin_unlock(&vxlan->hash_lock);
}
@ -2500,7 +2520,7 @@ static void vxlan_cleanup(struct timer_list *t)
"garbage collect %pM\n",
f->eth_addr);
f->state = NUD_STALE;
vxlan_fdb_destroy(vxlan, f, true);
vxlan_fdb_destroy(vxlan, f, true, true);
} else if (time_before(timeout, next_timer))
next_timer = timeout;
}
@ -2551,7 +2571,7 @@ static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
spin_lock_bh(&vxlan->hash_lock);
f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
if (f)
vxlan_fdb_destroy(vxlan, f, true);
vxlan_fdb_destroy(vxlan, f, true, true);
spin_unlock_bh(&vxlan->hash_lock);
}
@ -2605,7 +2625,7 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all)
continue;
/* the all_zeros_mac entry is deleted at vxlan_uninit */
if (!is_zero_ether_addr(f->eth_addr))
vxlan_fdb_destroy(vxlan, f, true);
vxlan_fdb_destroy(vxlan, f, true, true);
}
}
spin_unlock_bh(&vxlan->hash_lock);
@ -3343,13 +3363,14 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
/* notify default fdb entry */
if (f)
vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH);
vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH,
true);
list_add(&vxlan->next, &vn->vxlan_list);
return 0;
errout:
if (f)
vxlan_fdb_destroy(vxlan, f, false);
vxlan_fdb_destroy(vxlan, f, false, false);
return err;
}
@ -3437,11 +3458,8 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
conf->flags |= VXLAN_F_LEARN;
}
if (data[IFLA_VXLAN_AGEING]) {
if (changelink)
return -EOPNOTSUPP;
if (data[IFLA_VXLAN_AGEING])
conf->age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]);
}
if (data[IFLA_VXLAN_PROXY]) {
if (changelink)
@ -3586,6 +3604,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_rdst *dst = &vxlan->default_dst;
unsigned long old_age_interval;
struct vxlan_rdst old_dst;
struct vxlan_config conf;
struct vxlan_fdb *f = NULL;
@ -3596,12 +3615,16 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
if (err)
return err;
old_age_interval = vxlan->cfg.age_interval;
memcpy(&old_dst, dst, sizeof(struct vxlan_rdst));
err = vxlan_dev_configure(vxlan->net, dev, &conf, true, extack);
if (err)
return err;
if (old_age_interval != vxlan->cfg.age_interval)
mod_timer(&vxlan->age_timer, jiffies);
/* handle default dst entry */
if (!vxlan_addr_equal(&dst->remote_ip, &old_dst.remote_ip)) {
spin_lock_bh(&vxlan->hash_lock);
@ -3611,7 +3634,8 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
vxlan->cfg.dst_port,
old_dst.remote_vni,
old_dst.remote_vni,
old_dst.remote_ifindex, 0);
old_dst.remote_ifindex,
true);
if (!vxlan_addr_any(&dst->remote_ip)) {
err = vxlan_fdb_create(vxlan, all_zeros_mac,
@ -3626,7 +3650,8 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
spin_unlock_bh(&vxlan->hash_lock);
return err;
}
vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH);
vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f),
RTM_NEWNEIGH, true);
}
spin_unlock_bh(&vxlan->hash_lock);
}
@ -3900,18 +3925,89 @@ out:
spin_unlock_bh(&vxlan->hash_lock);
}
static int
vxlan_fdb_external_learn_add(struct net_device *dev,
struct switchdev_notifier_vxlan_fdb_info *fdb_info)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
int err;
spin_lock_bh(&vxlan->hash_lock);
err = vxlan_fdb_update(vxlan, fdb_info->eth_addr, &fdb_info->remote_ip,
NUD_REACHABLE,
NLM_F_CREATE | NLM_F_REPLACE,
fdb_info->remote_port,
fdb_info->vni,
fdb_info->remote_vni,
fdb_info->remote_ifindex,
NTF_USE | NTF_SELF | NTF_EXT_LEARNED,
false);
spin_unlock_bh(&vxlan->hash_lock);
return err;
}
static int
vxlan_fdb_external_learn_del(struct net_device *dev,
struct switchdev_notifier_vxlan_fdb_info *fdb_info)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_fdb *f;
int err = 0;
spin_lock_bh(&vxlan->hash_lock);
f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
if (!f)
err = -ENOENT;
else if (f->flags & NTF_EXT_LEARNED)
err = __vxlan_fdb_delete(vxlan, fdb_info->eth_addr,
fdb_info->remote_ip,
fdb_info->remote_port,
fdb_info->vni,
fdb_info->remote_vni,
fdb_info->remote_ifindex,
false);
spin_unlock_bh(&vxlan->hash_lock);
return err;
}
static int vxlan_switchdev_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
struct switchdev_notifier_vxlan_fdb_info *fdb_info;
int err = 0;
switch (event) {
case SWITCHDEV_VXLAN_FDB_OFFLOADED:
vxlan_fdb_offloaded_set(dev, ptr);
break;
case SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE:
fdb_info = ptr;
err = vxlan_fdb_external_learn_add(dev, fdb_info);
if (err) {
err = notifier_from_errno(err);
break;
}
fdb_info->offloaded = true;
vxlan_fdb_offloaded_set(dev, fdb_info);
break;
case SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE:
fdb_info = ptr;
err = vxlan_fdb_external_learn_del(dev, fdb_info);
if (err) {
err = notifier_from_errno(err);
break;
}
fdb_info->offloaded = false;
vxlan_fdb_offloaded_set(dev, fdb_info);
break;
}
return 0;
return err;
}
static struct notifier_block vxlan_switchdev_notifier_block __read_mostly = {

View File

@ -119,6 +119,7 @@ static inline int br_vlan_get_info(const struct net_device *dev, u16 vid,
struct net_device *br_fdb_find_port(const struct net_device *br_dev,
const unsigned char *addr,
__u16 vid);
bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag);
#else
static inline struct net_device *
br_fdb_find_port(const struct net_device *br_dev,
@ -127,6 +128,11 @@ br_fdb_find_port(const struct net_device *br_dev,
{
return NULL;
}
static inline bool
br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
{
return false;
}
#endif
#endif

View File

@ -146,6 +146,8 @@ enum switchdev_notifier_type {
SWITCHDEV_FDB_DEL_TO_DEVICE,
SWITCHDEV_FDB_OFFLOADED,
SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE,
SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE,
SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE,
SWITCHDEV_VXLAN_FDB_OFFLOADED,

View File

@ -421,6 +421,7 @@ struct switchdev_notifier_vxlan_fdb_info {
u8 eth_addr[ETH_ALEN];
__be32 vni;
bool offloaded;
bool added_by_user;
};
#if IS_ENABLED(CONFIG_VXLAN)

View File

@ -741,3 +741,15 @@ void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
if (mask & BR_NEIGH_SUPPRESS)
br_recalculate_neigh_suppress_enabled(br);
}
bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
{
struct net_bridge_port *p;
p = br_port_get_rtnl_rcu(dev);
if (!p)
return false;
return p->flags & flag;
}
EXPORT_SYMBOL_GPL(br_port_flag_is_set);

View File

@ -167,7 +167,7 @@ sanitization_single_dev_learning_enabled_test()
ip link add name vxlan0 up type vxlan id 10 learning noudpcsum \
ttl 20 tos inherit local 198.51.100.1 dstport 4789
sanitization_single_dev_test_fail
sanitization_single_dev_test_pass
ip link del dev vxlan0
ip link del dev br0

View File

@ -76,6 +76,7 @@ export VXPORT
ping_ipv4
test_flood
test_unicast
test_learning
"}
NUM_NETIFS=6
@ -663,6 +664,113 @@ test_ecn_decap()
test_ecn_decap_error
}
test_learning()
{
local mac=de:ad:be:ef:13:37
local dst=192.0.2.100
# Enable learning on the VxLAN device and set ageing time to 10 seconds
ip link set dev br1 type bridge ageing_time 1000
ip link set dev vx1 type vxlan ageing 10
ip link set dev vx1 type vxlan learning
reapply_config
# Check that flooding works
RET=0
vxlan_flood_test $mac $dst 10 10 10
log_test "VXLAN: flood before learning"
# Send a packet with source mac set to $mac from host w2 and check that
# a corresponding entry is created in VxLAN device vx1
RET=0
in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \
-t icmp -q
sleep 1
bridge fdb show brport vx1 | grep $mac | grep -q self
check_err $?
bridge fdb show brport vx1 | grep $mac | grep -q -v self
check_err $?
log_test "VXLAN: show learned FDB entry"
# Repeat first test and check that packets only reach host w2 in ns1
RET=0
vxlan_flood_test $mac $dst 0 10 0
log_test "VXLAN: learned FDB entry"
# Delete the learned FDB entry from the VxLAN and bridge devices and
# check that packets are flooded
RET=0
bridge fdb del dev vx1 $mac master self
sleep 1
vxlan_flood_test $mac $dst 10 10 10
log_test "VXLAN: deletion of learned FDB entry"
# Re-learn the first FDB entry and check that it is correctly aged-out
RET=0
in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \
-t icmp -q
sleep 1
bridge fdb show brport vx1 | grep $mac | grep -q self
check_err $?
bridge fdb show brport vx1 | grep $mac | grep -q -v self
check_err $?
vxlan_flood_test $mac $dst 0 10 0
sleep 20
bridge fdb show brport vx1 | grep $mac | grep -q self
check_fail $?
bridge fdb show brport vx1 | grep $mac | grep -q -v self
check_fail $?
vxlan_flood_test $mac $dst 10 10 10
log_test "VXLAN: Ageing of learned FDB entry"
# Toggle learning on the bridge port and check that the bridge's FDB
# is populated only when it should
RET=0
ip link set dev vx1 type bridge_slave learning off
in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \
-t icmp -q
sleep 1
bridge fdb show brport vx1 | grep $mac | grep -q -v self
check_fail $?
ip link set dev vx1 type bridge_slave learning on
in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \
-t icmp -q
sleep 1
bridge fdb show brport vx1 | grep $mac | grep -q -v self
check_err $?
log_test "VXLAN: learning toggling on bridge port"
# Restore previous settings
ip link set dev vx1 type vxlan nolearning
ip link set dev vx1 type vxlan ageing 300
ip link set dev br1 type bridge ageing_time 30000
reapply_config
}
test_all()
{
echo "Running tests with UDP port $VXPORT"