Merge branch 'mlxsw-Add-one-armed-router-support'
Ido Schimmel says: ==================== mlxsw: Add one-armed router support Up until now, when a packet was routed by the ASIC through the same router interface (RIF) from which it ingressed from, the ASIC passed the sole copy of the packet to the kernel. This allowed the kernel to route the packet and also potentially generate an ICMP redirect. There are scenarios (e.g., "one-armed router") where packets are intentionally routed this way and are therefore not deemed as exceptions. In such scenarios the current method of trapping packets to the CPU is problematic, as it results in major packet loss. This patchset solves the problem by having the ASIC forward the packet, but also send a copy to the CPU, which gives the kernel the opportunity to generate required exceptions. To prevent the kernel from forwarding such packets again, the driver marks them with 'offload_l3_fwd_mark', which causes the kernel to consume them in ip{,6}_forward_finish(). Patch #1 renames 'offload_mr_fwd_mark' to 'offload_l3_fwd_mark'. When set, the field indicates that a packet was already forwarded in L3 (unicast / multicast) by a capable device. Patch #2 teaches the kernel to consume unicast packets that have 'offload_l3_fwd_mark' set. Patch #3 changes mlxsw to mirror loopbacked (iRIF == eRIF) packets, instead of trapping them. Patch #4 adds a test case for above mentioned scenario. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
55827458e0
|
@ -5072,6 +5072,7 @@ enum mlxsw_reg_htgt_trap_group {
|
|||
MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT,
|
||||
MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD,
|
||||
MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND,
|
||||
MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR,
|
||||
};
|
||||
|
||||
/* reg_htgt_trap_group
|
||||
|
|
|
@ -3554,10 +3554,10 @@ static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port,
|
|||
return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv);
|
||||
}
|
||||
|
||||
static void mlxsw_sp_rx_listener_mr_mark_func(struct sk_buff *skb,
|
||||
static void mlxsw_sp_rx_listener_l3_mark_func(struct sk_buff *skb,
|
||||
u8 local_port, void *priv)
|
||||
{
|
||||
skb->offload_mr_fwd_mark = 1;
|
||||
skb->offload_l3_fwd_mark = 1;
|
||||
skb->offload_fwd_mark = 1;
|
||||
return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv);
|
||||
}
|
||||
|
@ -3605,8 +3605,8 @@ out:
|
|||
MLXSW_RXL(mlxsw_sp_rx_listener_mark_func, _trap_id, _action, \
|
||||
_is_ctrl, SP_##_trap_group, DISCARD)
|
||||
|
||||
#define MLXSW_SP_RXL_MR_MARK(_trap_id, _action, _trap_group, _is_ctrl) \
|
||||
MLXSW_RXL(mlxsw_sp_rx_listener_mr_mark_func, _trap_id, _action, \
|
||||
#define MLXSW_SP_RXL_L3_MARK(_trap_id, _action, _trap_group, _is_ctrl) \
|
||||
MLXSW_RXL(mlxsw_sp_rx_listener_l3_mark_func, _trap_id, _action, \
|
||||
_is_ctrl, SP_##_trap_group, DISCARD)
|
||||
|
||||
#define MLXSW_SP_EVENTL(_func, _trap_id) \
|
||||
|
@ -3639,7 +3639,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
|
|||
/* L3 traps */
|
||||
MLXSW_SP_RXL_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false),
|
||||
MLXSW_SP_RXL_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false),
|
||||
MLXSW_SP_RXL_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false),
|
||||
MLXSW_SP_RXL_L3_MARK(LBERROR, MIRROR_TO_CPU, LBERROR, false),
|
||||
MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
|
||||
MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP,
|
||||
false),
|
||||
|
@ -3683,7 +3683,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
|
|||
MLXSW_SP_RXL_MARK(IPV6_PIM, TRAP_TO_CPU, PIM, false),
|
||||
MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false),
|
||||
MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false),
|
||||
MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false),
|
||||
MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false),
|
||||
/* NVE traps */
|
||||
MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, ARP, false),
|
||||
};
|
||||
|
@ -3713,6 +3713,7 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
|
|||
case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
|
||||
case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
|
||||
case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF:
|
||||
case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR:
|
||||
rate = 128;
|
||||
burst_size = 7;
|
||||
break;
|
||||
|
@ -3798,6 +3799,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
|
|||
case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
|
||||
case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
|
||||
case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST:
|
||||
case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR:
|
||||
priority = 1;
|
||||
tc = 1;
|
||||
break;
|
||||
|
|
|
@ -616,6 +616,8 @@ typedef unsigned char *sk_buff_data_t;
|
|||
* @pkt_type: Packet class
|
||||
* @fclone: skbuff clone status
|
||||
* @ipvs_property: skbuff is owned by ipvs
|
||||
* @offload_fwd_mark: Packet was L2-forwarded in hardware
|
||||
* @offload_l3_fwd_mark: Packet was L3-forwarded in hardware
|
||||
* @tc_skip_classify: do not classify packet. set by IFB device
|
||||
* @tc_at_ingress: used within tc_classify to distinguish in/egress
|
||||
* @tc_redirected: packet was redirected by a tc action
|
||||
|
@ -799,7 +801,7 @@ struct sk_buff {
|
|||
__u8 remcsum_offload:1;
|
||||
#ifdef CONFIG_NET_SWITCHDEV
|
||||
__u8 offload_fwd_mark:1;
|
||||
__u8 offload_mr_fwd_mark:1;
|
||||
__u8 offload_l3_fwd_mark:1;
|
||||
#endif
|
||||
#ifdef CONFIG_NET_CLS_ACT
|
||||
__u8 tc_skip_classify:1;
|
||||
|
|
|
@ -4885,7 +4885,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
|
|||
|
||||
#ifdef CONFIG_NET_SWITCHDEV
|
||||
skb->offload_fwd_mark = 0;
|
||||
skb->offload_mr_fwd_mark = 0;
|
||||
skb->offload_l3_fwd_mark = 0;
|
||||
#endif
|
||||
|
||||
if (!xnet)
|
||||
|
|
|
@ -69,6 +69,13 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
|
|||
__IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
|
||||
__IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
|
||||
|
||||
#ifdef CONFIG_NET_SWITCHDEV
|
||||
if (skb->offload_l3_fwd_mark) {
|
||||
consume_skb(skb);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (unlikely(opt->optlen))
|
||||
ip_forward_options(skb);
|
||||
|
||||
|
|
|
@ -1802,7 +1802,7 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
|
|||
struct vif_device *out_vif = &mrt->vif_table[out_vifi];
|
||||
struct vif_device *in_vif = &mrt->vif_table[in_vifi];
|
||||
|
||||
if (!skb->offload_mr_fwd_mark)
|
||||
if (!skb->offload_l3_fwd_mark)
|
||||
return false;
|
||||
if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len)
|
||||
return false;
|
||||
|
|
|
@ -378,6 +378,13 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
|
|||
__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
|
||||
__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
|
||||
|
||||
#ifdef CONFIG_NET_SWITCHDEV
|
||||
if (skb->offload_l3_fwd_mark) {
|
||||
consume_skb(skb);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return dst_output(net, sk, skb);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,259 @@
|
|||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Test a "one-armed router" [1] scenario. Packets forwarded between H1 and H2
|
||||
# should be forwarded by the ASIC, but also trapped so that ICMP redirect
|
||||
# packets could be potentially generated.
|
||||
#
|
||||
# 1. https://en.wikipedia.org/wiki/One-armed_router
|
||||
#
|
||||
# +---------------------------------+
|
||||
# | H1 (vrf) |
|
||||
# | + $h1 |
|
||||
# | | 192.0.2.1/24 |
|
||||
# | | 2001:db8:1::1/64 |
|
||||
# | | |
|
||||
# | | default via 192.0.2.2 |
|
||||
# | | default via 2001:db8:1::2 |
|
||||
# +----|----------------------------+
|
||||
# |
|
||||
# +----|----------------------------------------------------------------------+
|
||||
# | SW | |
|
||||
# | +--|--------------------------------------------------------------------+ |
|
||||
# | | + $swp1 BR0 (802.1d) | |
|
||||
# | | | |
|
||||
# | | 192.0.2.2/24 | |
|
||||
# | | 2001:db8:1::2/64 | |
|
||||
# | | 198.51.100.2/24 | |
|
||||
# | | 2001:db8:2::2/64 | |
|
||||
# | | | |
|
||||
# | | + $swp2 | |
|
||||
# | +--|--------------------------------------------------------------------+ |
|
||||
# | | |
|
||||
# +----|----------------------------------------------------------------------+
|
||||
# |
|
||||
# +----|----------------------------+
|
||||
# | | default via 198.51.100.2 |
|
||||
# | | default via 2001:db8:2::2 |
|
||||
# | | |
|
||||
# | | 2001:db8:2::1/64 |
|
||||
# | | 198.51.100.1/24 |
|
||||
# | + $h2 |
|
||||
# | H2 (vrf) |
|
||||
# +---------------------------------+
|
||||
|
||||
lib_dir=$(dirname $0)/../../../net/forwarding
|
||||
|
||||
ALL_TESTS="ping_ipv4 ping_ipv6 fwd_mark_ipv4 fwd_mark_ipv6"
|
||||
NUM_NETIFS=4
|
||||
source $lib_dir/tc_common.sh
|
||||
source $lib_dir/lib.sh
|
||||
|
||||
h1_create()
|
||||
{
|
||||
simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
|
||||
|
||||
ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
|
||||
ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2
|
||||
}
|
||||
|
||||
h1_destroy()
|
||||
{
|
||||
ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2
|
||||
ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
|
||||
|
||||
simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
|
||||
}
|
||||
|
||||
h2_create()
|
||||
{
|
||||
simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64
|
||||
|
||||
ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
|
||||
ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2
|
||||
}
|
||||
|
||||
h2_destroy()
|
||||
{
|
||||
ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2
|
||||
ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
|
||||
|
||||
simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64
|
||||
}
|
||||
|
||||
switch_create()
|
||||
{
|
||||
ip link add name br0 type bridge mcast_snooping 0
|
||||
ip link set dev br0 up
|
||||
|
||||
ip link set dev $swp1 master br0
|
||||
ip link set dev $swp1 up
|
||||
ip link set dev $swp2 master br0
|
||||
ip link set dev $swp2 up
|
||||
|
||||
tc qdisc add dev $swp1 clsact
|
||||
tc qdisc add dev $swp2 clsact
|
||||
|
||||
__addr_add_del br0 add 192.0.2.2/24 2001:db8:1::2/64
|
||||
__addr_add_del br0 add 198.51.100.2/24 2001:db8:2::2/64
|
||||
}
|
||||
|
||||
switch_destroy()
|
||||
{
|
||||
__addr_add_del br0 del 198.51.100.2/24 2001:db8:2::2/64
|
||||
__addr_add_del br0 del 192.0.2.2/24 2001:db8:1::2/64
|
||||
|
||||
tc qdisc del dev $swp2 clsact
|
||||
tc qdisc del dev $swp1 clsact
|
||||
|
||||
ip link set dev $swp2 down
|
||||
ip link set dev $swp2 nomaster
|
||||
ip link set dev $swp1 down
|
||||
ip link set dev $swp1 nomaster
|
||||
|
||||
ip link set dev br0 down
|
||||
ip link del dev br0
|
||||
}
|
||||
|
||||
ping_ipv4()
|
||||
{
|
||||
ping_test $h1 198.51.100.1 ": h1->h2"
|
||||
}
|
||||
|
||||
ping_ipv6()
|
||||
{
|
||||
ping6_test $h1 2001:db8:2::1 ": h1->h2"
|
||||
}
|
||||
|
||||
fwd_mark_ipv4()
|
||||
{
|
||||
# Transmit packets from H1 to H2 and make sure they are trapped at
|
||||
# swp1 due to loopback error, but only forwarded by the ASIC through
|
||||
# swp2
|
||||
|
||||
tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
|
||||
skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \
|
||||
action pass
|
||||
|
||||
tc filter add dev $swp2 egress protocol ip pref 1 handle 101 flower \
|
||||
skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \
|
||||
action pass
|
||||
|
||||
tc filter add dev $swp2 egress protocol ip pref 2 handle 102 flower \
|
||||
skip_sw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \
|
||||
action pass
|
||||
|
||||
ip vrf exec v$h1 $MZ $h1 -c 10 -d 100msec -p 64 -A 192.0.2.1 \
|
||||
-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
|
||||
|
||||
RET=0
|
||||
|
||||
tc_check_packets "dev $swp1 ingress" 101 10
|
||||
check_err $?
|
||||
|
||||
log_test "fwd mark: trapping IPv4 packets due to LBERROR"
|
||||
|
||||
RET=0
|
||||
|
||||
tc_check_packets "dev $swp2 egress" 101 0
|
||||
check_err $?
|
||||
|
||||
log_test "fwd mark: forwarding IPv4 packets in software"
|
||||
|
||||
RET=0
|
||||
|
||||
tc_check_packets "dev $swp2 egress" 102 10
|
||||
check_err $?
|
||||
|
||||
log_test "fwd mark: forwarding IPv4 packets in hardware"
|
||||
|
||||
tc filter del dev $swp2 egress protocol ip pref 2 handle 102 flower
|
||||
tc filter del dev $swp2 egress protocol ip pref 1 handle 101 flower
|
||||
tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
|
||||
}
|
||||
|
||||
fwd_mark_ipv6()
|
||||
{
|
||||
tc filter add dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower \
|
||||
skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \
|
||||
action pass
|
||||
|
||||
tc filter add dev $swp2 egress protocol ipv6 pref 1 handle 101 flower \
|
||||
skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \
|
||||
action pass
|
||||
|
||||
tc filter add dev $swp2 egress protocol ipv6 pref 2 handle 102 flower \
|
||||
skip_sw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \
|
||||
action pass
|
||||
|
||||
ip vrf exec v$h1 $MZ $h1 -6 -c 10 -d 100msec -p 64 -A 2001:db8:1::1 \
|
||||
-B 2001:db8:2::1 -t udp dp=52768,sp=42768 -q
|
||||
|
||||
RET=0
|
||||
|
||||
tc_check_packets "dev $swp1 ingress" 101 10
|
||||
check_err $?
|
||||
|
||||
log_test "fwd mark: trapping IPv6 packets due to LBERROR"
|
||||
|
||||
RET=0
|
||||
|
||||
tc_check_packets "dev $swp2 egress" 101 0
|
||||
check_err $?
|
||||
|
||||
log_test "fwd mark: forwarding IPv6 packets in software"
|
||||
|
||||
RET=0
|
||||
|
||||
tc_check_packets "dev $swp2 egress" 102 10
|
||||
check_err $?
|
||||
|
||||
log_test "fwd mark: forwarding IPv6 packets in hardware"
|
||||
|
||||
tc filter del dev $swp2 egress protocol ipv6 pref 2 handle 102 flower
|
||||
tc filter del dev $swp2 egress protocol ipv6 pref 1 handle 101 flower
|
||||
tc filter del dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower
|
||||
}
|
||||
|
||||
setup_prepare()
|
||||
{
|
||||
h1=${NETIFS[p1]}
|
||||
swp1=${NETIFS[p2]}
|
||||
|
||||
swp2=${NETIFS[p3]}
|
||||
h2=${NETIFS[p4]}
|
||||
|
||||
vrf_prepare
|
||||
forwarding_enable
|
||||
|
||||
sysctl_set net.ipv4.conf.all.accept_redirects 0
|
||||
sysctl_set net.ipv6.conf.all.accept_redirects 0
|
||||
|
||||
h1_create
|
||||
h2_create
|
||||
switch_create
|
||||
}
|
||||
|
||||
cleanup()
|
||||
{
|
||||
pre_cleanup
|
||||
|
||||
switch_destroy
|
||||
h2_destroy
|
||||
h1_destroy
|
||||
|
||||
sysctl_restore net.ipv6.conf.all.accept_redirects
|
||||
sysctl_restore net.ipv4.conf.all.accept_redirects
|
||||
|
||||
forwarding_restore
|
||||
vrf_cleanup
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
setup_prepare
|
||||
setup_wait
|
||||
|
||||
tests_run
|
||||
|
||||
exit $EXIT_STATUS
|
Loading…
Reference in New Issue