net/mlx5e: Add support for hw encapsulation of MPLS over UDP
MPLS over UDP is supported by adding a rule on a representor net device which does tunnel_key set, push mpls and forward to a baredup device. At the hardware level we use a packet_reformat_context object to do the encapsulation of the packet. The resulting packet looks as follows (left side transmitted first): outer L2 | outer IP | UDP | MPLS | inner L3 and data | Example usage: tc filter add dev $rep0 protocol ip prio 1 root flower skip_sw \ action tunnel_key set src_ip 8.8.8.21 dst_ip 8.8.8.24 id 555 \ dst_port 6635 tos 4 ttl 6 csum action mpls push protocol 0x8847 \ label 555 tc 3 action mirred egress redirect dev bareudp0 This is how the filter is shown with tc filter show: tc filter show dev enp59s0f0_0 ingress filter protocol ip pref 1 flower chain 0 filter protocol ip pref 1 flower chain 0 handle 0x1 eth_type ipv4 skip_sw in_hw in_hw_count 1 action order 1: tunnel_key set src_ip 8.8.8.21 dst_ip 8.8.8.24 key_id 555 dst_port 6635 csum tos 0x4 ttl 6 pipe index 1 ref 1 bind 1 action order 2: mpls push protocol mpls_uc label 555 tc 3 ttl 255 pipe index 1 ref 1 bind 1 action order 3: mirred (Egress Redirect to device bareudp0) stolen index 1 ref 1 bind 1 Signed-off-by: Eli Cohen <eli@mellanox.com> Reviewed-by: Roi Dayan <roid@mellanox.com> Reviewed-by: Eli Britstein <elibr@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
This commit is contained in:
parent
d43600c01c
commit
f828ca6a2f
|
@ -38,7 +38,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o lib/geneve.o lib/port_tun.o lag
|
|||
mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \
|
||||
en/mapping.o esw/chains.o en/tc_tun.o \
|
||||
en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
|
||||
diag/en_tc_tracepoint.o
|
||||
en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o
|
||||
mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o
|
||||
|
||||
#
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include <net/vxlan.h>
|
||||
#include <net/gre.h>
|
||||
#include <net/geneve.h>
|
||||
#include <net/bareudp.h>
|
||||
#include "en/tc_tun.h"
|
||||
#include "en_tc.h"
|
||||
#include "rep/tc.h"
|
||||
|
@ -18,6 +19,8 @@ struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
|
|||
else if (netif_is_gretap(tunnel_dev) ||
|
||||
netif_is_ip6gretap(tunnel_dev))
|
||||
return &gre_tunnel;
|
||||
else if (netif_is_bareudp(tunnel_dev))
|
||||
return &mplsoudp_tunnel;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@ enum {
|
|||
MLX5E_TC_TUNNEL_TYPE_VXLAN,
|
||||
MLX5E_TC_TUNNEL_TYPE_GENEVE,
|
||||
MLX5E_TC_TUNNEL_TYPE_GRETAP,
|
||||
MLX5E_TC_TUNNEL_TYPE_MPLSOUDP,
|
||||
};
|
||||
|
||||
struct mlx5e_tc_tunnel {
|
||||
|
@ -46,6 +47,7 @@ struct mlx5e_tc_tunnel {
|
|||
extern struct mlx5e_tc_tunnel vxlan_tunnel;
|
||||
extern struct mlx5e_tc_tunnel geneve_tunnel;
|
||||
extern struct mlx5e_tc_tunnel gre_tunnel;
|
||||
extern struct mlx5e_tc_tunnel mplsoudp_tunnel;
|
||||
|
||||
struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev);
|
||||
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
|
||||
/* Copyright (c) 2018 Mellanox Technologies. */
|
||||
|
||||
#include <net/bareudp.h>
|
||||
#include <net/mpls.h>
|
||||
#include "en/tc_tun.h"
|
||||
|
||||
static bool can_offload(struct mlx5e_priv *priv)
|
||||
{
|
||||
return MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l3_tunnel_to_l2);
|
||||
}
|
||||
|
||||
static int calc_hlen(struct mlx5e_encap_entry *e)
|
||||
{
|
||||
return sizeof(struct udphdr) + MPLS_HLEN;
|
||||
}
|
||||
|
||||
static int init_encap_attr(struct net_device *tunnel_dev,
|
||||
struct mlx5e_priv *priv,
|
||||
struct mlx5e_encap_entry *e,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
e->tunnel = &mplsoudp_tunnel;
|
||||
e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline __be32 mpls_label_id_field(__be32 label, u8 tos, u8 ttl)
|
||||
{
|
||||
u32 res;
|
||||
|
||||
/* mpls label is 32 bits long and construction as follows:
|
||||
* 20 bits label
|
||||
* 3 bits tos
|
||||
* 1 bit bottom of stack. Since we support only one label, this bit is
|
||||
* always set.
|
||||
* 8 bits TTL
|
||||
*/
|
||||
res = be32_to_cpu(label) << 12 | 1 << 8 | (tos & 7) << 9 | ttl;
|
||||
return cpu_to_be32(res);
|
||||
}
|
||||
|
||||
static int generate_ip_tun_hdr(char buf[],
|
||||
__u8 *ip_proto,
|
||||
struct mlx5e_encap_entry *r)
|
||||
{
|
||||
const struct ip_tunnel_key *tun_key = &r->tun_info->key;
|
||||
__be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
|
||||
struct udphdr *udp = (struct udphdr *)(buf);
|
||||
struct mpls_shim_hdr *mpls;
|
||||
|
||||
mpls = (struct mpls_shim_hdr *)(udp + 1);
|
||||
*ip_proto = IPPROTO_UDP;
|
||||
|
||||
udp->dest = tun_key->tp_dst;
|
||||
mpls->label_stack_entry = mpls_label_id_field(tun_id, tun_key->tos, tun_key->ttl);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int parse_udp_ports(struct mlx5e_priv *priv,
|
||||
struct mlx5_flow_spec *spec,
|
||||
struct flow_cls_offload *f,
|
||||
void *headers_c,
|
||||
void *headers_v)
|
||||
{
|
||||
return mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
|
||||
}
|
||||
|
||||
static int parse_tunnel(struct mlx5e_priv *priv,
|
||||
struct mlx5_flow_spec *spec,
|
||||
struct flow_cls_offload *f,
|
||||
void *headers_c,
|
||||
void *headers_v)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct mlx5e_tc_tunnel mplsoudp_tunnel = {
|
||||
.tunnel_type = MLX5E_TC_TUNNEL_TYPE_MPLSOUDP,
|
||||
.match_level = MLX5_MATCH_L4,
|
||||
.can_offload = can_offload,
|
||||
.calc_hlen = calc_hlen,
|
||||
.init_encap_attr = init_encap_attr,
|
||||
.generate_ip_tun_hdr = generate_ip_tun_hdr,
|
||||
.parse_udp_ports = parse_udp_ports,
|
||||
.parse_tunnel = parse_tunnel,
|
||||
};
|
|
@ -48,6 +48,7 @@
|
|||
#include <net/tc_act/tc_csum.h>
|
||||
#include <net/arp.h>
|
||||
#include <net/ipv6_stubs.h>
|
||||
#include <net/bareudp.h>
|
||||
#include "en.h"
|
||||
#include "en_rep.h"
|
||||
#include "en/rep/tc.h"
|
||||
|
@ -3685,6 +3686,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
|
|||
bool encap = false, decap = false;
|
||||
u32 action = attr->action;
|
||||
int err, i, if_count = 0;
|
||||
bool mpls_push = false;
|
||||
|
||||
if (!flow_action_has_entries(flow_action))
|
||||
return -EINVAL;
|
||||
|
@ -3699,6 +3701,16 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
|
|||
action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
|
||||
MLX5_FLOW_CONTEXT_ACTION_COUNT;
|
||||
break;
|
||||
case FLOW_ACTION_MPLS_PUSH:
|
||||
if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
|
||||
reformat_l2_to_l3_tunnel) ||
|
||||
act->mpls_push.proto != htons(ETH_P_MPLS_UC)) {
|
||||
NL_SET_ERR_MSG_MOD(extack,
|
||||
"mpls push is supported only for mpls_uc protocol");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
mpls_push = true;
|
||||
break;
|
||||
case FLOW_ACTION_MANGLE:
|
||||
case FLOW_ACTION_ADD:
|
||||
err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
|
||||
|
@ -3729,6 +3741,12 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (mpls_push && !netif_is_bareudp(out_dev)) {
|
||||
NL_SET_ERR_MSG_MOD(extack,
|
||||
"mpls is supported only through a bareudp device");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (ft_flow && out_dev == priv->netdev) {
|
||||
/* Ignore forward to self rules generated
|
||||
* by adding both mlx5 devs to the flow table
|
||||
|
|
|
@ -144,11 +144,11 @@ static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy,
|
|||
int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy,
|
||||
int reformat_type)
|
||||
{
|
||||
/* the default is error for unknown (non VXLAN/GRE tunnel types) */
|
||||
int err = -EOPNOTSUPP;
|
||||
|
||||
mutex_lock(&tun_entropy->lock);
|
||||
if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN &&
|
||||
if ((reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN ||
|
||||
reformat_type == MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL) &&
|
||||
tun_entropy->enabled) {
|
||||
/* in case entropy calculation is enabled for all tunneling
|
||||
* types, it is ok for VXLAN, so approve.
|
||||
|
|
Loading…
Reference in New Issue