net/mlx5e: CT: Fix offload with CT action after CT NAT action

It could be a chain of rules will do action CT again after CT NAT
Before this fix matching will break as we get into the CT table
after NAT changes and not CT NAT.
Fix this by adding pre ct and pre ct nat tables to skip ct/ct_nat
tables and go straight to post_ct table if ct/nat was already done.

Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
This commit is contained in:
Roi Dayan 2020-04-12 15:39:15 +03:00 committed by Saeed Mahameed
parent 90bf1c8dbd
commit 9102d836d2
1 changed files with 286 additions and 22 deletions

View File

@ -24,6 +24,7 @@
#define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0) #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
#define MLX5_CT_STATE_TRK_BIT BIT(2) #define MLX5_CT_STATE_TRK_BIT BIT(2)
#define MLX5_CT_STATE_NAT_BIT BIT(3)
#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8) #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
#define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0) #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
@ -61,6 +62,15 @@ struct mlx5_ct_zone_rule {
bool nat; bool nat;
}; };
struct mlx5_tc_ct_pre {
struct mlx5_flow_table *fdb;
struct mlx5_flow_group *flow_grp;
struct mlx5_flow_group *miss_grp;
struct mlx5_flow_handle *flow_rule;
struct mlx5_flow_handle *miss_rule;
struct mlx5_modify_hdr *modify_hdr;
};
struct mlx5_ct_ft { struct mlx5_ct_ft {
struct rhash_head node; struct rhash_head node;
u16 zone; u16 zone;
@ -68,6 +78,8 @@ struct mlx5_ct_ft {
struct nf_flowtable *nf_ft; struct nf_flowtable *nf_ft;
struct mlx5_tc_ct_priv *ct_priv; struct mlx5_tc_ct_priv *ct_priv;
struct rhashtable ct_entries_ht; struct rhashtable ct_entries_ht;
struct mlx5_tc_ct_pre pre_ct;
struct mlx5_tc_ct_pre pre_ct_nat;
}; };
struct mlx5_ct_entry { struct mlx5_ct_entry {
@ -426,6 +438,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5_eswitch *esw = ct_priv->esw; struct mlx5_eswitch *esw = ct_priv->esw;
struct mlx5_modify_hdr *mod_hdr; struct mlx5_modify_hdr *mod_hdr;
struct flow_action_entry *meta; struct flow_action_entry *meta;
u16 ct_state = 0;
int err; int err;
meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
@ -444,11 +457,13 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
&mod_acts); &mod_acts);
if (err) if (err)
goto err_mapping; goto err_mapping;
ct_state |= MLX5_CT_STATE_NAT_BIT;
} }
ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
(MLX5_CT_STATE_ESTABLISHED_BIT | ct_state,
MLX5_CT_STATE_TRK_BIT),
meta->ct_metadata.mark, meta->ct_metadata.mark,
meta->ct_metadata.labels[0], meta->ct_metadata.labels[0],
tupleid); tupleid);
@ -791,6 +806,238 @@ mlx5_tc_ct_parse_action(struct mlx5e_priv *priv,
return 0; return 0;
} }
static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
struct mlx5_tc_ct_pre *pre_ct,
bool nat)
{
struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
struct mlx5_core_dev *dev = ct_priv->esw->dev;
struct mlx5_flow_table *fdb = pre_ct->fdb;
struct mlx5_flow_destination dest = {};
struct mlx5_flow_act flow_act = {};
struct mlx5_modify_hdr *mod_hdr;
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
u32 ctstate;
u16 zone;
int err;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec)
return -ENOMEM;
zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ZONE_TO_REG, zone);
if (err) {
ct_dbg("Failed to set zone register mapping");
goto err_mapping;
}
mod_hdr = mlx5_modify_header_alloc(dev,
MLX5_FLOW_NAMESPACE_FDB,
pre_mod_acts.num_actions,
pre_mod_acts.actions);
if (IS_ERR(mod_hdr)) {
err = PTR_ERR(mod_hdr);
ct_dbg("Failed to create pre ct mod hdr");
goto err_mapping;
}
pre_ct->modify_hdr = mod_hdr;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
flow_act.modify_hdr = mod_hdr;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
/* add flow rule */
mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
zone, MLX5_CT_ZONE_MASK);
ctstate = MLX5_CT_STATE_TRK_BIT;
if (nat)
ctstate |= MLX5_CT_STATE_NAT_BIT;
mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
dest.ft = ct_priv->post_ct;
rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
ct_dbg("Failed to add pre ct flow rule zone %d", zone);
goto err_flow_rule;
}
pre_ct->flow_rule = rule;
/* add miss rule */
memset(spec, 0, sizeof(*spec));
dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
ct_dbg("Failed to add pre ct miss rule zone %d", zone);
goto err_miss_rule;
}
pre_ct->miss_rule = rule;
dealloc_mod_hdr_actions(&pre_mod_acts);
kvfree(spec);
return 0;
err_miss_rule:
mlx5_del_flow_rules(pre_ct->flow_rule);
err_flow_rule:
mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
err_mapping:
dealloc_mod_hdr_actions(&pre_mod_acts);
kvfree(spec);
return err;
}
static void
tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
struct mlx5_tc_ct_pre *pre_ct)
{
struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
struct mlx5_core_dev *dev = ct_priv->esw->dev;
mlx5_del_flow_rules(pre_ct->flow_rule);
mlx5_del_flow_rules(pre_ct->miss_rule);
mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
}
static int
mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
struct mlx5_tc_ct_pre *pre_ct,
bool nat)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
struct mlx5_core_dev *dev = ct_priv->esw->dev;
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_namespace *ns;
struct mlx5_flow_table *ft;
struct mlx5_flow_group *g;
u32 metadata_reg_c_2_mask;
u32 *flow_group_in;
void *misc;
int err;
ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
if (!ns) {
err = -EOPNOTSUPP;
ct_dbg("Failed to get FDB flow namespace");
return err;
}
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
if (!flow_group_in)
return -ENOMEM;
ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
ft_attr.prio = FDB_TC_OFFLOAD;
ft_attr.max_fte = 2;
ft_attr.level = 1;
ft = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
ct_dbg("Failed to create pre ct table");
goto out_free;
}
pre_ct->fdb = ft;
/* create flow group */
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
MLX5_MATCH_MISC_PARAMETERS_2);
misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
match_criteria.misc_parameters_2);
metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
if (nat)
metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
metadata_reg_c_2_mask);
g = mlx5_create_flow_group(ft, flow_group_in);
if (IS_ERR(g)) {
err = PTR_ERR(g);
ct_dbg("Failed to create pre ct group");
goto err_flow_grp;
}
pre_ct->flow_grp = g;
/* create miss group */
memset(flow_group_in, 0, inlen);
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
g = mlx5_create_flow_group(ft, flow_group_in);
if (IS_ERR(g)) {
err = PTR_ERR(g);
ct_dbg("Failed to create pre ct miss group");
goto err_miss_grp;
}
pre_ct->miss_grp = g;
err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
if (err)
goto err_add_rules;
kvfree(flow_group_in);
return 0;
err_add_rules:
mlx5_destroy_flow_group(pre_ct->miss_grp);
err_miss_grp:
mlx5_destroy_flow_group(pre_ct->flow_grp);
err_flow_grp:
mlx5_destroy_flow_table(ft);
out_free:
kvfree(flow_group_in);
return err;
}
static void
mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
struct mlx5_tc_ct_pre *pre_ct)
{
tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
mlx5_destroy_flow_group(pre_ct->miss_grp);
mlx5_destroy_flow_group(pre_ct->flow_grp);
mlx5_destroy_flow_table(pre_ct->fdb);
}
static int
mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
{
int err;
err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
if (err)
return err;
err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
if (err)
goto err_pre_ct_nat;
return 0;
err_pre_ct_nat:
mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
return err;
}
static void
mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
{
mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
}
static struct mlx5_ct_ft * static struct mlx5_ct_ft *
mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
struct nf_flowtable *nf_ft) struct nf_flowtable *nf_ft)
@ -813,6 +1060,10 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
ft->ct_priv = ct_priv; ft->ct_priv = ct_priv;
refcount_set(&ft->refcount, 1); refcount_set(&ft->refcount, 1);
err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
if (err)
goto err_alloc_pre_ct;
err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
if (err) if (err)
goto err_init; goto err_init;
@ -834,6 +1085,8 @@ err_add_cb:
err_insert: err_insert:
rhashtable_destroy(&ft->ct_entries_ht); rhashtable_destroy(&ft->ct_entries_ht);
err_init: err_init:
mlx5_tc_ct_free_pre_ct_tables(ft);
err_alloc_pre_ct:
kfree(ft); kfree(ft);
return ERR_PTR(err); return ERR_PTR(err);
} }
@ -859,21 +1112,40 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
rhashtable_free_and_destroy(&ft->ct_entries_ht, rhashtable_free_and_destroy(&ft->ct_entries_ht,
mlx5_tc_ct_flush_ft_entry, mlx5_tc_ct_flush_ft_entry,
ct_priv); ct_priv);
mlx5_tc_ct_free_pre_ct_tables(ft);
kfree(ft); kfree(ft);
} }
/* We translate the tc filter with CT action to the following HW model: /* We translate the tc filter with CT action to the following HW model:
* *
* +-------------------+ +--------------------+ +--------------+ * +---------------------+
* + pre_ct (tc chain) +----->+ CT (nat or no nat) +--->+ post_ct +-----> * + fdb prio (tc chain) +
* + original match + | + tuple + zone match + | + fte_id match + | * + original match +
* +-------------------+ | +--------------------+ | +--------------+ | * +---------------------+
* v v v * | set chain miss mapping
* set chain miss mapping set mark original * | set fte_id
* set fte_id set label filter * | set tunnel_id
* set zone set established actions * | do decap
* set tunnel_id do nat (if needed) * v
* do decap * +---------------------+
* + pre_ct/pre_ct_nat + if matches +---------------------+
* + zone+nat match +---------------->+ post_ct (see below) +
* +---------------------+ set zone +---------------------+
* | set zone
* v
* +--------------------+
* + CT (nat or no nat) +
* + tuple + zone match +
* +--------------------+
* | set mark
* | set label
* | set established
* | do nat (if needed)
* v
* +--------------+
* + post_ct + original filter actions
* + fte_id match +------------------------>
* +--------------+
*/ */
static int static int
__mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
@ -951,14 +1223,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
goto err_mapping; goto err_mapping;
} }
err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, ZONE_TO_REG,
attr->ct_attr.zone &
MLX5_CT_ZONE_MASK);
if (err) {
ct_dbg("Failed to set zone register mapping");
goto err_mapping;
}
err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
FTEID_TO_REG, fte_id); FTEID_TO_REG, fte_id);
if (err) { if (err) {
@ -1018,7 +1282,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
/* Change original rule point to ct table */ /* Change original rule point to ct table */
pre_ct_attr->dest_chain = 0; pre_ct_attr->dest_chain = 0;
pre_ct_attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct; pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.fdb : ft->pre_ct.fdb;
ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw, ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw,
orig_spec, orig_spec,
pre_ct_attr); pre_ct_attr);