From a8eb919ba659adcbed8fd782b3e9a949c3a65b9c Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 29 Mar 2020 13:50:47 +0300 Subject: [PATCH] net/mlx5e: CT: Restore ct state from lookup in zone instead of tupleid Remove tupleid, and replace it with zone_restore, which is the zone an established tuple sets after match. On miss, Use this zone + tuple taken from the skb, to lookup the ct entry and restore it. This improves flow insertion rate by avoiding the allocation of a header rewrite context. Signed-off-by: Paul Blakey Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/rep/tc.c | 6 +- .../ethernet/mellanox/mlx5/core/en/tc_ct.c | 113 ++++++++++++------ .../ethernet/mellanox/mlx5/core/en/tc_ct.h | 17 +-- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en_tc.h | 2 +- 5 files changed, 90 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index eefeb1cdc2ee..e27963b80c11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -594,7 +594,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, struct mlx5e_tc_update_priv *tc_priv) { #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - u32 chain = 0, reg_c0, reg_c1, tunnel_id, tuple_id; + u32 chain = 0, reg_c0, reg_c1, tunnel_id, zone; struct mlx5_rep_uplink_priv *uplink_priv; struct mlx5e_rep_priv *uplink_rpriv; struct tc_skb_ext *tc_skb_ext; @@ -631,11 +631,11 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, tc_skb_ext->chain = chain; - tuple_id = reg_c1 & TUPLE_ID_MAX; + zone = reg_c1 & ZONE_RESTORE_MAX; uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); uplink_priv = &uplink_rpriv->uplink_priv; - if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb, tuple_id)) + if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb, zone)) return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 08ebce35b2fc..c6e92f818ecf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -60,7 +60,6 @@ struct mlx5_ct_flow { struct mlx5_ct_zone_rule { struct mlx5_flow_handle *rule; struct mlx5_esw_flow_attr attr; - int tupleid; bool nat; }; @@ -107,7 +106,6 @@ struct mlx5_ct_tuple { }; struct mlx5_ct_entry { - u16 zone; struct rhash_head node; struct rhash_head tuple_node; struct rhash_head tuple_nat_node; @@ -396,11 +394,10 @@ mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_esw_flow_attr *attr = &zone_rule->attr; struct mlx5_eswitch *esw = ct_priv->esw; - ct_dbg("Deleting ct entry rule in zone %d", entry->zone); + ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone); mlx5_eswitch_del_offloaded_rule(esw, zone_rule->rule, attr); mlx5_modify_header_dealloc(esw->dev, attr->modify_hdr); - xa_erase(&ct_priv->tuple_ids, zone_rule->tupleid); } static void @@ -434,7 +431,7 @@ mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, u8 ct_state, u32 mark, u32 label, - u32 tupleid) + u16 zone) { struct mlx5_eswitch *esw = ct_priv->esw; int err; @@ -455,7 +452,7 @@ mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, return err; err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts, - TUPLEID_TO_REG, tupleid); + ZONE_RESTORE_TO_REG, zone + 1); if (err) return err; @@ -582,8 +579,7 @@ static int mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_esw_flow_attr *attr, struct flow_rule *flow_rule, - u32 tupleid, - bool nat) + u16 zone, bool nat) { struct mlx5e_tc_mod_hdr_acts mod_acts = {}; struct mlx5_eswitch *esw = ct_priv->esw; @@ -617,7 +613,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, ct_state, meta->ct_metadata.mark, meta->ct_metadata.labels[0], - tupleid); + zone); if (err) goto err_mapping; @@ -648,7 +644,6 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_esw_flow_attr *attr = &zone_rule->attr; struct mlx5_eswitch *esw = ct_priv->esw; struct mlx5_flow_spec *spec = NULL; - u32 tupleid; int err; zone_rule->nat = nat; @@ -657,18 +652,8 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, if (!spec) return -ENOMEM; - /* Get tuple unique id */ - err = xa_alloc(&ct_priv->tuple_ids, &tupleid, zone_rule, - XA_LIMIT(1, TUPLE_ID_MAX), GFP_KERNEL); - if (err) { - netdev_warn(ct_priv->netdev, - "Failed to allocate tuple id, err: %d\n", err); - goto err_xa_alloc; - } - zone_rule->tupleid = tupleid; - err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, - tupleid, nat); + entry->tuple.zone, nat); if (err) { ct_dbg("Failed to create ct entry mod hdr"); goto err_mod_hdr; @@ -686,7 +671,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule); mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, - entry->zone & MLX5_CT_ZONE_MASK, + entry->tuple.zone & MLX5_CT_ZONE_MASK, MLX5_CT_ZONE_MASK); zone_rule->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); @@ -697,15 +682,13 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, } kfree(spec); - ct_dbg("Offloaded ct entry rule in zone %d", entry->zone); + ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone); return 0; err_rule: mlx5_modify_header_dealloc(esw->dev, attr->modify_hdr); err_mod_hdr: - xa_erase(&ct_priv->tuple_ids, zone_rule->tupleid); -err_xa_alloc: kfree(spec); return err; } @@ -766,7 +749,6 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, if (!entry) return -ENOMEM; - entry->zone = ft->zone; entry->tuple.zone = ft->zone; entry->cookie = flow->cookie; entry->restore_cookie = meta_action->ct_metadata.cookie; @@ -894,6 +876,48 @@ mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data, return -EOPNOTSUPP; } +static bool +mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple, + u16 zone) +{ + struct flow_keys flow_keys; + + skb_reset_network_header(skb); + skb_flow_dissect_flow_keys(skb, &flow_keys, 0); + + tuple->zone = zone; + + if (flow_keys.basic.ip_proto != IPPROTO_TCP && + flow_keys.basic.ip_proto != IPPROTO_UDP) + return false; + + tuple->port.src = flow_keys.ports.src; + tuple->port.dst = flow_keys.ports.dst; + tuple->n_proto = flow_keys.basic.n_proto; + tuple->ip_proto = flow_keys.basic.ip_proto; + + switch (flow_keys.basic.n_proto) { + case htons(ETH_P_IP): + tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src; + tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst; + break; + + case htons(ETH_P_IPV6): + tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src; + tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst; + break; + default: + goto out; + } + + return true; + +out: + return false; +} + int mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec) @@ -978,7 +1002,7 @@ mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, } if (mask->ct_zone) - mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, + mlx5e_tc_match_to_reg_match(spec, ZONE_RESTORE_TO_REG, key->ct_zone, MLX5_CT_ZONE_MASK); if (ctstate_mask) mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, @@ -1008,6 +1032,18 @@ mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + /* To mark that the need restore ct state on a skb, we mark the + * packet with the zone restore register. To distinguise from an + * uninitalized 0 value for this register, we write zone + 1 on the + * packet. + * + * This restricts us to a max zone of 0xFFFE. + */ + if (act->ct.zone == (u16)~0) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported ct zone"); + return -EOPNOTSUPP; + } + attr->ct_attr.zone = act->ct.zone; attr->ct_attr.ct_action = act->ct.action; attr->ct_attr.nf_ft = act->ct.flow_table; @@ -1349,6 +1385,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) * | set mark * | set label * | set established + * | set zone_restore * | do nat (if needed) * v * +--------------+ @@ -1770,7 +1807,6 @@ mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv) } idr_init(&ct_priv->fte_ids); - xa_init_flags(&ct_priv->tuple_ids, XA_FLAGS_ALLOC1); mutex_init(&ct_priv->control_lock); rhashtable_init(&ct_priv->zone_ht, &zone_params); rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params); @@ -1809,7 +1845,6 @@ mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv) rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); rhashtable_destroy(&ct_priv->zone_ht); mutex_destroy(&ct_priv->control_lock); - xa_destroy(&ct_priv->tuple_ids); idr_destroy(&ct_priv->fte_ids); kfree(ct_priv); @@ -1818,22 +1853,26 @@ mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv) bool mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, - struct sk_buff *skb, u32 tupleid) + struct sk_buff *skb, u16 zone) { struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv; - struct mlx5_ct_zone_rule *zone_rule; + struct mlx5_ct_tuple tuple = {}; struct mlx5_ct_entry *entry; - if (!ct_priv || !tupleid) + if (!ct_priv || !zone) return true; - zone_rule = xa_load(&ct_priv->tuple_ids, tupleid); - if (!zone_rule) + if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone - 1)) return false; - entry = container_of(zone_rule, struct mlx5_ct_entry, - zone_rules[zone_rule->nat]); - tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); + entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &tuple, + tuples_ht_params); + if (!entry) + entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht, + &tuple, tuples_nat_ht_params); + if (!entry) + return false; + tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 94f74cf71ce4..b5e07bff843b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -67,16 +67,17 @@ struct mlx5_ct_attr { misc_parameters_2.metadata_reg_c_5),\ } -#define tupleid_to_reg_ct {\ +#define zone_restore_to_reg_ct {\ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,\ .moffset = 0,\ - .mlen = 3,\ + .mlen = 2,\ .soffset = MLX5_BYTE_OFF(fte_match_param,\ misc_parameters_2.metadata_reg_c_1),\ } -#define TUPLE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[TUPLEID_TO_REG].mlen * 8) -#define TUPLE_ID_MAX GENMASK(TUPLE_ID_BITS - 1, 0) +#define REG_MAPPING_MLEN(reg) (mlx5e_tc_attr_to_reg_mappings[reg].mlen) +#define ZONE_RESTORE_BITS (REG_MAPPING_MLEN(ZONE_RESTORE_TO_REG) * 8) +#define ZONE_RESTORE_MAX GENMASK(ZONE_RESTORE_BITS - 1, 0) #if IS_ENABLED(CONFIG_MLX5_TC_CT) @@ -112,7 +113,7 @@ mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, bool mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, - struct sk_buff *skb, u32 tupleid); + struct sk_buff *skb, u16 zone); #else /* CONFIG_MLX5_TC_CT */ @@ -180,10 +181,10 @@ mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, static inline bool mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv, - struct sk_buff *skb, u32 tupleid) + struct sk_buff *skb, u16 zone) { - if (!tupleid) - return true; + if (!zone) + return true; return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index fd984ef234b2..090069e936b7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -186,11 +186,11 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { misc_parameters_2.metadata_reg_c_1), }, [ZONE_TO_REG] = zone_to_reg_ct, + [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct, [CTSTATE_TO_REG] = ctstate_to_reg_ct, [MARK_TO_REG] = mark_to_reg_ct, [LABELS_TO_REG] = labels_to_reg_ct, [FTEID_TO_REG] = fteid_to_reg_ct, - [TUPLEID_TO_REG] = tupleid_to_reg_ct, }; static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 68d49b945184..856e034b92f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -129,10 +129,10 @@ enum mlx5e_tc_attr_to_reg { TUNNEL_TO_REG, CTSTATE_TO_REG, ZONE_TO_REG, + ZONE_RESTORE_TO_REG, MARK_TO_REG, LABELS_TO_REG, FTEID_TO_REG, - TUPLEID_TO_REG, }; struct mlx5e_tc_attr_to_reg_mapping {