Merge branch 'act_ct-UDP-NEW'

Vlad Buslov says:

====================
net: Allow offloading of UDP NEW connections via act_ct

Currently only bidirectional established connections can be offloaded
via act_ct. Such approach allows to hardcode a lot of assumptions into
act_ct, flow_table and flow_offload intermediate layer codes. In order
to enabled offloading of unidirectional UDP NEW connections start with
incrementally changing the following assumptions:

- Drivers assume that only established connections are offloaded and
  don't support updating existing connections. Extract ctinfo from meta
  action cookie and refuse offloading of new connections in the drivers.

- Fix flow_table offload fixup algorithm to calculate flow timeout
  according to current connection state instead of hardcoded
  "established" value.

- Add new flow_table flow flag that designates bidirectional connections
  instead of assuming it and hardcoding hardware offload of every flow
  in both directions.

- Add new flow_table flow flag that designates connections that are
  offloaded to hardware as "established" instead of assuming it. This
  allows some optimizations in act_ct and prevents spamming the
  flow_table workqueue with redundant tasks.

With all the necessary infrastructure in place modify act_ct to offload
UDP NEW as unidirectional connection. Pass reply direction traffic to CT
and promote connection to bidirectional when UDP connection state
changes to "assured". Rely on refresh mechanism to propagate connection
state change to supporting drivers.

Note that early drop algorithm that is designed to free up some space in
connection tracking table when it becomes full (by randomly deleting up
to 5% of non-established connections) currently ignores connections
marked as "offloaded". Now, with UDP NEW connections becoming
"offloaded" it could allow malicious user to perform DoS attack by
filling the table with non-droppable UDP NEW connections by sending just
one packet in single direction. To prevent such scenario change early
drop algorithm to also consider "offloaded" connections for deletion.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2023-02-03 09:31:25 +00:00
commit 18390581d0
8 changed files with 103 additions and 34 deletions

View File

@ -1073,12 +1073,16 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
struct flow_action_entry *meta_action;
unsigned long cookie = flow->cookie;
enum ip_conntrack_info ctinfo;
struct mlx5_ct_entry *entry;
int err;
meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
if (!meta_action)
return -EOPNOTSUPP;
ctinfo = meta_action->ct_metadata.cookie & NFCT_INFOMASK;
if (ctinfo == IP_CT_NEW)
return -EOPNOTSUPP;
spin_lock_bh(&ct_priv->ht_lock);
entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);

View File

@ -1964,6 +1964,27 @@ int nfp_fl_ct_stats(struct flow_cls_offload *flow,
return 0;
}
static bool
nfp_fl_ct_offload_nft_supported(struct flow_cls_offload *flow)
{
struct flow_rule *flow_rule = flow->rule;
struct flow_action *flow_action =
&flow_rule->action;
struct flow_action_entry *act;
int i;
flow_action_for_each(i, act, flow_action) {
if (act->id == FLOW_ACTION_CT_METADATA) {
enum ip_conntrack_info ctinfo =
act->ct_metadata.cookie & NFCT_INFOMASK;
return ctinfo != IP_CT_NEW;
}
}
return false;
}
static int
nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offload *flow)
{
@ -1976,6 +1997,9 @@ nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offl
extack = flow->common.extack;
switch (flow->command) {
case FLOW_CLS_REPLACE:
if (!nfp_fl_ct_offload_nft_supported(flow))
return -EOPNOTSUPP;
/* Netfilter can request offload multiple times for the same
* flow - protect against adding duplicates.
*/

View File

@ -57,7 +57,7 @@ struct nf_flowtable_type {
struct net_device *dev,
enum flow_block_command cmd);
int (*action)(struct net *net,
const struct flow_offload *flow,
struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule);
void (*free)(struct nf_flowtable *ft);
@ -164,6 +164,8 @@ enum nf_flow_flags {
NF_FLOW_HW_DYING,
NF_FLOW_HW_DEAD,
NF_FLOW_HW_PENDING,
NF_FLOW_HW_BIDIRECTIONAL,
NF_FLOW_HW_ESTABLISHED,
};
enum flow_offload_type {
@ -312,10 +314,10 @@ void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable);
int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
struct net_device *dev,
enum flow_block_command cmd);
int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule);
int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule);

View File

@ -1371,9 +1371,6 @@ static unsigned int early_drop_list(struct net *net,
hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
continue;
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
continue;
@ -1443,11 +1440,14 @@ static bool gc_worker_skip_ct(const struct nf_conn *ct)
static bool gc_worker_can_early_drop(const struct nf_conn *ct)
{
const struct nf_conntrack_l4proto *l4proto;
u8 protonum = nf_ct_protonum(ct);
if (test_bit(IPS_OFFLOAD_BIT, &ct->status) && protonum != IPPROTO_UDP)
return false;
if (!test_bit(IPS_ASSURED_BIT, &ct->status))
return true;
l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
l4proto = nf_ct_l4proto_find(protonum);
if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
return true;
@ -1504,7 +1504,8 @@ static void gc_worker(struct work_struct *work)
if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
nf_ct_offload_timeout(tmp);
continue;
if (!nf_conntrack_max95)
continue;
}
if (expired_count > GC_SCAN_EXPIRED_MAX) {

View File

@ -193,8 +193,11 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
timeout -= tn->offload_timeout;
} else if (l4num == IPPROTO_UDP) {
struct nf_udp_net *tn = nf_udp_pernet(net);
enum udp_conntrack state =
test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
UDP_CT_REPLIED : UDP_CT_UNREPLIED;
timeout = tn->timeouts[UDP_CT_REPLIED];
timeout = tn->timeouts[state];
timeout -= tn->offload_timeout;
} else {
return;

View File

@ -39,7 +39,7 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
}
static int nf_flow_rule_route_inet(struct net *net,
const struct flow_offload *flow,
struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{

View File

@ -679,7 +679,7 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
return 0;
}
int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
@ -704,7 +704,7 @@ int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
}
EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
@ -735,7 +735,7 @@ nf_flow_offload_rule_alloc(struct net *net,
{
const struct nf_flowtable *flowtable = offload->flowtable;
const struct flow_offload_tuple *tuple, *other_tuple;
const struct flow_offload *flow = offload->flow;
struct flow_offload *flow = offload->flow;
struct dst_entry *other_dst = NULL;
struct nf_flow_rule *flow_rule;
int err = -ENOMEM;
@ -895,8 +895,9 @@ static int flow_offload_rule_add(struct flow_offload_work *offload,
ok_count += flow_offload_tuple_add(offload, flow_rule[0],
FLOW_OFFLOAD_DIR_ORIGINAL);
ok_count += flow_offload_tuple_add(offload, flow_rule[1],
FLOW_OFFLOAD_DIR_REPLY);
if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
ok_count += flow_offload_tuple_add(offload, flow_rule[1],
FLOW_OFFLOAD_DIR_REPLY);
if (ok_count == 0)
return -ENOENT;
@ -926,7 +927,8 @@ static void flow_offload_work_del(struct flow_offload_work *offload)
{
clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
}
@ -946,7 +948,9 @@ static void flow_offload_work_stats(struct flow_offload_work *offload)
u64 lastused;
flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY,
&stats[1]);
lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
offload->flow->timeout = max_t(u64, offload->flow->timeout,

View File

@ -170,11 +170,11 @@ tcf_ct_flow_table_add_action_nat_udp(const struct nf_conntrack_tuple *tuple,
static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct,
enum ip_conntrack_dir dir,
enum ip_conntrack_info ctinfo,
struct flow_action *action)
{
struct nf_conn_labels *ct_labels;
struct flow_action_entry *entry;
enum ip_conntrack_info ctinfo;
u32 *act_ct_labels;
entry = tcf_ct_flow_table_flow_action_get_next(action);
@ -182,8 +182,6 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct,
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
entry->ct_metadata.mark = READ_ONCE(ct->mark);
#endif
ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
IP_CT_ESTABLISHED_REPLY;
/* aligns with the CT reference on the SKB nf_ct_set */
entry->ct_metadata.cookie = (unsigned long)ct | ctinfo;
entry->ct_metadata.orig_dir = dir == IP_CT_DIR_ORIGINAL;
@ -237,22 +235,28 @@ static int tcf_ct_flow_table_add_action_nat(struct net *net,
}
static int tcf_ct_flow_table_fill_actions(struct net *net,
const struct flow_offload *flow,
struct flow_offload *flow,
enum flow_offload_tuple_dir tdir,
struct nf_flow_rule *flow_rule)
{
struct flow_action *action = &flow_rule->rule->action;
int num_entries = action->num_entries;
struct nf_conn *ct = flow->ct;
enum ip_conntrack_info ctinfo;
enum ip_conntrack_dir dir;
int i, err;
switch (tdir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
dir = IP_CT_DIR_ORIGINAL;
ctinfo = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
IP_CT_ESTABLISHED : IP_CT_NEW;
if (ctinfo == IP_CT_ESTABLISHED)
set_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
break;
case FLOW_OFFLOAD_DIR_REPLY:
dir = IP_CT_DIR_REPLY;
ctinfo = IP_CT_ESTABLISHED_REPLY;
break;
default:
return -EOPNOTSUPP;
@ -262,7 +266,7 @@ static int tcf_ct_flow_table_fill_actions(struct net *net,
if (err)
goto err_nat;
tcf_ct_flow_table_add_action_meta(ct, dir, action);
tcf_ct_flow_table_add_action_meta(ct, dir, ctinfo, action);
return 0;
err_nat:
@ -365,7 +369,7 @@ static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry,
static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
struct nf_conn *ct,
bool tcp)
bool tcp, bool bidirectional)
{
struct nf_conn_act_ct_ext *act_ct_ext;
struct flow_offload *entry;
@ -384,6 +388,8 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
}
if (bidirectional)
__set_bit(NF_FLOW_HW_BIDIRECTIONAL, &entry->flags);
act_ct_ext = nf_conn_act_ct_ext_find(ct);
if (act_ct_ext) {
@ -407,26 +413,34 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
bool tcp = false;
if ((ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) ||
!test_bit(IPS_ASSURED_BIT, &ct->status))
return;
bool tcp = false, bidirectional = true;
switch (nf_ct_protonum(ct)) {
case IPPROTO_TCP:
tcp = true;
if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
if ((ctinfo != IP_CT_ESTABLISHED &&
ctinfo != IP_CT_ESTABLISHED_REPLY) ||
!test_bit(IPS_ASSURED_BIT, &ct->status) ||
ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
return;
tcp = true;
break;
case IPPROTO_UDP:
if (!nf_ct_is_confirmed(ct))
return;
if (!test_bit(IPS_ASSURED_BIT, &ct->status))
bidirectional = false;
break;
#ifdef CONFIG_NF_CT_PROTO_GRE
case IPPROTO_GRE: {
struct nf_conntrack_tuple *tuple;
if (ct->status & IPS_NAT_MASK)
if ((ctinfo != IP_CT_ESTABLISHED &&
ctinfo != IP_CT_ESTABLISHED_REPLY) ||
!test_bit(IPS_ASSURED_BIT, &ct->status) ||
ct->status & IPS_NAT_MASK)
return;
tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
/* No support for GRE v1 */
if (tuple->src.u.gre.key || tuple->dst.u.gre.key)
@ -442,7 +456,7 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
ct->status & IPS_SEQ_ADJUST)
return;
tcf_ct_flow_table_add(ct_ft, ct, tcp);
tcf_ct_flow_table_add(ct_ft, ct, tcp, bidirectional);
}
static bool
@ -621,13 +635,30 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
ct = flow->ct;
if (dir == FLOW_OFFLOAD_DIR_REPLY &&
!test_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags)) {
/* Only offload reply direction after connection became
* assured.
*/
if (test_bit(IPS_ASSURED_BIT, &ct->status))
set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags);
else if (test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags))
/* If flow_table flow has already been updated to the
* established state, then don't refresh.
*/
return false;
}
if (tcph && (unlikely(tcph->fin || tcph->rst))) {
flow_offload_teardown(flow);
return false;
}
ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
IP_CT_ESTABLISHED_REPLY;
if (dir == FLOW_OFFLOAD_DIR_ORIGINAL)
ctinfo = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
IP_CT_ESTABLISHED : IP_CT_NEW;
else
ctinfo = IP_CT_ESTABLISHED_REPLY;
flow_offload_refresh(nf_ft, flow);
nf_conntrack_get(&ct->ct_general);