nf pull request 2023-08-16

-----BEGIN PGP SIGNATURE-----
 
 iQJBBAABCAArFiEEgKkgxbID4Gn1hq6fcJGo2a1f9gAFAmTb9twNHGZ3QHN0cmxl
 bi5kZQAKCRBwkajZrV/2ANr7D/wN/XTDG3JxLl4VUtmSOBrD6y6QwpqfUYBD11Ev
 eAXpP69wxh2J9gPqtVsPAJwbc0F2eca38ziyJ9+4hmNWNBc3Hh1oXj/9e0IqdPUP
 9AEHu73jLmeb6bN0RU8guxmipwZq/a4Q6y/OYPhf+c0uULYEWdH6AAHs3WGRCWHI
 a0gedUU3ChKDueObHWfaZSqGuMVKS+eCfT57oKc/l2J2b2064JAksKMDglsDmsA/
 VQw8Ko+l1PO7t8mOswPufGYyg5tKUXpq8AJ3Dlg2l2Qzws29FEIfzLLbu89GHlvP
 FYSjgOuVdwVra/Kt7jQzUxeGrBXZC8MuaGnEOi/tMXidw0uc+N1y2Bg2N6eFkmxW
 AN5e4p0S3ddfbGaEVoDx5aS6kKTDCQAvgeaM+KIExmbMJQ77FEqnq/WqyDd/eHJL
 5Su1nPoIGzswSzcYC6eh6AEnesx01OdKCZtMfF1LxutopMLItsBe3UxPRglJfvvX
 XxuosrZe1aIOgCSQGTeP6DqpTVEOsvNThhxapKt1AeW+YfnzGwZD0hU6vOq9ZSHS
 K+wRWfhipS5nt8zTv8SbM2DgOmD0pPcsiueAXNQUW7gUUXleLOvP8tden91M/37C
 eGMuZqdWRAjJtU0q4QvhiLSvlI7Awh2dsr0Vgh4k6b1oyuk3UrwKJeoZW07g9MR4
 VLRIUg==
 =5ePi
 -----END PGP SIGNATURE-----

Merge tag 'nf-23-08-16' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Florisn Westphal says:

====================
These are netfilter fixes for the *net* tree.

First patch resolves a false-positive lockdep splat:
rcu_dereference is used outside of rcu read lock.  Let lockdep
validate that the transaction mutex is locked.

Second patch fixes a kdoc warning added in previous PR.

Third patch fixes a memory leak:
The catchall element isn't disabled correctly, this allows
userspace to deactivate the element again. This results in refcount
underflow which in turn prevents memory release. This was always
broken since the feature was added in 5.13.

Patch 4 fixes an incorrect change in the previous pull request:
Adding a duplicate key to a set should work if the duplicate key
has expired, restore this behaviour. All from myself.

Patch #5 resolves an old historic artifact in sctp conntrack:
a 300ms timeout for shutdown_ack. Increase this to 3s.  From Xin Long.

Patch #6 fixes a sysctl data race in ipvs, two threads can clobber the
sysctl value, from Sishuai Gong. This is a day-0 bug that predates git
history.

Patches 7, 8 and 9, from Pablo Neira Ayuso, are also followups
for the previous GC rework in nf_tables: The netlink notifier and the
netns exit path must both increment the gc worker seqcount, else worker
may encounter stale (free'd) pointers.
================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2023-08-16 11:11:24 +01:00
commit de4c5efeec
7 changed files with 69 additions and 31 deletions

View File

@ -178,10 +178,10 @@ nf_conntrack_sctp_timeout_established - INTEGER (seconds)
Default is set to (hb_interval * path_max_retrans + rto_max)
nf_conntrack_sctp_timeout_shutdown_sent - INTEGER (seconds)
default 0.3
default 3
nf_conntrack_sctp_timeout_shutdown_recd - INTEGER (seconds)
default 0.3
default 3
nf_conntrack_sctp_timeout_shutdown_ack_sent - INTEGER (seconds)
default 3

View File

@ -534,6 +534,7 @@ struct nft_set_elem_expr {
* @expr: stateful expression
* @ops: set ops
* @flags: set flags
* @dead: set will be freed, never cleared
* @genmask: generation mask
* @klen: key length
* @dlen: data length

View File

@ -1876,6 +1876,7 @@ static int
proc_do_sync_threshold(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct netns_ipvs *ipvs = table->extra2;
int *valp = table->data;
int val[2];
int rc;
@ -1885,6 +1886,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
.mode = table->mode,
};
mutex_lock(&ipvs->sync_mutex);
memcpy(val, valp, sizeof(val));
rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
if (write) {
@ -1894,6 +1896,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
else
memcpy(valp, val, sizeof(val));
}
mutex_unlock(&ipvs->sync_mutex);
return rc;
}
@ -4321,6 +4324,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
tbl[idx].data = &ipvs->sysctl_sync_threshold;
tbl[idx].extra2 = ipvs;
tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;

View File

@ -49,8 +49,8 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
[SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
[SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
[SCTP_CONNTRACK_ESTABLISHED] = 210 SECS,
[SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_SENT] = 3 SECS,
[SCTP_CONNTRACK_SHUTDOWN_RECD] = 3 SECS,
[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
[SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS,
};
@ -105,7 +105,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW},
/* init */ {sCL, sCL, sCW, sCE, sES, sCL, sCL, sSA, sCW},
/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},
/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL},

View File

@ -7091,6 +7091,7 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
ret = __nft_set_catchall_flush(ctx, set, &elem);
if (ret < 0)
break;
nft_set_elem_change_active(ctx->net, set, ext);
}
return ret;
@ -9480,9 +9481,14 @@ struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
if (!trans)
return NULL;
trans->net = maybe_get_net(net);
if (!trans->net) {
kfree(trans);
return NULL;
}
refcount_inc(&set->refs);
trans->set = set;
trans->net = get_net(net);
trans->seq = gc_seq;
return trans;
@ -9738,6 +9744,22 @@ static void nft_set_commit_update(struct list_head *set_update_list)
}
}
static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net)
{
unsigned int gc_seq;
/* Bump gc counter, it becomes odd, this is the busy mark. */
gc_seq = READ_ONCE(nft_net->gc_seq);
WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
return gc_seq;
}
static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq)
{
WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
}
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@ -9823,9 +9845,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
WRITE_ONCE(nft_net->base_seq, base_seq);
/* Bump gc counter, it becomes odd, this is the busy mark. */
gc_seq = READ_ONCE(nft_net->gc_seq);
WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
gc_seq = nft_gc_seq_begin(nft_net);
/* step 3. Start new generation, rules_gen_X now in use. */
net->nft.gencursor = nft_gencursor_next(net);
@ -10038,7 +10058,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
nf_tables_commit_audit_log(&adl, nft_net->base_seq);
WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
nft_gc_seq_end(nft_net, gc_seq);
nf_tables_commit_release(net);
return 0;
@ -11039,6 +11059,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
struct net *net = n->net;
unsigned int deleted;
bool restart = false;
unsigned int gc_seq;
if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
return NOTIFY_DONE;
@ -11046,6 +11067,9 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
nft_net = nft_pernet(net);
deleted = 0;
mutex_lock(&nft_net->commit_mutex);
gc_seq = nft_gc_seq_begin(nft_net);
if (!list_empty(&nf_tables_destroy_list))
rcu_barrier();
again:
@ -11068,6 +11092,8 @@ again:
if (restart)
goto again;
}
nft_gc_seq_end(nft_net, gc_seq);
mutex_unlock(&nft_net->commit_mutex);
return NOTIFY_DONE;
@ -11105,12 +11131,20 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net)
static void __net_exit nf_tables_exit_net(struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
unsigned int gc_seq;
mutex_lock(&nft_net->commit_mutex);
gc_seq = nft_gc_seq_begin(nft_net);
if (!list_empty(&nft_net->commit_list) ||
!list_empty(&nft_net->module_list))
__nf_tables_abort(net, NFNL_ABORT_NONE);
__nft_release_tables(net);
nft_gc_seq_end(nft_net, gc_seq);
mutex_unlock(&nft_net->commit_mutex);
WARN_ON_ONCE(!list_empty(&nft_net->tables));
WARN_ON_ONCE(!list_empty(&nft_net->module_list));

View File

@ -191,6 +191,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (IS_ERR(set))
return PTR_ERR(set);
if (set->flags & NFT_SET_OBJECT)
return -EOPNOTSUPP;
if (set->ops->update == NULL)
return -EOPNOTSUPP;

View File

@ -566,6 +566,8 @@ next_match:
goto out;
if (last) {
if (nft_set_elem_expired(&f->mt[b].e->ext))
goto next_match;
if ((genmask &&
!nft_set_elem_active(&f->mt[b].e->ext, genmask)))
goto next_match;
@ -600,17 +602,8 @@ out:
static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
struct nft_pipapo_elem *ret;
ret = pipapo_get(net, set, (const u8 *)elem->key.val.data,
return pipapo_get(net, set, (const u8 *)elem->key.val.data,
nft_genmask_cur(net));
if (IS_ERR(ret))
return ret;
if (nft_set_elem_expired(&ret->ext))
return ERR_PTR(-ENOENT);
return ret;
}
/**
@ -1549,7 +1542,7 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
/**
* pipapo_gc() - Drop expired entries from set, destroy start and end elements
* @set: nftables API set representation
* @_set: nftables API set representation
* @m: Matching data
*/
static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
@ -1697,6 +1690,17 @@ static void nft_pipapo_commit(const struct nft_set *set)
priv->clone = new_clone;
}
static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
{
#ifdef CONFIG_PROVE_LOCKING
const struct net *net = read_pnet(&set->net);
return lockdep_is_held(&nft_pernet(net)->commit_mutex);
#else
return true;
#endif
}
static void nft_pipapo_abort(const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
@ -1705,7 +1709,7 @@ static void nft_pipapo_abort(const struct nft_set *set)
if (!priv->dirty)
return;
m = rcu_dereference(priv->match);
m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set));
new_clone = pipapo_clone(m);
if (IS_ERR(new_clone))
@ -1732,11 +1736,7 @@ static void nft_pipapo_activate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_pipapo_elem *e;
e = pipapo_get(net, set, (const u8 *)elem->key.val.data, 0);
if (IS_ERR(e))
return;
struct nft_pipapo_elem *e = elem->priv;
nft_set_elem_change_active(net, set, &e->ext);
}
@ -1950,10 +1950,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
data = (const u8 *)nft_set_ext_key(&e->ext);
e = pipapo_get(net, set, data, 0);
if (IS_ERR(e))
return;
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const u8 *match_start, *match_end;