netfilter: nf_conncount: speculative garbage collection on empty lists
Instead of removing a empty list node that might be reintroduced soon
thereafter, tentatively place the empty list node on the list passed to
tree_nodes_free(), then re-check if the list is empty again before erasing
it from the tree.
[ Florian: rebase on top of pending nf_conncount fixes ]
Fixes: 5c789e131c
("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
This commit is contained in:
parent
2f971a8f42
commit
c80f10bc97
|
@ -9,7 +9,6 @@ struct nf_conncount_list {
|
||||||
spinlock_t list_lock;
|
spinlock_t list_lock;
|
||||||
struct list_head head; /* connections with the same filtering key */
|
struct list_head head; /* connections with the same filtering key */
|
||||||
unsigned int count; /* length of list */
|
unsigned int count; /* length of list */
|
||||||
bool dead;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
|
struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
|
||||||
|
|
|
@ -81,27 +81,20 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
|
||||||
return memcmp(a, b, klen * sizeof(u32));
|
return memcmp(a, b, klen * sizeof(u32));
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool conn_free(struct nf_conncount_list *list,
|
static void conn_free(struct nf_conncount_list *list,
|
||||||
struct nf_conncount_tuple *conn)
|
struct nf_conncount_tuple *conn)
|
||||||
{
|
{
|
||||||
bool free_entry = false;
|
|
||||||
|
|
||||||
lockdep_assert_held(&list->list_lock);
|
lockdep_assert_held(&list->list_lock);
|
||||||
|
|
||||||
list->count--;
|
list->count--;
|
||||||
list_del(&conn->node);
|
list_del(&conn->node);
|
||||||
if (list->count == 0) {
|
|
||||||
list->dead = true;
|
|
||||||
free_entry = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
kmem_cache_free(conncount_conn_cachep, conn);
|
kmem_cache_free(conncount_conn_cachep, conn);
|
||||||
return free_entry;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct nf_conntrack_tuple_hash *
|
static const struct nf_conntrack_tuple_hash *
|
||||||
find_or_evict(struct net *net, struct nf_conncount_list *list,
|
find_or_evict(struct net *net, struct nf_conncount_list *list,
|
||||||
struct nf_conncount_tuple *conn, bool *free_entry)
|
struct nf_conncount_tuple *conn)
|
||||||
{
|
{
|
||||||
const struct nf_conntrack_tuple_hash *found;
|
const struct nf_conntrack_tuple_hash *found;
|
||||||
unsigned long a, b;
|
unsigned long a, b;
|
||||||
|
@ -121,7 +114,7 @@ find_or_evict(struct net *net, struct nf_conncount_list *list,
|
||||||
*/
|
*/
|
||||||
age = a - b;
|
age = a - b;
|
||||||
if (conn->cpu == cpu || age >= 2) {
|
if (conn->cpu == cpu || age >= 2) {
|
||||||
*free_entry = conn_free(list, conn);
|
conn_free(list, conn);
|
||||||
return ERR_PTR(-ENOENT);
|
return ERR_PTR(-ENOENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -137,14 +130,13 @@ static int __nf_conncount_add(struct net *net,
|
||||||
struct nf_conncount_tuple *conn, *conn_n;
|
struct nf_conncount_tuple *conn, *conn_n;
|
||||||
struct nf_conn *found_ct;
|
struct nf_conn *found_ct;
|
||||||
unsigned int collect = 0;
|
unsigned int collect = 0;
|
||||||
bool free_entry = false;
|
|
||||||
|
|
||||||
/* check the saved connections */
|
/* check the saved connections */
|
||||||
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
|
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
|
||||||
if (collect > CONNCOUNT_GC_MAX_NODES)
|
if (collect > CONNCOUNT_GC_MAX_NODES)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
found = find_or_evict(net, list, conn, &free_entry);
|
found = find_or_evict(net, list, conn);
|
||||||
if (IS_ERR(found)) {
|
if (IS_ERR(found)) {
|
||||||
/* Not found, but might be about to be confirmed */
|
/* Not found, but might be about to be confirmed */
|
||||||
if (PTR_ERR(found) == -EAGAIN) {
|
if (PTR_ERR(found) == -EAGAIN) {
|
||||||
|
@ -221,7 +213,6 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
|
||||||
spin_lock_init(&list->list_lock);
|
spin_lock_init(&list->list_lock);
|
||||||
INIT_LIST_HEAD(&list->head);
|
INIT_LIST_HEAD(&list->head);
|
||||||
list->count = 0;
|
list->count = 0;
|
||||||
list->dead = false;
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(nf_conncount_list_init);
|
EXPORT_SYMBOL_GPL(nf_conncount_list_init);
|
||||||
|
|
||||||
|
@ -233,7 +224,6 @@ bool nf_conncount_gc_list(struct net *net,
|
||||||
struct nf_conncount_tuple *conn, *conn_n;
|
struct nf_conncount_tuple *conn, *conn_n;
|
||||||
struct nf_conn *found_ct;
|
struct nf_conn *found_ct;
|
||||||
unsigned int collected = 0;
|
unsigned int collected = 0;
|
||||||
bool free_entry = false;
|
|
||||||
bool ret = false;
|
bool ret = false;
|
||||||
|
|
||||||
/* don't bother if other cpu is already doing GC */
|
/* don't bother if other cpu is already doing GC */
|
||||||
|
@ -241,15 +231,10 @@ bool nf_conncount_gc_list(struct net *net,
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
|
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
|
||||||
found = find_or_evict(net, list, conn, &free_entry);
|
found = find_or_evict(net, list, conn);
|
||||||
if (IS_ERR(found)) {
|
if (IS_ERR(found)) {
|
||||||
if (PTR_ERR(found) == -ENOENT) {
|
if (PTR_ERR(found) == -ENOENT)
|
||||||
if (free_entry) {
|
|
||||||
spin_unlock(&list->list_lock);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
collected++;
|
collected++;
|
||||||
}
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -260,10 +245,7 @@ bool nf_conncount_gc_list(struct net *net,
|
||||||
* closed already -> ditch it
|
* closed already -> ditch it
|
||||||
*/
|
*/
|
||||||
nf_ct_put(found_ct);
|
nf_ct_put(found_ct);
|
||||||
if (conn_free(list, conn)) {
|
conn_free(list, conn);
|
||||||
spin_unlock(&list->list_lock);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
collected++;
|
collected++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -273,10 +255,8 @@ bool nf_conncount_gc_list(struct net *net,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!list->count) {
|
if (!list->count)
|
||||||
list->dead = true;
|
|
||||||
ret = true;
|
ret = true;
|
||||||
}
|
|
||||||
spin_unlock(&list->list_lock);
|
spin_unlock(&list->list_lock);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -291,6 +271,7 @@ static void __tree_nodes_free(struct rcu_head *h)
|
||||||
kmem_cache_free(conncount_rb_cachep, rbconn);
|
kmem_cache_free(conncount_rb_cachep, rbconn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* caller must hold tree nf_conncount_locks[] lock */
|
||||||
static void tree_nodes_free(struct rb_root *root,
|
static void tree_nodes_free(struct rb_root *root,
|
||||||
struct nf_conncount_rb *gc_nodes[],
|
struct nf_conncount_rb *gc_nodes[],
|
||||||
unsigned int gc_count)
|
unsigned int gc_count)
|
||||||
|
@ -300,8 +281,10 @@ static void tree_nodes_free(struct rb_root *root,
|
||||||
while (gc_count) {
|
while (gc_count) {
|
||||||
rbconn = gc_nodes[--gc_count];
|
rbconn = gc_nodes[--gc_count];
|
||||||
spin_lock(&rbconn->list.list_lock);
|
spin_lock(&rbconn->list.list_lock);
|
||||||
rb_erase(&rbconn->node, root);
|
if (!rbconn->list.count) {
|
||||||
call_rcu(&rbconn->rcu_head, __tree_nodes_free);
|
rb_erase(&rbconn->node, root);
|
||||||
|
call_rcu(&rbconn->rcu_head, __tree_nodes_free);
|
||||||
|
}
|
||||||
spin_unlock(&rbconn->list.list_lock);
|
spin_unlock(&rbconn->list.list_lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -318,7 +301,6 @@ insert_tree(struct net *net,
|
||||||
struct rb_root *root,
|
struct rb_root *root,
|
||||||
unsigned int hash,
|
unsigned int hash,
|
||||||
const u32 *key,
|
const u32 *key,
|
||||||
u8 keylen,
|
|
||||||
const struct nf_conntrack_tuple *tuple,
|
const struct nf_conntrack_tuple *tuple,
|
||||||
const struct nf_conntrack_zone *zone)
|
const struct nf_conntrack_zone *zone)
|
||||||
{
|
{
|
||||||
|
@ -327,6 +309,7 @@ insert_tree(struct net *net,
|
||||||
struct nf_conncount_rb *rbconn;
|
struct nf_conncount_rb *rbconn;
|
||||||
struct nf_conncount_tuple *conn;
|
struct nf_conncount_tuple *conn;
|
||||||
unsigned int count = 0, gc_count = 0;
|
unsigned int count = 0, gc_count = 0;
|
||||||
|
u8 keylen = data->keylen;
|
||||||
bool do_gc = true;
|
bool do_gc = true;
|
||||||
|
|
||||||
spin_lock_bh(&nf_conncount_locks[hash]);
|
spin_lock_bh(&nf_conncount_locks[hash]);
|
||||||
|
@ -454,7 +437,7 @@ count_tree(struct net *net,
|
||||||
if (!tuple)
|
if (!tuple)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return insert_tree(net, data, root, hash, key, keylen, tuple, zone);
|
return insert_tree(net, data, root, hash, key, tuple, zone);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tree_gc_worker(struct work_struct *work)
|
static void tree_gc_worker(struct work_struct *work)
|
||||||
|
|
Loading…
Reference in New Issue