netfilter: conntrack: spinlock per cpu to protect special lists.
One spinlock per cpu to protect dying/unconfirmed/template special lists. (These lists are now per cpu, a bit like the untracked ct) Add a @cpu field to nf_conn, to make sure we hold the appropriate spinlock at removal time. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net> Reviewed-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
This commit is contained in:
parent
b476b72a0f
commit
b7779d06f9
|
@ -82,7 +82,8 @@ struct nf_conn {
|
|||
*/
|
||||
struct nf_conntrack ct_general;
|
||||
|
||||
spinlock_t lock;
|
||||
spinlock_t lock;
|
||||
u16 cpu;
|
||||
|
||||
/* XXX should I move this to the tail ? - Y.K */
|
||||
/* These are my tuples; original and reply */
|
||||
|
|
|
@ -62,6 +62,13 @@ struct nf_ip_net {
|
|||
#endif
|
||||
};
|
||||
|
||||
struct ct_pcpu {
|
||||
spinlock_t lock;
|
||||
struct hlist_nulls_head unconfirmed;
|
||||
struct hlist_nulls_head dying;
|
||||
struct hlist_nulls_head tmpl;
|
||||
};
|
||||
|
||||
struct netns_ct {
|
||||
atomic_t count;
|
||||
unsigned int expect_count;
|
||||
|
@ -86,9 +93,7 @@ struct netns_ct {
|
|||
struct kmem_cache *nf_conntrack_cachep;
|
||||
struct hlist_nulls_head *hash;
|
||||
struct hlist_head *expect_hash;
|
||||
struct hlist_nulls_head unconfirmed;
|
||||
struct hlist_nulls_head dying;
|
||||
struct hlist_nulls_head tmpl;
|
||||
struct ct_pcpu __percpu *pcpu_lists;
|
||||
struct ip_conntrack_stat __percpu *stat;
|
||||
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
|
||||
struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
|
||||
|
|
|
@ -192,6 +192,50 @@ clean_from_lists(struct nf_conn *ct)
|
|||
nf_ct_remove_expectations(ct);
|
||||
}
|
||||
|
||||
/* must be called with local_bh_disable */
|
||||
static void nf_ct_add_to_dying_list(struct nf_conn *ct)
|
||||
{
|
||||
struct ct_pcpu *pcpu;
|
||||
|
||||
/* add this conntrack to the (per cpu) dying list */
|
||||
ct->cpu = smp_processor_id();
|
||||
pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
|
||||
|
||||
spin_lock(&pcpu->lock);
|
||||
hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
|
||||
&pcpu->dying);
|
||||
spin_unlock(&pcpu->lock);
|
||||
}
|
||||
|
||||
/* must be called with local_bh_disable */
|
||||
static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct)
|
||||
{
|
||||
struct ct_pcpu *pcpu;
|
||||
|
||||
/* add this conntrack to the (per cpu) unconfirmed list */
|
||||
ct->cpu = smp_processor_id();
|
||||
pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
|
||||
|
||||
spin_lock(&pcpu->lock);
|
||||
hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
|
||||
&pcpu->unconfirmed);
|
||||
spin_unlock(&pcpu->lock);
|
||||
}
|
||||
|
||||
/* must be called with local_bh_disable */
|
||||
static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
|
||||
{
|
||||
struct ct_pcpu *pcpu;
|
||||
|
||||
/* We overload first tuple to link into unconfirmed or dying list.*/
|
||||
pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
|
||||
|
||||
spin_lock(&pcpu->lock);
|
||||
BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
|
||||
hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
|
||||
spin_unlock(&pcpu->lock);
|
||||
}
|
||||
|
||||
static void
|
||||
destroy_conntrack(struct nf_conntrack *nfct)
|
||||
{
|
||||
|
@ -220,9 +264,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
|
|||
* too. */
|
||||
nf_ct_remove_expectations(ct);
|
||||
|
||||
/* We overload first tuple to link into unconfirmed or dying list.*/
|
||||
BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
|
||||
hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
|
||||
nf_ct_del_from_dying_or_unconfirmed_list(ct);
|
||||
|
||||
NF_CT_STAT_INC(net, delete);
|
||||
spin_unlock_bh(&nf_conntrack_lock);
|
||||
|
@ -244,9 +286,7 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
|
|||
* Otherwise we can get spurious warnings. */
|
||||
NF_CT_STAT_INC(net, delete_list);
|
||||
clean_from_lists(ct);
|
||||
/* add this conntrack to the dying list */
|
||||
hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
|
||||
&net->ct.dying);
|
||||
nf_ct_add_to_dying_list(ct);
|
||||
spin_unlock_bh(&nf_conntrack_lock);
|
||||
}
|
||||
|
||||
|
@ -467,15 +507,22 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
|
|||
/* deletion from this larval template list happens via nf_ct_put() */
|
||||
void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl)
|
||||
{
|
||||
struct ct_pcpu *pcpu;
|
||||
|
||||
__set_bit(IPS_TEMPLATE_BIT, &tmpl->status);
|
||||
__set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
|
||||
nf_conntrack_get(&tmpl->ct_general);
|
||||
|
||||
spin_lock_bh(&nf_conntrack_lock);
|
||||
/* add this conntrack to the (per cpu) tmpl list */
|
||||
local_bh_disable();
|
||||
tmpl->cpu = smp_processor_id();
|
||||
pcpu = per_cpu_ptr(nf_ct_net(tmpl)->ct.pcpu_lists, tmpl->cpu);
|
||||
|
||||
spin_lock(&pcpu->lock);
|
||||
/* Overload tuple linked list to put us in template list. */
|
||||
hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
|
||||
&net->ct.tmpl);
|
||||
spin_unlock_bh(&nf_conntrack_lock);
|
||||
&pcpu->tmpl);
|
||||
spin_unlock_bh(&pcpu->lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert);
|
||||
|
||||
|
@ -546,8 +593,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
|
|||
zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
|
||||
goto out;
|
||||
|
||||
/* Remove from unconfirmed list */
|
||||
hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
|
||||
nf_ct_del_from_dying_or_unconfirmed_list(ct);
|
||||
|
||||
/* Timer relative to confirmation time, not original
|
||||
setting time, otherwise we'd get timer wrap in
|
||||
|
@ -879,10 +925,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
|
|||
|
||||
/* Now it is inserted into the unconfirmed list, bump refcount */
|
||||
nf_conntrack_get(&ct->ct_general);
|
||||
|
||||
/* Overload tuple linked list to put us in unconfirmed list. */
|
||||
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
|
||||
&net->ct.unconfirmed);
|
||||
nf_ct_add_to_unconfirmed_list(ct);
|
||||
|
||||
spin_unlock_bh(&nf_conntrack_lock);
|
||||
|
||||
|
@ -1254,6 +1297,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
|
|||
struct nf_conntrack_tuple_hash *h;
|
||||
struct nf_conn *ct;
|
||||
struct hlist_nulls_node *n;
|
||||
int cpu;
|
||||
|
||||
spin_lock_bh(&nf_conntrack_lock);
|
||||
for (; *bucket < net->ct.htable_size; (*bucket)++) {
|
||||
|
@ -1265,12 +1309,19 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
|
|||
goto found;
|
||||
}
|
||||
}
|
||||
hlist_nulls_for_each_entry(h, n, &net->ct.unconfirmed, hnnode) {
|
||||
ct = nf_ct_tuplehash_to_ctrack(h);
|
||||
if (iter(ct, data))
|
||||
set_bit(IPS_DYING_BIT, &ct->status);
|
||||
}
|
||||
spin_unlock_bh(&nf_conntrack_lock);
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
|
||||
|
||||
spin_lock_bh(&pcpu->lock);
|
||||
hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) {
|
||||
ct = nf_ct_tuplehash_to_ctrack(h);
|
||||
if (iter(ct, data))
|
||||
set_bit(IPS_DYING_BIT, &ct->status);
|
||||
}
|
||||
spin_unlock_bh(&pcpu->lock);
|
||||
}
|
||||
return NULL;
|
||||
found:
|
||||
atomic_inc(&ct->ct_general.use);
|
||||
|
@ -1323,14 +1374,19 @@ static void nf_ct_release_dying_list(struct net *net)
|
|||
struct nf_conntrack_tuple_hash *h;
|
||||
struct nf_conn *ct;
|
||||
struct hlist_nulls_node *n;
|
||||
int cpu;
|
||||
|
||||
spin_lock_bh(&nf_conntrack_lock);
|
||||
hlist_nulls_for_each_entry(h, n, &net->ct.dying, hnnode) {
|
||||
ct = nf_ct_tuplehash_to_ctrack(h);
|
||||
/* never fails to remove them, no listeners at this point */
|
||||
nf_ct_kill(ct);
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
|
||||
|
||||
spin_lock_bh(&pcpu->lock);
|
||||
hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
|
||||
ct = nf_ct_tuplehash_to_ctrack(h);
|
||||
/* never fails to remove them, no listeners at this point */
|
||||
nf_ct_kill(ct);
|
||||
}
|
||||
spin_unlock_bh(&pcpu->lock);
|
||||
}
|
||||
spin_unlock_bh(&nf_conntrack_lock);
|
||||
}
|
||||
|
||||
static int untrack_refs(void)
|
||||
|
@ -1417,6 +1473,7 @@ i_see_dead_people:
|
|||
kmem_cache_destroy(net->ct.nf_conntrack_cachep);
|
||||
kfree(net->ct.slabname);
|
||||
free_percpu(net->ct.stat);
|
||||
free_percpu(net->ct.pcpu_lists);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1629,37 +1686,43 @@ void nf_conntrack_init_end(void)
|
|||
|
||||
int nf_conntrack_init_net(struct net *net)
|
||||
{
|
||||
int ret;
|
||||
int ret = -ENOMEM;
|
||||
int cpu;
|
||||
|
||||
atomic_set(&net->ct.count, 0);
|
||||
INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL);
|
||||
INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL);
|
||||
INIT_HLIST_NULLS_HEAD(&net->ct.tmpl, TEMPLATE_NULLS_VAL);
|
||||
net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
|
||||
if (!net->ct.stat) {
|
||||
ret = -ENOMEM;
|
||||
|
||||
net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
|
||||
if (!net->ct.pcpu_lists)
|
||||
goto err_stat;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
|
||||
|
||||
spin_lock_init(&pcpu->lock);
|
||||
INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL);
|
||||
INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL);
|
||||
INIT_HLIST_NULLS_HEAD(&pcpu->tmpl, TEMPLATE_NULLS_VAL);
|
||||
}
|
||||
|
||||
net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
|
||||
if (!net->ct.stat)
|
||||
goto err_pcpu_lists;
|
||||
|
||||
net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
|
||||
if (!net->ct.slabname) {
|
||||
ret = -ENOMEM;
|
||||
if (!net->ct.slabname)
|
||||
goto err_slabname;
|
||||
}
|
||||
|
||||
net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
|
||||
sizeof(struct nf_conn), 0,
|
||||
SLAB_DESTROY_BY_RCU, NULL);
|
||||
if (!net->ct.nf_conntrack_cachep) {
|
||||
printk(KERN_ERR "Unable to create nf_conn slab cache\n");
|
||||
ret = -ENOMEM;
|
||||
goto err_cache;
|
||||
}
|
||||
|
||||
net->ct.htable_size = nf_conntrack_htable_size;
|
||||
net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
|
||||
if (!net->ct.hash) {
|
||||
ret = -ENOMEM;
|
||||
printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
|
||||
goto err_hash;
|
||||
}
|
||||
|
@ -1701,6 +1764,8 @@ err_cache:
|
|||
kfree(net->ct.slabname);
|
||||
err_slabname:
|
||||
free_percpu(net->ct.stat);
|
||||
err_pcpu_lists:
|
||||
free_percpu(net->ct.pcpu_lists);
|
||||
err_stat:
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -396,6 +396,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
|
|||
const struct hlist_node *next;
|
||||
const struct hlist_nulls_node *nn;
|
||||
unsigned int i;
|
||||
int cpu;
|
||||
|
||||
/* Get rid of expectations */
|
||||
for (i = 0; i < nf_ct_expect_hsize; i++) {
|
||||
|
@ -414,8 +415,14 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
|
|||
}
|
||||
|
||||
/* Get rid of expecteds, set helpers to NULL. */
|
||||
hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode)
|
||||
unhelp(h, me);
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
|
||||
|
||||
spin_lock_bh(&pcpu->lock);
|
||||
hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
|
||||
unhelp(h, me);
|
||||
spin_unlock_bh(&pcpu->lock);
|
||||
}
|
||||
for (i = 0; i < net->ct.htable_size; i++) {
|
||||
hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
|
||||
unhelp(h, me);
|
||||
|
|
|
@ -1137,50 +1137,65 @@ static int ctnetlink_done_list(struct netlink_callback *cb)
|
|||
}
|
||||
|
||||
static int
|
||||
ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb,
|
||||
struct hlist_nulls_head *list)
|
||||
ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying)
|
||||
{
|
||||
struct nf_conn *ct, *last;
|
||||
struct nf_conn *ct, *last = NULL;
|
||||
struct nf_conntrack_tuple_hash *h;
|
||||
struct hlist_nulls_node *n;
|
||||
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
|
||||
u_int8_t l3proto = nfmsg->nfgen_family;
|
||||
int res;
|
||||
int cpu;
|
||||
struct hlist_nulls_head *list;
|
||||
struct net *net = sock_net(skb->sk);
|
||||
|
||||
if (cb->args[2])
|
||||
return 0;
|
||||
|
||||
spin_lock_bh(&nf_conntrack_lock);
|
||||
last = (struct nf_conn *)cb->args[1];
|
||||
restart:
|
||||
hlist_nulls_for_each_entry(h, n, list, hnnode) {
|
||||
ct = nf_ct_tuplehash_to_ctrack(h);
|
||||
if (l3proto && nf_ct_l3num(ct) != l3proto)
|
||||
if (cb->args[0] == nr_cpu_ids)
|
||||
return 0;
|
||||
|
||||
for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) {
|
||||
struct ct_pcpu *pcpu;
|
||||
|
||||
if (!cpu_possible(cpu))
|
||||
continue;
|
||||
if (cb->args[1]) {
|
||||
if (ct != last)
|
||||
|
||||
pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
|
||||
spin_lock_bh(&pcpu->lock);
|
||||
last = (struct nf_conn *)cb->args[1];
|
||||
list = dying ? &pcpu->dying : &pcpu->unconfirmed;
|
||||
restart:
|
||||
hlist_nulls_for_each_entry(h, n, list, hnnode) {
|
||||
ct = nf_ct_tuplehash_to_ctrack(h);
|
||||
if (l3proto && nf_ct_l3num(ct) != l3proto)
|
||||
continue;
|
||||
if (cb->args[1]) {
|
||||
if (ct != last)
|
||||
continue;
|
||||
cb->args[1] = 0;
|
||||
}
|
||||
rcu_read_lock();
|
||||
res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
|
||||
cb->nlh->nlmsg_seq,
|
||||
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
|
||||
ct);
|
||||
rcu_read_unlock();
|
||||
if (res < 0) {
|
||||
nf_conntrack_get(&ct->ct_general);
|
||||
cb->args[1] = (unsigned long)ct;
|
||||
spin_unlock_bh(&pcpu->lock);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
if (cb->args[1]) {
|
||||
cb->args[1] = 0;
|
||||
}
|
||||
rcu_read_lock();
|
||||
res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
|
||||
cb->nlh->nlmsg_seq,
|
||||
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
|
||||
ct);
|
||||
rcu_read_unlock();
|
||||
if (res < 0) {
|
||||
nf_conntrack_get(&ct->ct_general);
|
||||
cb->args[1] = (unsigned long)ct;
|
||||
goto out;
|
||||
}
|
||||
goto restart;
|
||||
} else
|
||||
cb->args[2] = 1;
|
||||
spin_unlock_bh(&pcpu->lock);
|
||||
}
|
||||
if (cb->args[1]) {
|
||||
cb->args[1] = 0;
|
||||
goto restart;
|
||||
} else
|
||||
cb->args[2] = 1;
|
||||
out:
|
||||
spin_unlock_bh(&nf_conntrack_lock);
|
||||
if (last)
|
||||
nf_ct_put(last);
|
||||
|
||||
|
@ -1190,9 +1205,7 @@ out:
|
|||
static int
|
||||
ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
|
||||
{
|
||||
struct net *net = sock_net(skb->sk);
|
||||
|
||||
return ctnetlink_dump_list(skb, cb, &net->ct.dying);
|
||||
return ctnetlink_dump_list(skb, cb, true);
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -1214,9 +1227,7 @@ ctnetlink_get_ct_dying(struct sock *ctnl, struct sk_buff *skb,
|
|||
static int
|
||||
ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
|
||||
{
|
||||
struct net *net = sock_net(skb->sk);
|
||||
|
||||
return ctnetlink_dump_list(skb, cb, &net->ct.unconfirmed);
|
||||
return ctnetlink_dump_list(skb, cb, false);
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
Loading…
Reference in New Issue