netfilter: x_tables: dont block BH while reading counters
Using "iptables -L" with a lot of rules have a too big BH latency. Jesper mentioned ~6 ms and worried of frame drops. Switch to a per_cpu seqlock scheme, so that taking a snapshot of counters doesnt need to block BH (for this cpu, but also other cpus). This adds two increments on seqlock sequence per ipt_do_table() call, its a reasonable cost for allowing "iptables -L" not block BH processing. Reported-by: Jesper Dangaard Brouer <hawk@comx.dk> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> CC: Patrick McHardy <kaber@trash.net> Acked-by: Stephen Hemminger <shemminger@vyatta.com> Acked-by: Jesper Dangaard Brouer <hawk@comx.dk> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
This commit is contained in:
parent
45b9f509b7
commit
83723d6071
|
@ -472,7 +472,7 @@ extern void xt_free_table_info(struct xt_table_info *info);
|
|||
* necessary for reading the counters.
|
||||
*/
|
||||
struct xt_info_lock {
|
||||
spinlock_t lock;
|
||||
seqlock_t lock;
|
||||
unsigned char readers;
|
||||
};
|
||||
DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks);
|
||||
|
@ -497,7 +497,7 @@ static inline void xt_info_rdlock_bh(void)
|
|||
local_bh_disable();
|
||||
lock = &__get_cpu_var(xt_info_locks);
|
||||
if (likely(!lock->readers++))
|
||||
spin_lock(&lock->lock);
|
||||
write_seqlock(&lock->lock);
|
||||
}
|
||||
|
||||
static inline void xt_info_rdunlock_bh(void)
|
||||
|
@ -505,7 +505,7 @@ static inline void xt_info_rdunlock_bh(void)
|
|||
struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks);
|
||||
|
||||
if (likely(!--lock->readers))
|
||||
spin_unlock(&lock->lock);
|
||||
write_sequnlock(&lock->lock);
|
||||
local_bh_enable();
|
||||
}
|
||||
|
||||
|
@ -516,12 +516,12 @@ static inline void xt_info_rdunlock_bh(void)
|
|||
*/
|
||||
static inline void xt_info_wrlock(unsigned int cpu)
|
||||
{
|
||||
spin_lock(&per_cpu(xt_info_locks, cpu).lock);
|
||||
write_seqlock(&per_cpu(xt_info_locks, cpu).lock);
|
||||
}
|
||||
|
||||
static inline void xt_info_wrunlock(unsigned int cpu)
|
||||
{
|
||||
spin_unlock(&per_cpu(xt_info_locks, cpu).lock);
|
||||
write_sequnlock(&per_cpu(xt_info_locks, cpu).lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -710,42 +710,25 @@ static void get_counters(const struct xt_table_info *t,
|
|||
struct arpt_entry *iter;
|
||||
unsigned int cpu;
|
||||
unsigned int i;
|
||||
unsigned int curcpu = get_cpu();
|
||||
|
||||
/* Instead of clearing (by a previous call to memset())
|
||||
* the counters and using adds, we set the counters
|
||||
* with data used by 'current' CPU
|
||||
*
|
||||
* Bottom half has to be disabled to prevent deadlock
|
||||
* if new softirq were to run and call ipt_do_table
|
||||
*/
|
||||
local_bh_disable();
|
||||
i = 0;
|
||||
xt_entry_foreach(iter, t->entries[curcpu], t->size) {
|
||||
SET_COUNTER(counters[i], iter->counters.bcnt,
|
||||
iter->counters.pcnt);
|
||||
++i;
|
||||
}
|
||||
local_bh_enable();
|
||||
/* Processing counters from other cpus, we can let bottom half enabled,
|
||||
* (preemption is disabled)
|
||||
*/
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (cpu == curcpu)
|
||||
continue;
|
||||
seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
|
||||
|
||||
i = 0;
|
||||
local_bh_disable();
|
||||
xt_info_wrlock(cpu);
|
||||
xt_entry_foreach(iter, t->entries[cpu], t->size) {
|
||||
ADD_COUNTER(counters[i], iter->counters.bcnt,
|
||||
iter->counters.pcnt);
|
||||
u64 bcnt, pcnt;
|
||||
unsigned int start;
|
||||
|
||||
do {
|
||||
start = read_seqbegin(lock);
|
||||
bcnt = iter->counters.bcnt;
|
||||
pcnt = iter->counters.pcnt;
|
||||
} while (read_seqretry(lock, start));
|
||||
|
||||
ADD_COUNTER(counters[i], bcnt, pcnt);
|
||||
++i;
|
||||
}
|
||||
xt_info_wrunlock(cpu);
|
||||
local_bh_enable();
|
||||
}
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
static struct xt_counters *alloc_counters(const struct xt_table *table)
|
||||
|
@ -759,7 +742,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
|
|||
* about).
|
||||
*/
|
||||
countersize = sizeof(struct xt_counters) * private->number;
|
||||
counters = vmalloc(countersize);
|
||||
counters = vzalloc(countersize);
|
||||
|
||||
if (counters == NULL)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
@ -1007,7 +990,7 @@ static int __do_replace(struct net *net, const char *name,
|
|||
struct arpt_entry *iter;
|
||||
|
||||
ret = 0;
|
||||
counters = vmalloc(num_counters * sizeof(struct xt_counters));
|
||||
counters = vzalloc(num_counters * sizeof(struct xt_counters));
|
||||
if (!counters) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
|
|
|
@ -884,42 +884,25 @@ get_counters(const struct xt_table_info *t,
|
|||
struct ipt_entry *iter;
|
||||
unsigned int cpu;
|
||||
unsigned int i;
|
||||
unsigned int curcpu = get_cpu();
|
||||
|
||||
/* Instead of clearing (by a previous call to memset())
|
||||
* the counters and using adds, we set the counters
|
||||
* with data used by 'current' CPU.
|
||||
*
|
||||
* Bottom half has to be disabled to prevent deadlock
|
||||
* if new softirq were to run and call ipt_do_table
|
||||
*/
|
||||
local_bh_disable();
|
||||
i = 0;
|
||||
xt_entry_foreach(iter, t->entries[curcpu], t->size) {
|
||||
SET_COUNTER(counters[i], iter->counters.bcnt,
|
||||
iter->counters.pcnt);
|
||||
++i;
|
||||
}
|
||||
local_bh_enable();
|
||||
/* Processing counters from other cpus, we can let bottom half enabled,
|
||||
* (preemption is disabled)
|
||||
*/
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (cpu == curcpu)
|
||||
continue;
|
||||
seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
|
||||
|
||||
i = 0;
|
||||
local_bh_disable();
|
||||
xt_info_wrlock(cpu);
|
||||
xt_entry_foreach(iter, t->entries[cpu], t->size) {
|
||||
ADD_COUNTER(counters[i], iter->counters.bcnt,
|
||||
iter->counters.pcnt);
|
||||
u64 bcnt, pcnt;
|
||||
unsigned int start;
|
||||
|
||||
do {
|
||||
start = read_seqbegin(lock);
|
||||
bcnt = iter->counters.bcnt;
|
||||
pcnt = iter->counters.pcnt;
|
||||
} while (read_seqretry(lock, start));
|
||||
|
||||
ADD_COUNTER(counters[i], bcnt, pcnt);
|
||||
++i; /* macro does multi eval of i */
|
||||
}
|
||||
xt_info_wrunlock(cpu);
|
||||
local_bh_enable();
|
||||
}
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
static struct xt_counters *alloc_counters(const struct xt_table *table)
|
||||
|
@ -932,7 +915,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
|
|||
(other than comefrom, which userspace doesn't care
|
||||
about). */
|
||||
countersize = sizeof(struct xt_counters) * private->number;
|
||||
counters = vmalloc(countersize);
|
||||
counters = vzalloc(countersize);
|
||||
|
||||
if (counters == NULL)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
@ -1203,7 +1186,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
|
|||
struct ipt_entry *iter;
|
||||
|
||||
ret = 0;
|
||||
counters = vmalloc(num_counters * sizeof(struct xt_counters));
|
||||
counters = vzalloc(num_counters * sizeof(struct xt_counters));
|
||||
if (!counters) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
|
|
|
@ -897,42 +897,25 @@ get_counters(const struct xt_table_info *t,
|
|||
struct ip6t_entry *iter;
|
||||
unsigned int cpu;
|
||||
unsigned int i;
|
||||
unsigned int curcpu = get_cpu();
|
||||
|
||||
/* Instead of clearing (by a previous call to memset())
|
||||
* the counters and using adds, we set the counters
|
||||
* with data used by 'current' CPU
|
||||
*
|
||||
* Bottom half has to be disabled to prevent deadlock
|
||||
* if new softirq were to run and call ipt_do_table
|
||||
*/
|
||||
local_bh_disable();
|
||||
i = 0;
|
||||
xt_entry_foreach(iter, t->entries[curcpu], t->size) {
|
||||
SET_COUNTER(counters[i], iter->counters.bcnt,
|
||||
iter->counters.pcnt);
|
||||
++i;
|
||||
}
|
||||
local_bh_enable();
|
||||
/* Processing counters from other cpus, we can let bottom half enabled,
|
||||
* (preemption is disabled)
|
||||
*/
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (cpu == curcpu)
|
||||
continue;
|
||||
seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
|
||||
|
||||
i = 0;
|
||||
local_bh_disable();
|
||||
xt_info_wrlock(cpu);
|
||||
xt_entry_foreach(iter, t->entries[cpu], t->size) {
|
||||
ADD_COUNTER(counters[i], iter->counters.bcnt,
|
||||
iter->counters.pcnt);
|
||||
u64 bcnt, pcnt;
|
||||
unsigned int start;
|
||||
|
||||
do {
|
||||
start = read_seqbegin(lock);
|
||||
bcnt = iter->counters.bcnt;
|
||||
pcnt = iter->counters.pcnt;
|
||||
} while (read_seqretry(lock, start));
|
||||
|
||||
ADD_COUNTER(counters[i], bcnt, pcnt);
|
||||
++i;
|
||||
}
|
||||
xt_info_wrunlock(cpu);
|
||||
local_bh_enable();
|
||||
}
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
static struct xt_counters *alloc_counters(const struct xt_table *table)
|
||||
|
@ -945,7 +928,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
|
|||
(other than comefrom, which userspace doesn't care
|
||||
about). */
|
||||
countersize = sizeof(struct xt_counters) * private->number;
|
||||
counters = vmalloc(countersize);
|
||||
counters = vzalloc(countersize);
|
||||
|
||||
if (counters == NULL)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
@ -1216,7 +1199,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
|
|||
struct ip6t_entry *iter;
|
||||
|
||||
ret = 0;
|
||||
counters = vmalloc(num_counters * sizeof(struct xt_counters));
|
||||
counters = vzalloc(num_counters * sizeof(struct xt_counters));
|
||||
if (!counters) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
|
|
|
@ -1325,7 +1325,8 @@ static int __init xt_init(void)
|
|||
|
||||
for_each_possible_cpu(i) {
|
||||
struct xt_info_lock *lock = &per_cpu(xt_info_locks, i);
|
||||
spin_lock_init(&lock->lock);
|
||||
|
||||
seqlock_init(&lock->lock);
|
||||
lock->readers = 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue