udp: optimize bind(0) if many ports are in use
commit 9088c56095
(udp: Improve port randomization) introduced a regression for UDP bind() syscall
to null port (getting a random port) in case lot of ports are already in use.
This is because we do about 28000 scans of very long chains (220 sockets per chain),
with many spin_lock_bh()/spin_unlock_bh() calls.
Fix this using a bitmap (64 bytes for current value of UDP_HTABLE_SIZE)
so that we scan chains at most once.
Instead of 250 ms per bind() call, we get after patch a time of 2.9 ms
Based on a report from Vitaly Mayatskikh
Reported-by: Vitaly Mayatskikh <v.mayatskih@gmail.com>
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Tested-by: Vitaly Mayatskikh <v.mayatskih@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
8527bec548
commit
98322f22ec
|
@ -120,8 +120,11 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min);
|
||||||
atomic_t udp_memory_allocated;
|
atomic_t udp_memory_allocated;
|
||||||
EXPORT_SYMBOL(udp_memory_allocated);
|
EXPORT_SYMBOL(udp_memory_allocated);
|
||||||
|
|
||||||
|
#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE)
|
||||||
|
|
||||||
static int udp_lib_lport_inuse(struct net *net, __u16 num,
|
static int udp_lib_lport_inuse(struct net *net, __u16 num,
|
||||||
const struct udp_hslot *hslot,
|
const struct udp_hslot *hslot,
|
||||||
|
unsigned long *bitmap,
|
||||||
struct sock *sk,
|
struct sock *sk,
|
||||||
int (*saddr_comp)(const struct sock *sk1,
|
int (*saddr_comp)(const struct sock *sk1,
|
||||||
const struct sock *sk2))
|
const struct sock *sk2))
|
||||||
|
@ -132,12 +135,17 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
|
||||||
sk_nulls_for_each(sk2, node, &hslot->head)
|
sk_nulls_for_each(sk2, node, &hslot->head)
|
||||||
if (net_eq(sock_net(sk2), net) &&
|
if (net_eq(sock_net(sk2), net) &&
|
||||||
sk2 != sk &&
|
sk2 != sk &&
|
||||||
sk2->sk_hash == num &&
|
(bitmap || sk2->sk_hash == num) &&
|
||||||
(!sk2->sk_reuse || !sk->sk_reuse) &&
|
(!sk2->sk_reuse || !sk->sk_reuse) &&
|
||||||
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
|
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
|
||||||
|| sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
|
|| sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
|
||||||
(*saddr_comp)(sk, sk2))
|
(*saddr_comp)(sk, sk2)) {
|
||||||
return 1;
|
if (bitmap)
|
||||||
|
__set_bit(sk2->sk_hash / UDP_HTABLE_SIZE,
|
||||||
|
bitmap);
|
||||||
|
else
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -160,32 +168,47 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
|
||||||
if (!snum) {
|
if (!snum) {
|
||||||
int low, high, remaining;
|
int low, high, remaining;
|
||||||
unsigned rand;
|
unsigned rand;
|
||||||
unsigned short first;
|
unsigned short first, last;
|
||||||
|
DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
|
||||||
|
|
||||||
inet_get_local_port_range(&low, &high);
|
inet_get_local_port_range(&low, &high);
|
||||||
remaining = (high - low) + 1;
|
remaining = (high - low) + 1;
|
||||||
|
|
||||||
rand = net_random();
|
rand = net_random();
|
||||||
snum = first = rand % remaining + low;
|
first = (((u64)rand * remaining) >> 32) + low;
|
||||||
rand |= 1;
|
/*
|
||||||
for (;;) {
|
* force rand to be an odd multiple of UDP_HTABLE_SIZE
|
||||||
hslot = &udptable->hash[udp_hashfn(net, snum)];
|
*/
|
||||||
|
rand = (rand | 1) * UDP_HTABLE_SIZE;
|
||||||
|
for (last = first + UDP_HTABLE_SIZE; first != last; first++) {
|
||||||
|
hslot = &udptable->hash[udp_hashfn(net, first)];
|
||||||
|
bitmap_zero(bitmap, PORTS_PER_CHAIN);
|
||||||
spin_lock_bh(&hslot->lock);
|
spin_lock_bh(&hslot->lock);
|
||||||
if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp))
|
udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
|
||||||
break;
|
saddr_comp);
|
||||||
spin_unlock_bh(&hslot->lock);
|
|
||||||
|
snum = first;
|
||||||
|
/*
|
||||||
|
* Iterate on all possible values of snum for this hash.
|
||||||
|
* Using steps of an odd multiple of UDP_HTABLE_SIZE
|
||||||
|
* give us randomization and full range coverage.
|
||||||
|
*/
|
||||||
do {
|
do {
|
||||||
snum = snum + rand;
|
if (low <= snum && snum <= high &&
|
||||||
} while (snum < low || snum > high);
|
!test_bit(snum / UDP_HTABLE_SIZE, bitmap))
|
||||||
if (snum == first)
|
goto found;
|
||||||
goto fail;
|
snum += rand;
|
||||||
|
} while (snum != first);
|
||||||
|
spin_unlock_bh(&hslot->lock);
|
||||||
}
|
}
|
||||||
|
goto fail;
|
||||||
} else {
|
} else {
|
||||||
hslot = &udptable->hash[udp_hashfn(net, snum)];
|
hslot = &udptable->hash[udp_hashfn(net, snum)];
|
||||||
spin_lock_bh(&hslot->lock);
|
spin_lock_bh(&hslot->lock);
|
||||||
if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp))
|
if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp))
|
||||||
goto fail_unlock;
|
goto fail_unlock;
|
||||||
}
|
}
|
||||||
|
found:
|
||||||
inet_sk(sk)->num = snum;
|
inet_sk(sk)->num = snum;
|
||||||
sk->sk_hash = snum;
|
sk->sk_hash = snum;
|
||||||
if (sk_unhashed(sk)) {
|
if (sk_unhashed(sk)) {
|
||||||
|
|
Loading…
Reference in New Issue