net: move inet_dport/inet_num in sock_common

commit 68835aba4d (net: optimize INET input path further)
moved some fields used for tcp/udp sockets lookup in the first cache
line of struct sock_common.

This patch moves inet_dport/inet_num as well, filling a 32bit hole
on 64 bit arches and reducing number of cache line misses in lookups.

Also change INET_MATCH()/INET_TW_MATCH() to perform the ports match
before addresses match, as this check is more discriminant.

Remove the hash check from MATCH() macros because we dont need to
re validate the hash value after taking a refcount on socket, and
use likely/unlikely compiler hints, as the sk_hash/hash check
makes the following conditional tests 100% predicted by cpu.

Introduce skc_addrpair/skc_portpair pair values to better
document the alignment requirements of the port/addr pairs
used in the various MATCH() macros, and remove some casts.

The namespace check can also be done at last.

This slightly improves TCP/UDP lookup times.

IP/TCP early demux needs inet->rx_dst_ifindex and
TCP needs inet->min_ttl, lets group them together in same cache line.

With help from Ben Hutchings & Joe Perches.

Idea of this patch came after Ling Ma proposal to move skc_hash
to the beginning of struct sock_common, and should allow him
to submit a final version of his patch. My tests show an improvement
doing so.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Joe Perches <joe@perches.com>
Cc: Ling Ma <ling.ma.program@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet 2012-11-30 09:49:27 +00:00 committed by David S. Miller
parent b02a80674e
commit ce43b03e88
7 changed files with 114 additions and 67 deletions

View File

@ -364,20 +364,22 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
#define inet_v6_ipv6only(__sk) 0 #define inet_v6_ipv6only(__sk) 0
#endif /* IS_ENABLED(CONFIG_IPV6) */ #endif /* IS_ENABLED(CONFIG_IPV6) */
#define INET6_MATCH(__sk, __net, __hash, __saddr, __daddr, __ports, __dif)\ #define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \
(((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ ((inet_sk(__sk)->inet_portpair == (__ports)) && \
((*((__portpair *)&(inet_sk(__sk)->inet_dport))) == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \
((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \
ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \
ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ (!(__sk)->sk_bound_dev_if || \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) ((__sk)->sk_bound_dev_if == (__dif))) && \
net_eq(sock_net(__sk), (__net)))
#define INET6_TW_MATCH(__sk, __net, __hash, __saddr, __daddr, __ports, __dif) \ #define INET6_TW_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \
(((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ ((inet_twsk(__sk)->tw_portpair == (__ports)) && \
(*((__portpair *)&(inet_twsk(__sk)->tw_dport)) == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \
((__sk)->sk_family == PF_INET6) && \ ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_daddr, (__saddr)) && \
(ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_daddr, (__saddr))) && \ ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_rcv_saddr, (__daddr)) && \
(ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_rcv_saddr, (__daddr))) && \ (!(__sk)->sk_bound_dev_if || \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) ((__sk)->sk_bound_dev_if == (__dif))) && \
net_eq(sock_net(__sk), (__net)))
#endif /* _IPV6_H */ #endif /* _IPV6_H */

View File

@ -299,30 +299,34 @@ typedef __u64 __bitwise __addrpair;
(((__force __u64)(__be32)(__daddr)) << 32) | \ (((__force __u64)(__be32)(__daddr)) << 32) | \
((__force __u64)(__be32)(__saddr))); ((__force __u64)(__be32)(__saddr)));
#endif /* __BIG_ENDIAN */ #endif /* __BIG_ENDIAN */
#define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ #define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
(((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ ((inet_sk(__sk)->inet_portpair == (__ports)) && \
((*((__addrpair *)&(inet_sk(__sk)->inet_daddr))) == (__cookie)) && \ (inet_sk(__sk)->inet_addrpair == (__cookie)) && \
((*((__portpair *)&(inet_sk(__sk)->inet_dport))) == (__ports)) && \ (!(__sk)->sk_bound_dev_if || \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) ((__sk)->sk_bound_dev_if == (__dif))) && \
#define INET_TW_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ net_eq(sock_net(__sk), (__net)))
(((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ #define INET_TW_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)\
((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ ((inet_twsk(__sk)->tw_portpair == (__ports)) && \
((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ (inet_twsk(__sk)->tw_addrpair == (__cookie)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) (!(__sk)->sk_bound_dev_if || \
((__sk)->sk_bound_dev_if == (__dif))) && \
net_eq(sock_net(__sk), (__net)))
#else /* 32-bit arch */ #else /* 32-bit arch */
#define INET_ADDR_COOKIE(__name, __saddr, __daddr) #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
#define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ #define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
(((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ ((inet_sk(__sk)->inet_portpair == (__ports)) && \
(inet_sk(__sk)->inet_daddr == (__saddr)) && \ (inet_sk(__sk)->inet_daddr == (__saddr)) && \
(inet_sk(__sk)->inet_rcv_saddr == (__daddr)) && \ (inet_sk(__sk)->inet_rcv_saddr == (__daddr)) && \
((*((__portpair *)&(inet_sk(__sk)->inet_dport))) == (__ports)) && \ (!(__sk)->sk_bound_dev_if || \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) ((__sk)->sk_bound_dev_if == (__dif))) && \
#define INET_TW_MATCH(__sk, __net, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ net_eq(sock_net(__sk), (__net)))
(((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ #define INET_TW_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
(inet_twsk(__sk)->tw_daddr == (__saddr)) && \ ((inet_twsk(__sk)->tw_portpair == (__ports)) && \
(inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ (inet_twsk(__sk)->tw_daddr == (__saddr)) && \
((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) (!(__sk)->sk_bound_dev_if || \
((__sk)->sk_bound_dev_if == (__dif))) && \
net_eq(sock_net(__sk), (__net)))
#endif /* 64-bit arch */ #endif /* 64-bit arch */
/* /*

View File

@ -144,9 +144,11 @@ struct inet_sock {
/* Socket demultiplex comparisons on incoming packets. */ /* Socket demultiplex comparisons on incoming packets. */
#define inet_daddr sk.__sk_common.skc_daddr #define inet_daddr sk.__sk_common.skc_daddr
#define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr #define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr
#define inet_addrpair sk.__sk_common.skc_addrpair
#define inet_dport sk.__sk_common.skc_dport
#define inet_num sk.__sk_common.skc_num
#define inet_portpair sk.__sk_common.skc_portpair
__be16 inet_dport;
__u16 inet_num;
__be32 inet_saddr; __be32 inet_saddr;
__s16 uc_ttl; __s16 uc_ttl;
__u16 cmsg_flags; __u16 cmsg_flags;
@ -154,6 +156,7 @@ struct inet_sock {
__u16 inet_id; __u16 inet_id;
struct ip_options_rcu __rcu *inet_opt; struct ip_options_rcu __rcu *inet_opt;
int rx_dst_ifindex;
__u8 tos; __u8 tos;
__u8 min_ttl; __u8 min_ttl;
__u8 mc_ttl; __u8 mc_ttl;
@ -170,7 +173,6 @@ struct inet_sock {
int uc_index; int uc_index;
int mc_index; int mc_index;
__be32 mc_addr; __be32 mc_addr;
int rx_dst_ifindex;
struct ip_mc_socklist __rcu *mc_list; struct ip_mc_socklist __rcu *mc_list;
struct inet_cork_full cork; struct inet_cork_full cork;
}; };

View File

@ -112,6 +112,11 @@ struct inet_timewait_sock {
#define tw_net __tw_common.skc_net #define tw_net __tw_common.skc_net
#define tw_daddr __tw_common.skc_daddr #define tw_daddr __tw_common.skc_daddr
#define tw_rcv_saddr __tw_common.skc_rcv_saddr #define tw_rcv_saddr __tw_common.skc_rcv_saddr
#define tw_addrpair __tw_common.skc_addrpair
#define tw_dport __tw_common.skc_dport
#define tw_num __tw_common.skc_num
#define tw_portpair __tw_common.skc_portpair
int tw_timeout; int tw_timeout;
volatile unsigned char tw_substate; volatile unsigned char tw_substate;
unsigned char tw_rcv_wscale; unsigned char tw_rcv_wscale;
@ -119,8 +124,6 @@ struct inet_timewait_sock {
/* Socket demultiplex comparisons on incoming packets. */ /* Socket demultiplex comparisons on incoming packets. */
/* these three are in inet_sock */ /* these three are in inet_sock */
__be16 tw_sport; __be16 tw_sport;
__be16 tw_dport;
__u16 tw_num;
kmemcheck_bitfield_begin(flags); kmemcheck_bitfield_begin(flags);
/* And these are ours. */ /* And these are ours. */
unsigned int tw_ipv6only : 1, unsigned int tw_ipv6only : 1,

View File

@ -132,6 +132,8 @@ struct net;
* @skc_rcv_saddr: Bound local IPv4 addr * @skc_rcv_saddr: Bound local IPv4 addr
* @skc_hash: hash value used with various protocol lookup tables * @skc_hash: hash value used with various protocol lookup tables
* @skc_u16hashes: two u16 hash values used by UDP lookup tables * @skc_u16hashes: two u16 hash values used by UDP lookup tables
* @skc_dport: placeholder for inet_dport/tw_dport
* @skc_num: placeholder for inet_num/tw_num
* @skc_family: network address family * @skc_family: network address family
* @skc_state: Connection state * @skc_state: Connection state
* @skc_reuse: %SO_REUSEADDR setting * @skc_reuse: %SO_REUSEADDR setting
@ -149,16 +151,29 @@ struct net;
* for struct sock and struct inet_timewait_sock. * for struct sock and struct inet_timewait_sock.
*/ */
struct sock_common { struct sock_common {
/* skc_daddr and skc_rcv_saddr must be grouped : /* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned
* cf INET_MATCH() and INET_TW_MATCH() * address on 64bit arches : cf INET_MATCH() and INET_TW_MATCH()
*/ */
__be32 skc_daddr; union {
__be32 skc_rcv_saddr; unsigned long skc_addrpair;
struct {
__be32 skc_daddr;
__be32 skc_rcv_saddr;
};
};
union { union {
unsigned int skc_hash; unsigned int skc_hash;
__u16 skc_u16hashes[2]; __u16 skc_u16hashes[2];
}; };
/* skc_dport && skc_num must be grouped as well */
union {
u32 skc_portpair;
struct {
__be16 skc_dport;
__u16 skc_num;
};
};
unsigned short skc_family; unsigned short skc_family;
volatile unsigned char skc_state; volatile unsigned char skc_state;
unsigned char skc_reuse; unsigned char skc_reuse;

View File

@ -237,12 +237,14 @@ struct sock *__inet_lookup_established(struct net *net,
rcu_read_lock(); rcu_read_lock();
begin: begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) { sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (INET_MATCH(sk, net, hash, acookie, if (sk->sk_hash != hash)
saddr, daddr, ports, dif)) { continue;
if (likely(INET_MATCH(sk, net, acookie,
saddr, daddr, ports, dif))) {
if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
goto begintw; goto begintw;
if (unlikely(!INET_MATCH(sk, net, hash, acookie, if (unlikely(!INET_MATCH(sk, net, acookie,
saddr, daddr, ports, dif))) { saddr, daddr, ports, dif))) {
sock_put(sk); sock_put(sk);
goto begin; goto begin;
} }
@ -260,14 +262,18 @@ begin:
begintw: begintw:
/* Must check for a TIME_WAIT'er before going to listener hash. */ /* Must check for a TIME_WAIT'er before going to listener hash. */
sk_nulls_for_each_rcu(sk, node, &head->twchain) { sk_nulls_for_each_rcu(sk, node, &head->twchain) {
if (INET_TW_MATCH(sk, net, hash, acookie, if (sk->sk_hash != hash)
saddr, daddr, ports, dif)) { continue;
if (likely(INET_TW_MATCH(sk, net, acookie,
saddr, daddr, ports,
dif))) {
if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
sk = NULL; sk = NULL;
goto out; goto out;
} }
if (unlikely(!INET_TW_MATCH(sk, net, hash, acookie, if (unlikely(!INET_TW_MATCH(sk, net, acookie,
saddr, daddr, ports, dif))) { saddr, daddr, ports,
dif))) {
sock_put(sk); sock_put(sk);
goto begintw; goto begintw;
} }
@ -314,10 +320,12 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
/* Check TIME-WAIT sockets first. */ /* Check TIME-WAIT sockets first. */
sk_nulls_for_each(sk2, node, &head->twchain) { sk_nulls_for_each(sk2, node, &head->twchain) {
tw = inet_twsk(sk2); if (sk2->sk_hash != hash)
continue;
if (INET_TW_MATCH(sk2, net, hash, acookie, if (likely(INET_TW_MATCH(sk2, net, acookie,
saddr, daddr, ports, dif)) { saddr, daddr, ports, dif))) {
tw = inet_twsk(sk2);
if (twsk_unique(sk, sk2, twp)) if (twsk_unique(sk, sk2, twp))
goto unique; goto unique;
else else
@ -328,8 +336,10 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
/* And established part... */ /* And established part... */
sk_nulls_for_each(sk2, node, &head->chain) { sk_nulls_for_each(sk2, node, &head->chain) {
if (INET_MATCH(sk2, net, hash, acookie, if (sk2->sk_hash != hash)
saddr, daddr, ports, dif)) continue;
if (likely(INET_MATCH(sk2, net, acookie,
saddr, daddr, ports, dif)))
goto not_unique; goto not_unique;
} }

View File

@ -87,11 +87,13 @@ struct sock *__inet6_lookup_established(struct net *net,
rcu_read_lock(); rcu_read_lock();
begin: begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) { sk_nulls_for_each_rcu(sk, node, &head->chain) {
/* For IPV6 do the cheaper port and family tests first. */ if (sk->sk_hash != hash)
if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { continue;
if (likely(INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
goto begintw; goto begintw;
if (!INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { if (unlikely(!INET6_MATCH(sk, net, saddr, daddr,
ports, dif))) {
sock_put(sk); sock_put(sk);
goto begin; goto begin;
} }
@ -104,12 +106,16 @@ begin:
begintw: begintw:
/* Must check for a TIME_WAIT'er before going to listener hash. */ /* Must check for a TIME_WAIT'er before going to listener hash. */
sk_nulls_for_each_rcu(sk, node, &head->twchain) { sk_nulls_for_each_rcu(sk, node, &head->twchain) {
if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { if (sk->sk_hash != hash)
continue;
if (likely(INET6_TW_MATCH(sk, net, saddr, daddr,
ports, dif))) {
if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
sk = NULL; sk = NULL;
goto out; goto out;
} }
if (!INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr,
ports, dif))) {
sock_put(sk); sock_put(sk);
goto begintw; goto begintw;
} }
@ -236,9 +242,12 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
/* Check TIME-WAIT sockets first. */ /* Check TIME-WAIT sockets first. */
sk_nulls_for_each(sk2, node, &head->twchain) { sk_nulls_for_each(sk2, node, &head->twchain) {
tw = inet_twsk(sk2); if (sk2->sk_hash != hash)
continue;
if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) { if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr,
ports, dif))) {
tw = inet_twsk(sk2);
if (twsk_unique(sk, sk2, twp)) if (twsk_unique(sk, sk2, twp))
goto unique; goto unique;
else else
@ -249,7 +258,9 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
/* And established part... */ /* And established part... */
sk_nulls_for_each(sk2, node, &head->chain) { sk_nulls_for_each(sk2, node, &head->chain) {
if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) if (sk2->sk_hash != hash)
continue;
if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif)))
goto not_unique; goto not_unique;
} }