2005-08-12 20:19:38 +08:00
|
|
|
/*
|
|
|
|
* INET An implementation of the TCP/IP protocol suite for the LINUX
|
|
|
|
* operating system. INET is implemented using the BSD Socket
|
|
|
|
* interface as the means of communication with the user level.
|
|
|
|
*
|
|
|
|
* Authors: Lotsa people, from code originally in tcp
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _INET6_HASHTABLES_H
|
|
|
|
#define _INET6_HASHTABLES_H
|
|
|
|
|
2005-08-12 20:26:18 +08:00
|
|
|
|
|
|
|
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
|
|
|
|
#include <linux/in6.h>
|
|
|
|
#include <linux/ipv6.h>
|
2005-08-12 20:19:38 +08:00
|
|
|
#include <linux/types.h>
|
2007-03-24 02:40:27 +08:00
|
|
|
#include <linux/jhash.h>
|
|
|
|
|
|
|
|
#include <net/inet_sock.h>
|
2005-08-12 20:19:38 +08:00
|
|
|
|
2005-08-12 20:26:18 +08:00
|
|
|
#include <net/ipv6.h>
|
|
|
|
|
2005-08-12 20:19:38 +08:00
|
|
|
struct inet_hashinfo;
|
|
|
|
|
2005-08-12 20:26:18 +08:00
|
|
|
/* I have no idea if this is a good hash for v6 or not. -DaveM */
|
[INET]: speedup inet (tcp/dccp) lookups
Arnaldo and I agreed it could be applied now, because I have other
pending patches depending on this one (Thank you Arnaldo)
(The other important patch moves skc_refcnt in a separate cache line,
so that the SMP/NUMA performance doesnt suffer from cache line ping pongs)
1) First some performance data :
--------------------------------
tcp_v4_rcv() wastes a *lot* of time in __inet_lookup_established()
The most time critical code is :
sk_for_each(sk, node, &head->chain) {
if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif))
goto hit; /* You sunk my battleship! */
}
The sk_for_each() does use prefetch() hints but only the begining of
"struct sock" is prefetched.
As INET_MATCH first comparison uses inet_sk(__sk)->daddr, wich is far
away from the begining of "struct sock", it has to bring into CPU
cache cold cache line. Each iteration has to use at least 2 cache
lines.
This can be problematic if some chains are very long.
2) The goal
-----------
The idea I had is to change things so that INET_MATCH() may return
FALSE in 99% of cases only using the data already in the CPU cache,
using one cache line per iteration.
3) Description of the patch
---------------------------
Adds a new 'unsigned int skc_hash' field in 'struct sock_common',
filling a 32 bits hole on 64 bits platform.
struct sock_common {
unsigned short skc_family;
volatile unsigned char skc_state;
unsigned char skc_reuse;
int skc_bound_dev_if;
struct hlist_node skc_node;
struct hlist_node skc_bind_node;
atomic_t skc_refcnt;
+ unsigned int skc_hash;
struct proto *skc_prot;
};
Store in this 32 bits field the full hash, not masked by (ehash_size -
1) Using this full hash as the first comparison done in INET_MATCH
permits us immediatly skip the element without touching a second cache
line in case of a miss.
Suppress the sk_hashent/tw_hashent fields since skc_hash (aliased to
sk_hash and tw_hash) already contains the slot number if we mask with
(ehash_size - 1)
File include/net/inet_hashtables.h
64 bits platforms :
#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
(((__sk)->sk_hash == (__hash))
((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \
((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
32bits platforms:
#define TCP_IPV4_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
(((__sk)->sk_hash == (__hash)) && \
(inet_sk(__sk)->daddr == (__saddr)) && \
(inet_sk(__sk)->rcv_saddr == (__daddr)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
- Adds a prefetch(head->chain.first) in
__inet_lookup_established()/__tcp_v4_check_established() and
__inet6_lookup_established()/__tcp_v6_check_established() and
__dccp_v4_check_established() to bring into cache the first element of the
list, before the {read|write}_lock(&head->lock);
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-10-04 05:13:38 +08:00
|
|
|
static inline unsigned int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport,
|
2006-11-08 16:20:00 +08:00
|
|
|
const struct in6_addr *faddr, const __be16 fport)
|
2005-08-12 20:26:18 +08:00
|
|
|
{
|
2007-03-24 02:40:27 +08:00
|
|
|
u32 ports = (lport ^ (__force u16)fport);
|
2005-08-12 20:26:18 +08:00
|
|
|
|
2007-03-24 02:40:27 +08:00
|
|
|
return jhash_3words((__force u32)laddr->s6_addr32[3],
|
|
|
|
(__force u32)faddr->s6_addr32[3],
|
|
|
|
ports, inet_ehash_secret);
|
2005-08-12 20:26:18 +08:00
|
|
|
}
|
|
|
|
|
[INET]: speedup inet (tcp/dccp) lookups
Arnaldo and I agreed it could be applied now, because I have other
pending patches depending on this one (Thank you Arnaldo)
(The other important patch moves skc_refcnt in a separate cache line,
so that the SMP/NUMA performance doesnt suffer from cache line ping pongs)
1) First some performance data :
--------------------------------
tcp_v4_rcv() wastes a *lot* of time in __inet_lookup_established()
The most time critical code is :
sk_for_each(sk, node, &head->chain) {
if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif))
goto hit; /* You sunk my battleship! */
}
The sk_for_each() does use prefetch() hints but only the begining of
"struct sock" is prefetched.
As INET_MATCH first comparison uses inet_sk(__sk)->daddr, wich is far
away from the begining of "struct sock", it has to bring into CPU
cache cold cache line. Each iteration has to use at least 2 cache
lines.
This can be problematic if some chains are very long.
2) The goal
-----------
The idea I had is to change things so that INET_MATCH() may return
FALSE in 99% of cases only using the data already in the CPU cache,
using one cache line per iteration.
3) Description of the patch
---------------------------
Adds a new 'unsigned int skc_hash' field in 'struct sock_common',
filling a 32 bits hole on 64 bits platform.
struct sock_common {
unsigned short skc_family;
volatile unsigned char skc_state;
unsigned char skc_reuse;
int skc_bound_dev_if;
struct hlist_node skc_node;
struct hlist_node skc_bind_node;
atomic_t skc_refcnt;
+ unsigned int skc_hash;
struct proto *skc_prot;
};
Store in this 32 bits field the full hash, not masked by (ehash_size -
1) Using this full hash as the first comparison done in INET_MATCH
permits us immediatly skip the element without touching a second cache
line in case of a miss.
Suppress the sk_hashent/tw_hashent fields since skc_hash (aliased to
sk_hash and tw_hash) already contains the slot number if we mask with
(ehash_size - 1)
File include/net/inet_hashtables.h
64 bits platforms :
#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
(((__sk)->sk_hash == (__hash))
((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \
((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
32bits platforms:
#define TCP_IPV4_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
(((__sk)->sk_hash == (__hash)) && \
(inet_sk(__sk)->daddr == (__saddr)) && \
(inet_sk(__sk)->rcv_saddr == (__daddr)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
- Adds a prefetch(head->chain.first) in
__inet_lookup_established()/__tcp_v4_check_established() and
__inet6_lookup_established()/__tcp_v6_check_established() and
__dccp_v4_check_established() to bring into cache the first element of the
list, before the {read|write}_lock(&head->lock);
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-10-04 05:13:38 +08:00
|
|
|
static inline int inet6_sk_ehashfn(const struct sock *sk)
|
2005-08-12 20:26:18 +08:00
|
|
|
{
|
|
|
|
const struct inet_sock *inet = inet_sk(sk);
|
|
|
|
const struct ipv6_pinfo *np = inet6_sk(sk);
|
|
|
|
const struct in6_addr *laddr = &np->rcv_saddr;
|
|
|
|
const struct in6_addr *faddr = &np->daddr;
|
|
|
|
const __u16 lport = inet->num;
|
2006-11-08 16:20:00 +08:00
|
|
|
const __be16 fport = inet->dport;
|
[INET]: speedup inet (tcp/dccp) lookups
Arnaldo and I agreed it could be applied now, because I have other
pending patches depending on this one (Thank you Arnaldo)
(The other important patch moves skc_refcnt in a separate cache line,
so that the SMP/NUMA performance doesnt suffer from cache line ping pongs)
1) First some performance data :
--------------------------------
tcp_v4_rcv() wastes a *lot* of time in __inet_lookup_established()
The most time critical code is :
sk_for_each(sk, node, &head->chain) {
if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif))
goto hit; /* You sunk my battleship! */
}
The sk_for_each() does use prefetch() hints but only the begining of
"struct sock" is prefetched.
As INET_MATCH first comparison uses inet_sk(__sk)->daddr, wich is far
away from the begining of "struct sock", it has to bring into CPU
cache cold cache line. Each iteration has to use at least 2 cache
lines.
This can be problematic if some chains are very long.
2) The goal
-----------
The idea I had is to change things so that INET_MATCH() may return
FALSE in 99% of cases only using the data already in the CPU cache,
using one cache line per iteration.
3) Description of the patch
---------------------------
Adds a new 'unsigned int skc_hash' field in 'struct sock_common',
filling a 32 bits hole on 64 bits platform.
struct sock_common {
unsigned short skc_family;
volatile unsigned char skc_state;
unsigned char skc_reuse;
int skc_bound_dev_if;
struct hlist_node skc_node;
struct hlist_node skc_bind_node;
atomic_t skc_refcnt;
+ unsigned int skc_hash;
struct proto *skc_prot;
};
Store in this 32 bits field the full hash, not masked by (ehash_size -
1) Using this full hash as the first comparison done in INET_MATCH
permits us immediatly skip the element without touching a second cache
line in case of a miss.
Suppress the sk_hashent/tw_hashent fields since skc_hash (aliased to
sk_hash and tw_hash) already contains the slot number if we mask with
(ehash_size - 1)
File include/net/inet_hashtables.h
64 bits platforms :
#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
(((__sk)->sk_hash == (__hash))
((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \
((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
32bits platforms:
#define TCP_IPV4_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
(((__sk)->sk_hash == (__hash)) && \
(inet_sk(__sk)->daddr == (__saddr)) && \
(inet_sk(__sk)->rcv_saddr == (__daddr)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
- Adds a prefetch(head->chain.first) in
__inet_lookup_established()/__tcp_v4_check_established() and
__inet6_lookup_established()/__tcp_v6_check_established() and
__dccp_v4_check_established() to bring into cache the first element of the
list, before the {read|write}_lock(&head->lock);
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-10-04 05:13:38 +08:00
|
|
|
return inet6_ehashfn(laddr, lport, faddr, fport);
|
2005-08-12 20:26:18 +08:00
|
|
|
}
|
|
|
|
|
[SOCK] proto: Add hashinfo member to struct proto
This way we can remove TCP and DCCP specific versions of
sk->sk_prot->get_port: both v4 and v6 use inet_csk_get_port
sk->sk_prot->hash: inet_hash is directly used, only v6 need
a specific version to deal with mapped sockets
sk->sk_prot->unhash: both v4 and v6 use inet_hash directly
struct inet_connection_sock_af_ops also gets a new member, bind_conflict, so
that inet_csk_get_port can find the per family routine.
Now only the lookup routines receive as a parameter a struct inet_hashtable.
With this we further reuse code, reducing the difference among INET transport
protocols.
Eventually work has to be done on UDP and SCTP to make them share this
infrastructure and get as a bonus inet_diag interfaces so that iproute can be
used with these protocols.
net-2.6/net/ipv4/inet_hashtables.c:
struct proto | +8
struct inet_connection_sock_af_ops | +8
2 structs changed
__inet_hash_nolisten | +18
__inet_hash | -210
inet_put_port | +8
inet_bind_bucket_create | +1
__inet_hash_connect | -8
5 functions changed, 27 bytes added, 218 bytes removed, diff: -191
net-2.6/net/core/sock.c:
proto_seq_show | +3
1 function changed, 3 bytes added, diff: +3
net-2.6/net/ipv4/inet_connection_sock.c:
inet_csk_get_port | +15
1 function changed, 15 bytes added, diff: +15
net-2.6/net/ipv4/tcp.c:
tcp_set_state | -7
1 function changed, 7 bytes removed, diff: -7
net-2.6/net/ipv4/tcp_ipv4.c:
tcp_v4_get_port | -31
tcp_v4_hash | -48
tcp_v4_destroy_sock | -7
tcp_v4_syn_recv_sock | -2
tcp_unhash | -179
5 functions changed, 267 bytes removed, diff: -267
net-2.6/net/ipv6/inet6_hashtables.c:
__inet6_hash | +8
1 function changed, 8 bytes added, diff: +8
net-2.6/net/ipv4/inet_hashtables.c:
inet_unhash | +190
inet_hash | +242
2 functions changed, 432 bytes added, diff: +432
vmlinux:
16 functions changed, 485 bytes added, 492 bytes removed, diff: -7
/home/acme/git/net-2.6/net/ipv6/tcp_ipv6.c:
tcp_v6_get_port | -31
tcp_v6_hash | -7
tcp_v6_syn_recv_sock | -9
3 functions changed, 47 bytes removed, diff: -47
/home/acme/git/net-2.6/net/dccp/proto.c:
dccp_destroy_sock | -7
dccp_unhash | -179
dccp_hash | -49
dccp_set_state | -7
dccp_done | +1
5 functions changed, 1 bytes added, 242 bytes removed, diff: -241
/home/acme/git/net-2.6/net/dccp/ipv4.c:
dccp_v4_get_port | -31
dccp_v4_request_recv_sock | -2
2 functions changed, 33 bytes removed, diff: -33
/home/acme/git/net-2.6/net/dccp/ipv6.c:
dccp_v6_get_port | -31
dccp_v6_hash | -7
dccp_v6_request_recv_sock | +5
3 functions changed, 5 bytes added, 38 bytes removed, diff: -33
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2008-02-03 20:06:04 +08:00
|
|
|
extern void __inet6_hash(struct sock *sk);
|
2005-12-14 15:15:01 +08:00
|
|
|
|
2005-08-12 20:26:18 +08:00
|
|
|
/*
|
|
|
|
* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
|
|
|
|
* we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
|
|
|
|
*
|
|
|
|
* The sockhash lock must be held as a reader here.
|
|
|
|
*/
|
2008-01-31 21:07:21 +08:00
|
|
|
extern struct sock *__inet6_lookup_established(struct net *net,
|
|
|
|
struct inet_hashinfo *hashinfo,
|
2005-08-12 20:26:18 +08:00
|
|
|
const struct in6_addr *saddr,
|
2006-11-08 16:20:00 +08:00
|
|
|
const __be16 sport,
|
2005-08-12 20:26:18 +08:00
|
|
|
const struct in6_addr *daddr,
|
|
|
|
const u16 hnum,
|
2006-04-10 13:48:59 +08:00
|
|
|
const int dif);
|
2005-08-12 20:26:18 +08:00
|
|
|
|
2008-01-31 21:07:21 +08:00
|
|
|
extern struct sock *inet6_lookup_listener(struct net *net,
|
|
|
|
struct inet_hashinfo *hashinfo,
|
2005-08-12 20:26:18 +08:00
|
|
|
const struct in6_addr *daddr,
|
|
|
|
const unsigned short hnum,
|
|
|
|
const int dif);
|
|
|
|
|
2008-01-31 21:07:21 +08:00
|
|
|
static inline struct sock *__inet6_lookup(struct net *net,
|
|
|
|
struct inet_hashinfo *hashinfo,
|
2005-08-12 20:26:18 +08:00
|
|
|
const struct in6_addr *saddr,
|
2006-11-08 16:20:00 +08:00
|
|
|
const __be16 sport,
|
2005-08-12 20:26:18 +08:00
|
|
|
const struct in6_addr *daddr,
|
|
|
|
const u16 hnum,
|
|
|
|
const int dif)
|
|
|
|
{
|
2008-01-31 21:07:21 +08:00
|
|
|
struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr,
|
|
|
|
sport, daddr, hnum, dif);
|
2005-08-12 20:26:18 +08:00
|
|
|
if (sk)
|
|
|
|
return sk;
|
|
|
|
|
2008-01-31 21:07:21 +08:00
|
|
|
return inet6_lookup_listener(net, hashinfo, daddr, hnum, dif);
|
2005-08-12 20:26:18 +08:00
|
|
|
}
|
|
|
|
|
2008-01-31 21:07:21 +08:00
|
|
|
extern struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
|
2006-11-08 16:20:00 +08:00
|
|
|
const struct in6_addr *saddr, const __be16 sport,
|
|
|
|
const struct in6_addr *daddr, const __be16 dport,
|
2005-08-12 20:19:38 +08:00
|
|
|
const int dif);
|
2005-08-12 20:26:18 +08:00
|
|
|
#endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */
|
2005-08-12 20:19:38 +08:00
|
|
|
#endif /* _INET6_HASHTABLES_H */
|