IPVS: netns, connection hash got net as param.

Connection hash table is now name space aware.
i.e. net ptr >> 8 is xor:ed to the hash,
and this is the first param to be compared.
The net struct is 0xa40 in size ( a little bit smaller for 32 bit arch:s)
and cache-line aligned, so a ptr >> 5 might be a more clever solution ?

All lookups where net is compared uses net_eq() which returns 1 when netns
is disabled, and the compiler seems to do something clever in that case.

ip_vs_conn_fill_param() have *net as first param now.

Three new inlines added to keep conn struct smaller
when names space is disabled.
- ip_vs_conn_net()
- ip_vs_conn_net_set()
- ip_vs_conn_net_eq()

*v3
  moved net compare to the end in "fast path"

Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
This commit is contained in:
Hans Schillstrom 2011-01-03 14:44:57 +01:00 committed by Simon Horman
parent b17fc9963f
commit 6e67e586e7
11 changed files with 153 additions and 83 deletions

View File

@ -477,6 +477,7 @@ extern struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net,
unsigned short proto); unsigned short proto);
struct ip_vs_conn_param { struct ip_vs_conn_param {
struct net *net;
const union nf_inet_addr *caddr; const union nf_inet_addr *caddr;
const union nf_inet_addr *vaddr; const union nf_inet_addr *vaddr;
__be16 cport; __be16 cport;
@ -494,17 +495,19 @@ struct ip_vs_conn_param {
*/ */
struct ip_vs_conn { struct ip_vs_conn {
struct list_head c_list; /* hashed list heads */ struct list_head c_list; /* hashed list heads */
#ifdef CONFIG_NET_NS
struct net *net; /* Name space */
#endif
/* Protocol, addresses and port numbers */ /* Protocol, addresses and port numbers */
u16 af; /* address family */ u16 af; /* address family */
union nf_inet_addr caddr; /* client address */ __be16 cport;
union nf_inet_addr vaddr; /* virtual address */ __be16 vport;
union nf_inet_addr daddr; /* destination address */ __be16 dport;
volatile __u32 flags; /* status flags */ __u32 fwmark; /* Fire wall mark from skb */
__u32 fwmark; /* Fire wall mark from skb */ union nf_inet_addr caddr; /* client address */
__be16 cport; union nf_inet_addr vaddr; /* virtual address */
__be16 vport; union nf_inet_addr daddr; /* destination address */
__be16 dport; volatile __u32 flags; /* status flags */
__u16 protocol; /* Which protocol (TCP/UDP) */ __u16 protocol; /* Which protocol (TCP/UDP) */
/* counter and timer */ /* counter and timer */
@ -547,6 +550,33 @@ struct ip_vs_conn {
__u8 pe_data_len; __u8 pe_data_len;
}; };
/*
* To save some memory in conn table when name space is disabled.
*/
static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp)
{
#ifdef CONFIG_NET_NS
return cp->net;
#else
return &init_net;
#endif
}
static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net)
{
#ifdef CONFIG_NET_NS
cp->net = net;
#endif
}
static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp,
struct net *net)
{
#ifdef CONFIG_NET_NS
return cp->net == net;
#else
return 1;
#endif
}
/* /*
* Extended internal versions of struct ip_vs_service_user and * Extended internal versions of struct ip_vs_service_user and
@ -796,13 +826,14 @@ enum {
IP_VS_DIR_LAST, IP_VS_DIR_LAST,
}; };
static inline void ip_vs_conn_fill_param(int af, int protocol, static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol,
const union nf_inet_addr *caddr, const union nf_inet_addr *caddr,
__be16 cport, __be16 cport,
const union nf_inet_addr *vaddr, const union nf_inet_addr *vaddr,
__be16 vport, __be16 vport,
struct ip_vs_conn_param *p) struct ip_vs_conn_param *p)
{ {
p->net = net;
p->af = af; p->af = af;
p->protocol = protocol; p->protocol = protocol;
p->caddr = caddr; p->caddr = caddr;

View File

@ -66,6 +66,8 @@ struct netns_ipvs {
struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */ struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */
seqcount_t *ustats_seq; /* u64 read retry */ seqcount_t *ustats_seq; /* u64 read retry */
/* ip_vs_conn */
atomic_t conn_count; /* connection counter */
/* ip_vs_lblc */ /* ip_vs_lblc */
int sysctl_lblc_expiration; int sysctl_lblc_expiration;
struct ctl_table_header *lblc_ctl_header; struct ctl_table_header *lblc_ctl_header;

View File

@ -64,9 +64,6 @@ static struct list_head *ip_vs_conn_tab __read_mostly;
/* SLAB cache for IPVS connections */ /* SLAB cache for IPVS connections */
static struct kmem_cache *ip_vs_conn_cachep __read_mostly; static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
/* counter for current IPVS connections */
static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
/* counter for no client port connections */ /* counter for no client port connections */
static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
@ -76,7 +73,7 @@ static unsigned int ip_vs_conn_rnd __read_mostly;
/* /*
* Fine locking granularity for big connection hash table * Fine locking granularity for big connection hash table
*/ */
#define CT_LOCKARRAY_BITS 4 #define CT_LOCKARRAY_BITS 5
#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS) #define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)
#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1) #define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)
@ -133,19 +130,19 @@ static inline void ct_write_unlock_bh(unsigned key)
/* /*
* Returns hash value for IPVS connection entry * Returns hash value for IPVS connection entry
*/ */
static unsigned int ip_vs_conn_hashkey(int af, unsigned proto, static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto,
const union nf_inet_addr *addr, const union nf_inet_addr *addr,
__be16 port) __be16 port)
{ {
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) if (af == AF_INET6)
return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
(__force u32)port, proto, ip_vs_conn_rnd) (__force u32)port, proto, ip_vs_conn_rnd) ^
& ip_vs_conn_tab_mask; ((size_t)net>>8)) & ip_vs_conn_tab_mask;
#endif #endif
return jhash_3words((__force u32)addr->ip, (__force u32)port, proto, return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
ip_vs_conn_rnd) ip_vs_conn_rnd) ^
& ip_vs_conn_tab_mask; ((size_t)net>>8)) & ip_vs_conn_tab_mask;
} }
static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
@ -166,15 +163,15 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
port = p->vport; port = p->vport;
} }
return ip_vs_conn_hashkey(p->af, p->protocol, addr, port); return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port);
} }
static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
{ {
struct ip_vs_conn_param p; struct ip_vs_conn_param p;
ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport, ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol,
NULL, 0, &p); &cp->caddr, cp->cport, NULL, 0, &p);
if (cp->pe) { if (cp->pe) {
p.pe = cp->pe; p.pe = cp->pe;
@ -186,7 +183,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
} }
/* /*
* Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. * Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port.
* returns bool success. * returns bool success.
*/ */
static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
@ -269,11 +266,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
if (cp->af == p->af && if (cp->af == p->af &&
p->cport == cp->cport && p->vport == cp->vport &&
ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
p->cport == cp->cport && p->vport == cp->vport &&
((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
p->protocol == cp->protocol) { p->protocol == cp->protocol &&
ip_vs_conn_net_eq(cp, p->net)) {
/* HIT */ /* HIT */
atomic_inc(&cp->refcnt); atomic_inc(&cp->refcnt);
ct_read_unlock(hash); ct_read_unlock(hash);
@ -313,17 +311,18 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
struct ip_vs_conn_param *p) struct ip_vs_conn_param *p)
{ {
__be16 _ports[2], *pptr; __be16 _ports[2], *pptr;
struct net *net = skb_net(skb);
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL) if (pptr == NULL)
return 1; return 1;
if (likely(!inverse)) if (likely(!inverse))
ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0], ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,
&iph->daddr, pptr[1], p); pptr[0], &iph->daddr, pptr[1], p);
else else
ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1], ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr,
&iph->saddr, pptr[0], p); pptr[1], &iph->saddr, pptr[0], p);
return 0; return 0;
} }
@ -352,6 +351,8 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
ct_read_lock(hash); ct_read_lock(hash);
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
if (!ip_vs_conn_net_eq(cp, p->net))
continue;
if (p->pe_data && p->pe->ct_match) { if (p->pe_data && p->pe->ct_match) {
if (p->pe == cp->pe && p->pe->ct_match(p, cp)) if (p->pe == cp->pe && p->pe->ct_match(p, cp))
goto out; goto out;
@ -403,10 +404,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
if (cp->af == p->af && if (cp->af == p->af &&
p->vport == cp->cport && p->cport == cp->dport &&
ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
p->vport == cp->cport && p->cport == cp->dport && p->protocol == cp->protocol &&
p->protocol == cp->protocol) { ip_vs_conn_net_eq(cp, p->net)) {
/* HIT */ /* HIT */
atomic_inc(&cp->refcnt); atomic_inc(&cp->refcnt);
ret = cp; ret = cp;
@ -609,8 +611,8 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
struct ip_vs_dest *dest; struct ip_vs_dest *dest;
if ((cp) && (!cp->dest)) { if ((cp) && (!cp->dest)) {
dest = ip_vs_find_dest(&init_net, cp->af, &cp->daddr, cp->dport, dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
&cp->vaddr, cp->vport, cp->dport, &cp->vaddr, cp->vport,
cp->protocol, cp->fwmark); cp->protocol, cp->fwmark);
ip_vs_bind_dest(cp, dest); ip_vs_bind_dest(cp, dest);
return dest; return dest;
@ -728,6 +730,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
static void ip_vs_conn_expire(unsigned long data) static void ip_vs_conn_expire(unsigned long data)
{ {
struct ip_vs_conn *cp = (struct ip_vs_conn *)data; struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
cp->timeout = 60*HZ; cp->timeout = 60*HZ;
@ -770,7 +773,7 @@ static void ip_vs_conn_expire(unsigned long data)
ip_vs_unbind_dest(cp); ip_vs_unbind_dest(cp);
if (cp->flags & IP_VS_CONN_F_NO_CPORT) if (cp->flags & IP_VS_CONN_F_NO_CPORT)
atomic_dec(&ip_vs_conn_no_cport_cnt); atomic_dec(&ip_vs_conn_no_cport_cnt);
atomic_dec(&ip_vs_conn_count); atomic_dec(&ipvs->conn_count);
kmem_cache_free(ip_vs_conn_cachep, cp); kmem_cache_free(ip_vs_conn_cachep, cp);
return; return;
@ -804,7 +807,9 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
struct ip_vs_dest *dest, __u32 fwmark) struct ip_vs_dest *dest, __u32 fwmark)
{ {
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, p->protocol); struct netns_ipvs *ipvs = net_ipvs(p->net);
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
p->protocol);
cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
if (cp == NULL) { if (cp == NULL) {
@ -814,6 +819,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
INIT_LIST_HEAD(&cp->c_list); INIT_LIST_HEAD(&cp->c_list);
setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
ip_vs_conn_net_set(cp, p->net);
cp->af = p->af; cp->af = p->af;
cp->protocol = p->protocol; cp->protocol = p->protocol;
ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
@ -844,7 +850,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
atomic_set(&cp->n_control, 0); atomic_set(&cp->n_control, 0);
atomic_set(&cp->in_pkts, 0); atomic_set(&cp->in_pkts, 0);
atomic_inc(&ip_vs_conn_count); atomic_inc(&ipvs->conn_count);
if (flags & IP_VS_CONN_F_NO_CPORT) if (flags & IP_VS_CONN_F_NO_CPORT)
atomic_inc(&ip_vs_conn_no_cport_cnt); atomic_inc(&ip_vs_conn_no_cport_cnt);
@ -886,17 +892,22 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
* /proc/net/ip_vs_conn entries * /proc/net/ip_vs_conn entries
*/ */
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
struct ip_vs_iter_state {
struct seq_net_private p;
struct list_head *l;
};
static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
{ {
int idx; int idx;
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
struct ip_vs_iter_state *iter = seq->private;
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
ct_read_lock_bh(idx); ct_read_lock_bh(idx);
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
if (pos-- == 0) { if (pos-- == 0) {
seq->private = &ip_vs_conn_tab[idx]; iter->l = &ip_vs_conn_tab[idx];
return cp; return cp;
} }
} }
@ -908,14 +919,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
{ {
seq->private = NULL; struct ip_vs_iter_state *iter = seq->private;
iter->l = NULL;
return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN; return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
} }
static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{ {
struct ip_vs_conn *cp = v; struct ip_vs_conn *cp = v;
struct list_head *e, *l = seq->private; struct ip_vs_iter_state *iter = seq->private;
struct list_head *e, *l = iter->l;
int idx; int idx;
++*pos; ++*pos;
@ -932,18 +946,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
while (++idx < ip_vs_conn_tab_size) { while (++idx < ip_vs_conn_tab_size) {
ct_read_lock_bh(idx); ct_read_lock_bh(idx);
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
seq->private = &ip_vs_conn_tab[idx]; iter->l = &ip_vs_conn_tab[idx];
return cp; return cp;
} }
ct_read_unlock_bh(idx); ct_read_unlock_bh(idx);
} }
seq->private = NULL; iter->l = NULL;
return NULL; return NULL;
} }
static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
{ {
struct list_head *l = seq->private; struct ip_vs_iter_state *iter = seq->private;
struct list_head *l = iter->l;
if (l) if (l)
ct_read_unlock_bh(l - ip_vs_conn_tab); ct_read_unlock_bh(l - ip_vs_conn_tab);
@ -957,9 +972,12 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
"Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n"); "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n");
else { else {
const struct ip_vs_conn *cp = v; const struct ip_vs_conn *cp = v;
struct net *net = seq_file_net(seq);
char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
size_t len = 0; size_t len = 0;
if (!ip_vs_conn_net_eq(cp, net))
return 0;
if (cp->pe_data) { if (cp->pe_data) {
pe_data[0] = ' '; pe_data[0] = ' ';
len = strlen(cp->pe->name); len = strlen(cp->pe->name);
@ -1004,7 +1022,8 @@ static const struct seq_operations ip_vs_conn_seq_ops = {
static int ip_vs_conn_open(struct inode *inode, struct file *file) static int ip_vs_conn_open(struct inode *inode, struct file *file)
{ {
return seq_open(file, &ip_vs_conn_seq_ops); return seq_open_net(inode, file, &ip_vs_conn_seq_ops,
sizeof(struct ip_vs_iter_state));
} }
static const struct file_operations ip_vs_conn_fops = { static const struct file_operations ip_vs_conn_fops = {
@ -1031,6 +1050,10 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
"Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n"); "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n");
else { else {
const struct ip_vs_conn *cp = v; const struct ip_vs_conn *cp = v;
struct net *net = seq_file_net(seq);
if (!ip_vs_conn_net_eq(cp, net))
return 0;
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6) if (cp->af == AF_INET6)
@ -1067,7 +1090,8 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
static int ip_vs_conn_sync_open(struct inode *inode, struct file *file) static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
{ {
return seq_open(file, &ip_vs_conn_sync_seq_ops); return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,
sizeof(struct ip_vs_iter_state));
} }
static const struct file_operations ip_vs_conn_sync_fops = { static const struct file_operations ip_vs_conn_sync_fops = {
@ -1168,10 +1192,11 @@ void ip_vs_random_dropentry(void)
/* /*
* Flush all the connection entries in the ip_vs_conn_tab * Flush all the connection entries in the ip_vs_conn_tab
*/ */
static void ip_vs_conn_flush(void) static void ip_vs_conn_flush(struct net *net)
{ {
int idx; int idx;
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
struct netns_ipvs *ipvs = net_ipvs(net);
flush_again: flush_again:
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
@ -1181,7 +1206,8 @@ static void ip_vs_conn_flush(void)
ct_write_lock_bh(idx); ct_write_lock_bh(idx);
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
if (!ip_vs_conn_net_eq(cp, net))
continue;
IP_VS_DBG(4, "del connection\n"); IP_VS_DBG(4, "del connection\n");
ip_vs_conn_expire_now(cp); ip_vs_conn_expire_now(cp);
if (cp->control) { if (cp->control) {
@ -1194,7 +1220,7 @@ static void ip_vs_conn_flush(void)
/* the counter may be not NULL, because maybe some conn entries /* the counter may be not NULL, because maybe some conn entries
are run by slow timer handler or unhashed but still referred */ are run by slow timer handler or unhashed but still referred */
if (atomic_read(&ip_vs_conn_count) != 0) { if (atomic_read(&ipvs->conn_count) != 0) {
schedule(); schedule();
goto flush_again; goto flush_again;
} }
@ -1204,8 +1230,11 @@ static void ip_vs_conn_flush(void)
*/ */
int __net_init __ip_vs_conn_init(struct net *net) int __net_init __ip_vs_conn_init(struct net *net)
{ {
struct netns_ipvs *ipvs = net_ipvs(net);
if (!net_eq(net, &init_net)) /* netns not enabled yet */ if (!net_eq(net, &init_net)) /* netns not enabled yet */
return -EPERM; return -EPERM;
atomic_set(&ipvs->conn_count, 0);
proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops); proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
@ -1217,6 +1246,8 @@ static void __net_exit __ip_vs_conn_cleanup(struct net *net)
if (!net_eq(net, &init_net)) /* netns not enabled yet */ if (!net_eq(net, &init_net)) /* netns not enabled yet */
return; return;
/* flush all the connection entries first */
ip_vs_conn_flush(net);
proc_net_remove(net, "ip_vs_conn"); proc_net_remove(net, "ip_vs_conn");
proc_net_remove(net, "ip_vs_conn_sync"); proc_net_remove(net, "ip_vs_conn_sync");
} }
@ -1277,9 +1308,6 @@ int __init ip_vs_conn_init(void)
void ip_vs_conn_cleanup(void) void ip_vs_conn_cleanup(void)
{ {
unregister_pernet_subsys(&ipvs_conn_ops); unregister_pernet_subsys(&ipvs_conn_ops);
/* flush all the connection entries first */
ip_vs_conn_flush();
/* Release the empty cache */ /* Release the empty cache */
kmem_cache_destroy(ip_vs_conn_cachep); kmem_cache_destroy(ip_vs_conn_cachep);
vfree(ip_vs_conn_tab); vfree(ip_vs_conn_tab);

View File

@ -205,7 +205,8 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
const union nf_inet_addr *vaddr, __be16 vport, const union nf_inet_addr *vaddr, __be16 vport,
struct ip_vs_conn_param *p) struct ip_vs_conn_param *p)
{ {
ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p); ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
vport, p);
p->pe = svc->pe; p->pe = svc->pe;
if (p->pe && p->pe->fill_param) if (p->pe && p->pe->fill_param)
return p->pe->fill_param(p, skb); return p->pe->fill_param(p, skb);
@ -348,8 +349,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/* /*
* Create a new connection according to the template * Create a new connection according to the template
*/ */
ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, src_port, ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr,
&iph.daddr, dst_port, &param); src_port, &iph.daddr, dst_port, &param);
cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark); cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
if (cp == NULL) { if (cp == NULL) {
@ -464,8 +465,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
*/ */
{ {
struct ip_vs_conn_param p; struct ip_vs_conn_param p;
ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr,
pptr[0], &iph.daddr, pptr[1], &p); ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
&iph.saddr, pptr[0], &iph.daddr, pptr[1],
&p);
cp = ip_vs_conn_new(&p, &dest->addr, cp = ip_vs_conn_new(&p, &dest->addr,
dest->port ? dest->port : pptr[1], dest->port ? dest->port : pptr[1],
flags, dest, skb->mark); flags, dest, skb->mark);
@ -532,7 +535,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
{ {
struct ip_vs_conn_param p; struct ip_vs_conn_param p;
ip_vs_conn_fill_param(svc->af, iph.protocol, ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
&iph.saddr, pptr[0], &iph.saddr, pptr[0],
&iph.daddr, pptr[1], &p); &iph.daddr, pptr[1], &p);
cp = ip_vs_conn_new(&p, &daddr, 0, cp = ip_vs_conn_new(&p, &daddr, 0,

View File

@ -198,13 +198,15 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
*/ */
{ {
struct ip_vs_conn_param p; struct ip_vs_conn_param p;
ip_vs_conn_fill_param(AF_INET, iph->protocol, ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
&from, port, &cp->caddr, 0, &p); iph->protocol, &from, port,
&cp->caddr, 0, &p);
n_cp = ip_vs_conn_out_get(&p); n_cp = ip_vs_conn_out_get(&p);
} }
if (!n_cp) { if (!n_cp) {
struct ip_vs_conn_param p; struct ip_vs_conn_param p;
ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr, ip_vs_conn_fill_param(ip_vs_conn_net(cp),
AF_INET, IPPROTO_TCP, &cp->caddr,
0, &cp->vaddr, port, &p); 0, &cp->vaddr, port, &p);
n_cp = ip_vs_conn_new(&p, &from, port, n_cp = ip_vs_conn_new(&p, &from, port,
IP_VS_CONN_F_NO_CPORT | IP_VS_CONN_F_NO_CPORT |
@ -361,9 +363,9 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
{ {
struct ip_vs_conn_param p; struct ip_vs_conn_param p;
ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port, ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
&cp->vaddr, htons(ntohs(cp->vport)-1), iph->protocol, &to, port, &cp->vaddr,
&p); htons(ntohs(cp->vport)-1), &p);
n_cp = ip_vs_conn_in_get(&p); n_cp = ip_vs_conn_in_get(&p);
if (!n_cp) { if (!n_cp) {
n_cp = ip_vs_conn_new(&p, &cp->daddr, n_cp = ip_vs_conn_new(&p, &cp->daddr,

View File

@ -141,6 +141,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
struct nf_conntrack_tuple *orig, new_reply; struct nf_conntrack_tuple *orig, new_reply;
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
struct ip_vs_conn_param p; struct ip_vs_conn_param p;
struct net *net = nf_ct_net(ct);
if (exp->tuple.src.l3num != PF_INET) if (exp->tuple.src.l3num != PF_INET)
return; return;
@ -155,7 +156,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
/* RS->CLIENT */ /* RS->CLIENT */
orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum, ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
&orig->src.u3, orig->src.u.tcp.port, &orig->src.u3, orig->src.u.tcp.port,
&orig->dst.u3, orig->dst.u.tcp.port, &p); &orig->dst.u3, orig->dst.u.tcp.port, &p);
cp = ip_vs_conn_out_get(&p); cp = ip_vs_conn_out_get(&p);
@ -268,7 +269,8 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
" for conn " FMT_CONN "\n", " for conn " FMT_CONN "\n",
__func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); __func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple); h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
&tuple);
if (h) { if (h) {
ct = nf_ct_tuplehash_to_ctrack(h); ct = nf_ct_tuplehash_to_ctrack(h);
/* Show what happens instead of calling nf_ct_kill() */ /* Show what happens instead of calling nf_ct_kill() */

View File

@ -41,15 +41,16 @@ struct isakmp_hdr {
#define PORT_ISAKMP 500 #define PORT_ISAKMP 500
static void static void
ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph, ah_esp_conn_fill_param_proto(struct net *net, int af,
int inverse, struct ip_vs_conn_param *p) const struct ip_vs_iphdr *iph, int inverse,
struct ip_vs_conn_param *p)
{ {
if (likely(!inverse)) if (likely(!inverse))
ip_vs_conn_fill_param(af, IPPROTO_UDP, ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
&iph->saddr, htons(PORT_ISAKMP), &iph->saddr, htons(PORT_ISAKMP),
&iph->daddr, htons(PORT_ISAKMP), p); &iph->daddr, htons(PORT_ISAKMP), p);
else else
ip_vs_conn_fill_param(af, IPPROTO_UDP, ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
&iph->daddr, htons(PORT_ISAKMP), &iph->daddr, htons(PORT_ISAKMP),
&iph->saddr, htons(PORT_ISAKMP), p); &iph->saddr, htons(PORT_ISAKMP), p);
} }
@ -61,8 +62,9 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
{ {
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
struct ip_vs_conn_param p; struct ip_vs_conn_param p;
struct net *net = skb_net(skb);
ah_esp_conn_fill_param_proto(af, iph, inverse, &p); ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
cp = ip_vs_conn_in_get(&p); cp = ip_vs_conn_in_get(&p);
if (!cp) { if (!cp) {
/* /*
@ -89,8 +91,9 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
{ {
struct ip_vs_conn *cp; struct ip_vs_conn *cp;
struct ip_vs_conn_param p; struct ip_vs_conn_param p;
struct net *net = skb_net(skb);
ah_esp_conn_fill_param_proto(af, iph, inverse, &p); ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
cp = ip_vs_conn_out_get(&p); cp = ip_vs_conn_out_get(&p);
if (!cp) { if (!cp) {
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "

View File

@ -1055,7 +1055,7 @@ static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
static int sctp_app_conn_bind(struct ip_vs_conn *cp) static int sctp_app_conn_bind(struct ip_vs_conn *cp)
{ {
struct netns_ipvs *ipvs = net_ipvs(&init_net); struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
int hash; int hash;
struct ip_vs_app *inc; struct ip_vs_app *inc;
int result = 0; int result = 0;

View File

@ -620,7 +620,7 @@ tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
static int static int
tcp_app_conn_bind(struct ip_vs_conn *cp) tcp_app_conn_bind(struct ip_vs_conn *cp)
{ {
struct netns_ipvs *ipvs = net_ipvs(&init_net); struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
int hash; int hash;
struct ip_vs_app *inc; struct ip_vs_app *inc;
int result = 0; int result = 0;

View File

@ -396,7 +396,7 @@ udp_unregister_app(struct net *net, struct ip_vs_app *inc)
static int udp_app_conn_bind(struct ip_vs_conn *cp) static int udp_app_conn_bind(struct ip_vs_conn *cp)
{ {
struct netns_ipvs *ipvs = net_ipvs(&init_net); struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
int hash; int hash;
struct ip_vs_app *inc; struct ip_vs_app *inc;
int result = 0; int result = 0;

View File

@ -660,21 +660,21 @@ control:
* fill_param used by version 1 * fill_param used by version 1
*/ */
static inline int static inline int
ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc, ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
struct ip_vs_conn_param *p, struct ip_vs_conn_param *p,
__u8 *pe_data, unsigned int pe_data_len, __u8 *pe_data, unsigned int pe_data_len,
__u8 *pe_name, unsigned int pe_name_len) __u8 *pe_name, unsigned int pe_name_len)
{ {
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) if (af == AF_INET6)
ip_vs_conn_fill_param(af, sc->v6.protocol, ip_vs_conn_fill_param(net, af, sc->v6.protocol,
(const union nf_inet_addr *)&sc->v6.caddr, (const union nf_inet_addr *)&sc->v6.caddr,
sc->v6.cport, sc->v6.cport,
(const union nf_inet_addr *)&sc->v6.vaddr, (const union nf_inet_addr *)&sc->v6.vaddr,
sc->v6.vport, p); sc->v6.vport, p);
else else
#endif #endif
ip_vs_conn_fill_param(af, sc->v4.protocol, ip_vs_conn_fill_param(net, af, sc->v4.protocol,
(const union nf_inet_addr *)&sc->v4.caddr, (const union nf_inet_addr *)&sc->v4.caddr,
sc->v4.cport, sc->v4.cport,
(const union nf_inet_addr *)&sc->v4.vaddr, (const union nf_inet_addr *)&sc->v4.vaddr,
@ -881,7 +881,7 @@ static void ip_vs_process_message_v0(struct net *net, const char *buffer,
} }
} }
ip_vs_conn_fill_param(AF_INET, s->protocol, ip_vs_conn_fill_param(net, AF_INET, s->protocol,
(const union nf_inet_addr *)&s->caddr, (const union nf_inet_addr *)&s->caddr,
s->cport, s->cport,
(const union nf_inet_addr *)&s->vaddr, (const union nf_inet_addr *)&s->vaddr,
@ -1043,9 +1043,8 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
state = 0; state = 0;
} }
} }
if (ip_vs_conn_fill_param_sync(af, s, &param, if (ip_vs_conn_fill_param_sync(net, af, s, &param, pe_data,
pe_data, pe_data_len, pe_data_len, pe_name, pe_name_len)) {
pe_name, pe_name_len)) {
retc = 50; retc = 50;
goto out; goto out;
} }