ipvs: do not disable bh for long time

We used a global BH disable in LOCAL_OUT hook.
Add _bh suffix to all places that need it and remove
the disabling from LOCAL_OUT and sync code.

Functions like ip_defrag need protection from
BH, so add it. As for nf_nat_mangle_tcp_packet, it needs
RCU lock.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
This commit is contained in:
Julian Anastasov 2013-03-22 11:46:54 +02:00 committed by Pablo Neira Ayuso
parent ceec4c3816
commit ac69269a45
12 changed files with 64 additions and 87 deletions

View File

@ -352,14 +352,14 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
unsigned int flag, __u32 seq, int diff)
{
/* spinlock is to keep updating cp->flags atomic */
spin_lock(&cp->lock);
spin_lock_bh(&cp->lock);
if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
vseq->previous_delta = vseq->delta;
vseq->delta += diff;
vseq->init_seq = seq;
cp->flags |= flag;
}
spin_unlock(&cp->lock);
spin_unlock_bh(&cp->lock);
}
static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,

View File

@ -86,14 +86,14 @@ struct ip_vs_aligned_lock
static struct ip_vs_aligned_lock
__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
static inline void ct_write_lock(unsigned int key)
static inline void ct_write_lock_bh(unsigned int key)
{
spin_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
spin_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
static inline void ct_write_unlock(unsigned int key)
static inline void ct_write_unlock_bh(unsigned int key)
{
spin_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
@ -167,7 +167,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
/* Hash by protocol, client address and port */
hash = ip_vs_conn_hashkey_conn(cp);
ct_write_lock(hash);
ct_write_lock_bh(hash);
spin_lock(&cp->lock);
if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
@ -182,7 +182,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
}
spin_unlock(&cp->lock);
ct_write_unlock(hash);
ct_write_unlock_bh(hash);
return ret;
}
@ -200,7 +200,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
/* unhash it and decrease its reference counter */
hash = ip_vs_conn_hashkey_conn(cp);
ct_write_lock(hash);
ct_write_lock_bh(hash);
spin_lock(&cp->lock);
if (cp->flags & IP_VS_CONN_F_HASHED) {
@ -212,7 +212,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
ret = 0;
spin_unlock(&cp->lock);
ct_write_unlock(hash);
ct_write_unlock_bh(hash);
return ret;
}
@ -227,7 +227,7 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
hash = ip_vs_conn_hashkey_conn(cp);
ct_write_lock(hash);
ct_write_lock_bh(hash);
spin_lock(&cp->lock);
if (cp->flags & IP_VS_CONN_F_HASHED) {
@ -242,7 +242,7 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
ret = atomic_read(&cp->refcnt) ? false : true;
spin_unlock(&cp->lock);
ct_write_unlock(hash);
ct_write_unlock_bh(hash);
return ret;
}
@ -462,13 +462,13 @@ void ip_vs_conn_put(struct ip_vs_conn *cp)
void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
{
if (ip_vs_conn_unhash(cp)) {
spin_lock(&cp->lock);
spin_lock_bh(&cp->lock);
if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
atomic_dec(&ip_vs_conn_no_cport_cnt);
cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
cp->cport = cport;
}
spin_unlock(&cp->lock);
spin_unlock_bh(&cp->lock);
/* hash on new dport */
ip_vs_conn_hash(cp);
@ -622,9 +622,9 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
if (dest) {
struct ip_vs_proto_data *pd;
spin_lock(&cp->lock);
spin_lock_bh(&cp->lock);
if (cp->dest) {
spin_unlock(&cp->lock);
spin_unlock_bh(&cp->lock);
rcu_read_unlock();
return;
}
@ -635,7 +635,7 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
ip_vs_unbind_app(cp);
ip_vs_bind_dest(cp, dest);
spin_unlock(&cp->lock);
spin_unlock_bh(&cp->lock);
/* Update its packet transmitter */
cp->packet_xmit = NULL;

View File

@ -638,8 +638,11 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
{
int err = ip_defrag(skb, user);
int err;
local_bh_disable();
err = ip_defrag(skb, user);
local_bh_enable();
if (!err)
ip_send_check(ip_hdr(skb));
@ -1217,13 +1220,7 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
unsigned int verdict;
/* Disable BH in LOCAL_OUT until all places are fixed */
local_bh_disable();
verdict = ip_vs_out(hooknum, skb, AF_INET);
local_bh_enable();
return verdict;
return ip_vs_out(hooknum, skb, AF_INET);
}
#ifdef CONFIG_IP_VS_IPV6
@ -1250,13 +1247,7 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
unsigned int verdict;
/* Disable BH in LOCAL_OUT until all places are fixed */
local_bh_disable();
verdict = ip_vs_out(hooknum, skb, AF_INET6);
local_bh_enable();
return verdict;
return ip_vs_out(hooknum, skb, AF_INET6);
}
#endif
@ -1714,13 +1705,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
unsigned int verdict;
/* Disable BH in LOCAL_OUT until all places are fixed */
local_bh_disable();
verdict = ip_vs_in(hooknum, skb, AF_INET);
local_bh_enable();
return verdict;
return ip_vs_in(hooknum, skb, AF_INET);
}
#ifdef CONFIG_IP_VS_IPV6
@ -1779,13 +1764,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
unsigned int verdict;
/* Disable BH in LOCAL_OUT until all places are fixed */
local_bh_disable();
verdict = ip_vs_in(hooknum, skb, AF_INET6);
local_bh_enable();
return verdict;
return ip_vs_in(hooknum, skb, AF_INET6);
}
#endif

View File

@ -267,10 +267,12 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* hopefully it will succeed on the retransmitted
* packet.
*/
rcu_read_lock();
ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
iph->ihl * 4,
start-data, end-start,
buf, buf_len);
rcu_read_unlock();
if (ret) {
ip_vs_nfct_expect_related(skb, ct, n_cp,
IPPROTO_TCP, 0, 0);

View File

@ -527,10 +527,10 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
}
/* If we fail to create a cache entry, we'll just use the valid dest */
spin_lock(&svc->sched_lock);
spin_lock_bh(&svc->sched_lock);
if (!tbl->dead)
ip_vs_lblc_new(tbl, &iph.daddr, dest);
spin_unlock(&svc->sched_lock);
spin_unlock_bh(&svc->sched_lock);
out:
IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n",

View File

@ -678,7 +678,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
if (atomic_read(&en->set.size) > 1 &&
time_after(jiffies, en->set.lastmod +
sysctl_lblcr_expiration(svc))) {
spin_lock(&svc->sched_lock);
spin_lock_bh(&svc->sched_lock);
if (atomic_read(&en->set.size) > 1) {
struct ip_vs_dest *m;
@ -686,7 +686,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
if (m)
ip_vs_dest_set_erase(&en->set, m);
}
spin_unlock(&svc->sched_lock);
spin_unlock_bh(&svc->sched_lock);
}
/* If the destination is not overloaded, use it */
@ -701,10 +701,10 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
}
/* Update our cache entry */
spin_lock(&svc->sched_lock);
spin_lock_bh(&svc->sched_lock);
if (!tbl->dead)
ip_vs_dest_set_insert(&en->set, dest, true);
spin_unlock(&svc->sched_lock);
spin_unlock_bh(&svc->sched_lock);
goto out;
}
@ -716,10 +716,10 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
}
/* If we fail to create a cache entry, we'll just use the valid dest */
spin_lock(&svc->sched_lock);
spin_lock_bh(&svc->sched_lock);
if (!tbl->dead)
ip_vs_lblcr_new(tbl, &iph.daddr, dest);
spin_unlock(&svc->sched_lock);
spin_unlock_bh(&svc->sched_lock);
out:
IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",

View File

@ -994,9 +994,9 @@ static void
sctp_state_transition(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb, struct ip_vs_proto_data *pd)
{
spin_lock(&cp->lock);
spin_lock_bh(&cp->lock);
set_sctp_state(pd, cp, direction, skb);
spin_unlock(&cp->lock);
spin_unlock_bh(&cp->lock);
}
static inline __u16 sctp_app_hashkey(__be16 port)

View File

@ -557,9 +557,9 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
if (th == NULL)
return;
spin_lock(&cp->lock);
spin_lock_bh(&cp->lock);
set_tcp_state(pd, cp, direction, th);
spin_unlock(&cp->lock);
spin_unlock_bh(&cp->lock);
}
static inline __u16 tcp_app_hashkey(__be16 port)
@ -655,11 +655,11 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
{
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
spin_lock(&cp->lock);
spin_lock_bh(&cp->lock);
cp->state = IP_VS_TCP_S_LISTEN;
cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
: tcp_timeouts[IP_VS_TCP_S_LISTEN]);
spin_unlock(&cp->lock);
spin_unlock_bh(&cp->lock);
}
/* ---------------------------------------------

View File

@ -63,7 +63,7 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
spin_lock(&svc->sched_lock);
spin_lock_bh(&svc->sched_lock);
p = (struct list_head *) svc->sched_data;
last = dest = list_entry(p, struct ip_vs_dest, n_list);
@ -85,13 +85,13 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
} while (pass < 2 && p != &svc->destinations);
stop:
spin_unlock(&svc->sched_lock);
spin_unlock_bh(&svc->sched_lock);
ip_vs_scheduler_err(svc, "no destination available");
return NULL;
out:
svc->sched_data = &dest->n_list;
spin_unlock(&svc->sched_lock);
spin_unlock_bh(&svc->sched_lock);
IP_VS_DBG_BUF(6, "RR: server %s:%u "
"activeconns %d refcnt %d weight %d\n",
IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),

View File

@ -531,9 +531,9 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
return;
spin_lock(&ipvs->sync_buff_lock);
spin_lock_bh(&ipvs->sync_buff_lock);
if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
spin_unlock(&ipvs->sync_buff_lock);
spin_unlock_bh(&ipvs->sync_buff_lock);
return;
}
@ -552,7 +552,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
if (!buff) {
buff = ip_vs_sync_buff_create_v0(ipvs);
if (!buff) {
spin_unlock(&ipvs->sync_buff_lock);
spin_unlock_bh(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
return;
}
@ -590,7 +590,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
sb_queue_tail(ipvs, ms);
ms->sync_buff = NULL;
}
spin_unlock(&ipvs->sync_buff_lock);
spin_unlock_bh(&ipvs->sync_buff_lock);
/* synchronize its controller if it has */
cp = cp->control;
@ -641,9 +641,9 @@ sloop:
pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
}
spin_lock(&ipvs->sync_buff_lock);
spin_lock_bh(&ipvs->sync_buff_lock);
if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
spin_unlock(&ipvs->sync_buff_lock);
spin_unlock_bh(&ipvs->sync_buff_lock);
return;
}
@ -683,7 +683,7 @@ sloop:
if (!buff) {
buff = ip_vs_sync_buff_create(ipvs);
if (!buff) {
spin_unlock(&ipvs->sync_buff_lock);
spin_unlock_bh(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
return;
}
@ -750,7 +750,7 @@ sloop:
}
}
spin_unlock(&ipvs->sync_buff_lock);
spin_unlock_bh(&ipvs->sync_buff_lock);
control:
/* synchronize its controller if it has */
@ -843,7 +843,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
kfree(param->pe_data);
dest = cp->dest;
spin_lock(&cp->lock);
spin_lock_bh(&cp->lock);
if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
!(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
if (flags & IP_VS_CONN_F_INACTIVE) {
@ -857,7 +857,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
cp->flags = flags;
spin_unlock(&cp->lock);
spin_unlock_bh(&cp->lock);
if (!dest)
ip_vs_try_bind_dest(cp);
} else {
@ -1689,11 +1689,7 @@ static int sync_thread_backup(void *data)
break;
}
/* disable bottom half, because it accesses the data
shared by softirq while getting/creating conns */
local_bh_disable();
ip_vs_process_message(tinfo->net, tinfo->buf, len);
local_bh_enable();
}
}

View File

@ -170,7 +170,7 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
spin_lock(&svc->sched_lock);
spin_lock_bh(&svc->sched_lock);
dest = mark->cl;
/* No available dests? */
if (mark->mw == 0)
@ -222,7 +222,7 @@ found:
mark->cl = dest;
out:
spin_unlock(&svc->sched_lock);
spin_unlock_bh(&svc->sched_lock);
return dest;
err_noavail:

View File

@ -177,22 +177,22 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
rt = (struct rtable *) dest_dst->dst_cache;
else {
dest_dst = ip_vs_dest_dst_alloc();
spin_lock(&dest->dst_lock);
spin_lock_bh(&dest->dst_lock);
if (!dest_dst) {
__ip_vs_dst_set(dest, NULL, NULL, 0);
spin_unlock(&dest->dst_lock);
spin_unlock_bh(&dest->dst_lock);
goto err_unreach;
}
rt = do_output_route4(net, dest->addr.ip, rt_mode,
&dest_dst->dst_saddr.ip);
if (!rt) {
__ip_vs_dst_set(dest, NULL, NULL, 0);
spin_unlock(&dest->dst_lock);
spin_unlock_bh(&dest->dst_lock);
ip_vs_dest_dst_free(dest_dst);
goto err_unreach;
}
__ip_vs_dst_set(dest, dest_dst, &rt->dst, 0);
spin_unlock(&dest->dst_lock);
spin_unlock_bh(&dest->dst_lock);
IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
&dest->addr.ip, &dest_dst->dst_saddr.ip,
atomic_read(&rt->dst.__refcnt));
@ -358,10 +358,10 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
u32 cookie;
dest_dst = ip_vs_dest_dst_alloc();
spin_lock(&dest->dst_lock);
spin_lock_bh(&dest->dst_lock);
if (!dest_dst) {
__ip_vs_dst_set(dest, NULL, NULL, 0);
spin_unlock(&dest->dst_lock);
spin_unlock_bh(&dest->dst_lock);
goto err_unreach;
}
dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
@ -369,14 +369,14 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
do_xfrm);
if (!dst) {
__ip_vs_dst_set(dest, NULL, NULL, 0);
spin_unlock(&dest->dst_lock);
spin_unlock_bh(&dest->dst_lock);
ip_vs_dest_dst_free(dest_dst);
goto err_unreach;
}
rt = (struct rt6_info *) dst;
cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
__ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
spin_unlock(&dest->dst_lock);
spin_unlock_bh(&dest->dst_lock);
IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
&dest->addr.in6, &dest_dst->dst_saddr.in6,
atomic_read(&rt->dst.__refcnt));