ipvs: queue delayed work to expire no destination connections if expire_nodest_conn=1
[upstream commit 35dfb01314
]
When expire_nodest_conn=1 and a destination is deleted, IPVS does not
expire the existing connections until the next matching incoming packet.
If there are many connection entries from a single client to a single
destination, many packets may get dropped before all the connections are
expired (more likely with lots of UDP traffic). An optimization can be
made where upon deletion of a destination, IPVS queues up delayed work
to immediately expire any connections with a deleted destination. This
ensures any reused source ports from a client (within the IPVS timeouts)
are scheduled to new real servers instead of silently dropped.
Signed-off-by: Andrew Sy Kim <kim.andrewsy@gmail.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
This commit is contained in:
parent
0fe440f03b
commit
f2f46e7af4
|
@ -14,6 +14,7 @@
|
||||||
#include <linux/spinlock.h> /* for struct rwlock_t */
|
#include <linux/spinlock.h> /* for struct rwlock_t */
|
||||||
#include <linux/atomic.h> /* for struct atomic_t */
|
#include <linux/atomic.h> /* for struct atomic_t */
|
||||||
#include <linux/refcount.h> /* for struct refcount_t */
|
#include <linux/refcount.h> /* for struct refcount_t */
|
||||||
|
#include <linux/workqueue.h>
|
||||||
|
|
||||||
#include <linux/compiler.h>
|
#include <linux/compiler.h>
|
||||||
#include <linux/timer.h>
|
#include <linux/timer.h>
|
||||||
|
@ -887,6 +888,8 @@ struct netns_ipvs {
|
||||||
atomic_t conn_out_counter;
|
atomic_t conn_out_counter;
|
||||||
|
|
||||||
#ifdef CONFIG_SYSCTL
|
#ifdef CONFIG_SYSCTL
|
||||||
|
/* delayed work for expiring no dest connections */
|
||||||
|
struct delayed_work expire_nodest_conn_work;
|
||||||
/* 1/rate drop and drop-entry variables */
|
/* 1/rate drop and drop-entry variables */
|
||||||
struct delayed_work defense_work; /* Work handler */
|
struct delayed_work defense_work; /* Work handler */
|
||||||
int drop_rate;
|
int drop_rate;
|
||||||
|
@ -1054,6 +1057,11 @@ static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
|
||||||
return ipvs->sysctl_conn_reuse_mode;
|
return ipvs->sysctl_conn_reuse_mode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
|
||||||
|
{
|
||||||
|
return ipvs->sysctl_expire_nodest_conn;
|
||||||
|
}
|
||||||
|
|
||||||
static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs)
|
static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs)
|
||||||
{
|
{
|
||||||
return ipvs->sysctl_schedule_icmp;
|
return ipvs->sysctl_schedule_icmp;
|
||||||
|
@ -1146,6 +1154,11 @@ static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs)
|
static inline int sysctl_schedule_icmp(struct netns_ipvs *ipvs)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1515,6 +1528,22 @@ static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
|
||||||
static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; }
|
static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_SYSCTL
|
||||||
|
/* Enqueue delayed work for expiring no dest connections
|
||||||
|
* Only run when sysctl_expire_nodest=1
|
||||||
|
*/
|
||||||
|
static inline void ip_vs_enqueue_expire_nodest_conns(struct netns_ipvs *ipvs)
|
||||||
|
{
|
||||||
|
if (sysctl_expire_nodest_conn(ipvs))
|
||||||
|
queue_delayed_work(system_long_wq,
|
||||||
|
&ipvs->expire_nodest_conn_work, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs);
|
||||||
|
#else
|
||||||
|
static inline void ip_vs_enqueue_expire_nodest_conns(struct netns_ipvs *ipvs) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
#define IP_VS_DFWD_METHOD(dest) (atomic_read(&(dest)->conn_flags) & \
|
#define IP_VS_DFWD_METHOD(dest) (atomic_read(&(dest)->conn_flags) & \
|
||||||
IP_VS_CONN_F_FWD_MASK)
|
IP_VS_CONN_F_FWD_MASK)
|
||||||
|
|
||||||
|
|
|
@ -823,6 +823,17 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head)
|
||||||
kmem_cache_free(ip_vs_conn_cachep, cp);
|
kmem_cache_free(ip_vs_conn_cachep, cp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Try to delete connection while not holding reference */
|
||||||
|
static void ip_vs_conn_del(struct ip_vs_conn *cp)
|
||||||
|
{
|
||||||
|
if (del_timer(&cp->timer)) {
|
||||||
|
/* Drop cp->control chain too */
|
||||||
|
if (cp->control)
|
||||||
|
cp->timeout = 0;
|
||||||
|
ip_vs_conn_expire(&cp->timer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void ip_vs_conn_expire(struct timer_list *t)
|
static void ip_vs_conn_expire(struct timer_list *t)
|
||||||
{
|
{
|
||||||
struct ip_vs_conn *cp = from_timer(cp, t, timer);
|
struct ip_vs_conn *cp = from_timer(cp, t, timer);
|
||||||
|
@ -1382,6 +1393,45 @@ flush_again:
|
||||||
goto flush_again;
|
goto flush_again;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_SYSCTL
|
||||||
|
void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs)
|
||||||
|
{
|
||||||
|
int idx;
|
||||||
|
struct ip_vs_conn *cp, *cp_c;
|
||||||
|
struct ip_vs_dest *dest;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
|
||||||
|
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
|
||||||
|
if (cp->ipvs != ipvs)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
dest = cp->dest;
|
||||||
|
if (!dest || (dest->flags & IP_VS_DEST_F_AVAILABLE))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (atomic_read(&cp->n_control))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
cp_c = cp->control;
|
||||||
|
IP_VS_DBG(4, "del connection\n");
|
||||||
|
ip_vs_conn_del(cp);
|
||||||
|
if (cp_c && !atomic_read(&cp_c->n_control)) {
|
||||||
|
IP_VS_DBG(4, "del controlling connection\n");
|
||||||
|
ip_vs_conn_del(cp_c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cond_resched_rcu();
|
||||||
|
|
||||||
|
/* netns clean up started, abort delayed work */
|
||||||
|
if (!ipvs->enable)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* per netns init and exit
|
* per netns init and exit
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -694,16 +694,10 @@ static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs)
|
||||||
return ipvs->sysctl_nat_icmp_send;
|
return ipvs->sysctl_nat_icmp_send;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
|
|
||||||
{
|
|
||||||
return ipvs->sysctl_expire_nodest_conn;
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { return 0; }
|
static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { return 0; }
|
||||||
static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { return 0; }
|
static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { return 0; }
|
||||||
static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; }
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -2126,6 +2120,26 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Check the server status */
|
||||||
|
if (cp && cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
|
||||||
|
/* the destination server is not available */
|
||||||
|
if (sysctl_expire_nodest_conn(ipvs)) {
|
||||||
|
bool old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
|
||||||
|
|
||||||
|
if (!old_ct)
|
||||||
|
cp->flags &= ~IP_VS_CONN_F_NFCT;
|
||||||
|
|
||||||
|
ip_vs_conn_expire_now(cp);
|
||||||
|
__ip_vs_conn_put(cp);
|
||||||
|
if (old_ct)
|
||||||
|
return NF_DROP;
|
||||||
|
cp = NULL;
|
||||||
|
} else {
|
||||||
|
__ip_vs_conn_put(cp);
|
||||||
|
return NF_DROP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (unlikely(!cp)) {
|
if (unlikely(!cp)) {
|
||||||
int v;
|
int v;
|
||||||
|
|
||||||
|
@ -2135,27 +2149,6 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
|
||||||
|
|
||||||
IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet");
|
IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet");
|
||||||
|
|
||||||
/* Check the server status */
|
|
||||||
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
|
|
||||||
/* the destination server is not available */
|
|
||||||
|
|
||||||
__u32 flags = cp->flags;
|
|
||||||
|
|
||||||
/* when timer already started, silently drop the packet.*/
|
|
||||||
if (timer_pending(&cp->timer))
|
|
||||||
__ip_vs_conn_put(cp);
|
|
||||||
else
|
|
||||||
ip_vs_conn_put(cp);
|
|
||||||
|
|
||||||
if (sysctl_expire_nodest_conn(ipvs) &&
|
|
||||||
!(flags & IP_VS_CONN_F_ONE_PACKET)) {
|
|
||||||
/* try to expire the connection immediately */
|
|
||||||
ip_vs_conn_expire_now(cp);
|
|
||||||
}
|
|
||||||
|
|
||||||
return NF_DROP;
|
|
||||||
}
|
|
||||||
|
|
||||||
ip_vs_in_stats(cp, skb);
|
ip_vs_in_stats(cp, skb);
|
||||||
ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
|
ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
|
||||||
if (cp->packet_xmit)
|
if (cp->packet_xmit)
|
||||||
|
|
|
@ -210,6 +210,17 @@ static void update_defense_level(struct netns_ipvs *ipvs)
|
||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Handler for delayed work for expiring no
|
||||||
|
* destination connections
|
||||||
|
*/
|
||||||
|
static void expire_nodest_conn_handler(struct work_struct *work)
|
||||||
|
{
|
||||||
|
struct netns_ipvs *ipvs;
|
||||||
|
|
||||||
|
ipvs = container_of(work, struct netns_ipvs,
|
||||||
|
expire_nodest_conn_work.work);
|
||||||
|
ip_vs_expire_nodest_conn_flush(ipvs);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Timer for checking the defense
|
* Timer for checking the defense
|
||||||
|
@ -1163,6 +1174,12 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
|
||||||
list_add(&dest->t_list, &ipvs->dest_trash);
|
list_add(&dest->t_list, &ipvs->dest_trash);
|
||||||
dest->idle_start = 0;
|
dest->idle_start = 0;
|
||||||
spin_unlock_bh(&ipvs->dest_trash_lock);
|
spin_unlock_bh(&ipvs->dest_trash_lock);
|
||||||
|
|
||||||
|
/* Queue up delayed work to expire all no destination connections.
|
||||||
|
* No-op when CONFIG_SYSCTL is disabled.
|
||||||
|
*/
|
||||||
|
if (!cleanup)
|
||||||
|
ip_vs_enqueue_expire_nodest_conns(ipvs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -4081,6 +4098,10 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
|
||||||
INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
|
INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
|
||||||
schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
|
schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
|
||||||
|
|
||||||
|
/* Init delayed work for expiring no dest conn */
|
||||||
|
INIT_DELAYED_WORK(&ipvs->expire_nodest_conn_work,
|
||||||
|
expire_nodest_conn_handler);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4088,6 +4109,7 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
|
||||||
{
|
{
|
||||||
struct net *net = ipvs->net;
|
struct net *net = ipvs->net;
|
||||||
|
|
||||||
|
cancel_delayed_work_sync(&ipvs->expire_nodest_conn_work);
|
||||||
cancel_delayed_work_sync(&ipvs->defense_work);
|
cancel_delayed_work_sync(&ipvs->defense_work);
|
||||||
cancel_work_sync(&ipvs->defense_work.work);
|
cancel_work_sync(&ipvs->defense_work.work);
|
||||||
unregister_net_sysctl_table(ipvs->sysctl_hdr);
|
unregister_net_sysctl_table(ipvs->sysctl_hdr);
|
||||||
|
|
Loading…
Reference in New Issue