ipv4: Flush per-ns routing cache more sanely.

Flush the routing cache only of entries that match the
network namespace in which the purge event occurred.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
This commit is contained in:
David S. Miller 2010-12-19 21:11:20 -08:00
parent 782615aea8
commit 6561a3b12d
3 changed files with 30 additions and 42 deletions

View File

@ -114,7 +114,7 @@ extern int ip_rt_init(void);
extern void ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw, extern void ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw,
__be32 src, struct net_device *dev); __be32 src, struct net_device *dev);
extern void rt_cache_flush(struct net *net, int how); extern void rt_cache_flush(struct net *net, int how);
extern void rt_cache_flush_batch(void); extern void rt_cache_flush_batch(struct net *net);
extern int __ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp); extern int __ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp);
extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp); extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);

View File

@ -987,7 +987,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
rt_cache_flush(dev_net(dev), 0); rt_cache_flush(dev_net(dev), 0);
break; break;
case NETDEV_UNREGISTER_BATCH: case NETDEV_UNREGISTER_BATCH:
rt_cache_flush_batch(); /* The batch unregister is only called on the first
* device in the list of devices being unregistered.
* Therefore we should not pass dev_net(dev) in here.
*/
rt_cache_flush_batch(NULL);
break; break;
} }
return NOTIFY_DONE; return NOTIFY_DONE;

View File

@ -717,13 +717,15 @@ static inline int rt_is_expired(struct rtable *rth)
* Can be called by a softirq or a process. * Can be called by a softirq or a process.
* In the later case, we want to be reschedule if necessary * In the later case, we want to be reschedule if necessary
*/ */
static void rt_do_flush(int process_context) static void rt_do_flush(struct net *net, int process_context)
{ {
unsigned int i; unsigned int i;
struct rtable *rth, *next; struct rtable *rth, *next;
struct rtable * tail;
for (i = 0; i <= rt_hash_mask; i++) { for (i = 0; i <= rt_hash_mask; i++) {
struct rtable __rcu **pprev;
struct rtable *list;
if (process_context && need_resched()) if (process_context && need_resched())
cond_resched(); cond_resched();
rth = rcu_dereference_raw(rt_hash_table[i].chain); rth = rcu_dereference_raw(rt_hash_table[i].chain);
@ -731,50 +733,32 @@ static void rt_do_flush(int process_context)
continue; continue;
spin_lock_bh(rt_hash_lock_addr(i)); spin_lock_bh(rt_hash_lock_addr(i));
#ifdef CONFIG_NET_NS
{
struct rtable __rcu **prev;
struct rtable *p;
rth = rcu_dereference_protected(rt_hash_table[i].chain, list = NULL;
pprev = &rt_hash_table[i].chain;
rth = rcu_dereference_protected(*pprev,
lockdep_is_held(rt_hash_lock_addr(i))); lockdep_is_held(rt_hash_lock_addr(i)));
/* defer releasing the head of the list after spin_unlock */ while (rth) {
for (tail = rth; tail; next = rcu_dereference_protected(rth->dst.rt_next,
tail = rcu_dereference_protected(tail->dst.rt_next, lockdep_is_held(rt_hash_lock_addr(i)));
lockdep_is_held(rt_hash_lock_addr(i))))
if (!rt_is_expired(tail))
break;
if (rth != tail)
rt_hash_table[i].chain = tail;
/* call rt_free on entries after the tail requiring flush */ if (!net ||
prev = &rt_hash_table[i].chain; net_eq(dev_net(rth->dst.dev), net)) {
for (p = rcu_dereference_protected(*prev, rcu_assign_pointer(*pprev, next);
lockdep_is_held(rt_hash_lock_addr(i))); rcu_assign_pointer(rth->dst.rt_next, list);
p != NULL; list = rth;
p = next) {
next = rcu_dereference_protected(p->dst.rt_next,
lockdep_is_held(rt_hash_lock_addr(i)));
if (!rt_is_expired(p)) {
prev = &p->dst.rt_next;
} else { } else {
*prev = next; pprev = &rth->dst.rt_next;
rt_free(p);
} }
rth = next;
} }
}
#else
rth = rcu_dereference_protected(rt_hash_table[i].chain,
lockdep_is_held(rt_hash_lock_addr(i)));
rcu_assign_pointer(rt_hash_table[i].chain, NULL);
tail = NULL;
#endif
spin_unlock_bh(rt_hash_lock_addr(i)); spin_unlock_bh(rt_hash_lock_addr(i));
for (; rth != tail; rth = next) { for (; list; list = next) {
next = rcu_dereference_protected(rth->dst.rt_next, 1); next = rcu_dereference_protected(list->dst.rt_next, 1);
rt_free(rth); rt_free(list);
} }
} }
} }
@ -922,13 +906,13 @@ void rt_cache_flush(struct net *net, int delay)
{ {
rt_cache_invalidate(net); rt_cache_invalidate(net);
if (delay >= 0) if (delay >= 0)
rt_do_flush(!in_softirq()); rt_do_flush(net, !in_softirq());
} }
/* Flush previous cache invalidated entries from the cache */ /* Flush previous cache invalidated entries from the cache */
void rt_cache_flush_batch(void) void rt_cache_flush_batch(struct net *net)
{ {
rt_do_flush(!in_softirq()); rt_do_flush(net, !in_softirq());
} }
static void rt_emergency_hash_rebuild(struct net *net) static void rt_emergency_hash_rebuild(struct net *net)