From 6561a3b12d62ed5317e6ac32182d87a03f62c8dc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 19 Dec 2010 21:11:20 -0800 Subject: [PATCH] ipv4: Flush per-ns routing cache more sanely. Flush the routing cache only of entries that match the network namespace in which the purge event occurred. Signed-off-by: David S. Miller Acked-by: Eric Dumazet --- include/net/route.h | 2 +- net/ipv4/fib_frontend.c | 6 +++- net/ipv4/route.c | 64 ++++++++++++++++------------------------- 3 files changed, 30 insertions(+), 42 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index 27002362944a..93e10c453f6b 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -114,7 +114,7 @@ extern int ip_rt_init(void); extern void ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw, __be32 src, struct net_device *dev); extern void rt_cache_flush(struct net *net, int how); -extern void rt_cache_flush_batch(void); +extern void rt_cache_flush_batch(struct net *net); extern int __ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp); extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp); extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d3a1112b9d9c..9f8bb68911e4 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -987,7 +987,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo rt_cache_flush(dev_net(dev), 0); break; case NETDEV_UNREGISTER_BATCH: - rt_cache_flush_batch(); + /* The batch unregister is only called on the first + * device in the list of devices being unregistered. + * Therefore we should not pass dev_net(dev) in here. + */ + rt_cache_flush_batch(NULL); break; } return NOTIFY_DONE; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ae520963540f..d8b4f4d0d66e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -717,13 +717,15 @@ static inline int rt_is_expired(struct rtable *rth) * Can be called by a softirq or a process. * In the later case, we want to be reschedule if necessary */ -static void rt_do_flush(int process_context) +static void rt_do_flush(struct net *net, int process_context) { unsigned int i; struct rtable *rth, *next; - struct rtable * tail; for (i = 0; i <= rt_hash_mask; i++) { + struct rtable __rcu **pprev; + struct rtable *list; + if (process_context && need_resched()) cond_resched(); rth = rcu_dereference_raw(rt_hash_table[i].chain); @@ -731,50 +733,32 @@ static void rt_do_flush(int process_context) continue; spin_lock_bh(rt_hash_lock_addr(i)); -#ifdef CONFIG_NET_NS - { - struct rtable __rcu **prev; - struct rtable *p; - rth = rcu_dereference_protected(rt_hash_table[i].chain, + list = NULL; + pprev = &rt_hash_table[i].chain; + rth = rcu_dereference_protected(*pprev, lockdep_is_held(rt_hash_lock_addr(i))); - /* defer releasing the head of the list after spin_unlock */ - for (tail = rth; tail; - tail = rcu_dereference_protected(tail->dst.rt_next, - lockdep_is_held(rt_hash_lock_addr(i)))) - if (!rt_is_expired(tail)) - break; - if (rth != tail) - rt_hash_table[i].chain = tail; + while (rth) { + next = rcu_dereference_protected(rth->dst.rt_next, + lockdep_is_held(rt_hash_lock_addr(i))); - /* call rt_free on entries after the tail requiring flush */ - prev = &rt_hash_table[i].chain; - for (p = rcu_dereference_protected(*prev, - lockdep_is_held(rt_hash_lock_addr(i))); - p != NULL; - p = next) { - next = rcu_dereference_protected(p->dst.rt_next, - lockdep_is_held(rt_hash_lock_addr(i))); - if (!rt_is_expired(p)) { - prev = &p->dst.rt_next; + if (!net || + net_eq(dev_net(rth->dst.dev), net)) { + rcu_assign_pointer(*pprev, next); + rcu_assign_pointer(rth->dst.rt_next, list); + list = rth; } else { - *prev = next; - rt_free(p); + pprev = &rth->dst.rt_next; } + rth = next; } - } -#else - rth = rcu_dereference_protected(rt_hash_table[i].chain, - lockdep_is_held(rt_hash_lock_addr(i))); - rcu_assign_pointer(rt_hash_table[i].chain, NULL); - tail = NULL; -#endif + spin_unlock_bh(rt_hash_lock_addr(i)); - for (; rth != tail; rth = next) { - next = rcu_dereference_protected(rth->dst.rt_next, 1); - rt_free(rth); + for (; list; list = next) { + next = rcu_dereference_protected(list->dst.rt_next, 1); + rt_free(list); } } } @@ -922,13 +906,13 @@ void rt_cache_flush(struct net *net, int delay) { rt_cache_invalidate(net); if (delay >= 0) - rt_do_flush(!in_softirq()); + rt_do_flush(net, !in_softirq()); } /* Flush previous cache invalidated entries from the cache */ -void rt_cache_flush_batch(void) +void rt_cache_flush_batch(struct net *net) { - rt_do_flush(!in_softirq()); + rt_do_flush(net, !in_softirq()); } static void rt_emergency_hash_rebuild(struct net *net)