From b3832117b4b61374fac08692f1b1a620088342dd Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 22 Jan 2015 15:51:08 -0800 Subject: [PATCH 1/7] fib_trie: Use index & (~0ul << n->bits) instead of index >> n->bits In doing performance testing and analysis of the changes I recently found that by shifting the index I had created an unnecessary dependency. I have updated the code so that we instead shift a mask by bits and then just test against that as that should save us about 2 CPU cycles since we can generate the mask while the key and pos are being processed. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 281e5e00025f..dea2f80e08c3 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -961,12 +961,12 @@ static struct tnode *fib_find_node(struct trie *t, u32 key) * prefix plus zeros for the bits in the cindex. The index * is the difference between the key and this value. From * this we can actually derive several pieces of data. - * if !(index >> bits) - * we know the value is cindex - * else + * if (index & (~0ul << bits)) * we have a mismatch in skip bits and failed + * else + * we know the value is cindex */ - if (index >> n->bits) + if (index & (~0ul << n->bits)) return NULL; /* we have found a leaf. Prefixes have already been compared */ @@ -1301,12 +1301,12 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, * prefix plus zeros for the "bits" in the prefix. The index * is the difference between the key and this value. From * this we can actually derive several pieces of data. - * if !(index >> bits) - * we know the value is child index - * else + * if (index & (~0ul << bits)) * we have a mismatch in skip bits and failed + * else + * we know the value is cindex */ - if (index >> n->bits) + if (index & (~0ul << n->bits)) break; /* we have found a leaf. Prefixes have already been compared */ From 69fa57b1e42c171599d53486839c3d58f7ed8eec Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 22 Jan 2015 15:51:14 -0800 Subject: [PATCH 2/7] fib_trie: Fix RCU bug and merge similar bits of inflate/halve This patch addresses two issues. The first issue is the fact that I believe I had the RCU freeing sequence slightly out of order. As a result we could get into an issue if a caller went into a child of a child of the new node, then backtraced into the to be freed parent, and then attempted to access a child of a child that may have been consumed in a resize of one of the new nodes children. To resolve this I have moved the resize after we have freed the oldtnode. The only side effect of this is that we will now be calling resize on more nodes in the case of inflate due to the fact that we don't have a good way to test to see if a full_tnode on the new node was there before or after the allocation. This should have minimal impact however since the node should already be correctly size so it is just the cost of calling should_inflate that we will be taking on the node which is only a couple of cycles. The second issue is the fact that inflate and halve were essentially doing the same thing after the new node was added to the trie replacing the old one. As such it wasn't really necessary to keep the code in both functions so I have split it out into two other functions, called replace and update_children. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 157 ++++++++++++++++++++------------------------ 1 file changed, 73 insertions(+), 84 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index dea2f80e08c3..7e9031739f06 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -396,8 +396,30 @@ static void put_child(struct tnode *tn, unsigned long i, struct tnode *n) rcu_assign_pointer(tn->child[i], n); } -static void put_child_root(struct tnode *tp, struct trie *t, - t_key key, struct tnode *n) +static void update_children(struct tnode *tn) +{ + unsigned long i; + + /* update all of the child parent pointers */ + for (i = tnode_child_length(tn); i;) { + struct tnode *inode = tnode_get_child(tn, --i); + + if (!inode) + continue; + + /* Either update the children of a tnode that + * already belongs to us or update the child + * to point to ourselves. + */ + if (node_parent(inode) == tn) + update_children(inode); + else + node_set_parent(inode, tn); + } +} + +static inline void put_child_root(struct tnode *tp, struct trie *t, + t_key key, struct tnode *n) { if (tp) put_child(tp, get_index(key, tp), n); @@ -434,10 +456,35 @@ static void tnode_free(struct tnode *tn) } } +static void replace(struct trie *t, struct tnode *oldtnode, struct tnode *tn) +{ + struct tnode *tp = node_parent(oldtnode); + unsigned long i; + + /* setup the parent pointer out of and back into this node */ + NODE_INIT_PARENT(tn, tp); + put_child_root(tp, t, tn->key, tn); + + /* update all of the child parent pointers */ + update_children(tn); + + /* all pointers should be clean so we are done */ + tnode_free(oldtnode); + + /* resize children now that oldtnode is freed */ + for (i = tnode_child_length(tn); i;) { + struct tnode *inode = tnode_get_child(tn, --i); + + /* resize child node */ + if (tnode_full(tn, inode)) + resize(t, inode); + } +} + static int inflate(struct trie *t, struct tnode *oldtnode) { - struct tnode *inode, *node0, *node1, *tn, *tp; - unsigned long i, j, k; + struct tnode *tn; + unsigned long i; t_key m; pr_debug("In inflate\n"); @@ -446,13 +493,18 @@ static int inflate(struct trie *t, struct tnode *oldtnode) if (!tn) return -ENOMEM; + /* prepare oldtnode to be freed */ + tnode_free_init(oldtnode); + /* Assemble all of the pointers in our cluster, in this case that * represents all of the pointers out of our allocated nodes that * point to existing tnodes and the links between our allocated * nodes. */ for (i = tnode_child_length(oldtnode), m = 1u << tn->pos; i;) { - inode = tnode_get_child(oldtnode, --i); + struct tnode *inode = tnode_get_child(oldtnode, --i); + struct tnode *node0, *node1; + unsigned long j, k; /* An empty child */ if (inode == NULL) @@ -464,6 +516,9 @@ static int inflate(struct trie *t, struct tnode *oldtnode) continue; } + /* drop the node in the old tnode free list */ + tnode_free_append(oldtnode, inode); + /* An internal node with two children */ if (inode->bits == 1) { put_child(tn, 2 * i + 1, tnode_get_child(inode, 1)); @@ -488,9 +543,9 @@ static int inflate(struct trie *t, struct tnode *oldtnode) node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1); if (!node1) goto nomem; - tnode_free_append(tn, node1); + node0 = tnode_new(inode->key, inode->pos, inode->bits - 1); - node0 = tnode_new(inode->key & ~m, inode->pos, inode->bits - 1); + tnode_free_append(tn, node1); if (!node0) goto nomem; tnode_free_append(tn, node0); @@ -512,53 +567,9 @@ static int inflate(struct trie *t, struct tnode *oldtnode) put_child(tn, 2 * i, node0); } - /* setup the parent pointer into and out of this node */ - tp = node_parent(oldtnode); - NODE_INIT_PARENT(tn, tp); - put_child_root(tp, t, tn->key, tn); + /* setup the parent pointers into and out of this node */ + replace(t, oldtnode, tn); - /* prepare oldtnode to be freed */ - tnode_free_init(oldtnode); - - /* update all child nodes parent pointers to route to us */ - for (i = tnode_child_length(oldtnode); i;) { - inode = tnode_get_child(oldtnode, --i); - - /* A leaf or an internal node with skipped bits */ - if (!tnode_full(oldtnode, inode)) { - node_set_parent(inode, tn); - continue; - } - - /* drop the node in the old tnode free list */ - tnode_free_append(oldtnode, inode); - - /* fetch new nodes */ - node1 = tnode_get_child(tn, 2 * i + 1); - node0 = tnode_get_child(tn, 2 * i); - - /* bits == 1 then node0 and node1 represent inode's children */ - if (inode->bits == 1) { - node_set_parent(node1, tn); - node_set_parent(node0, tn); - continue; - } - - /* update parent pointers in child node's children */ - for (k = tnode_child_length(inode), j = k / 2; j;) { - node_set_parent(tnode_get_child(inode, --k), node1); - node_set_parent(tnode_get_child(inode, --j), node0); - node_set_parent(tnode_get_child(inode, --k), node1); - node_set_parent(tnode_get_child(inode, --j), node0); - } - - /* resize child nodes */ - resize(t, node1); - resize(t, node0); - } - - /* we completed without error, prepare to free old node */ - tnode_free(oldtnode); return 0; nomem: /* all pointers should be clean so we are done */ @@ -568,7 +579,7 @@ nomem: static int halve(struct trie *t, struct tnode *oldtnode) { - struct tnode *tn, *tp, *inode, *node0, *node1; + struct tnode *tn; unsigned long i; pr_debug("In halve\n"); @@ -577,14 +588,18 @@ static int halve(struct trie *t, struct tnode *oldtnode) if (!tn) return -ENOMEM; + /* prepare oldtnode to be freed */ + tnode_free_init(oldtnode); + /* Assemble all of the pointers in our cluster, in this case that * represents all of the pointers out of our allocated nodes that * point to existing tnodes and the links between our allocated * nodes. */ for (i = tnode_child_length(oldtnode); i;) { - node1 = tnode_get_child(oldtnode, --i); - node0 = tnode_get_child(oldtnode, --i); + struct tnode *node1 = tnode_get_child(oldtnode, --i); + struct tnode *node0 = tnode_get_child(oldtnode, --i); + struct tnode *inode; /* At least one of the children is empty */ if (!node1 || !node0) { @@ -609,34 +624,8 @@ static int halve(struct trie *t, struct tnode *oldtnode) put_child(tn, i / 2, inode); } - /* setup the parent pointer out of and back into this node */ - tp = node_parent(oldtnode); - NODE_INIT_PARENT(tn, tp); - put_child_root(tp, t, tn->key, tn); - - /* prepare oldtnode to be freed */ - tnode_free_init(oldtnode); - - /* update all of the child parent pointers */ - for (i = tnode_child_length(tn); i;) { - inode = tnode_get_child(tn, --i); - - /* only new tnodes will be considered "full" nodes */ - if (!tnode_full(tn, inode)) { - node_set_parent(inode, tn); - continue; - } - - /* Two nonempty children */ - node_set_parent(tnode_get_child(inode, 1), inode); - node_set_parent(tnode_get_child(inode, 0), inode); - - /* resize child node */ - resize(t, inode); - } - - /* all pointers should be clean so we are done */ - tnode_free(oldtnode); + /* setup the parent pointers into and out of this node */ + replace(t, oldtnode, tn); return 0; } From a80e89d4c650a7c3ab74f0b2d133cc2ce9738994 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 22 Jan 2015 15:51:20 -0800 Subject: [PATCH 3/7] fib_trie: Fall back to slen update on inflate/halve failure This change corrects an issue where if inflate or halve fails we were exiting the resize function without at least updating the slen for the node. To correct this I have moved the update of max_size into the while loop so that it is only decremented on a successful call to either inflate or halve. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 7e9031739f06..80892f565030 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -752,7 +752,7 @@ static void resize(struct trie *t, struct tnode *tn) { struct tnode *tp = node_parent(tn), *n = NULL; struct tnode __rcu **cptr; - int max_work; + int max_work = MAX_WORK; pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n", tn, inflate_threshold, halve_threshold); @@ -775,8 +775,7 @@ static void resize(struct trie *t, struct tnode *tn) /* Double as long as the resulting node has a number of * nonempty nodes that are above the threshold. */ - max_work = MAX_WORK; - while (should_inflate(tp, tn) && max_work--) { + while (should_inflate(tp, tn) && max_work) { if (inflate(t, tn)) { #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(t->stats->resize_node_skipped); @@ -784,6 +783,7 @@ static void resize(struct trie *t, struct tnode *tn) break; } + max_work--; tn = rtnl_dereference(*cptr); } @@ -794,8 +794,7 @@ static void resize(struct trie *t, struct tnode *tn) /* Halve as long as the number of empty children in this * node is above threshold. */ - max_work = MAX_WORK; - while (should_halve(tp, tn) && max_work--) { + while (should_halve(tp, tn) && max_work) { if (halve(t, tn)) { #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(t->stats->resize_node_skipped); @@ -803,6 +802,7 @@ static void resize(struct trie *t, struct tnode *tn) break; } + max_work--; tn = rtnl_dereference(*cptr); } From 95f60ea3e99aef5967d1566979f4ade7be386e34 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 22 Jan 2015 15:51:26 -0800 Subject: [PATCH 4/7] fib_trie: Add collapse() and should_collapse() to resize This patch really does two things. First it pulls the logic for determining if we should collapse one node out of the tree and the actual code doing the collapse into a separate pair of functions. This helps to make the changes to these areas more readable. Second it encodes the upper 32b of the empty_children value onto the full_children value in the case of bits == KEYLENGTH. By doing this we are able to handle the case of a 32b node where empty_children would appear to be 0 when it was actually 1ul << 32. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 100 ++++++++++++++++++++++++++++---------------- 1 file changed, 65 insertions(+), 35 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 80892f565030..f874e1811eab 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -83,7 +83,8 @@ #define MAX_STAT_DEPTH 32 -#define KEYLENGTH (8*sizeof(t_key)) +#define KEYLENGTH (8*sizeof(t_key)) +#define KEY_MAX ((t_key)~0) typedef unsigned int t_key; @@ -102,8 +103,8 @@ struct tnode { union { /* The fields in this struct are valid if bits > 0 (TNODE) */ struct { - unsigned int full_children; /* KEYLENGTH bits needed */ - unsigned int empty_children; /* KEYLENGTH bits needed */ + t_key empty_children; /* KEYLENGTH bits needed */ + t_key full_children; /* KEYLENGTH bits needed */ struct tnode __rcu *child[0]; }; /* This list pointer if valid if bits == 0 (LEAF) */ @@ -302,6 +303,16 @@ static struct tnode *tnode_alloc(size_t size) return vzalloc(size); } +static inline void empty_child_inc(struct tnode *n) +{ + ++n->empty_children ? : ++n->full_children; +} + +static inline void empty_child_dec(struct tnode *n) +{ + n->empty_children-- ? : n->full_children--; +} + static struct tnode *leaf_new(t_key key) { struct tnode *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); @@ -335,7 +346,7 @@ static struct leaf_info *leaf_info_new(int plen) static struct tnode *tnode_new(t_key key, int pos, int bits) { - size_t sz = offsetof(struct tnode, child[1 << bits]); + size_t sz = offsetof(struct tnode, child[1ul << bits]); struct tnode *tn = tnode_alloc(sz); unsigned int shift = pos + bits; @@ -348,8 +359,10 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) tn->pos = pos; tn->bits = bits; tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0; - tn->full_children = 0; - tn->empty_children = 1<full_children = 1; + else + tn->empty_children = 1ul << bits; } pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), @@ -375,11 +388,11 @@ static void put_child(struct tnode *tn, unsigned long i, struct tnode *n) BUG_ON(i >= tnode_child_length(tn)); - /* update emptyChildren */ + /* update emptyChildren, overflow into fullChildren */ if (n == NULL && chi != NULL) - tn->empty_children++; - else if (n != NULL && chi == NULL) - tn->empty_children--; + empty_child_inc(tn); + if (n != NULL && chi == NULL) + empty_child_dec(tn); /* update fullChildren */ wasfull = tnode_full(tn, chi); @@ -630,6 +643,24 @@ static int halve(struct trie *t, struct tnode *oldtnode) return 0; } +static void collapse(struct trie *t, struct tnode *oldtnode) +{ + struct tnode *n, *tp; + unsigned long i; + + /* scan the tnode looking for that one child that might still exist */ + for (n = NULL, i = tnode_child_length(oldtnode); !n && i;) + n = tnode_get_child(oldtnode, --i); + + /* compress one level */ + tp = node_parent(oldtnode); + put_child_root(tp, t, oldtnode->key, n); + node_set_parent(n, tp); + + /* drop dead node */ + node_free(oldtnode); +} + static unsigned char update_suffix(struct tnode *tn) { unsigned char slen = tn->pos; @@ -729,10 +760,12 @@ static bool should_inflate(const struct tnode *tp, const struct tnode *tn) /* Keep root node larger */ threshold *= tp ? inflate_threshold : inflate_threshold_root; - used += tn->full_children; used -= tn->empty_children; + used += tn->full_children; - return tn->pos && ((50 * used) >= threshold); + /* if bits == KEYLENGTH then pos = 0, and will fail below */ + + return (used > 1) && tn->pos && ((50 * used) >= threshold); } static bool should_halve(const struct tnode *tp, const struct tnode *tn) @@ -744,13 +777,29 @@ static bool should_halve(const struct tnode *tp, const struct tnode *tn) threshold *= tp ? halve_threshold : halve_threshold_root; used -= tn->empty_children; - return (tn->bits > 1) && ((100 * used) < threshold); + /* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */ + + return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold); +} + +static bool should_collapse(const struct tnode *tn) +{ + unsigned long used = tnode_child_length(tn); + + used -= tn->empty_children; + + /* account for bits == KEYLENGTH case */ + if ((tn->bits == KEYLENGTH) && tn->full_children) + used -= KEY_MAX; + + /* One child or none, time to drop us from the trie */ + return used < 2; } #define MAX_WORK 10 static void resize(struct trie *t, struct tnode *tn) { - struct tnode *tp = node_parent(tn), *n = NULL; + struct tnode *tp = node_parent(tn); struct tnode __rcu **cptr; int max_work = MAX_WORK; @@ -764,14 +813,6 @@ static void resize(struct trie *t, struct tnode *tn) cptr = tp ? &tp->child[get_index(tn->key, tp)] : &t->trie; BUG_ON(tn != rtnl_dereference(*cptr)); - /* No children */ - if (tn->empty_children > (tnode_child_length(tn) - 1)) - goto no_children; - - /* One child */ - if (tn->empty_children == (tnode_child_length(tn) - 1)) - goto one_child; - /* Double as long as the resulting node has a number of * nonempty nodes that are above the threshold. */ @@ -807,19 +848,8 @@ static void resize(struct trie *t, struct tnode *tn) } /* Only one child remains */ - if (tn->empty_children == (tnode_child_length(tn) - 1)) { - unsigned long i; -one_child: - for (i = tnode_child_length(tn); !n && i;) - n = tnode_get_child(tn, --i); -no_children: - /* compress one level */ - put_child_root(tp, t, tn->key, n); - node_set_parent(n, tp); - - /* drop dead node */ - tnode_free_init(tn); - tnode_free(tn); + if (should_collapse(tn)) { + collapse(t, tn); return; } From 30cfe7c9c88d73440560d7e381bab12f5463a6cd Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 22 Jan 2015 15:51:33 -0800 Subject: [PATCH 5/7] fib_trie: Use empty_children instead of counting empty nodes in stats collection It doesn't make much sense to count the pointers ourselves when empty_children already has a count for the number of NULL pointers stored in the tnode. As such save ourselves the cycles and just use empty_children. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index f874e1811eab..90654bb64e21 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1954,16 +1954,10 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s) hlist_for_each_entry_rcu(li, &n->list, hlist) ++s->prefixes; } else { - unsigned long i; - s->tnodes++; if (n->bits < MAX_STAT_DEPTH) s->nodesizes[n->bits]++; - - for (i = tnode_child_length(n); i--;) { - if (!rcu_access_pointer(n->child[i])) - s->nullpointers++; - } + s->nullpointers += n->empty_children; } } rcu_read_unlock(); From 02525368f48c197bce6e4251ff7bde92fa6f026e Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 22 Jan 2015 15:51:39 -0800 Subject: [PATCH 6/7] fib_trie: Move fib_find_alias to file where it is used The function fib_find_alias is only accessed by functions in fib_trie.c as such it makes sense to relocate it and cast it as static so that the compiler can take advantage of optimizations it can do to it as a local function. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/fib_lookup.h | 1 - net/ipv4/fib_semantics.c | 18 ------------------ net/ipv4/fib_trie.c | 20 ++++++++++++++++++++ 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 1e4f6600b31d..825981b1049a 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -32,7 +32,6 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id, unsigned int); void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, u32 tb_id, const struct nl_info *info, unsigned int nlm_flags); -struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); static inline void fib_result_assign(struct fib_result *res, struct fib_info *fi) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 265cb72b7c1b..1e2090ea663e 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -411,24 +411,6 @@ errout: rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); } -/* Return the first fib alias matching TOS with - * priority less than or equal to PRIO. - */ -struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) -{ - if (fah) { - struct fib_alias *fa; - list_for_each_entry(fa, fah, fa_list) { - if (fa->fa_tos > tos) - continue; - if (fa->fa_info->fib_priority >= prio || - fa->fa_tos < tos) - return fa; - } - } - return NULL; -} - static int fib_detect_death(struct fib_info *fi, int order, struct fib_info **last_resort, int *last_idx, int dflt) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 90654bb64e21..7f342265968e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -998,6 +998,26 @@ static struct tnode *fib_find_node(struct trie *t, u32 key) return n; } +/* Return the first fib alias matching TOS with + * priority less than or equal to PRIO. + */ +static struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) +{ + struct fib_alias *fa; + + if (!fah) + return NULL; + + list_for_each_entry(fa, fah, fa_list) { + if (fa->fa_tos > tos) + continue; + if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos) + return fa; + } + + return NULL; +} + static void trie_rebalance(struct trie *t, struct tnode *tn) { struct tnode *tp; From 64c6272351a0eca55574f487b103770163d1dbce Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 22 Jan 2015 15:51:45 -0800 Subject: [PATCH 7/7] fib_trie: Various clean-ups for handling slen While doing further work on the fib_trie I noted a few items. First I was using calls that were far more complicated than they needed to be for determining when to push/pull the suffix length. I have updated the code to reflect the simplier logic. The second issue is that I realised we weren't necessarily handling the case of a leaf_info struct surviving a flush. I have updated the logic so that now we will call pull_suffix in the event of having a leaf info value left in the leaf after flushing it. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 49 +++++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 7f342265968e..3daf0224ff2e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -917,27 +917,20 @@ static void leaf_push_suffix(struct tnode *l) static void remove_leaf_info(struct tnode *l, struct leaf_info *old) { - struct hlist_node *prev; - - /* record the location of the pointer to this object */ - prev = rtnl_dereference(hlist_pprev_rcu(&old->hlist)); + /* record the location of the previous list_info entry */ + struct hlist_node **pprev = old->hlist.pprev; + struct leaf_info *li = hlist_entry(pprev, typeof(*li), hlist.next); /* remove the leaf info from the list */ hlist_del_rcu(&old->hlist); - /* if we emptied the list this leaf will be freed and we can sort - * out parent suffix lengths as a part of trie_rebalance - */ - if (hlist_empty(&l->list)) + /* only access li if it is pointing at the last valid hlist_node */ + if (hlist_empty(&l->list) || (*pprev)) return; - /* if we removed the tail then we need to update slen */ - if (!rcu_access_pointer(hlist_next_rcu(prev))) { - struct leaf_info *li = hlist_entry(prev, typeof(*li), hlist); - - l->slen = KEYLENGTH - li->plen; - leaf_pull_suffix(l); - } + /* update the trie with the latest suffix length */ + l->slen = KEYLENGTH - li->plen; + leaf_pull_suffix(l); } static void insert_leaf_info(struct tnode *l, struct leaf_info *new) @@ -961,7 +954,7 @@ static void insert_leaf_info(struct tnode *l, struct leaf_info *new) } /* if we added to the tail node then we need to update slen */ - if (!rcu_access_pointer(hlist_next_rcu(&new->hlist))) { + if (l->slen < (KEYLENGTH - new->plen)) { l->slen = KEYLENGTH - new->plen; leaf_push_suffix(l); } @@ -1613,6 +1606,7 @@ static int trie_flush_leaf(struct tnode *l) struct hlist_head *lih = &l->list; struct hlist_node *tmp; struct leaf_info *li = NULL; + unsigned char plen = KEYLENGTH; hlist_for_each_entry_safe(li, tmp, lih, hlist) { found += trie_flush_list(&li->falh); @@ -1620,8 +1614,14 @@ static int trie_flush_leaf(struct tnode *l) if (list_empty(&li->falh)) { hlist_del_rcu(&li->hlist); free_leaf_info(li); + continue; } + + plen = li->plen; } + + l->slen = KEYLENGTH - plen; + return found; } @@ -1700,13 +1700,22 @@ int fib_table_flush(struct fib_table *tb) for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) { found += trie_flush_leaf(l); - if (ll && hlist_empty(&ll->list)) - trie_leaf_remove(t, ll); + if (ll) { + if (hlist_empty(&ll->list)) + trie_leaf_remove(t, ll); + else + leaf_pull_suffix(ll); + } + ll = l; } - if (ll && hlist_empty(&ll->list)) - trie_leaf_remove(t, ll); + if (ll) { + if (hlist_empty(&ll->list)) + trie_leaf_remove(t, ll); + else + leaf_pull_suffix(ll); + } pr_debug("trie_flush found=%d\n", found); return found;