From 97a6ec4ac021f7fbec05c15a3aa0c4aaf0461af5 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 4 Dec 2017 10:31:41 -0800 Subject: [PATCH 1/5] rhashtable: Change rhashtable_walk_start to return void Most callers of rhashtable_walk_start don't care about a resize event which is indicated by a return value of -EAGAIN. So calls to rhashtable_walk_start are wrapped wih code to ignore -EAGAIN. Something like this is common: ret = rhashtable_walk_start(rhiter); if (ret && ret != -EAGAIN) goto out; Since zero and -EAGAIN are the only possible return values from the function this check is pointless. The condition never evaluates to true. This patch changes rhashtable_walk_start to return void. This simplifies code for the callers that ignore -EAGAIN. For the few cases where the caller cares about the resize event, particularly where the table can be walked in mulitple parts for netlink or seq file dump, the function rhashtable_walk_start_check has been added that returns -EAGAIN on a resize event. Signed-off-by: Tom Herbert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 6 +--- .../ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 7 ++-- fs/gfs2/glock.c | 7 ++-- include/linux/rhashtable.h | 8 ++++- include/net/sctp/sctp.h | 2 +- lib/rhashtable.c | 10 ++++-- lib/test_rhashtable.c | 6 +--- net/ipv6/ila/ila_xlat.c | 4 +-- net/ipv6/seg6.c | 4 +-- net/mac80211/mesh_pathtbl.c | 34 ++++++------------- net/netfilter/nft_set_hash.c | 10 ++---- net/netlink/af_netlink.c | 5 +-- net/netlink/diag.c | 8 ++--- net/sctp/proc.c | 6 +--- net/sctp/socket.c | 19 +++-------- net/tipc/socket.c | 6 ++-- 16 files changed, 48 insertions(+), 94 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 9807214da206..2ae5ed151369 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1412,11 +1412,7 @@ bnxt_tc_flow_stats_batch_prep(struct bnxt *bp, void *flow_node; int rc, i; - rc = rhashtable_walk_start(iter); - if (rc && rc != -EAGAIN) { - i = 0; - goto done; - } + rhashtable_walk_start(iter); rc = 0; for (i = 0; i < BNXT_FLOW_STATS_BATCH_MAX; i++) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index a12b894f135d..9b9f3f99b39d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -763,9 +763,7 @@ static void ch_flower_stats_handler(struct work_struct *work) rhashtable_walk_enter(&adap->flower_tbl, &iter); do { - flower_entry = ERR_PTR(rhashtable_walk_start(&iter)); - if (IS_ERR(flower_entry)) - goto walk_stop; + rhashtable_walk_start(&iter); while ((flower_entry = rhashtable_walk_next(&iter)) && !IS_ERR(flower_entry)) { @@ -784,8 +782,9 @@ static void ch_flower_stats_handler(struct work_struct *work) spin_unlock(&flower_entry->lock); } } -walk_stop: + rhashtable_walk_stop(&iter); + } while (flower_entry == ERR_PTR(-EAGAIN)); rhashtable_walk_exit(&iter); mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 11066d8647d2..90af87ff29ba 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1549,16 +1549,13 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) rhashtable_walk_enter(&gl_hash_table, &iter); do { - gl = ERR_PTR(rhashtable_walk_start(&iter)); - if (IS_ERR(gl)) - goto walk_stop; + rhashtable_walk_start(&iter); while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) if (gl->gl_name.ln_sbd == sdp && lockref_get_not_dead(&gl->gl_lockref)) examiner(gl); -walk_stop: rhashtable_walk_stop(&iter); } while (cond_resched(), gl == ERR_PTR(-EAGAIN)); @@ -1947,7 +1944,7 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) loff_t n = *pos; rhashtable_walk_enter(&gl_hash_table, &gi->hti); - if (rhashtable_walk_start(&gi->hti) != 0) + if (rhashtable_walk_start_check(&gi->hti) != 0) return NULL; do { diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 361c08e35dbc..13ccc483738d 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -378,7 +378,13 @@ void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); -int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); +int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU); + +static inline void rhashtable_walk_start(struct rhashtable_iter *iter) +{ + (void)rhashtable_walk_start_check(iter); +} + void *rhashtable_walk_next(struct rhashtable_iter *iter); void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 906a9c0efa71..6f79415f6634 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -116,7 +116,7 @@ extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); -int sctp_transport_walk_start(struct rhashtable_iter *iter); +void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, struct rhashtable_iter *iter); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index ddd7dde87c3c..1935e86ed477 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -732,7 +732,7 @@ void rhashtable_walk_exit(struct rhashtable_iter *iter) EXPORT_SYMBOL_GPL(rhashtable_walk_exit); /** - * rhashtable_walk_start - Start a hash table walk + * rhashtable_walk_start_check - Start a hash table walk * @iter: Hash table iterator * * Start a hash table walk at the current iterator position. Note that we take @@ -744,8 +744,12 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit); * Returns -EAGAIN if resize event occured. Note that the iterator * will rewind back to the beginning and you may use it immediately * by calling rhashtable_walk_next. + * + * rhashtable_walk_start is defined as an inline variant that returns + * void. This is preferred in cases where the caller would ignore + * resize events and always continue. */ -int rhashtable_walk_start(struct rhashtable_iter *iter) +int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU) { struct rhashtable *ht = iter->ht; @@ -764,7 +768,7 @@ int rhashtable_walk_start(struct rhashtable_iter *iter) return 0; } -EXPORT_SYMBOL_GPL(rhashtable_walk_start); +EXPORT_SYMBOL_GPL(rhashtable_walk_start_check); /** * rhashtable_walk_next - Return the next object and advance the iterator diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 8e83cbdc049c..76d3667fdea2 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -162,11 +162,7 @@ static void test_bucket_stats(struct rhashtable *ht, unsigned int entries) return; } - err = rhashtable_walk_start(&hti); - if (err && err != -EAGAIN) { - pr_warn("Test failed: iterator failed: %d\n", err); - return; - } + rhashtable_walk_start(&hti); while ((pos = rhashtable_walk_next(&hti))) { if (PTR_ERR(pos) == -EAGAIN) { diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 6eb5e68f112a..44c39c5f0638 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -512,9 +512,7 @@ static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) struct ila_map *ila; int ret; - ret = rhashtable_walk_start(rhiter); - if (ret && ret != -EAGAIN) - goto done; + rhashtable_walk_start(rhiter); for (;;) { ila = rhashtable_walk_next(rhiter); diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index c81407770956..7f5621d09571 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -306,9 +306,7 @@ static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb) struct seg6_hmac_info *hinfo; int ret; - ret = rhashtable_walk_start(iter); - if (ret && ret != -EAGAIN) - goto done; + rhashtable_walk_start(iter); for (;;) { hinfo = rhashtable_walk_next(iter); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 86c8dfef56a4..a5125624a76d 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -257,9 +257,7 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx) if (ret) return NULL; - ret = rhashtable_walk_start(&iter); - if (ret && ret != -EAGAIN) - goto err; + rhashtable_walk_start(&iter); while ((mpath = rhashtable_walk_next(&iter))) { if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) @@ -269,7 +267,6 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx) if (i++ == idx) break; } -err: rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); @@ -513,9 +510,7 @@ void mesh_plink_broken(struct sta_info *sta) if (ret) return; - ret = rhashtable_walk_start(&iter); - if (ret && ret != -EAGAIN) - goto out; + rhashtable_walk_start(&iter); while ((mpath = rhashtable_walk_next(&iter))) { if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) @@ -535,7 +530,6 @@ void mesh_plink_broken(struct sta_info *sta) WLAN_REASON_MESH_PATH_DEST_UNREACHABLE, bcast); } } -out: rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); } @@ -584,9 +578,7 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta) if (ret) return; - ret = rhashtable_walk_start(&iter); - if (ret && ret != -EAGAIN) - goto out; + rhashtable_walk_start(&iter); while ((mpath = rhashtable_walk_next(&iter))) { if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) @@ -597,7 +589,7 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta) if (rcu_access_pointer(mpath->next_hop) == sta) __mesh_path_del(tbl, mpath); } -out: + rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); } @@ -614,9 +606,7 @@ static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata, if (ret) return; - ret = rhashtable_walk_start(&iter); - if (ret && ret != -EAGAIN) - goto out; + rhashtable_walk_start(&iter); while ((mpath = rhashtable_walk_next(&iter))) { if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) @@ -627,7 +617,7 @@ static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata, if (ether_addr_equal(mpath->mpp, proxy)) __mesh_path_del(tbl, mpath); } -out: + rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); } @@ -642,9 +632,7 @@ static void table_flush_by_iface(struct mesh_table *tbl) if (ret) return; - ret = rhashtable_walk_start(&iter); - if (ret && ret != -EAGAIN) - goto out; + rhashtable_walk_start(&iter); while ((mpath = rhashtable_walk_next(&iter))) { if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) @@ -653,7 +641,7 @@ static void table_flush_by_iface(struct mesh_table *tbl) break; __mesh_path_del(tbl, mpath); } -out: + rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); } @@ -873,9 +861,7 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata, if (ret) return; - ret = rhashtable_walk_start(&iter); - if (ret && ret != -EAGAIN) - goto out; + rhashtable_walk_start(&iter); while ((mpath = rhashtable_walk_next(&iter))) { if (IS_ERR(mpath) && PTR_ERR(mpath) == -EAGAIN) @@ -887,7 +873,7 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata, time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) __mesh_path_del(tbl, mpath); } -out: + rhashtable_walk_stop(&iter); rhashtable_walk_exit(&iter); } diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index f8166c1d5430..3f1624ee056f 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -251,11 +251,7 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, if (err) return; - err = rhashtable_walk_start(&hti); - if (err && err != -EAGAIN) { - iter->err = err; - goto out; - } + rhashtable_walk_start(&hti); while ((he = rhashtable_walk_next(&hti))) { if (IS_ERR(he)) { @@ -306,9 +302,7 @@ static void nft_rhash_gc(struct work_struct *work) if (err) goto schedule; - err = rhashtable_walk_start(&hti); - if (err && err != -EAGAIN) - goto out; + rhashtable_walk_start(&hti); while ((he = rhashtable_walk_next(&hti))) { if (IS_ERR(he)) { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index b9e0ee4e22f5..ab325d4d6fef 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2478,8 +2478,9 @@ static int netlink_walk_start(struct nl_seq_iter *iter) return err; } - err = rhashtable_walk_start(&iter->hti); - return err == -EAGAIN ? 0 : err; + rhashtable_walk_start(&iter->hti); + + return 0; } static void netlink_walk_stop(struct nl_seq_iter *iter) diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 8faa20b4d457..7dda33b9b784 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -115,11 +115,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, if (!s_num) rhashtable_walk_enter(&tbl->hash, hti); - ret = rhashtable_walk_start(hti); - if (ret == -EAGAIN) - ret = 0; - if (ret) - goto stop; + rhashtable_walk_start(hti); while ((nlsk = rhashtable_walk_next(hti))) { if (IS_ERR(nlsk)) { @@ -146,8 +142,8 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, } } -stop: rhashtable_walk_stop(hti); + if (ret) goto done; diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 26b4be6b4172..4545bc2aff84 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -288,12 +288,8 @@ struct sctp_ht_iter { static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos) { struct sctp_ht_iter *iter = seq->private; - int err = sctp_transport_walk_start(&iter->hti); - if (err) { - iter->start_fail = 1; - return ERR_PTR(err); - } + sctp_transport_walk_start(&iter->hti); iter->start_fail = 0; return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index eb17a911aa29..3e55daa37e66 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4676,20 +4676,11 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, EXPORT_SYMBOL_GPL(sctp_get_sctp_info); /* use callback to avoid exporting the core structure */ -int sctp_transport_walk_start(struct rhashtable_iter *iter) +void sctp_transport_walk_start(struct rhashtable_iter *iter) { - int err; - rhltable_walk_enter(&sctp_transport_hashtable, iter); - err = rhashtable_walk_start(iter); - if (err && err != -EAGAIN) { - rhashtable_walk_stop(iter); - rhashtable_walk_exit(iter); - return err; - } - - return 0; + rhashtable_walk_start(iter); } void sctp_transport_walk_stop(struct rhashtable_iter *iter) @@ -4780,12 +4771,10 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), struct net *net, int *pos, void *p) { struct rhashtable_iter hti; struct sctp_transport *tsp; - int ret; + int ret = 0; again: - ret = sctp_transport_walk_start(&hti); - if (ret) - return ret; + sctp_transport_walk_start(&hti); tsp = sctp_transport_get_idx(net, &hti, *pos + 1); for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 5d18c0caa92b..22c4fd8a9dfe 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2640,9 +2640,7 @@ void tipc_sk_reinit(struct net *net) rhashtable_walk_enter(&tn->sk_rht, &iter); do { - tsk = ERR_PTR(rhashtable_walk_start(&iter)); - if (IS_ERR(tsk)) - goto walk_stop; + rhashtable_walk_start(&iter); while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { spin_lock_bh(&tsk->sk.sk_lock.slock); @@ -2651,7 +2649,7 @@ void tipc_sk_reinit(struct net *net) msg_set_orignode(msg, tn->own_addr); spin_unlock_bh(&tsk->sk.sk_lock.slock); } -walk_stop: + rhashtable_walk_stop(&iter); } while (tsk == ERR_PTR(-EAGAIN)); } From 2db54b475ae918d274bfc276416c384ba95e9f94 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 4 Dec 2017 10:31:42 -0800 Subject: [PATCH 2/5] rhashtable: Add rhastable_walk_peek This function is like rhashtable_walk_next except that it only returns the current element in the inter and does not advance the iter. This patch also creates __rhashtable_walk_find_next. It finds the next element in the table when the entry cached in iter is NULL or at the end of a slot. __rhashtable_walk_find_next is called from rhashtable_walk_next and rhastable_walk_peek. end_of_table is an added field to the iter structure. This indicates that the end of table was reached (walker.tbl being NULL is not a sufficient condition for end of table). Signed-off-by: Tom Herbert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 + lib/rhashtable.c | 103 +++++++++++++++++++++++++++++++------ 2 files changed, 89 insertions(+), 16 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 13ccc483738d..542b1b265ac4 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -207,6 +207,7 @@ struct rhashtable_iter { struct rhashtable_walker walker; unsigned int slot; unsigned int skip; + bool end_of_table; }; static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash) @@ -386,6 +387,7 @@ static inline void rhashtable_walk_start(struct rhashtable_iter *iter) } void *rhashtable_walk_next(struct rhashtable_iter *iter); +void *rhashtable_walk_peek(struct rhashtable_iter *iter); void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); void rhashtable_free_and_destroy(struct rhashtable *ht, diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 1935e86ed477..6fc52d82efe6 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -707,6 +707,7 @@ void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter) iter->p = NULL; iter->slot = 0; iter->skip = 0; + iter->end_of_table = 0; spin_lock(&ht->lock); iter->walker.tbl = @@ -761,7 +762,7 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter) list_del(&iter->walker.list); spin_unlock(&ht->lock); - if (!iter->walker.tbl) { + if (!iter->walker.tbl && !iter->end_of_table) { iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht); return -EAGAIN; } @@ -771,18 +772,16 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter) EXPORT_SYMBOL_GPL(rhashtable_walk_start_check); /** - * rhashtable_walk_next - Return the next object and advance the iterator + * __rhashtable_walk_find_next - Find the next element in a table (or the first + * one in case of a new walk). + * * @iter: Hash table iterator * - * Note that you must call rhashtable_walk_stop when you are finished - * with the walk. + * Returns the found object or NULL when the end of the table is reached. * - * Returns the next object or NULL when the end of the table is reached. - * - * Returns -EAGAIN if resize event occured. Note that the iterator - * will rewind back to the beginning and you may continue to use it. + * Returns -EAGAIN if resize event occurred. */ -void *rhashtable_walk_next(struct rhashtable_iter *iter) +static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter) { struct bucket_table *tbl = iter->walker.tbl; struct rhlist_head *list = iter->list; @@ -790,13 +789,8 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter) struct rhash_head *p = iter->p; bool rhlist = ht->rhlist; - if (p) { - if (!rhlist || !(list = rcu_dereference(list->next))) { - p = rcu_dereference(p->next); - list = container_of(p, struct rhlist_head, rhead); - } - goto next; - } + if (!tbl) + return NULL; for (; iter->slot < tbl->size; iter->slot++) { int skip = iter->skip; @@ -840,12 +834,89 @@ next: iter->slot = 0; iter->skip = 0; return ERR_PTR(-EAGAIN); + } else { + iter->end_of_table = true; } return NULL; } + +/** + * rhashtable_walk_next - Return the next object and advance the iterator + * @iter: Hash table iterator + * + * Note that you must call rhashtable_walk_stop when you are finished + * with the walk. + * + * Returns the next object or NULL when the end of the table is reached. + * + * Returns -EAGAIN if resize event occurred. Note that the iterator + * will rewind back to the beginning and you may continue to use it. + */ +void *rhashtable_walk_next(struct rhashtable_iter *iter) +{ + struct rhlist_head *list = iter->list; + struct rhashtable *ht = iter->ht; + struct rhash_head *p = iter->p; + bool rhlist = ht->rhlist; + + if (p) { + if (!rhlist || !(list = rcu_dereference(list->next))) { + p = rcu_dereference(p->next); + list = container_of(p, struct rhlist_head, rhead); + } + if (!rht_is_a_nulls(p)) { + iter->skip++; + iter->p = p; + iter->list = list; + return rht_obj(ht, rhlist ? &list->rhead : p); + } + + /* At the end of this slot, switch to next one and then find + * next entry from that point. + */ + iter->skip = 0; + iter->slot++; + } + + return __rhashtable_walk_find_next(iter); +} EXPORT_SYMBOL_GPL(rhashtable_walk_next); +/** + * rhashtable_walk_peek - Return the next object but don't advance the iterator + * @iter: Hash table iterator + * + * Returns the next object or NULL when the end of the table is reached. + * + * Returns -EAGAIN if resize event occurred. Note that the iterator + * will rewind back to the beginning and you may continue to use it. + */ +void *rhashtable_walk_peek(struct rhashtable_iter *iter) +{ + struct rhlist_head *list = iter->list; + struct rhashtable *ht = iter->ht; + struct rhash_head *p = iter->p; + + if (p) + return rht_obj(ht, ht->rhlist ? &list->rhead : p); + + /* No object found in current iter, find next one in the table. */ + + if (iter->skip) { + /* A nonzero skip value points to the next entry in the table + * beyond that last one that was found. Decrement skip so + * we find the current value. __rhashtable_walk_find_next + * will restore the original value of skip assuming that + * the table hasn't changed. + */ + iter->skip--; + } + + return __rhashtable_walk_find_next(iter); +} +EXPORT_SYMBOL_GPL(rhashtable_walk_peek); + /** * rhashtable_walk_stop - Finish a hash table walk * @iter: Hash table iterator From 2b86093135d014624f668658e1636f22a3027fc2 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 4 Dec 2017 10:31:43 -0800 Subject: [PATCH 3/5] rhashtable: abstract out function to get hash Split out most of rht_key_hashfn which is calculating the hash into its own function. This way the hash function can be called separately to get the hash value. Acked-by: Thomas Graf Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 542b1b265ac4..c9df2527e0cd 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -240,34 +240,42 @@ static inline unsigned int rht_bucket_index(const struct bucket_table *tbl, return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1); } -static inline unsigned int rht_key_hashfn( - struct rhashtable *ht, const struct bucket_table *tbl, - const void *key, const struct rhashtable_params params) +static inline unsigned int rht_key_get_hash(struct rhashtable *ht, + const void *key, const struct rhashtable_params params, + unsigned int hash_rnd) { unsigned int hash; /* params must be equal to ht->p if it isn't constant. */ if (!__builtin_constant_p(params.key_len)) - hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd); + hash = ht->p.hashfn(key, ht->key_len, hash_rnd); else if (params.key_len) { unsigned int key_len = params.key_len; if (params.hashfn) - hash = params.hashfn(key, key_len, tbl->hash_rnd); + hash = params.hashfn(key, key_len, hash_rnd); else if (key_len & (sizeof(u32) - 1)) - hash = jhash(key, key_len, tbl->hash_rnd); + hash = jhash(key, key_len, hash_rnd); else - hash = jhash2(key, key_len / sizeof(u32), - tbl->hash_rnd); + hash = jhash2(key, key_len / sizeof(u32), hash_rnd); } else { unsigned int key_len = ht->p.key_len; if (params.hashfn) - hash = params.hashfn(key, key_len, tbl->hash_rnd); + hash = params.hashfn(key, key_len, hash_rnd); else - hash = jhash(key, key_len, tbl->hash_rnd); + hash = jhash(key, key_len, hash_rnd); } + return hash; +} + +static inline unsigned int rht_key_hashfn( + struct rhashtable *ht, const struct bucket_table *tbl, + const void *key, const struct rhashtable_params params) +{ + unsigned int hash = rht_key_get_hash(ht, key, params, tbl->hash_rnd); + return rht_bucket_index(tbl, hash); } From 92f36cca5773cbaa78c46ccf49503964a52da294 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 4 Dec 2017 10:31:44 -0800 Subject: [PATCH 4/5] spinlock: Add library function to allocate spinlock buckets array Add two new library functions: alloc_bucket_spinlocks and free_bucket_spinlocks. These are used to allocate and free an array of spinlocks that are useful as locks for hash buckets. The interface specifies the maximum number of spinlocks in the array as well as a CPU multiplier to derive the number of spinlocks to allocate. The number allocated is rounded up to a power of two to make the array amenable to hash lookup. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/spinlock.h | 6 +++++ lib/Makefile | 2 +- lib/bucket_locks.c | 54 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 lib/bucket_locks.c diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index a39186194cd6..10fd28b118ee 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -414,4 +414,10 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); #define atomic_dec_and_lock(atomic, lock) \ __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) +int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, + size_t max_size, unsigned int cpu_mult, + gfp_t gfp); + +void free_bucket_spinlocks(spinlock_t *locks); + #endif /* __LINUX_SPINLOCK_H */ diff --git a/lib/Makefile b/lib/Makefile index d11c48ec8ffd..a6c8529dd9b2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -39,7 +39,7 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ bsearch.o find_bit.o llist.o memweight.o kfifo.o \ percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \ - once.o refcount.o usercopy.o errseq.o + once.o refcount.o usercopy.o errseq.o bucket_locks.o obj-$(CONFIG_STRING_SELFTEST) += test_string.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o diff --git a/lib/bucket_locks.c b/lib/bucket_locks.c new file mode 100644 index 000000000000..266a97c5708b --- /dev/null +++ b/lib/bucket_locks.c @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include + +/* Allocate an array of spinlocks to be accessed by a hash. Two arguments + * indicate the number of elements to allocate in the array. max_size + * gives the maximum number of elements to allocate. cpu_mult gives + * the number of locks per CPU to allocate. The size is rounded up + * to a power of 2 to be suitable as a hash table. + */ + +int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask, + size_t max_size, unsigned int cpu_mult, gfp_t gfp) +{ + spinlock_t *tlocks = NULL; + unsigned int i, size; +#if defined(CONFIG_PROVE_LOCKING) + unsigned int nr_pcpus = 2; +#else + unsigned int nr_pcpus = num_possible_cpus(); +#endif + + if (cpu_mult) { + nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL); + size = min_t(unsigned int, nr_pcpus * cpu_mult, max_size); + } else { + size = max_size; + } + + if (sizeof(spinlock_t) != 0) { + if (gfpflags_allow_blocking(gfp)) + tlocks = kvmalloc(size * sizeof(spinlock_t), gfp); + else + tlocks = kmalloc_array(size, sizeof(spinlock_t), gfp); + if (!tlocks) + return -ENOMEM; + for (i = 0; i < size; i++) + spin_lock_init(&tlocks[i]); + } + + *locks = tlocks; + *locks_mask = size - 1; + + return 0; +} +EXPORT_SYMBOL(alloc_bucket_spinlocks); + +void free_bucket_spinlocks(spinlock_t *locks) +{ + kvfree(locks); +} +EXPORT_SYMBOL(free_bucket_spinlocks); From 64e0cd0d3540dbbdf6661943025409e6b31d5178 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 4 Dec 2017 10:31:45 -0800 Subject: [PATCH 5/5] rhashtable: Call library function alloc_bucket_locks To allocate the array of bucket locks for the hash table we now call library function alloc_bucket_spinlocks. This function is based on the old alloc_bucket_locks in rhashtable and should produce the same effect. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- lib/rhashtable.c | 47 ++++++++--------------------------------------- 1 file changed, 8 insertions(+), 39 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 6fc52d82efe6..3825c30aaa36 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -65,42 +65,6 @@ EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); #define ASSERT_RHT_MUTEX(HT) #endif - -static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, - gfp_t gfp) -{ - unsigned int i, size; -#if defined(CONFIG_PROVE_LOCKING) - unsigned int nr_pcpus = 2; -#else - unsigned int nr_pcpus = num_possible_cpus(); -#endif - - nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL); - size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul); - - /* Never allocate more than 0.5 locks per bucket */ - size = min_t(unsigned int, size, tbl->size >> 1); - - if (tbl->nest) - size = min(size, 1U << tbl->nest); - - if (sizeof(spinlock_t) != 0) { - if (gfpflags_allow_blocking(gfp)) - tbl->locks = kvmalloc(size * sizeof(spinlock_t), gfp); - else - tbl->locks = kmalloc_array(size, sizeof(spinlock_t), - gfp); - if (!tbl->locks) - return -ENOMEM; - for (i = 0; i < size; i++) - spin_lock_init(&tbl->locks[i]); - } - tbl->locks_mask = size - 1; - - return 0; -} - static void nested_table_free(union nested_table *ntbl, unsigned int size) { const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); @@ -140,7 +104,7 @@ static void bucket_table_free(const struct bucket_table *tbl) if (tbl->nest) nested_bucket_table_free(tbl); - kvfree(tbl->locks); + free_bucket_spinlocks(tbl->locks); kvfree(tbl); } @@ -207,7 +171,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, gfp_t gfp) { struct bucket_table *tbl = NULL; - size_t size; + size_t size, max_locks; int i; size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); @@ -227,7 +191,12 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, tbl->size = size; - if (alloc_bucket_locks(ht, tbl, gfp) < 0) { + max_locks = size >> 1; + if (tbl->nest) + max_locks = min_t(size_t, max_locks, 1U << tbl->nest); + + if (alloc_bucket_spinlocks(&tbl->locks, &tbl->locks_mask, max_locks, + ht->p.locks_mul, gfp) < 0) { bucket_table_free(tbl); return NULL; }