bpf: Do not grab the bucket spinlock by default on htab batch ops

Grabbing the spinlock for every bucket even if it's empty, was causing
significant perfomance cost when traversing htab maps that have only a
few entries. This patch addresses the issue by checking first the
bucket_cnt, if the bucket has some entries then we go and grab the
spinlock and proceed with the batching.

Tested with a htab of size 50K and different value of populated entries.

Before:
  Benchmark             Time(ns)        CPU(ns)
  ---------------------------------------------
  BM_DumpHashMap/1       2759655        2752033
  BM_DumpHashMap/10      2933722        2930825
  BM_DumpHashMap/200     3171680        3170265
  BM_DumpHashMap/500     3639607        3635511
  BM_DumpHashMap/1000    4369008        4364981
  BM_DumpHashMap/5k     11171919       11134028
  BM_DumpHashMap/20k    69150080       69033496
  BM_DumpHashMap/39k   190501036      190226162

After:
  Benchmark             Time(ns)        CPU(ns)
  ---------------------------------------------
  BM_DumpHashMap/1        202707         200109
  BM_DumpHashMap/10       213441         210569
  BM_DumpHashMap/200      478641         472350
  BM_DumpHashMap/500      980061         967102
  BM_DumpHashMap/1000    1863835        1839575
  BM_DumpHashMap/5k      8961836        8902540
  BM_DumpHashMap/20k    69761497       69322756
  BM_DumpHashMap/39k   187437830      186551111

Fixes: 057996380a ("bpf: Add batch ops to all htab bpf map")
Signed-off-by: Brian Vazquez <brianvv@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200218172552.215077-1-brianvv@google.com
This commit is contained in:
Brian Vazquez 2020-02-18 09:25:52 -08:00 committed by Alexei Starovoitov
parent 113e6b7e15
commit 492e0d0d6f
1 changed files with 22 additions and 2 deletions

View File

@ -1259,7 +1259,8 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
u64 elem_map_flags, map_flags; u64 elem_map_flags, map_flags;
struct hlist_nulls_head *head; struct hlist_nulls_head *head;
struct hlist_nulls_node *n; struct hlist_nulls_node *n;
unsigned long flags; unsigned long flags = 0;
bool locked = false;
struct htab_elem *l; struct htab_elem *l;
struct bucket *b; struct bucket *b;
int ret = 0; int ret = 0;
@ -1319,15 +1320,25 @@ again_nocopy:
dst_val = values; dst_val = values;
b = &htab->buckets[batch]; b = &htab->buckets[batch];
head = &b->head; head = &b->head;
raw_spin_lock_irqsave(&b->lock, flags); /* do not grab the lock unless need it (bucket_cnt > 0). */
if (locked)
raw_spin_lock_irqsave(&b->lock, flags);
bucket_cnt = 0; bucket_cnt = 0;
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
bucket_cnt++; bucket_cnt++;
if (bucket_cnt && !locked) {
locked = true;
goto again_nocopy;
}
if (bucket_cnt > (max_count - total)) { if (bucket_cnt > (max_count - total)) {
if (total == 0) if (total == 0)
ret = -ENOSPC; ret = -ENOSPC;
/* Note that since bucket_cnt > 0 here, it is implicit
* that the locked was grabbed, so release it.
*/
raw_spin_unlock_irqrestore(&b->lock, flags); raw_spin_unlock_irqrestore(&b->lock, flags);
rcu_read_unlock(); rcu_read_unlock();
this_cpu_dec(bpf_prog_active); this_cpu_dec(bpf_prog_active);
@ -1337,6 +1348,9 @@ again_nocopy:
if (bucket_cnt > bucket_size) { if (bucket_cnt > bucket_size) {
bucket_size = bucket_cnt; bucket_size = bucket_cnt;
/* Note that since bucket_cnt > 0 here, it is implicit
* that the locked was grabbed, so release it.
*/
raw_spin_unlock_irqrestore(&b->lock, flags); raw_spin_unlock_irqrestore(&b->lock, flags);
rcu_read_unlock(); rcu_read_unlock();
this_cpu_dec(bpf_prog_active); this_cpu_dec(bpf_prog_active);
@ -1346,6 +1360,10 @@ again_nocopy:
goto alloc; goto alloc;
} }
/* Next block is only safe to run if you have grabbed the lock */
if (!locked)
goto next_batch;
hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
memcpy(dst_key, l->key, key_size); memcpy(dst_key, l->key, key_size);
@ -1380,6 +1398,8 @@ again_nocopy:
} }
raw_spin_unlock_irqrestore(&b->lock, flags); raw_spin_unlock_irqrestore(&b->lock, flags);
locked = false;
next_batch:
/* If we are not copying data, we can go to next bucket and avoid /* If we are not copying data, we can go to next bucket and avoid
* unlocking the rcu. * unlocking the rcu.
*/ */