bpf: Add lookup_and_delete_elem support to hashtab
Extend the existing bpf_map_lookup_and_delete_elem() functionality to hashtab map types, in addition to stacks and queues. Create a new hashtab bpf_map_ops function that does lookup and deletion of the element under the same bucket lock and add the created map_ops to bpf.h. Signed-off-by: Denis Salopek <denis.salopek@sartura.hr> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Acked-by: Yonghong Song <yhs@fb.com> Link: https://lore.kernel.org/bpf/4d18480a3e990ffbf14751ddef0325eed3be2966.1620763117.git.denis.salopek@sartura.hr
This commit is contained in:
parent
f9bceaa59c
commit
3e87f192b4
|
@ -70,6 +70,8 @@ struct bpf_map_ops {
|
|||
void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
|
||||
int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr);
|
||||
int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key,
|
||||
void *value, u64 flags);
|
||||
int (*map_lookup_and_delete_batch)(struct bpf_map *map,
|
||||
const union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr);
|
||||
|
|
|
@ -527,6 +527,15 @@ union bpf_iter_link_info {
|
|||
* Look up an element with the given *key* in the map referred to
|
||||
* by the file descriptor *fd*, and if found, delete the element.
|
||||
*
|
||||
* For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map
|
||||
* types, the *flags* argument needs to be set to 0, but for other
|
||||
* map types, it may be specified as:
|
||||
*
|
||||
* **BPF_F_LOCK**
|
||||
* Look up and delete the value of a spin-locked map
|
||||
* without returning the lock. This must be specified if
|
||||
* the elements contain a spinlock.
|
||||
*
|
||||
* The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
|
||||
* implement this command as a "pop" operation, deleting the top
|
||||
* element rather than one corresponding to *key*.
|
||||
|
@ -536,6 +545,10 @@ union bpf_iter_link_info {
|
|||
* This command is only valid for the following map types:
|
||||
* * **BPF_MAP_TYPE_QUEUE**
|
||||
* * **BPF_MAP_TYPE_STACK**
|
||||
* * **BPF_MAP_TYPE_HASH**
|
||||
* * **BPF_MAP_TYPE_PERCPU_HASH**
|
||||
* * **BPF_MAP_TYPE_LRU_HASH**
|
||||
* * **BPF_MAP_TYPE_LRU_PERCPU_HASH**
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
|
|
|
@ -1401,6 +1401,100 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
|
|||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
|
||||
void *value, bool is_lru_map,
|
||||
bool is_percpu, u64 flags)
|
||||
{
|
||||
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
|
||||
struct hlist_nulls_head *head;
|
||||
unsigned long bflags;
|
||||
struct htab_elem *l;
|
||||
u32 hash, key_size;
|
||||
struct bucket *b;
|
||||
int ret;
|
||||
|
||||
key_size = map->key_size;
|
||||
|
||||
hash = htab_map_hash(key, key_size, htab->hashrnd);
|
||||
b = __select_bucket(htab, hash);
|
||||
head = &b->head;
|
||||
|
||||
ret = htab_lock_bucket(htab, b, hash, &bflags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
l = lookup_elem_raw(head, hash, key, key_size);
|
||||
if (!l) {
|
||||
ret = -ENOENT;
|
||||
} else {
|
||||
if (is_percpu) {
|
||||
u32 roundup_value_size = round_up(map->value_size, 8);
|
||||
void __percpu *pptr;
|
||||
int off = 0, cpu;
|
||||
|
||||
pptr = htab_elem_get_ptr(l, key_size);
|
||||
for_each_possible_cpu(cpu) {
|
||||
bpf_long_memcpy(value + off,
|
||||
per_cpu_ptr(pptr, cpu),
|
||||
roundup_value_size);
|
||||
off += roundup_value_size;
|
||||
}
|
||||
} else {
|
||||
u32 roundup_key_size = round_up(map->key_size, 8);
|
||||
|
||||
if (flags & BPF_F_LOCK)
|
||||
copy_map_value_locked(map, value, l->key +
|
||||
roundup_key_size,
|
||||
true);
|
||||
else
|
||||
copy_map_value(map, value, l->key +
|
||||
roundup_key_size);
|
||||
check_and_init_map_lock(map, value);
|
||||
}
|
||||
|
||||
hlist_nulls_del_rcu(&l->hash_node);
|
||||
if (!is_lru_map)
|
||||
free_htab_elem(htab, l);
|
||||
}
|
||||
|
||||
htab_unlock_bucket(htab, b, hash, bflags);
|
||||
|
||||
if (is_lru_map && l)
|
||||
bpf_lru_push_free(&htab->lru, &l->lru_node);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
|
||||
void *value, u64 flags)
|
||||
{
|
||||
return __htab_map_lookup_and_delete_elem(map, key, value, false, false,
|
||||
flags);
|
||||
}
|
||||
|
||||
static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
|
||||
void *key, void *value,
|
||||
u64 flags)
|
||||
{
|
||||
return __htab_map_lookup_and_delete_elem(map, key, value, false, true,
|
||||
flags);
|
||||
}
|
||||
|
||||
static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
|
||||
void *value, u64 flags)
|
||||
{
|
||||
return __htab_map_lookup_and_delete_elem(map, key, value, true, false,
|
||||
flags);
|
||||
}
|
||||
|
||||
static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
|
||||
void *key, void *value,
|
||||
u64 flags)
|
||||
{
|
||||
return __htab_map_lookup_and_delete_elem(map, key, value, true, true,
|
||||
flags);
|
||||
}
|
||||
|
||||
static int
|
||||
__htab_map_lookup_and_delete_batch(struct bpf_map *map,
|
||||
const union bpf_attr *attr,
|
||||
|
@ -1934,6 +2028,7 @@ const struct bpf_map_ops htab_map_ops = {
|
|||
.map_free = htab_map_free,
|
||||
.map_get_next_key = htab_map_get_next_key,
|
||||
.map_lookup_elem = htab_map_lookup_elem,
|
||||
.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
|
||||
.map_update_elem = htab_map_update_elem,
|
||||
.map_delete_elem = htab_map_delete_elem,
|
||||
.map_gen_lookup = htab_map_gen_lookup,
|
||||
|
@ -1954,6 +2049,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
|
|||
.map_free = htab_map_free,
|
||||
.map_get_next_key = htab_map_get_next_key,
|
||||
.map_lookup_elem = htab_lru_map_lookup_elem,
|
||||
.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
|
||||
.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
|
||||
.map_update_elem = htab_lru_map_update_elem,
|
||||
.map_delete_elem = htab_lru_map_delete_elem,
|
||||
|
@ -2077,6 +2173,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
|
|||
.map_free = htab_map_free,
|
||||
.map_get_next_key = htab_map_get_next_key,
|
||||
.map_lookup_elem = htab_percpu_map_lookup_elem,
|
||||
.map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem,
|
||||
.map_update_elem = htab_percpu_map_update_elem,
|
||||
.map_delete_elem = htab_map_delete_elem,
|
||||
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
|
||||
|
@ -2096,6 +2193,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
|
|||
.map_free = htab_map_free,
|
||||
.map_get_next_key = htab_map_get_next_key,
|
||||
.map_lookup_elem = htab_lru_percpu_map_lookup_elem,
|
||||
.map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem,
|
||||
.map_update_elem = htab_lru_percpu_map_update_elem,
|
||||
.map_delete_elem = htab_lru_map_delete_elem,
|
||||
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
|
||||
|
|
|
@ -1483,7 +1483,7 @@ free_buf:
|
|||
return err;
|
||||
}
|
||||
|
||||
#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
|
||||
#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags
|
||||
|
||||
static int map_lookup_and_delete_elem(union bpf_attr *attr)
|
||||
{
|
||||
|
@ -1499,6 +1499,9 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
|
|||
if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
|
||||
return -EINVAL;
|
||||
|
||||
if (attr->flags & ~BPF_F_LOCK)
|
||||
return -EINVAL;
|
||||
|
||||
f = fdget(ufd);
|
||||
map = __bpf_map_get(f);
|
||||
if (IS_ERR(map))
|
||||
|
@ -1509,24 +1512,47 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
|
|||
goto err_put;
|
||||
}
|
||||
|
||||
if (attr->flags &&
|
||||
(map->map_type == BPF_MAP_TYPE_QUEUE ||
|
||||
map->map_type == BPF_MAP_TYPE_STACK)) {
|
||||
err = -EINVAL;
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
if ((attr->flags & BPF_F_LOCK) &&
|
||||
!map_value_has_spin_lock(map)) {
|
||||
err = -EINVAL;
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
key = __bpf_copy_key(ukey, map->key_size);
|
||||
if (IS_ERR(key)) {
|
||||
err = PTR_ERR(key);
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
value_size = map->value_size;
|
||||
value_size = bpf_map_value_size(map);
|
||||
|
||||
err = -ENOMEM;
|
||||
value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
|
||||
if (!value)
|
||||
goto free_key;
|
||||
|
||||
err = -ENOTSUPP;
|
||||
if (map->map_type == BPF_MAP_TYPE_QUEUE ||
|
||||
map->map_type == BPF_MAP_TYPE_STACK) {
|
||||
err = map->ops->map_pop_elem(map, value);
|
||||
} else {
|
||||
err = -ENOTSUPP;
|
||||
} else if (map->map_type == BPF_MAP_TYPE_HASH ||
|
||||
map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
|
||||
map->map_type == BPF_MAP_TYPE_LRU_HASH ||
|
||||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
|
||||
if (!bpf_map_is_dev_bound(map)) {
|
||||
bpf_disable_instrumentation();
|
||||
rcu_read_lock();
|
||||
err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags);
|
||||
rcu_read_unlock();
|
||||
bpf_enable_instrumentation();
|
||||
}
|
||||
}
|
||||
|
||||
if (err)
|
||||
|
|
|
@ -527,6 +527,15 @@ union bpf_iter_link_info {
|
|||
* Look up an element with the given *key* in the map referred to
|
||||
* by the file descriptor *fd*, and if found, delete the element.
|
||||
*
|
||||
* For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map
|
||||
* types, the *flags* argument needs to be set to 0, but for other
|
||||
* map types, it may be specified as:
|
||||
*
|
||||
* **BPF_F_LOCK**
|
||||
* Look up and delete the value of a spin-locked map
|
||||
* without returning the lock. This must be specified if
|
||||
* the elements contain a spinlock.
|
||||
*
|
||||
* The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
|
||||
* implement this command as a "pop" operation, deleting the top
|
||||
* element rather than one corresponding to *key*.
|
||||
|
@ -536,6 +545,10 @@ union bpf_iter_link_info {
|
|||
* This command is only valid for the following map types:
|
||||
* * **BPF_MAP_TYPE_QUEUE**
|
||||
* * **BPF_MAP_TYPE_STACK**
|
||||
* * **BPF_MAP_TYPE_HASH**
|
||||
* * **BPF_MAP_TYPE_PERCPU_HASH**
|
||||
* * **BPF_MAP_TYPE_LRU_HASH**
|
||||
* * **BPF_MAP_TYPE_LRU_PERCPU_HASH**
|
||||
*
|
||||
* Return
|
||||
* Returns zero on success. On error, -1 is returned and *errno*
|
||||
|
|
Loading…
Reference in New Issue