libbpf: Refactor global data map initialization

Refactor global data map initialization to use anonymous mmap()-ed memory
instead of malloc()-ed one. This allows to do a transparent re-mmap()-ing of
already existing memory address to point to BPF map's memory after
bpf_object__load() step (done in follow up patch). This choreographed setup
allows to have a nice and unsurprising way to pre-initialize read-only (and
r/w as well) maps by user and after BPF map creation keep working with
mmap()-ed contents of this map. All in a way that doesn't require user code to
update any pointers: the illusion of working with memory contents is preserved
before and after actual BPF map instantiation.

Selftests and runqslower example demonstrate this feature in follow up patches.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20191214014341.3442258-10-andriin@fb.com
This commit is contained in:
Andrii Nakryiko 2019-12-13 17:43:33 -08:00 committed by Alexei Starovoitov
parent 01af3bf067
commit eba9c5f498
1 changed files with 59 additions and 38 deletions

View File

@ -221,16 +221,12 @@ struct bpf_map {
void *priv; void *priv;
bpf_map_clear_priv_t clear_priv; bpf_map_clear_priv_t clear_priv;
enum libbpf_map_type libbpf_type; enum libbpf_map_type libbpf_type;
void *mmaped;
char *pin_path; char *pin_path;
bool pinned; bool pinned;
bool reused; bool reused;
}; };
struct bpf_secdata {
void *rodata;
void *data;
};
static LIST_HEAD(bpf_objects_list); static LIST_HEAD(bpf_objects_list);
struct bpf_object { struct bpf_object {
@ -243,7 +239,6 @@ struct bpf_object {
struct bpf_map *maps; struct bpf_map *maps;
size_t nr_maps; size_t nr_maps;
size_t maps_cap; size_t maps_cap;
struct bpf_secdata sections;
bool loaded; bool loaded;
bool has_pseudo_calls; bool has_pseudo_calls;
@ -828,13 +823,24 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
return &obj->maps[obj->nr_maps++]; return &obj->maps[obj->nr_maps++];
} }
static size_t bpf_map_mmap_sz(const struct bpf_map *map)
{
long page_sz = sysconf(_SC_PAGE_SIZE);
size_t map_sz;
map_sz = roundup(map->def.value_size, 8) * map->def.max_entries;
map_sz = roundup(map_sz, page_sz);
return map_sz;
}
static int static int
bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
int sec_idx, Elf_Data *data, void **data_buff) int sec_idx, void *data, size_t data_sz)
{ {
char map_name[BPF_OBJ_NAME_LEN]; char map_name[BPF_OBJ_NAME_LEN];
struct bpf_map_def *def; struct bpf_map_def *def;
struct bpf_map *map; struct bpf_map *map;
int err;
map = bpf_object__add_map(obj); map = bpf_object__add_map(obj);
if (IS_ERR(map)) if (IS_ERR(map))
@ -854,7 +860,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
def = &map->def; def = &map->def;
def->type = BPF_MAP_TYPE_ARRAY; def->type = BPF_MAP_TYPE_ARRAY;
def->key_size = sizeof(int); def->key_size = sizeof(int);
def->value_size = data->d_size; def->value_size = data_sz;
def->max_entries = 1; def->max_entries = 1;
def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0; def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0;
def->map_flags |= BPF_F_MMAPABLE; def->map_flags |= BPF_F_MMAPABLE;
@ -862,16 +868,20 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
map_name, map->sec_idx, map->sec_offset, def->map_flags); map_name, map->sec_idx, map->sec_offset, def->map_flags);
if (data_buff) { map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
*data_buff = malloc(data->d_size); MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (!*data_buff) { if (map->mmaped == MAP_FAILED) {
zfree(&map->name); err = -errno;
pr_warn("failed to alloc map content buffer\n"); map->mmaped = NULL;
return -ENOMEM; pr_warn("failed to alloc map '%s' content buffer: %d\n",
} map->name, err);
memcpy(*data_buff, data->d_buf, data->d_size); zfree(&map->name);
return err;
} }
if (type != LIBBPF_MAP_BSS)
memcpy(map->mmaped, data, data_sz);
pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name); pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
return 0; return 0;
} }
@ -886,23 +896,24 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)
if (obj->efile.data_shndx >= 0) { if (obj->efile.data_shndx >= 0) {
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
obj->efile.data_shndx, obj->efile.data_shndx,
obj->efile.data, obj->efile.data->d_buf,
&obj->sections.data); obj->efile.data->d_size);
if (err) if (err)
return err; return err;
} }
if (obj->efile.rodata_shndx >= 0) { if (obj->efile.rodata_shndx >= 0) {
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
obj->efile.rodata_shndx, obj->efile.rodata_shndx,
obj->efile.rodata, obj->efile.rodata->d_buf,
&obj->sections.rodata); obj->efile.rodata->d_size);
if (err) if (err)
return err; return err;
} }
if (obj->efile.bss_shndx >= 0) { if (obj->efile.bss_shndx >= 0) {
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
obj->efile.bss_shndx, obj->efile.bss_shndx,
obj->efile.bss, NULL); NULL,
obj->efile.bss->d_size);
if (err) if (err)
return err; return err;
} }
@ -2292,27 +2303,32 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
{ {
char *cp, errmsg[STRERR_BUFSIZE]; char *cp, errmsg[STRERR_BUFSIZE];
int err, zero = 0; int err, zero = 0;
__u8 *data;
/* Nothing to do here since kernel already zero-initializes .bss map. */ /* Nothing to do here since kernel already zero-initializes .bss map. */
if (map->libbpf_type == LIBBPF_MAP_BSS) if (map->libbpf_type == LIBBPF_MAP_BSS)
return 0; return 0;
data = map->libbpf_type == LIBBPF_MAP_DATA ? err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
obj->sections.data : obj->sections.rodata; if (err) {
err = -errno;
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
pr_warn("Error setting initial map(%s) contents: %s\n",
map->name, cp);
return err;
}
err = bpf_map_update_elem(map->fd, &zero, data, 0);
/* Freeze .rodata map as read-only from syscall side. */ /* Freeze .rodata map as read-only from syscall side. */
if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) { if (map->libbpf_type == LIBBPF_MAP_RODATA) {
err = bpf_map_freeze(map->fd); err = bpf_map_freeze(map->fd);
if (err) { if (err) {
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); err = -errno;
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
pr_warn("Error freezing map(%s) as read-only: %s\n", pr_warn("Error freezing map(%s) as read-only: %s\n",
map->name, cp); map->name, cp);
err = 0; return err;
} }
} }
return err; return 0;
} }
static int static int
@ -4683,17 +4699,22 @@ void bpf_object__close(struct bpf_object *obj)
btf_ext__free(obj->btf_ext); btf_ext__free(obj->btf_ext);
for (i = 0; i < obj->nr_maps; i++) { for (i = 0; i < obj->nr_maps; i++) {
zfree(&obj->maps[i].name); struct bpf_map *map = &obj->maps[i];
zfree(&obj->maps[i].pin_path);
if (obj->maps[i].clear_priv) if (map->clear_priv)
obj->maps[i].clear_priv(&obj->maps[i], map->clear_priv(map, map->priv);
obj->maps[i].priv); map->priv = NULL;
obj->maps[i].priv = NULL; map->clear_priv = NULL;
obj->maps[i].clear_priv = NULL;
if (map->mmaped) {
munmap(map->mmaped, bpf_map_mmap_sz(map));
map->mmaped = NULL;
}
zfree(&map->name);
zfree(&map->pin_path);
} }
zfree(&obj->sections.rodata);
zfree(&obj->sections.data);
zfree(&obj->maps); zfree(&obj->maps);
obj->nr_maps = 0; obj->nr_maps = 0;