Merge branch 'bpf-bpffs-bpftool-dump-with-btf'

Yonghong Song says:

====================
Commit a26ca7c982 ("bpf: btf: Add pretty print support to the
basic arraymap") and Commit 699c86d6ec ("bpf: btf: add pretty print
for hash/lru_hash maps") added bpffs pretty print for array, hash and
lru hash maps. The pretty print gives users a structurally formatted
dump for keys/values which much easy to understand than raw bytes.

This patch set implemented bpffs pretty print support for
percpu arraymap, percpu hashmap and percpu lru hashmap.
For complex key/value types, the pretty print here is even more useful
due to:

  . large volumne of data making it even harder to correlate bytes
    to a particular field in a particular cpu.
  . kernel rounds the value size for each cpu to multiple of 8.
    User has to be aware of this otherwise wrong value may be
    derived from cpu 1/2/...

For example, we may have a bpffs pretty print like below:
   43602: {
        cpu0: {43602,0,-43602,0x3,0xaa52,0x3,{43602|[82,170,0,0,0,0,0,0]},ENUM_TWO}
        cpu1: {43602,0,-43602,0x3,0xaa52,0x3,{43602|[82,170,0,0,0,0,0,0]},ENUM_TWO}
        cpu2: {43602,0,-43602,0x3,0xaa52,0x3,{43602|[82,170,0,0,0,0,0,0]},ENUM_TWO}
        cpu3: {43602,0,-43602,0x3,0xaa52,0x3,{43602|[82,170,0,0,0,0,0,0]},ENUM_TWO}
   }
for a percpu map.

This patch also added percpu formatted print on bpftool. For example,
bpftool may print like below:
    {
        "key": 0,
        "values": [{
                "cpu": 0,
                "value": {
                    "ui32": 0,
                    "ui16": 0,
                }
            },{
                "cpu": 1,
                "value": {
                    "ui32": 1,
                    "ui16": 0,
                }
            },{
                "cpu": 2,
                "value": {
                    "ui32": 2,
                    "ui16": 0,
                }
            },{
                "cpu": 3,
                "value": {
                    "ui32": 3,
                    "ui16": 0,
                }
            }
        ]
    }

Patch #1 implemented bpffs pretty print for percpu arraymap/hash/lru_hash
in kernel. Patch #2 added the test case in tools bpf selftest test_btf.
Patch #3 added percpu map btf based dump.
====================

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
Daniel Borkmann 2018-08-30 14:03:54 +02:00
commit 56b48c6a60
4 changed files with 229 additions and 36 deletions

View File

@ -358,6 +358,29 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
rcu_read_unlock();
}
static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
struct seq_file *m)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
void __percpu *pptr;
int cpu;
rcu_read_lock();
seq_printf(m, "%u: {\n", *(u32 *)key);
pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
seq_printf(m, "\tcpu%d: ", cpu);
btf_type_seq_show(map->btf, map->btf_value_type_id,
per_cpu_ptr(pptr, cpu), m);
seq_puts(m, "\n");
}
seq_puts(m, "}\n");
rcu_read_unlock();
}
static int array_map_check_btf(const struct bpf_map *map,
const struct btf_type *key_type,
const struct btf_type *value_type)
@ -398,6 +421,7 @@ const struct bpf_map_ops percpu_array_map_ops = {
.map_lookup_elem = percpu_array_map_lookup_elem,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
.map_seq_show_elem = percpu_array_map_seq_show_elem,
.map_check_btf = array_map_check_btf,
};

View File

@ -1285,6 +1285,35 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
return ret;
}
static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key,
struct seq_file *m)
{
struct htab_elem *l;
void __percpu *pptr;
int cpu;
rcu_read_lock();
l = __htab_map_lookup_elem(map, key);
if (!l) {
rcu_read_unlock();
return;
}
btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
seq_puts(m, ": {\n");
pptr = htab_elem_get_ptr(l, map->key_size);
for_each_possible_cpu(cpu) {
seq_printf(m, "\tcpu%d: ", cpu);
btf_type_seq_show(map->btf, map->btf_value_type_id,
per_cpu_ptr(pptr, cpu), m);
seq_puts(m, "\n");
}
seq_puts(m, "}\n");
rcu_read_unlock();
}
const struct bpf_map_ops htab_percpu_map_ops = {
.map_alloc_check = htab_map_alloc_check,
.map_alloc = htab_map_alloc,
@ -1293,6 +1322,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
.map_lookup_elem = htab_percpu_map_lookup_elem,
.map_update_elem = htab_percpu_map_update_elem,
.map_delete_elem = htab_map_delete_elem,
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
};
const struct bpf_map_ops htab_lru_percpu_map_ops = {
@ -1303,6 +1333,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
.map_lookup_elem = htab_lru_percpu_map_lookup_elem,
.map_update_elem = htab_lru_percpu_map_update_elem,
.map_delete_elem = htab_lru_map_delete_elem,
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
};
static int fd_htab_map_alloc_check(union bpf_attr *attr)

View File

@ -169,9 +169,28 @@ static int do_dump_btf(const struct btf_dumper *d,
if (ret)
goto err_end_obj;
jsonw_name(d->jw, "value");
if (!map_is_per_cpu(map_info->type)) {
jsonw_name(d->jw, "value");
ret = btf_dumper_type(d, map_info->btf_value_type_id, value);
} else {
unsigned int i, n, step;
ret = btf_dumper_type(d, map_info->btf_value_type_id, value);
jsonw_name(d->jw, "values");
jsonw_start_array(d->jw);
n = get_possible_cpus();
step = round_up(map_info->value_size, 8);
for (i = 0; i < n; i++) {
jsonw_start_object(d->jw);
jsonw_int_field(d->jw, "cpu", i);
jsonw_name(d->jw, "value");
ret = btf_dumper_type(d, map_info->btf_value_type_id,
value + i * step);
jsonw_end_object(d->jw);
if (ret)
break;
}
jsonw_end_array(d->jw);
}
err_end_obj:
/* end of key-value pair */
@ -298,6 +317,16 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key,
jsonw_end_object(json_wtr);
}
jsonw_end_array(json_wtr);
if (btf) {
struct btf_dumper d = {
.btf = btf,
.jw = json_wtr,
.is_plain_text = false,
};
jsonw_name(json_wtr, "formatted");
do_dump_btf(&d, info, key, value);
}
}
jsonw_end_object(json_wtr);

View File

@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/err.h>
#include <linux/kernel.h>
#include <bpf/bpf.h>
#include <sys/resource.h>
#include <libelf.h>
@ -45,7 +46,6 @@ static int count_result(int err)
return err;
}
#define min(a, b) ((a) < (b) ? (a) : (b))
#define __printf(a, b) __attribute__((format(printf, a, b)))
__printf(1, 2)
@ -130,6 +130,7 @@ struct btf_raw_test {
bool map_create_err;
bool ordered_map;
bool lossless_map;
bool percpu_map;
int hdr_len_delta;
int type_off_delta;
int str_off_delta;
@ -2157,6 +2158,7 @@ static struct btf_pprint_test_meta {
const char *map_name;
bool ordered_map;
bool lossless_map;
bool percpu_map;
} pprint_tests_meta[] = {
{
.descr = "BTF pretty print array",
@ -2164,6 +2166,7 @@ static struct btf_pprint_test_meta {
.map_name = "pprint_test_array",
.ordered_map = true,
.lossless_map = true,
.percpu_map = false,
},
{
@ -2172,6 +2175,7 @@ static struct btf_pprint_test_meta {
.map_name = "pprint_test_hash",
.ordered_map = false,
.lossless_map = true,
.percpu_map = false,
},
{
@ -2180,30 +2184,83 @@ static struct btf_pprint_test_meta {
.map_name = "pprint_test_lru_hash",
.ordered_map = false,
.lossless_map = false,
.percpu_map = false,
},
{
.descr = "BTF pretty print percpu array",
.map_type = BPF_MAP_TYPE_PERCPU_ARRAY,
.map_name = "pprint_test_percpu_array",
.ordered_map = true,
.lossless_map = true,
.percpu_map = true,
},
{
.descr = "BTF pretty print percpu hash",
.map_type = BPF_MAP_TYPE_PERCPU_HASH,
.map_name = "pprint_test_percpu_hash",
.ordered_map = false,
.lossless_map = true,
.percpu_map = true,
},
{
.descr = "BTF pretty print lru percpu hash",
.map_type = BPF_MAP_TYPE_LRU_PERCPU_HASH,
.map_name = "pprint_test_lru_percpu_hash",
.ordered_map = false,
.lossless_map = false,
.percpu_map = true,
},
};
static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i)
static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i,
int num_cpus, int rounded_value_size)
{
v->ui32 = i;
v->si32 = -i;
v->unused_bits2a = 3;
v->bits28 = i;
v->unused_bits2b = 3;
v->ui64 = i;
v->aenum = i & 0x03;
int cpu;
for (cpu = 0; cpu < num_cpus; cpu++) {
v->ui32 = i + cpu;
v->si32 = -i;
v->unused_bits2a = 3;
v->bits28 = i;
v->unused_bits2b = 3;
v->ui64 = i;
v->aenum = i & 0x03;
v = (void *)v + rounded_value_size;
}
}
static int check_line(const char *expected_line, int nexpected_line,
int expected_line_len, const char *line)
{
if (CHECK(nexpected_line == expected_line_len,
"expected_line is too long"))
return -1;
if (strcmp(expected_line, line)) {
fprintf(stderr, "unexpected pprint output\n");
fprintf(stderr, "expected: %s", expected_line);
fprintf(stderr, " read: %s", line);
return -1;
}
return 0;
}
static int do_test_pprint(void)
{
const struct btf_raw_test *test = &pprint_test_template;
struct bpf_create_map_attr create_attr = {};
bool ordered_map, lossless_map, percpu_map;
int err, ret, num_cpus, rounded_value_size;
struct pprint_mapv *mapv = NULL;
unsigned int key, nr_read_elems;
bool ordered_map, lossless_map;
int map_fd = -1, btf_fd = -1;
struct pprint_mapv mapv = {};
unsigned int raw_btf_size;
char expected_line[255];
FILE *pin_file = NULL;
@ -2212,7 +2269,6 @@ static int do_test_pprint(void)
char *line = NULL;
uint8_t *raw_btf;
ssize_t nread;
int err, ret;
fprintf(stderr, "%s......", test->descr);
raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
@ -2261,9 +2317,18 @@ static int do_test_pprint(void)
if (CHECK(err, "bpf_obj_pin(%s): errno:%d.", pin_path, errno))
goto done;
percpu_map = test->percpu_map;
num_cpus = percpu_map ? bpf_num_possible_cpus() : 1;
rounded_value_size = round_up(sizeof(struct pprint_mapv), 8);
mapv = calloc(num_cpus, rounded_value_size);
if (CHECK(!mapv, "mapv allocation failure")) {
err = -1;
goto done;
}
for (key = 0; key < test->max_entries; key++) {
set_pprint_mapv(&mapv, key);
bpf_map_update_elem(map_fd, &key, &mapv, 0);
set_pprint_mapv(mapv, key, num_cpus, rounded_value_size);
bpf_map_update_elem(map_fd, &key, mapv, 0);
}
pin_file = fopen(pin_path, "r");
@ -2286,33 +2351,74 @@ static int do_test_pprint(void)
ordered_map = test->ordered_map;
lossless_map = test->lossless_map;
do {
struct pprint_mapv *cmapv;
ssize_t nexpected_line;
unsigned int next_key;
int cpu;
next_key = ordered_map ? nr_read_elems : atoi(line);
set_pprint_mapv(&mapv, next_key);
nexpected_line = snprintf(expected_line, sizeof(expected_line),
"%u: {%u,0,%d,0x%x,0x%x,0x%x,{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n",
next_key,
mapv.ui32, mapv.si32,
mapv.unused_bits2a, mapv.bits28, mapv.unused_bits2b,
mapv.ui64,
mapv.ui8a[0], mapv.ui8a[1], mapv.ui8a[2], mapv.ui8a[3],
mapv.ui8a[4], mapv.ui8a[5], mapv.ui8a[6], mapv.ui8a[7],
pprint_enum_str[mapv.aenum]);
set_pprint_mapv(mapv, next_key, num_cpus, rounded_value_size);
cmapv = mapv;
if (CHECK(nexpected_line == sizeof(expected_line),
"expected_line is too long")) {
err = -1;
goto done;
for (cpu = 0; cpu < num_cpus; cpu++) {
if (percpu_map) {
/* for percpu map, the format looks like:
* <key>: {
* cpu0: <value_on_cpu0>
* cpu1: <value_on_cpu1>
* ...
* cpun: <value_on_cpun>
* }
*
* let us verify the line containing the key here.
*/
if (cpu == 0) {
nexpected_line = snprintf(expected_line,
sizeof(expected_line),
"%u: {\n",
next_key);
err = check_line(expected_line, nexpected_line,
sizeof(expected_line), line);
if (err == -1)
goto done;
}
/* read value@cpu */
nread = getline(&line, &line_len, pin_file);
if (nread < 0)
break;
}
nexpected_line = snprintf(expected_line, sizeof(expected_line),
"%s%u: {%u,0,%d,0x%x,0x%x,0x%x,"
"{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n",
percpu_map ? "\tcpu" : "",
percpu_map ? cpu : next_key,
cmapv->ui32, cmapv->si32,
cmapv->unused_bits2a,
cmapv->bits28,
cmapv->unused_bits2b,
cmapv->ui64,
cmapv->ui8a[0], cmapv->ui8a[1],
cmapv->ui8a[2], cmapv->ui8a[3],
cmapv->ui8a[4], cmapv->ui8a[5],
cmapv->ui8a[6], cmapv->ui8a[7],
pprint_enum_str[cmapv->aenum]);
err = check_line(expected_line, nexpected_line,
sizeof(expected_line), line);
if (err == -1)
goto done;
cmapv = (void *)cmapv + rounded_value_size;
}
if (strcmp(expected_line, line)) {
err = -1;
fprintf(stderr, "unexpected pprint output\n");
fprintf(stderr, "expected: %s", expected_line);
fprintf(stderr, " read: %s", line);
goto done;
if (percpu_map) {
/* skip the last bracket for the percpu map */
nread = getline(&line, &line_len, pin_file);
if (nread < 0)
break;
}
nread = getline(&line, &line_len, pin_file);
@ -2334,6 +2440,8 @@ static int do_test_pprint(void)
err = 0;
done:
if (mapv)
free(mapv);
if (!err)
fprintf(stderr, "OK");
if (*btf_log_buf && (err || args.always_log))
@ -2361,6 +2469,7 @@ static int test_pprint(void)
pprint_test_template.map_name = pprint_tests_meta[i].map_name;
pprint_test_template.ordered_map = pprint_tests_meta[i].ordered_map;
pprint_test_template.lossless_map = pprint_tests_meta[i].lossless_map;
pprint_test_template.percpu_map = pprint_tests_meta[i].percpu_map;
err |= count_result(do_test_pprint());
}