OpenCloudOS-Kernel/tools/bpf/bpftool/skeleton/profiler.bpf.c

// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
// Copyright (c) 2020 Facebook
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

/* map of perf event fds, num_cpu * num_metric entries */
struct {
	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
	__uint(key_size, sizeof(u32));
	__uint(value_size, sizeof(int));
} events SEC(".maps");

/* readings at fentry */
struct {
	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
	__uint(key_size, sizeof(u32));
	__uint(value_size, sizeof(struct bpf_perf_event_value));
} fentry_readings SEC(".maps");

/* accumulated readings */
struct {
	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
	__uint(key_size, sizeof(u32));
	__uint(value_size, sizeof(struct bpf_perf_event_value));
} accum_readings SEC(".maps");

/* sample counts, one per cpu */
struct {
	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
	__uint(key_size, sizeof(u32));
	__uint(value_size, sizeof(u64));
} counts SEC(".maps");

const volatile __u32 num_cpu = 1;
const volatile __u32 num_metric = 1;
#define MAX_NUM_MATRICS 4

SEC("fentry/XXX")
int BPF_PROG(fentry_XXX)
{
	struct bpf_perf_event_value *ptrs[MAX_NUM_MATRICS];
	u32 key = bpf_get_smp_processor_id();
	u32 i;

	/* look up before reading, to reduce error */
	for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
		u32 flag = i;

		ptrs[i] = bpf_map_lookup_elem(&fentry_readings, &flag);
		if (!ptrs[i])
			return 0;
	}

	for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
		struct bpf_perf_event_value reading;
		int err;

		err = bpf_perf_event_read_value(&events, key, &reading,
						sizeof(reading));
		if (err)
			return 0;
		*(ptrs[i]) = reading;
		key += num_cpu;
	}

	return 0;
}

static inline void
fexit_update_maps(u32 id, struct bpf_perf_event_value *after)
{
	struct bpf_perf_event_value *before, diff;

	before = bpf_map_lookup_elem(&fentry_readings, &id);
	/* only account samples with a valid fentry_reading */
	if (before && before->counter) {
		struct bpf_perf_event_value *accum;

		diff.counter = after->counter - before->counter;
		diff.enabled = after->enabled - before->enabled;
		diff.running = after->running - before->running;

		accum = bpf_map_lookup_elem(&accum_readings, &id);
		if (accum) {
			accum->counter += diff.counter;
			accum->enabled += diff.enabled;
			accum->running += diff.running;
		}
	}
}

SEC("fexit/XXX")
int BPF_PROG(fexit_XXX)
{
	struct bpf_perf_event_value readings[MAX_NUM_MATRICS];
	u32 cpu = bpf_get_smp_processor_id();
	u32 i, zero = 0;
	int err;
	u64 *count;

	/* read all events before updating the maps, to reduce error */
	for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
		err = bpf_perf_event_read_value(&events, cpu + i * num_cpu,
						readings + i, sizeof(*readings));
		if (err)
			return 0;
	}
	count = bpf_map_lookup_elem(&counts, &zero);
	if (count) {
		*count += 1;
		for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++)
			fexit_update_maps(i, &readings[i]);
	}
	return 0;
}

char LICENSE[] SEC("license") = "Dual BSD/GPL";
tools/bpftool: Relicense bpftool's BPF profiler prog as dual-license GPL/BSD Relicense it to be compatible with the rest of bpftool files. Suggested-by: Quentin Monnet <quentin@isovalent.com> Signed-off-by: Andrii Nakryiko <andriin@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20200619222024.519774-1-andriin@fb.com 2020-06-20 06:20:24 +08:00			`// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)`
bpftool: Introduce "prog profile" command With fentry/fexit programs, it is possible to profile BPF program with hardware counters. Introduce bpftool "prog profile", which measures key metrics of a BPF program. bpftool prog profile command creates per-cpu perf events. Then it attaches fentry/fexit programs to the target BPF program. The fentry program saves perf event value to a map. The fexit program reads the perf event again, and calculates the difference, which is the instructions/cycles used by the target program. Example input and output: ./bpftool prog profile id 337 duration 3 cycles instructions llc_misses 4228 run_cnt 3403698 cycles (84.08%) 3525294 instructions # 1.04 insn per cycle (84.05%) 13 llc_misses # 3.69 LLC misses per million isns (83.50%) This command measures cycles and instructions for BPF program with id 337 for 3 seconds. The program has triggered 4228 times. The rest of the output is similar to perf-stat. In this example, the counters were only counting ~84% of the time because of time multiplexing of perf counters. Note that, this approach measures cycles and instructions in very small increments. So the fentry/fexit programs introduce noticeable errors to the measurement results. The fentry/fexit programs are generated with BPF skeletons. Therefore, we build bpftool twice. The first time _bpftool is built without skeletons. Then, _bpftool is used to generate the skeletons. The second time, bpftool is built with skeletons. Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Quentin Monnet <quentin@isovalent.com> Acked-by: Yonghong Song <yhs@fb.com> Link: https://lore.kernel.org/bpf/20200309173218.2739965-2-songliubraving@fb.com 2020-03-10 01:32:15 +08:00			`// Copyright (c) 2020 Facebook`
tools/bpftool: Generalize BPF skeleton support and generate vmlinux.h Adapt Makefile to support BPF skeleton generation beyond single profiler.bpf.c case. Also add vmlinux.h generation and switch profiler.bpf.c to use it. clang-bpf-global-var feature is extended and renamed to clang-bpf-co-re to check for support of preserve_access_index attribute, which, together with BTF for global variables, is the minimum requirement for modern BPF programs. Signed-off-by: Andrii Nakryiko <andriin@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Reviewed-by: Quentin Monnet <quentin@isovalent.com> Link: https://lore.kernel.org/bpf/20200619231703.738941-7-andriin@fb.com 2020-06-20 07:17:00 +08:00			`#include <vmlinux.h>`
bpftool: Introduce "prog profile" command With fentry/fexit programs, it is possible to profile BPF program with hardware counters. Introduce bpftool "prog profile", which measures key metrics of a BPF program. bpftool prog profile command creates per-cpu perf events. Then it attaches fentry/fexit programs to the target BPF program. The fentry program saves perf event value to a map. The fexit program reads the perf event again, and calculates the difference, which is the instructions/cycles used by the target program. Example input and output: ./bpftool prog profile id 337 duration 3 cycles instructions llc_misses 4228 run_cnt 3403698 cycles (84.08%) 3525294 instructions # 1.04 insn per cycle (84.05%) 13 llc_misses # 3.69 LLC misses per million isns (83.50%) This command measures cycles and instructions for BPF program with id 337 for 3 seconds. The program has triggered 4228 times. The rest of the output is similar to perf-stat. In this example, the counters were only counting ~84% of the time because of time multiplexing of perf counters. Note that, this approach measures cycles and instructions in very small increments. So the fentry/fexit programs introduce noticeable errors to the measurement results. The fentry/fexit programs are generated with BPF skeletons. Therefore, we build bpftool twice. The first time _bpftool is built without skeletons. Then, _bpftool is used to generate the skeletons. The second time, bpftool is built with skeletons. Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Quentin Monnet <quentin@isovalent.com> Acked-by: Yonghong Song <yhs@fb.com> Link: https://lore.kernel.org/bpf/20200309173218.2739965-2-songliubraving@fb.com 2020-03-10 01:32:15 +08:00			`#include <bpf/bpf_helpers.h>`
			`#include <bpf/bpf_tracing.h>`

			`/* map of perf event fds, num_cpu * num_metric entries */`
			`struct {`
			`__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);`
			`__uint(key_size, sizeof(u32));`
			`__uint(value_size, sizeof(int));`
			`} events SEC(".maps");`

			`/* readings at fentry */`
			`struct {`
			`__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);`
			`__uint(key_size, sizeof(u32));`
			`__uint(value_size, sizeof(struct bpf_perf_event_value));`
			`} fentry_readings SEC(".maps");`

			`/* accumulated readings */`
			`struct {`
			`__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);`
			`__uint(key_size, sizeof(u32));`
			`__uint(value_size, sizeof(struct bpf_perf_event_value));`
			`} accum_readings SEC(".maps");`

			`/* sample counts, one per cpu */`
			`struct {`
			`__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);`
			`__uint(key_size, sizeof(u32));`
			`__uint(value_size, sizeof(u64));`
			`} counts SEC(".maps");`

			`const volatile __u32 num_cpu = 1;`
			`const volatile __u32 num_metric = 1;`
			`#define MAX_NUM_MATRICS 4`

			`SEC("fentry/XXX")`
			`int BPF_PROG(fentry_XXX)`
			`{`
			`struct bpf_perf_event_value *ptrs[MAX_NUM_MATRICS];`
			`u32 key = bpf_get_smp_processor_id();`
			`u32 i;`

			`/* look up before reading, to reduce error */`
			`for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {`
			`u32 flag = i;`

			`ptrs[i] = bpf_map_lookup_elem(&fentry_readings, &flag);`
			`if (!ptrs[i])`
			`return 0;`
			`}`

			`for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {`
			`struct bpf_perf_event_value reading;`
			`int err;`

			`err = bpf_perf_event_read_value(&events, key, &reading,`
			`sizeof(reading));`
			`if (err)`
			`return 0;`
			`*(ptrs[i]) = reading;`
			`key += num_cpu;`
			`}`

			`return 0;`
			`}`

			`static inline void`
			`fexit_update_maps(u32 id, struct bpf_perf_event_value *after)`
			`{`
tools, bpftool: Remove two unused variables. Avoid an unused variable warning. Signed-off-by: Ian Rogers <irogers@google.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Tobias Klauser <tklauser@distanz.ch> Acked-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/bpf/20201027233646.3434896-2-irogers@google.com 2020-10-28 07:36:46 +08:00			`struct bpf_perf_event_value *before, diff;`
bpftool: Introduce "prog profile" command With fentry/fexit programs, it is possible to profile BPF program with hardware counters. Introduce bpftool "prog profile", which measures key metrics of a BPF program. bpftool prog profile command creates per-cpu perf events. Then it attaches fentry/fexit programs to the target BPF program. The fentry program saves perf event value to a map. The fexit program reads the perf event again, and calculates the difference, which is the instructions/cycles used by the target program. Example input and output: ./bpftool prog profile id 337 duration 3 cycles instructions llc_misses 4228 run_cnt 3403698 cycles (84.08%) 3525294 instructions # 1.04 insn per cycle (84.05%) 13 llc_misses # 3.69 LLC misses per million isns (83.50%) This command measures cycles and instructions for BPF program with id 337 for 3 seconds. The program has triggered 4228 times. The rest of the output is similar to perf-stat. In this example, the counters were only counting ~84% of the time because of time multiplexing of perf counters. Note that, this approach measures cycles and instructions in very small increments. So the fentry/fexit programs introduce noticeable errors to the measurement results. The fentry/fexit programs are generated with BPF skeletons. Therefore, we build bpftool twice. The first time _bpftool is built without skeletons. Then, _bpftool is used to generate the skeletons. The second time, bpftool is built with skeletons. Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Quentin Monnet <quentin@isovalent.com> Acked-by: Yonghong Song <yhs@fb.com> Link: https://lore.kernel.org/bpf/20200309173218.2739965-2-songliubraving@fb.com 2020-03-10 01:32:15 +08:00
			`before = bpf_map_lookup_elem(&fentry_readings, &id);`
			`/* only account samples with a valid fentry_reading */`
			`if (before && before->counter) {`
			`struct bpf_perf_event_value *accum;`

			`diff.counter = after->counter - before->counter;`
			`diff.enabled = after->enabled - before->enabled;`
			`diff.running = after->running - before->running;`

			`accum = bpf_map_lookup_elem(&accum_readings, &id);`
			`if (accum) {`
			`accum->counter += diff.counter;`
			`accum->enabled += diff.enabled;`
			`accum->running += diff.running;`
			`}`
			`}`
			`}`

			`SEC("fexit/XXX")`
			`int BPF_PROG(fexit_XXX)`
			`{`
			`struct bpf_perf_event_value readings[MAX_NUM_MATRICS];`
			`u32 cpu = bpf_get_smp_processor_id();`
tools, bpftool: Remove two unused variables. Avoid an unused variable warning. Signed-off-by: Ian Rogers <irogers@google.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Tobias Klauser <tklauser@distanz.ch> Acked-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/bpf/20201027233646.3434896-2-irogers@google.com 2020-10-28 07:36:46 +08:00			`u32 i, zero = 0;`
bpftool: Introduce "prog profile" command With fentry/fexit programs, it is possible to profile BPF program with hardware counters. Introduce bpftool "prog profile", which measures key metrics of a BPF program. bpftool prog profile command creates per-cpu perf events. Then it attaches fentry/fexit programs to the target BPF program. The fentry program saves perf event value to a map. The fexit program reads the perf event again, and calculates the difference, which is the instructions/cycles used by the target program. Example input and output: ./bpftool prog profile id 337 duration 3 cycles instructions llc_misses 4228 run_cnt 3403698 cycles (84.08%) 3525294 instructions # 1.04 insn per cycle (84.05%) 13 llc_misses # 3.69 LLC misses per million isns (83.50%) This command measures cycles and instructions for BPF program with id 337 for 3 seconds. The program has triggered 4228 times. The rest of the output is similar to perf-stat. In this example, the counters were only counting ~84% of the time because of time multiplexing of perf counters. Note that, this approach measures cycles and instructions in very small increments. So the fentry/fexit programs introduce noticeable errors to the measurement results. The fentry/fexit programs are generated with BPF skeletons. Therefore, we build bpftool twice. The first time _bpftool is built without skeletons. Then, _bpftool is used to generate the skeletons. The second time, bpftool is built with skeletons. Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Quentin Monnet <quentin@isovalent.com> Acked-by: Yonghong Song <yhs@fb.com> Link: https://lore.kernel.org/bpf/20200309173218.2739965-2-songliubraving@fb.com 2020-03-10 01:32:15 +08:00			`int err;`
			`u64 *count;`

			`/* read all events before updating the maps, to reduce error */`
			`for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {`
			`err = bpf_perf_event_read_value(&events, cpu + i * num_cpu,`
			`readings + i, sizeof(*readings));`
			`if (err)`
			`return 0;`
			`}`
			`count = bpf_map_lookup_elem(&counts, &zero);`
			`if (count) {`
			`*count += 1;`
			`for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++)`
			`fexit_update_maps(i, &readings[i]);`
			`}`
			`return 0;`
			`}`

tools/bpftool: Relicense bpftool's BPF profiler prog as dual-license GPL/BSD Relicense it to be compatible with the rest of bpftool files. Suggested-by: Quentin Monnet <quentin@isovalent.com> Signed-off-by: Andrii Nakryiko <andriin@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20200619222024.519774-1-andriin@fb.com 2020-06-20 06:20:24 +08:00			`char LICENSE[] SEC("license") = "Dual BSD/GPL";`