linux-sg2042/samples/bpf/xdp_monitor_user.c

716 lines
17 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0
* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
*/
static const char *__doc__=
"XDP monitor tool, based on tracepoints\n"
;
static const char *__doc_err_only__=
" NOTICE: Only tracking XDP redirect errors\n"
" Enable TX success stats via '--stats'\n"
" (which comes with a per packet processing overhead)\n"
;
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>
#include <locale.h>
#include <sys/resource.h>
#include <getopt.h>
#include <net/if.h>
#include <time.h>
#include <bpf/bpf.h>
#include "bpf_load.h"
#include "bpf_util.h"
static int verbose = 1;
static bool debug = false;
static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' },
{"debug", no_argument, NULL, 'D' },
{"stats", no_argument, NULL, 'S' },
{"sec", required_argument, NULL, 's' },
{0, 0, NULL, 0 }
};
/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */
#define EXIT_FAIL_MEM 5
static void usage(char *argv[])
{
int i;
printf("\nDOCUMENTATION:\n%s\n", __doc__);
printf("\n");
printf(" Usage: %s (options-see-below)\n",
argv[0]);
printf(" Listing options:\n");
for (i = 0; long_options[i].name != 0; i++) {
printf(" --%-15s", long_options[i].name);
if (long_options[i].flag != NULL)
printf(" flag (internal value:%d)",
*long_options[i].flag);
else
printf("short-option: -%c",
long_options[i].val);
printf("\n");
}
printf("\n");
}
#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
static __u64 gettime(void)
{
struct timespec t;
int res;
res = clock_gettime(CLOCK_MONOTONIC, &t);
if (res < 0) {
fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
exit(EXIT_FAILURE);
}
return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
}
enum {
REDIR_SUCCESS = 0,
REDIR_ERROR = 1,
};
#define REDIR_RES_MAX 2
static const char *redir_names[REDIR_RES_MAX] = {
[REDIR_SUCCESS] = "Success",
[REDIR_ERROR] = "Error",
};
static const char *err2str(int err)
{
if (err < REDIR_RES_MAX)
return redir_names[err];
return NULL;
}
/* enum xdp_action */
#define XDP_UNKNOWN XDP_REDIRECT + 1
#define XDP_ACTION_MAX (XDP_UNKNOWN + 1)
static const char *xdp_action_names[XDP_ACTION_MAX] = {
[XDP_ABORTED] = "XDP_ABORTED",
[XDP_DROP] = "XDP_DROP",
[XDP_PASS] = "XDP_PASS",
[XDP_TX] = "XDP_TX",
[XDP_REDIRECT] = "XDP_REDIRECT",
[XDP_UNKNOWN] = "XDP_UNKNOWN",
};
static const char *action2str(int action)
{
if (action < XDP_ACTION_MAX)
return xdp_action_names[action];
return NULL;
}
/* Common stats data record shared with _kern.c */
struct datarec {
__u64 processed;
__u64 dropped;
__u64 info;
__u64 err;
};
#define MAX_CPUS 64
/* Userspace structs for collection of stats from maps */
struct record {
__u64 timestamp;
struct datarec total;
struct datarec *cpu;
};
struct u64rec {
__u64 processed;
};
struct record_u64 {
/* record for _kern side __u64 values */
__u64 timestamp;
struct u64rec total;
struct u64rec *cpu;
};
struct stats_record {
struct record_u64 xdp_redirect[REDIR_RES_MAX];
struct record_u64 xdp_exception[XDP_ACTION_MAX];
struct record xdp_cpumap_kthread;
struct record xdp_cpumap_enqueue[MAX_CPUS];
struct record xdp_devmap_xmit;
};
static bool map_collect_record(int fd, __u32 key, struct record *rec)
{
/* For percpu maps, userspace gets a value per possible CPU */
unsigned int nr_cpus = bpf_num_possible_cpus();
struct datarec values[nr_cpus];
__u64 sum_processed = 0;
__u64 sum_dropped = 0;
__u64 sum_info = 0;
__u64 sum_err = 0;
int i;
if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
fprintf(stderr,
"ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
return false;
}
/* Get time as close as possible to reading map contents */
rec->timestamp = gettime();
/* Record and sum values from each CPU */
for (i = 0; i < nr_cpus; i++) {
rec->cpu[i].processed = values[i].processed;
sum_processed += values[i].processed;
rec->cpu[i].dropped = values[i].dropped;
sum_dropped += values[i].dropped;
rec->cpu[i].info = values[i].info;
sum_info += values[i].info;
rec->cpu[i].err = values[i].err;
sum_err += values[i].err;
}
rec->total.processed = sum_processed;
rec->total.dropped = sum_dropped;
rec->total.info = sum_info;
rec->total.err = sum_err;
return true;
}
static bool map_collect_record_u64(int fd, __u32 key, struct record_u64 *rec)
{
/* For percpu maps, userspace gets a value per possible CPU */
unsigned int nr_cpus = bpf_num_possible_cpus();
struct u64rec values[nr_cpus];
__u64 sum_total = 0;
int i;
if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
fprintf(stderr,
"ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
return false;
}
/* Get time as close as possible to reading map contents */
rec->timestamp = gettime();
/* Record and sum values from each CPU */
for (i = 0; i < nr_cpus; i++) {
rec->cpu[i].processed = values[i].processed;
sum_total += values[i].processed;
}
rec->total.processed = sum_total;
return true;
}
static double calc_period(struct record *r, struct record *p)
{
double period_ = 0;
__u64 period = 0;
period = r->timestamp - p->timestamp;
if (period > 0)
period_ = ((double) period / NANOSEC_PER_SEC);
return period_;
}
static double calc_period_u64(struct record_u64 *r, struct record_u64 *p)
{
double period_ = 0;
__u64 period = 0;
period = r->timestamp - p->timestamp;
if (period > 0)
period_ = ((double) period / NANOSEC_PER_SEC);
return period_;
}
static double calc_pps(struct datarec *r, struct datarec *p, double period)
{
__u64 packets = 0;
double pps = 0;
if (period > 0) {
packets = r->processed - p->processed;
pps = packets / period;
}
return pps;
}
static double calc_pps_u64(struct u64rec *r, struct u64rec *p, double period)
{
__u64 packets = 0;
double pps = 0;
if (period > 0) {
packets = r->processed - p->processed;
pps = packets / period;
}
return pps;
}
static double calc_drop(struct datarec *r, struct datarec *p, double period)
{
__u64 packets = 0;
double pps = 0;
if (period > 0) {
packets = r->dropped - p->dropped;
pps = packets / period;
}
return pps;
}
static double calc_info(struct datarec *r, struct datarec *p, double period)
{
__u64 packets = 0;
double pps = 0;
if (period > 0) {
packets = r->info - p->info;
pps = packets / period;
}
return pps;
}
static double calc_err(struct datarec *r, struct datarec *p, double period)
{
__u64 packets = 0;
double pps = 0;
if (period > 0) {
packets = r->err - p->err;
pps = packets / period;
}
return pps;
}
static void stats_print(struct stats_record *stats_rec,
struct stats_record *stats_prev,
bool err_only)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
int rec_i = 0, i, to_cpu;
double t = 0, pps = 0;
/* Header */
printf("%-15s %-7s %-12s %-12s %-9s\n",
"XDP-event", "CPU:to", "pps", "drop-pps", "extra-info");
/* tracepoint: xdp:xdp_redirect_* */
if (err_only)
rec_i = REDIR_ERROR;
for (; rec_i < REDIR_RES_MAX; rec_i++) {
struct record_u64 *rec, *prev;
char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
rec = &stats_rec->xdp_redirect[rec_i];
prev = &stats_prev->xdp_redirect[rec_i];
t = calc_period_u64(rec, prev);
for (i = 0; i < nr_cpus; i++) {
struct u64rec *r = &rec->cpu[i];
struct u64rec *p = &prev->cpu[i];
pps = calc_pps_u64(r, p, t);
if (pps > 0)
printf(fmt1, "XDP_REDIRECT", i,
rec_i ? 0.0: pps, rec_i ? pps : 0.0,
err2str(rec_i));
}
pps = calc_pps_u64(&rec->total, &prev->total, t);
printf(fmt2, "XDP_REDIRECT", "total",
rec_i ? 0.0: pps, rec_i ? pps : 0.0, err2str(rec_i));
}
/* tracepoint: xdp:xdp_exception */
for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) {
struct record_u64 *rec, *prev;
char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
rec = &stats_rec->xdp_exception[rec_i];
prev = &stats_prev->xdp_exception[rec_i];
t = calc_period_u64(rec, prev);
for (i = 0; i < nr_cpus; i++) {
struct u64rec *r = &rec->cpu[i];
struct u64rec *p = &prev->cpu[i];
pps = calc_pps_u64(r, p, t);
if (pps > 0)
printf(fmt1, "Exception", i,
0.0, pps, action2str(rec_i));
}
pps = calc_pps_u64(&rec->total, &prev->total, t);
if (pps > 0)
printf(fmt2, "Exception", "total",
0.0, pps, action2str(rec_i));
}
/* cpumap enqueue stats */
for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {
char *fmt1 = "%-15s %3d:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
char *fmt2 = "%-15s %3s:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
struct record *rec, *prev;
char *info_str = "";
double drop, info;
rec = &stats_rec->xdp_cpumap_enqueue[to_cpu];
prev = &stats_prev->xdp_cpumap_enqueue[to_cpu];
t = calc_period(rec, prev);
for (i = 0; i < nr_cpus; i++) {
struct datarec *r = &rec->cpu[i];
struct datarec *p = &prev->cpu[i];
pps = calc_pps(r, p, t);
drop = calc_drop(r, p, t);
info = calc_info(r, p, t);
if (info > 0) {
info_str = "bulk-average";
info = pps / info; /* calc average bulk size */
}
if (pps > 0)
printf(fmt1, "cpumap-enqueue",
i, to_cpu, pps, drop, info, info_str);
}
pps = calc_pps(&rec->total, &prev->total, t);
if (pps > 0) {
drop = calc_drop(&rec->total, &prev->total, t);
info = calc_info(&rec->total, &prev->total, t);
if (info > 0) {
info_str = "bulk-average";
info = pps / info; /* calc average bulk size */
}
printf(fmt2, "cpumap-enqueue",
"sum", to_cpu, pps, drop, info, info_str);
}
}
/* cpumap kthread stats */
{
char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n";
char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n";
struct record *rec, *prev;
double drop, info;
char *i_str = "";
rec = &stats_rec->xdp_cpumap_kthread;
prev = &stats_prev->xdp_cpumap_kthread;
t = calc_period(rec, prev);
for (i = 0; i < nr_cpus; i++) {
struct datarec *r = &rec->cpu[i];
struct datarec *p = &prev->cpu[i];
pps = calc_pps(r, p, t);
drop = calc_drop(r, p, t);
info = calc_info(r, p, t);
if (info > 0)
i_str = "sched";
if (pps > 0 || drop > 0)
printf(fmt1, "cpumap-kthread",
i, pps, drop, info, i_str);
}
pps = calc_pps(&rec->total, &prev->total, t);
drop = calc_drop(&rec->total, &prev->total, t);
info = calc_info(&rec->total, &prev->total, t);
if (info > 0)
i_str = "sched-sum";
printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
}
/* devmap ndo_xdp_xmit stats */
{
char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s %s\n";
char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s %s\n";
struct record *rec, *prev;
double drop, info, err;
char *i_str = "";
char *err_str = "";
rec = &stats_rec->xdp_devmap_xmit;
prev = &stats_prev->xdp_devmap_xmit;
t = calc_period(rec, prev);
for (i = 0; i < nr_cpus; i++) {
struct datarec *r = &rec->cpu[i];
struct datarec *p = &prev->cpu[i];
pps = calc_pps(r, p, t);
drop = calc_drop(r, p, t);
info = calc_info(r, p, t);
err = calc_err(r, p, t);
if (info > 0) {
i_str = "bulk-average";
info = (pps+drop) / info; /* calc avg bulk */
}
if (err > 0)
err_str = "drv-err";
if (pps > 0 || drop > 0)
printf(fmt1, "devmap-xmit",
i, pps, drop, info, i_str, err_str);
}
pps = calc_pps(&rec->total, &prev->total, t);
drop = calc_drop(&rec->total, &prev->total, t);
info = calc_info(&rec->total, &prev->total, t);
err = calc_err(&rec->total, &prev->total, t);
if (info > 0) {
i_str = "bulk-average";
info = (pps+drop) / info; /* calc avg bulk */
}
if (err > 0)
err_str = "drv-err";
printf(fmt2, "devmap-xmit", "total", pps, drop,
info, i_str, err_str);
}
printf("\n");
}
static bool stats_collect(struct stats_record *rec)
{
int fd;
int i;
/* TODO: Detect if someone unloaded the perf event_fd's, as
* this can happen by someone running perf-record -e
*/
fd = map_data[0].fd; /* map0: redirect_err_cnt */
for (i = 0; i < REDIR_RES_MAX; i++)
map_collect_record_u64(fd, i, &rec->xdp_redirect[i]);
fd = map_data[1].fd; /* map1: exception_cnt */
for (i = 0; i < XDP_ACTION_MAX; i++) {
map_collect_record_u64(fd, i, &rec->xdp_exception[i]);
}
fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */
for (i = 0; i < MAX_CPUS; i++)
map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]);
fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
fd = map_data[4].fd; /* map4: devmap_xmit_cnt */
map_collect_record(fd, 0, &rec->xdp_devmap_xmit);
return true;
}
static void *alloc_rec_per_cpu(int record_size)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
void *array;
size_t size;
size = record_size * nr_cpus;
array = malloc(size);
memset(array, 0, size);
if (!array) {
fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
exit(EXIT_FAIL_MEM);
}
return array;
}
static struct stats_record *alloc_stats_record(void)
{
struct stats_record *rec;
int rec_sz;
int i;
/* Alloc main stats_record structure */
rec = malloc(sizeof(*rec));
memset(rec, 0, sizeof(*rec));
if (!rec) {
fprintf(stderr, "Mem alloc error\n");
exit(EXIT_FAIL_MEM);
}
/* Alloc stats stored per CPU for each record */
rec_sz = sizeof(struct u64rec);
for (i = 0; i < REDIR_RES_MAX; i++)
rec->xdp_redirect[i].cpu = alloc_rec_per_cpu(rec_sz);
for (i = 0; i < XDP_ACTION_MAX; i++)
rec->xdp_exception[i].cpu = alloc_rec_per_cpu(rec_sz);
rec_sz = sizeof(struct datarec);
rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz);
rec->xdp_devmap_xmit.cpu = alloc_rec_per_cpu(rec_sz);
for (i = 0; i < MAX_CPUS; i++)
rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz);
return rec;
}
static void free_stats_record(struct stats_record *r)
{
int i;
for (i = 0; i < REDIR_RES_MAX; i++)
free(r->xdp_redirect[i].cpu);
for (i = 0; i < XDP_ACTION_MAX; i++)
free(r->xdp_exception[i].cpu);
free(r->xdp_cpumap_kthread.cpu);
free(r->xdp_devmap_xmit.cpu);
for (i = 0; i < MAX_CPUS; i++)
free(r->xdp_cpumap_enqueue[i].cpu);
free(r);
}
/* Pointer swap trick */
static inline void swap(struct stats_record **a, struct stats_record **b)
{
struct stats_record *tmp;
tmp = *a;
*a = *b;
*b = tmp;
}
static void stats_poll(int interval, bool err_only)
{
struct stats_record *rec, *prev;
rec = alloc_stats_record();
prev = alloc_stats_record();
stats_collect(rec);
if (err_only)
printf("\n%s\n", __doc_err_only__);
/* Trick to pretty printf with thousands separators use %' */
setlocale(LC_NUMERIC, "en_US");
/* Header */
if (verbose)
printf("\n%s", __doc__);
/* TODO Need more advanced stats on error types */
if (verbose) {
printf(" - Stats map0: %s\n", map_data[0].name);
printf(" - Stats map1: %s\n", map_data[1].name);
printf("\n");
}
fflush(stdout);
while (1) {
swap(&prev, &rec);
stats_collect(rec);
stats_print(rec, prev, err_only);
fflush(stdout);
sleep(interval);
}
free_stats_record(rec);
free_stats_record(prev);
}
static void print_bpf_prog_info(void)
{
int i;
/* Prog info */
printf("Loaded BPF prog have %d bpf program(s)\n", prog_cnt);
for (i = 0; i < prog_cnt; i++) {
printf(" - prog_fd[%d] = fd(%d)\n", i, prog_fd[i]);
}
/* Maps info */
printf("Loaded BPF prog have %d map(s)\n", map_data_count);
for (i = 0; i < map_data_count; i++) {
char *name = map_data[i].name;
int fd = map_data[i].fd;
printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name);
}
/* Event info */
printf("Searching for (max:%d) event file descriptor(s)\n", prog_cnt);
for (i = 0; i < prog_cnt; i++) {
if (event_fd[i] != -1)
printf(" - event_fd[%d] = fd(%d)\n", i, event_fd[i]);
}
}
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
int longindex = 0, opt;
int ret = EXIT_SUCCESS;
char bpf_obj_file[256];
/* Default settings: */
bool errors_only = true;
int interval = 2;
snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]);
/* Parse commands line args */
while ((opt = getopt_long(argc, argv, "hDSs:",
long_options, &longindex)) != -1) {
switch (opt) {
case 'D':
debug = true;
break;
case 'S':
errors_only = false;
break;
case 's':
interval = atoi(optarg);
break;
case 'h':
default:
usage(argv);
return EXIT_FAILURE;
}
}
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK)");
return EXIT_FAILURE;
}
if (load_bpf_file(bpf_obj_file)) {
printf("ERROR - bpf_log_buf: %s", bpf_log_buf);
return EXIT_FAILURE;
}
if (!prog_fd[0]) {
printf("ERROR - load_bpf_file: %s\n", strerror(errno));
return EXIT_FAILURE;
}
if (debug) {
print_bpf_prog_info();
}
/* Unload/stop tracepoint event by closing fd's */
if (errors_only) {
/* The prog_fd[i] and event_fd[i] depend on the
* order the functions was defined in _kern.c
*/
close(event_fd[2]); /* tracepoint/xdp/xdp_redirect */
close(prog_fd[2]); /* func: trace_xdp_redirect */
close(event_fd[3]); /* tracepoint/xdp/xdp_redirect_map */
close(prog_fd[3]); /* func: trace_xdp_redirect_map */
}
stats_poll(interval, errors_only);
return ret;
}