From 64fe8c061de7d7ffdfdc104a57d48d645ae0ac4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 1 Nov 2019 12:03:44 +0100 Subject: [PATCH 1/3] xsk: Store struct xdp_sock as a flexible array member of the XSKMAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prior this commit, the array storing XDP socket instances were stored in a separate allocated array of the XSKMAP. Now, we store the sockets as a flexible array member in a similar fashion as the arraymap. Doing so, we do less pointer chasing in the lookup. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Acked-by: Jonathan Lemon Link: https://lore.kernel.org/bpf/20191101110346.15004-2-bjorn.topel@gmail.com --- kernel/bpf/xskmap.c | 55 +++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index 82a1ffe15dfa..edcbd863650e 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -11,9 +11,9 @@ struct xsk_map { struct bpf_map map; - struct xdp_sock **xsk_map; struct list_head __percpu *flush_list; spinlock_t lock; /* Synchronize map updates */ + struct xdp_sock *xsk_map[]; }; int xsk_map_inc(struct xsk_map *map) @@ -80,9 +80,10 @@ static void xsk_map_sock_delete(struct xdp_sock *xs, static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) { + struct bpf_map_memory mem; + int cpu, err, numa_node; struct xsk_map *m; - int cpu, err; - u64 cost; + u64 cost, size; if (!capable(CAP_NET_ADMIN)) return ERR_PTR(-EPERM); @@ -92,44 +93,35 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)) return ERR_PTR(-EINVAL); - m = kzalloc(sizeof(*m), GFP_USER); - if (!m) + numa_node = bpf_map_attr_numa_node(attr); + size = struct_size(m, xsk_map, attr->max_entries); + cost = size + array_size(sizeof(*m->flush_list), num_possible_cpus()); + + err = bpf_map_charge_init(&mem, cost); + if (err < 0) + return ERR_PTR(err); + + m = bpf_map_area_alloc(size, numa_node); + if (!m) { + bpf_map_charge_finish(&mem); return ERR_PTR(-ENOMEM); + } bpf_map_init_from_attr(&m->map, attr); + bpf_map_charge_move(&m->map.memory, &mem); spin_lock_init(&m->lock); - cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *); - cost += sizeof(struct list_head) * num_possible_cpus(); - - /* Notice returns -EPERM on if map size is larger than memlock limit */ - err = bpf_map_charge_init(&m->map.memory, cost); - if (err) - goto free_m; - - err = -ENOMEM; - m->flush_list = alloc_percpu(struct list_head); - if (!m->flush_list) - goto free_charge; + if (!m->flush_list) { + bpf_map_charge_finish(&m->map.memory); + bpf_map_area_free(m); + return ERR_PTR(-ENOMEM); + } for_each_possible_cpu(cpu) INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu)); - m->xsk_map = bpf_map_area_alloc(m->map.max_entries * - sizeof(struct xdp_sock *), - m->map.numa_node); - if (!m->xsk_map) - goto free_percpu; return &m->map; - -free_percpu: - free_percpu(m->flush_list); -free_charge: - bpf_map_charge_finish(&m->map.memory); -free_m: - kfree(m); - return ERR_PTR(err); } static void xsk_map_free(struct bpf_map *map) @@ -139,8 +131,7 @@ static void xsk_map_free(struct bpf_map *map) bpf_clear_redirect_map(map); synchronize_net(); free_percpu(m->flush_list); - bpf_map_area_free(m->xsk_map); - kfree(m); + bpf_map_area_free(m); } static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key) From e65650f291ee72fc578d6346080fc4699204ef2c Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 1 Nov 2019 12:03:45 +0100 Subject: [PATCH 2/3] bpf: Implement map_gen_lookup() callback for XSKMAP Inline the xsk_map_lookup_elem() via implementing the map_gen_lookup() callback. This results in emitting the bpf instructions in place of bpf_map_lookup_elem() helper call and better performance of bpf programs. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Jonathan Lemon Link: https://lore.kernel.org/bpf/20191101110346.15004-3-bjorn.topel@gmail.com --- kernel/bpf/xskmap.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index edcbd863650e..554939f78b83 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -163,6 +163,22 @@ struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key) return xs; } +static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) +{ + const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2; + struct bpf_insn *insn = insn_buf; + + *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); + *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(sizeof(struct xsk_sock *))); + *insn++ = BPF_ALU64_IMM(BPF_ADD, mp, offsetof(struct xsk_map, xsk_map)); + *insn++ = BPF_ALU64_REG(BPF_ADD, ret, mp); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(struct xsk_sock *), ret, ret, 0); + *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); + *insn++ = BPF_MOV64_IMM(ret, 0); + return insn - insn_buf; +} + int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, struct xdp_sock *xs) { @@ -303,6 +319,7 @@ const struct bpf_map_ops xsk_map_ops = { .map_free = xsk_map_free, .map_get_next_key = xsk_map_get_next_key, .map_lookup_elem = xsk_map_lookup_elem, + .map_gen_lookup = xsk_map_gen_lookup, .map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only, .map_update_elem = xsk_map_update_elem, .map_delete_elem = xsk_map_delete_elem, From d817991cc7486ab83f6c7188b0bc80eebee872f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 1 Nov 2019 12:03:46 +0100 Subject: [PATCH 3/3] xsk: Restructure/inline XSKMAP lookup/redirect/flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In this commit the XSKMAP entry lookup function used by the XDP redirect code is moved from the xskmap.c file to the xdp_sock.h header, so the lookup can be inlined from, e.g., the bpf_xdp_redirect_map() function. Further the __xsk_map_redirect() and __xsk_map_flush() is moved to the xsk.c, which lets the compiler inline the xsk_rcv() and xsk_flush() functions. Finally, all the XDP socket functions were moved from linux/bpf.h to net/xdp_sock.h, where most of the XDP sockets functions are anyway. This yields a ~2% performance boost for the xdpsock "rx_drop" scenario. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191101110346.15004-4-bjorn.topel@gmail.com --- include/linux/bpf.h | 25 --------------------- include/net/xdp_sock.h | 51 ++++++++++++++++++++++++++++++++---------- kernel/bpf/xskmap.c | 48 --------------------------------------- net/xdp/xsk.c | 33 +++++++++++++++++++++++++-- 4 files changed, 70 insertions(+), 87 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 80158cff44bd..7c7f518811a6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1009,31 +1009,6 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr, } #endif -#if defined(CONFIG_XDP_SOCKETS) -struct xdp_sock; -struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key); -int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, - struct xdp_sock *xs); -void __xsk_map_flush(struct bpf_map *map); -#else -struct xdp_sock; -static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, - u32 key) -{ - return NULL; -} - -static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, - struct xdp_sock *xs) -{ - return -EOPNOTSUPP; -} - -static inline void __xsk_map_flush(struct bpf_map *map) -{ -} -#endif - #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) void bpf_sk_reuseport_detach(struct sock *sk); int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index c9398ce7960f..e3780e4b74e1 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -69,7 +69,14 @@ struct xdp_umem { /* Nodes are linked in the struct xdp_sock map_list field, and used to * track which maps a certain socket reside in. */ -struct xsk_map; + +struct xsk_map { + struct bpf_map map; + struct list_head __percpu *flush_list; + spinlock_t lock; /* Synchronize map updates */ + struct xdp_sock *xsk_map[]; +}; + struct xsk_map_node { struct list_head node; struct xsk_map *map; @@ -109,8 +116,6 @@ struct xdp_sock { struct xdp_buff; #ifdef CONFIG_XDP_SOCKETS int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); -int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); -void xsk_flush(struct xdp_sock *xs); bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs); /* Used from netdev driver */ bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt); @@ -134,6 +139,22 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, struct xdp_sock **map_entry); int xsk_map_inc(struct xsk_map *map); void xsk_map_put(struct xsk_map *map); +int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, + struct xdp_sock *xs); +void __xsk_map_flush(struct bpf_map *map); + +static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, + u32 key) +{ + struct xsk_map *m = container_of(map, struct xsk_map, map); + struct xdp_sock *xs; + + if (key >= map->max_entries) + return NULL; + + xs = READ_ONCE(m->xsk_map[key]); + return xs; +} static inline u64 xsk_umem_extract_addr(u64 addr) { @@ -224,15 +245,6 @@ static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) return -ENOTSUPP; } -static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) -{ - return -ENOTSUPP; -} - -static inline void xsk_flush(struct xdp_sock *xs) -{ -} - static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) { return false; @@ -357,6 +369,21 @@ static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle, return 0; } +static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, + struct xdp_sock *xs) +{ + return -EOPNOTSUPP; +} + +static inline void __xsk_map_flush(struct bpf_map *map) +{ +} + +static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, + u32 key) +{ + return NULL; +} #endif /* CONFIG_XDP_SOCKETS */ #endif /* _LINUX_XDP_SOCK_H */ diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index 554939f78b83..da16c30868f3 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -9,13 +9,6 @@ #include #include -struct xsk_map { - struct bpf_map map; - struct list_head __percpu *flush_list; - spinlock_t lock; /* Synchronize map updates */ - struct xdp_sock *xsk_map[]; -}; - int xsk_map_inc(struct xsk_map *map) { struct bpf_map *m = &map->map; @@ -151,18 +144,6 @@ static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key) return 0; } -struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key) -{ - struct xsk_map *m = container_of(map, struct xsk_map, map); - struct xdp_sock *xs; - - if (key >= map->max_entries) - return NULL; - - xs = READ_ONCE(m->xsk_map[key]); - return xs; -} - static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2; @@ -179,35 +160,6 @@ static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) return insn - insn_buf; } -int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, - struct xdp_sock *xs) -{ - struct xsk_map *m = container_of(map, struct xsk_map, map); - struct list_head *flush_list = this_cpu_ptr(m->flush_list); - int err; - - err = xsk_rcv(xs, xdp); - if (err) - return err; - - if (!xs->flush_node.prev) - list_add(&xs->flush_node, flush_list); - - return 0; -} - -void __xsk_map_flush(struct bpf_map *map) -{ - struct xsk_map *m = container_of(map, struct xsk_map, map); - struct list_head *flush_list = this_cpu_ptr(m->flush_list); - struct xdp_sock *xs, *tmp; - - list_for_each_entry_safe(xs, tmp, flush_list, flush_node) { - xsk_flush(xs); - __list_del_clearprev(&xs->flush_node); - } -} - static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) { WARN_ON_ONCE(!rcu_read_lock_held()); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 9044073fbf22..6040bc2b0088 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -196,7 +196,7 @@ static bool xsk_is_bound(struct xdp_sock *xs) return false; } -int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) +static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { u32 len; @@ -212,7 +212,7 @@ int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) __xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len); } -void xsk_flush(struct xdp_sock *xs) +static void xsk_flush(struct xdp_sock *xs) { xskq_produce_flush_desc(xs->rx); xs->sk.sk_data_ready(&xs->sk); @@ -264,6 +264,35 @@ out_unlock: return err; } +int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, + struct xdp_sock *xs) +{ + struct xsk_map *m = container_of(map, struct xsk_map, map); + struct list_head *flush_list = this_cpu_ptr(m->flush_list); + int err; + + err = xsk_rcv(xs, xdp); + if (err) + return err; + + if (!xs->flush_node.prev) + list_add(&xs->flush_node, flush_list); + + return 0; +} + +void __xsk_map_flush(struct bpf_map *map) +{ + struct xsk_map *m = container_of(map, struct xsk_map, map); + struct list_head *flush_list = this_cpu_ptr(m->flush_list); + struct xdp_sock *xs, *tmp; + + list_for_each_entry_safe(xs, tmp, flush_list, flush_node) { + xsk_flush(xs); + __list_del_clearprev(&xs->flush_node); + } +} + void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries) { xskq_produce_flush_addr_n(umem->cq, nb_entries);