2019-05-27 14:55:01 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2015-01-15 16:52:39 +08:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/skbuff.h>
|
|
|
|
#include <linux/rtnetlink.h>
|
|
|
|
#include <linux/filter.h>
|
2015-03-20 22:11:12 +08:00
|
|
|
#include <linux/bpf.h>
|
|
|
|
|
2015-01-15 16:52:39 +08:00
|
|
|
#include <net/netlink.h>
|
2020-03-30 06:53:38 +08:00
|
|
|
#include <net/sock.h>
|
2015-01-15 16:52:39 +08:00
|
|
|
#include <net/pkt_sched.h>
|
2019-03-20 22:00:00 +08:00
|
|
|
#include <net/pkt_cls.h>
|
2015-01-15 16:52:39 +08:00
|
|
|
|
|
|
|
#include <linux/tc_act/tc_bpf.h>
|
|
|
|
#include <net/tc_act/tc_bpf.h>
|
|
|
|
|
2015-03-20 22:11:12 +08:00
|
|
|
#define ACT_BPF_NAME_LEN 256
|
|
|
|
|
|
|
|
struct tcf_bpf_cfg {
|
|
|
|
struct bpf_prog *filter;
|
|
|
|
struct sock_filter *bpf_ops;
|
act_bpf: fix memory leaks when replacing bpf programs
We currently trigger multiple memory leaks when replacing bpf
actions, besides others:
comm "tc", pid 1909, jiffies 4294851310 (age 1602.796s)
hex dump (first 32 bytes):
01 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 ................
18 b0 98 6d 00 88 ff ff 00 00 00 00 00 00 00 00 ...m............
backtrace:
[<ffffffff817e623e>] kmemleak_alloc+0x4e/0xb0
[<ffffffff8120a22d>] __vmalloc_node_range+0x1bd/0x2c0
[<ffffffff8120a37a>] __vmalloc+0x4a/0x50
[<ffffffff811a8d0a>] bpf_prog_alloc+0x3a/0xa0
[<ffffffff816c0684>] bpf_prog_create+0x44/0xa0
[<ffffffffa09ba4eb>] tcf_bpf_init+0x28b/0x3c0 [act_bpf]
[<ffffffff816d7001>] tcf_action_init_1+0x191/0x1b0
[<ffffffff816d70a2>] tcf_action_init+0x82/0xf0
[<ffffffff816d4d12>] tcf_exts_validate+0xb2/0xc0
[<ffffffffa09b5838>] cls_bpf_modify_existing+0x98/0x340 [cls_bpf]
[<ffffffffa09b5cd6>] cls_bpf_change+0x1a6/0x274 [cls_bpf]
[<ffffffff816d56e5>] tc_ctl_tfilter+0x335/0x910
[<ffffffff816b9145>] rtnetlink_rcv_msg+0x95/0x240
[<ffffffff816df34f>] netlink_rcv_skb+0xaf/0xc0
[<ffffffff816b909e>] rtnetlink_rcv+0x2e/0x40
[<ffffffff816deaaf>] netlink_unicast+0xef/0x1b0
Issue is that the old content from tcf_bpf is allocated and needs
to be released when we replace it. We seem to do that since the
beginning of act_bpf on the filter and insns, later on the name as
well.
Example test case, after patch:
# FOO="1,6 0 0 4294967295,"
# BAR="1,6 0 0 4294967294,"
# tc actions add action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$BAR" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions del action bpf index 2
[...]
# echo "scan" > /sys/kernel/debug/kmemleak
# cat /sys/kernel/debug/kmemleak | grep "comm \"tc\"" | wc -l
0
Fixes: d23b8ad8ab23 ("tc: add BPF based action")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-30 00:40:56 +08:00
|
|
|
const char *bpf_name;
|
2015-03-20 22:11:12 +08:00
|
|
|
u16 bpf_num_ops;
|
act_bpf: fix memory leaks when replacing bpf programs
We currently trigger multiple memory leaks when replacing bpf
actions, besides others:
comm "tc", pid 1909, jiffies 4294851310 (age 1602.796s)
hex dump (first 32 bytes):
01 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 ................
18 b0 98 6d 00 88 ff ff 00 00 00 00 00 00 00 00 ...m............
backtrace:
[<ffffffff817e623e>] kmemleak_alloc+0x4e/0xb0
[<ffffffff8120a22d>] __vmalloc_node_range+0x1bd/0x2c0
[<ffffffff8120a37a>] __vmalloc+0x4a/0x50
[<ffffffff811a8d0a>] bpf_prog_alloc+0x3a/0xa0
[<ffffffff816c0684>] bpf_prog_create+0x44/0xa0
[<ffffffffa09ba4eb>] tcf_bpf_init+0x28b/0x3c0 [act_bpf]
[<ffffffff816d7001>] tcf_action_init_1+0x191/0x1b0
[<ffffffff816d70a2>] tcf_action_init+0x82/0xf0
[<ffffffff816d4d12>] tcf_exts_validate+0xb2/0xc0
[<ffffffffa09b5838>] cls_bpf_modify_existing+0x98/0x340 [cls_bpf]
[<ffffffffa09b5cd6>] cls_bpf_change+0x1a6/0x274 [cls_bpf]
[<ffffffff816d56e5>] tc_ctl_tfilter+0x335/0x910
[<ffffffff816b9145>] rtnetlink_rcv_msg+0x95/0x240
[<ffffffff816df34f>] netlink_rcv_skb+0xaf/0xc0
[<ffffffff816b909e>] rtnetlink_rcv+0x2e/0x40
[<ffffffff816deaaf>] netlink_unicast+0xef/0x1b0
Issue is that the old content from tcf_bpf is allocated and needs
to be released when we replace it. We seem to do that since the
beginning of act_bpf on the filter and insns, later on the name as
well.
Example test case, after patch:
# FOO="1,6 0 0 4294967295,"
# BAR="1,6 0 0 4294967294,"
# tc actions add action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$BAR" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions del action bpf index 2
[...]
# echo "scan" > /sys/kernel/debug/kmemleak
# cat /sys/kernel/debug/kmemleak | grep "comm \"tc\"" | wc -l
0
Fixes: d23b8ad8ab23 ("tc: add BPF based action")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-30 00:40:56 +08:00
|
|
|
bool is_ebpf;
|
2015-03-20 22:11:12 +08:00
|
|
|
};
|
2015-01-15 16:52:39 +08:00
|
|
|
|
netns: make struct pernet_operations::id unsigned int
Make struct pernet_operations::id unsigned.
There are 2 reasons to do so:
1)
This field is really an index into an zero based array and
thus is unsigned entity. Using negative value is out-of-bound
access by definition.
2)
On x86_64 unsigned 32-bit data which are mixed with pointers
via array indexing or offsets added or subtracted to pointers
are preffered to signed 32-bit data.
"int" being used as an array index needs to be sign-extended
to 64-bit before being used.
void f(long *p, int i)
{
g(p[i]);
}
roughly translates to
movsx rsi, esi
mov rdi, [rsi+...]
call g
MOVSX is 3 byte instruction which isn't necessary if the variable is
unsigned because x86_64 is zero extending by default.
Now, there is net_generic() function which, you guessed it right, uses
"int" as an array index:
static inline void *net_generic(const struct net *net, int id)
{
...
ptr = ng->ptr[id - 1];
...
}
And this function is used a lot, so those sign extensions add up.
Patch snipes ~1730 bytes on allyesconfig kernel (without all junk
messing with code generation):
add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730)
Unfortunately some functions actually grow bigger.
This is a semmingly random artefact of code generation with register
allocator being used differently. gcc decides that some variable
needs to live in new r8+ registers and every access now requires REX
prefix. Or it is shifted into r12, so [r12+0] addressing mode has to be
used which is longer than [r8]
However, overall balance is in negative direction:
add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730)
function old new delta
nfsd4_lock 3886 3959 +73
tipc_link_build_proto_msg 1096 1140 +44
mac80211_hwsim_new_radio 2776 2808 +32
tipc_mon_rcv 1032 1058 +26
svcauth_gss_legacy_init 1413 1429 +16
tipc_bcbase_select_primary 379 392 +13
nfsd4_exchange_id 1247 1260 +13
nfsd4_setclientid_confirm 782 793 +11
...
put_client_renew_locked 494 480 -14
ip_set_sockfn_get 730 716 -14
geneve_sock_add 829 813 -16
nfsd4_sequence_done 721 703 -18
nlmclnt_lookup_host 708 686 -22
nfsd4_lockt 1085 1063 -22
nfs_get_client 1077 1050 -27
tcf_bpf_init 1106 1076 -30
nfsd4_encode_fattr 5997 5930 -67
Total: Before=154856051, After=154854321, chg -0.00%
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-17 09:58:21 +08:00
|
|
|
static unsigned int bpf_net_id;
|
2016-07-26 07:09:41 +08:00
|
|
|
static struct tc_action_ops act_bpf_ops;
|
2016-02-23 07:57:53 +08:00
|
|
|
|
2018-08-12 21:34:50 +08:00
|
|
|
static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
|
|
|
|
struct tcf_result *res)
|
2015-01-15 16:52:39 +08:00
|
|
|
{
|
2016-09-13 05:38:43 +08:00
|
|
|
bool at_ingress = skb_at_tc_ingress(skb);
|
2016-07-26 07:09:41 +08:00
|
|
|
struct tcf_bpf *prog = to_bpf(act);
|
2015-08-26 11:06:35 +08:00
|
|
|
struct bpf_prog *filter;
|
act_bpf: allow non-default TC_ACT opcodes as BPF exec outcome
Revisiting commit d23b8ad8ab23 ("tc: add BPF based action") with regards
to eBPF support, I was thinking that it might be better to improve
return semantics from a BPF program invoked through BPF_PROG_RUN().
Currently, in case filter_res is 0, we overwrite the default action
opcode with TC_ACT_SHOT. A default action opcode configured through tc's
m_bpf can be: TC_ACT_RECLASSIFY, TC_ACT_PIPE, TC_ACT_SHOT, TC_ACT_UNSPEC,
TC_ACT_OK.
In cls_bpf, we have the possibility to overwrite the default class
associated with the classifier in case filter_res is _not_ 0xffffffff
(-1).
That allows us to fold multiple [e]BPF programs into a single one, where
they would otherwise need to be defined as a separate classifier with
its own classid, needlessly redoing parsing work, etc.
Similarly, we could do better in act_bpf: Since above TC_ACT* opcodes
are exported to UAPI anyway, we reuse them for return-code-to-tc-opcode
mapping, where we would allow above possibilities. Thus, like in cls_bpf,
a filter_res of 0xffffffff (-1) means that the configured _default_ action
is used. Any unkown return code from the BPF program would fail in
tcf_bpf() with TC_ACT_UNSPEC.
Should we one day want to make use of TC_ACT_STOLEN or TC_ACT_QUEUED,
which both have the same semantics, we have the option to either use
that as a default action (filter_res of 0xffffffff) or non-default BPF
return code.
All that will allow us to transparently use tcf_bpf() for both BPF
flavours.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-18 03:25:57 +08:00
|
|
|
int action, filter_res;
|
2015-01-15 16:52:39 +08:00
|
|
|
|
2015-08-26 11:06:35 +08:00
|
|
|
tcf_lastuse_update(&prog->tcf_tm);
|
2021-10-16 16:49:09 +08:00
|
|
|
bstats_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
|
2015-01-15 16:52:39 +08:00
|
|
|
|
2015-08-26 11:06:35 +08:00
|
|
|
filter = rcu_dereference(prog->filter);
|
2015-06-05 01:11:53 +08:00
|
|
|
if (at_ingress) {
|
|
|
|
__skb_push(skb, skb->mac_len);
|
2017-09-25 08:25:50 +08:00
|
|
|
bpf_compute_data_pointers(skb);
|
2021-08-15 15:05:54 +08:00
|
|
|
filter_res = bpf_prog_run(filter, skb);
|
2015-06-05 01:11:53 +08:00
|
|
|
__skb_pull(skb, skb->mac_len);
|
|
|
|
} else {
|
2017-09-25 08:25:50 +08:00
|
|
|
bpf_compute_data_pointers(skb);
|
2021-08-15 15:05:54 +08:00
|
|
|
filter_res = bpf_prog_run(filter, skb);
|
2015-06-05 01:11:53 +08:00
|
|
|
}
|
bpf: Keep the (rcv) timestamp behavior for the existing tc-bpf@ingress
The current tc-bpf@ingress reads and writes the __sk_buff->tstamp
as a (rcv) timestamp which currently could either be 0 (not available)
or ktime_get_real(). This patch is to backward compatible with the
(rcv) timestamp expectation at ingress. If the skb->tstamp has
the delivery_time, the bpf insn rewrite will read 0 for tc-bpf
running at ingress as it is not available. When writing at ingress,
it will also clear the skb->mono_delivery_time bit.
/* BPF_READ: a = __sk_buff->tstamp */
if (!skb->tc_at_ingress || !skb->mono_delivery_time)
a = skb->tstamp;
else
a = 0
/* BPF_WRITE: __sk_buff->tstamp = a */
if (skb->tc_at_ingress)
skb->mono_delivery_time = 0;
skb->tstamp = a;
[ A note on the BPF_CGROUP_INET_INGRESS which can also access
skb->tstamp. At that point, the skb is delivered locally
and skb_clear_delivery_time() has already been done,
so the skb->tstamp will only have the (rcv) timestamp. ]
If the tc-bpf@egress writes 0 to skb->tstamp, the skb->mono_delivery_time
has to be cleared also. It could be done together during
convert_ctx_access(). However, the latter patch will also expose
the skb->mono_delivery_time bit as __sk_buff->delivery_time_type.
Changing the delivery_time_type in the background may surprise
the user, e.g. the 2nd read on __sk_buff->delivery_time_type
may need a READ_ONCE() to avoid compiler optimization. Thus,
in expecting the needs in the latter patch, this patch does a
check on !skb->tstamp after running the tc-bpf and clears the
skb->mono_delivery_time bit if needed. The earlier discussion
on v4 [0].
The bpf insn rewrite requires the skb's mono_delivery_time bit and
tc_at_ingress bit. They are moved up in sk_buff so that bpf rewrite
can be done at a fixed offset. tc_skip_classify is moved together with
tc_at_ingress. To get one bit for mono_delivery_time, csum_not_inet is
moved down and this bit is currently used by sctp.
[0]: https://lore.kernel.org/bpf/20220217015043.khqwqklx45c4m4se@kafai-mbp.dhcp.thefacebook.com/
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2022-03-03 03:56:28 +08:00
|
|
|
if (unlikely(!skb->tstamp && skb->mono_delivery_time))
|
|
|
|
skb->mono_delivery_time = 0;
|
2020-03-30 06:53:38 +08:00
|
|
|
if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK)
|
|
|
|
skb_orphan(skb);
|
act_bpf: allow non-default TC_ACT opcodes as BPF exec outcome
Revisiting commit d23b8ad8ab23 ("tc: add BPF based action") with regards
to eBPF support, I was thinking that it might be better to improve
return semantics from a BPF program invoked through BPF_PROG_RUN().
Currently, in case filter_res is 0, we overwrite the default action
opcode with TC_ACT_SHOT. A default action opcode configured through tc's
m_bpf can be: TC_ACT_RECLASSIFY, TC_ACT_PIPE, TC_ACT_SHOT, TC_ACT_UNSPEC,
TC_ACT_OK.
In cls_bpf, we have the possibility to overwrite the default class
associated with the classifier in case filter_res is _not_ 0xffffffff
(-1).
That allows us to fold multiple [e]BPF programs into a single one, where
they would otherwise need to be defined as a separate classifier with
its own classid, needlessly redoing parsing work, etc.
Similarly, we could do better in act_bpf: Since above TC_ACT* opcodes
are exported to UAPI anyway, we reuse them for return-code-to-tc-opcode
mapping, where we would allow above possibilities. Thus, like in cls_bpf,
a filter_res of 0xffffffff (-1) means that the configured _default_ action
is used. Any unkown return code from the BPF program would fail in
tcf_bpf() with TC_ACT_UNSPEC.
Should we one day want to make use of TC_ACT_STOLEN or TC_ACT_QUEUED,
which both have the same semantics, we have the option to either use
that as a default action (filter_res of 0xffffffff) or non-default BPF
return code.
All that will allow us to transparently use tcf_bpf() for both BPF
flavours.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-18 03:25:57 +08:00
|
|
|
|
|
|
|
/* A BPF program may overwrite the default action opcode.
|
|
|
|
* Similarly as in cls_bpf, if filter_res == -1 we use the
|
|
|
|
* default action specified from tc.
|
|
|
|
*
|
|
|
|
* In case a different well-known TC_ACT opcode has been
|
|
|
|
* returned, it will overwrite the default one.
|
|
|
|
*
|
2020-11-09 15:02:17 +08:00
|
|
|
* For everything else that is unknown, TC_ACT_UNSPEC is
|
act_bpf: allow non-default TC_ACT opcodes as BPF exec outcome
Revisiting commit d23b8ad8ab23 ("tc: add BPF based action") with regards
to eBPF support, I was thinking that it might be better to improve
return semantics from a BPF program invoked through BPF_PROG_RUN().
Currently, in case filter_res is 0, we overwrite the default action
opcode with TC_ACT_SHOT. A default action opcode configured through tc's
m_bpf can be: TC_ACT_RECLASSIFY, TC_ACT_PIPE, TC_ACT_SHOT, TC_ACT_UNSPEC,
TC_ACT_OK.
In cls_bpf, we have the possibility to overwrite the default class
associated with the classifier in case filter_res is _not_ 0xffffffff
(-1).
That allows us to fold multiple [e]BPF programs into a single one, where
they would otherwise need to be defined as a separate classifier with
its own classid, needlessly redoing parsing work, etc.
Similarly, we could do better in act_bpf: Since above TC_ACT* opcodes
are exported to UAPI anyway, we reuse them for return-code-to-tc-opcode
mapping, where we would allow above possibilities. Thus, like in cls_bpf,
a filter_res of 0xffffffff (-1) means that the configured _default_ action
is used. Any unkown return code from the BPF program would fail in
tcf_bpf() with TC_ACT_UNSPEC.
Should we one day want to make use of TC_ACT_STOLEN or TC_ACT_QUEUED,
which both have the same semantics, we have the option to either use
that as a default action (filter_res of 0xffffffff) or non-default BPF
return code.
All that will allow us to transparently use tcf_bpf() for both BPF
flavours.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-18 03:25:57 +08:00
|
|
|
* returned.
|
|
|
|
*/
|
|
|
|
switch (filter_res) {
|
|
|
|
case TC_ACT_PIPE:
|
|
|
|
case TC_ACT_RECLASSIFY:
|
|
|
|
case TC_ACT_OK:
|
2015-09-16 14:05:43 +08:00
|
|
|
case TC_ACT_REDIRECT:
|
act_bpf: allow non-default TC_ACT opcodes as BPF exec outcome
Revisiting commit d23b8ad8ab23 ("tc: add BPF based action") with regards
to eBPF support, I was thinking that it might be better to improve
return semantics from a BPF program invoked through BPF_PROG_RUN().
Currently, in case filter_res is 0, we overwrite the default action
opcode with TC_ACT_SHOT. A default action opcode configured through tc's
m_bpf can be: TC_ACT_RECLASSIFY, TC_ACT_PIPE, TC_ACT_SHOT, TC_ACT_UNSPEC,
TC_ACT_OK.
In cls_bpf, we have the possibility to overwrite the default class
associated with the classifier in case filter_res is _not_ 0xffffffff
(-1).
That allows us to fold multiple [e]BPF programs into a single one, where
they would otherwise need to be defined as a separate classifier with
its own classid, needlessly redoing parsing work, etc.
Similarly, we could do better in act_bpf: Since above TC_ACT* opcodes
are exported to UAPI anyway, we reuse them for return-code-to-tc-opcode
mapping, where we would allow above possibilities. Thus, like in cls_bpf,
a filter_res of 0xffffffff (-1) means that the configured _default_ action
is used. Any unkown return code from the BPF program would fail in
tcf_bpf() with TC_ACT_UNSPEC.
Should we one day want to make use of TC_ACT_STOLEN or TC_ACT_QUEUED,
which both have the same semantics, we have the option to either use
that as a default action (filter_res of 0xffffffff) or non-default BPF
return code.
All that will allow us to transparently use tcf_bpf() for both BPF
flavours.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-18 03:25:57 +08:00
|
|
|
action = filter_res;
|
|
|
|
break;
|
|
|
|
case TC_ACT_SHOT:
|
|
|
|
action = filter_res;
|
2015-08-26 11:06:35 +08:00
|
|
|
qstats_drop_inc(this_cpu_ptr(prog->common.cpu_qstats));
|
act_bpf: allow non-default TC_ACT opcodes as BPF exec outcome
Revisiting commit d23b8ad8ab23 ("tc: add BPF based action") with regards
to eBPF support, I was thinking that it might be better to improve
return semantics from a BPF program invoked through BPF_PROG_RUN().
Currently, in case filter_res is 0, we overwrite the default action
opcode with TC_ACT_SHOT. A default action opcode configured through tc's
m_bpf can be: TC_ACT_RECLASSIFY, TC_ACT_PIPE, TC_ACT_SHOT, TC_ACT_UNSPEC,
TC_ACT_OK.
In cls_bpf, we have the possibility to overwrite the default class
associated with the classifier in case filter_res is _not_ 0xffffffff
(-1).
That allows us to fold multiple [e]BPF programs into a single one, where
they would otherwise need to be defined as a separate classifier with
its own classid, needlessly redoing parsing work, etc.
Similarly, we could do better in act_bpf: Since above TC_ACT* opcodes
are exported to UAPI anyway, we reuse them for return-code-to-tc-opcode
mapping, where we would allow above possibilities. Thus, like in cls_bpf,
a filter_res of 0xffffffff (-1) means that the configured _default_ action
is used. Any unkown return code from the BPF program would fail in
tcf_bpf() with TC_ACT_UNSPEC.
Should we one day want to make use of TC_ACT_STOLEN or TC_ACT_QUEUED,
which both have the same semantics, we have the option to either use
that as a default action (filter_res of 0xffffffff) or non-default BPF
return code.
All that will allow us to transparently use tcf_bpf() for both BPF
flavours.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-18 03:25:57 +08:00
|
|
|
break;
|
|
|
|
case TC_ACT_UNSPEC:
|
2015-03-20 22:11:12 +08:00
|
|
|
action = prog->tcf_action;
|
act_bpf: allow non-default TC_ACT opcodes as BPF exec outcome
Revisiting commit d23b8ad8ab23 ("tc: add BPF based action") with regards
to eBPF support, I was thinking that it might be better to improve
return semantics from a BPF program invoked through BPF_PROG_RUN().
Currently, in case filter_res is 0, we overwrite the default action
opcode with TC_ACT_SHOT. A default action opcode configured through tc's
m_bpf can be: TC_ACT_RECLASSIFY, TC_ACT_PIPE, TC_ACT_SHOT, TC_ACT_UNSPEC,
TC_ACT_OK.
In cls_bpf, we have the possibility to overwrite the default class
associated with the classifier in case filter_res is _not_ 0xffffffff
(-1).
That allows us to fold multiple [e]BPF programs into a single one, where
they would otherwise need to be defined as a separate classifier with
its own classid, needlessly redoing parsing work, etc.
Similarly, we could do better in act_bpf: Since above TC_ACT* opcodes
are exported to UAPI anyway, we reuse them for return-code-to-tc-opcode
mapping, where we would allow above possibilities. Thus, like in cls_bpf,
a filter_res of 0xffffffff (-1) means that the configured _default_ action
is used. Any unkown return code from the BPF program would fail in
tcf_bpf() with TC_ACT_UNSPEC.
Should we one day want to make use of TC_ACT_STOLEN or TC_ACT_QUEUED,
which both have the same semantics, we have the option to either use
that as a default action (filter_res of 0xffffffff) or non-default BPF
return code.
All that will allow us to transparently use tcf_bpf() for both BPF
flavours.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-03-18 03:25:57 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
action = TC_ACT_UNSPEC;
|
|
|
|
break;
|
2015-01-15 16:52:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return action;
|
|
|
|
}
|
|
|
|
|
2015-03-20 22:11:12 +08:00
|
|
|
static bool tcf_bpf_is_ebpf(const struct tcf_bpf *prog)
|
|
|
|
{
|
|
|
|
return !prog->bpf_ops;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog,
|
|
|
|
struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
struct nlattr *nla;
|
|
|
|
|
|
|
|
if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, prog->bpf_num_ops))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
nla = nla_reserve(skb, TCA_ACT_BPF_OPS, prog->bpf_num_ops *
|
|
|
|
sizeof(struct sock_filter));
|
|
|
|
if (nla == NULL)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog,
|
|
|
|
struct sk_buff *skb)
|
|
|
|
{
|
2016-12-05 06:19:41 +08:00
|
|
|
struct nlattr *nla;
|
|
|
|
|
2015-03-20 22:11:12 +08:00
|
|
|
if (prog->bpf_name &&
|
|
|
|
nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
2017-06-22 02:16:11 +08:00
|
|
|
if (nla_put_u32(skb, TCA_ACT_BPF_ID, prog->filter->aux->id))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
2017-01-14 06:38:15 +08:00
|
|
|
nla = nla_reserve(skb, TCA_ACT_BPF_TAG, sizeof(prog->filter->tag));
|
2016-12-05 06:19:41 +08:00
|
|
|
if (nla == NULL)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
2017-01-14 06:38:15 +08:00
|
|
|
memcpy(nla_data(nla), prog->filter->tag, nla_len(nla));
|
2016-12-05 06:19:41 +08:00
|
|
|
|
2015-03-20 22:11:12 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
|
2015-01-15 16:52:39 +08:00
|
|
|
int bind, int ref)
|
|
|
|
{
|
|
|
|
unsigned char *tp = skb_tail_pointer(skb);
|
2016-07-26 07:09:41 +08:00
|
|
|
struct tcf_bpf *prog = to_bpf(act);
|
2015-01-15 16:52:39 +08:00
|
|
|
struct tc_act_bpf opt = {
|
2015-03-20 22:11:12 +08:00
|
|
|
.index = prog->tcf_index,
|
2018-07-05 22:24:24 +08:00
|
|
|
.refcnt = refcount_read(&prog->tcf_refcnt) - ref,
|
|
|
|
.bindcnt = atomic_read(&prog->tcf_bindcnt) - bind,
|
2015-01-15 16:52:39 +08:00
|
|
|
};
|
2015-03-20 22:11:12 +08:00
|
|
|
struct tcf_t tm;
|
|
|
|
int ret;
|
2015-01-15 16:52:39 +08:00
|
|
|
|
2018-08-15 02:46:16 +08:00
|
|
|
spin_lock_bh(&prog->tcf_lock);
|
2018-08-11 01:51:41 +08:00
|
|
|
opt.action = prog->tcf_action;
|
2015-01-15 16:52:39 +08:00
|
|
|
if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt))
|
|
|
|
goto nla_put_failure;
|
|
|
|
|
2015-03-20 22:11:12 +08:00
|
|
|
if (tcf_bpf_is_ebpf(prog))
|
|
|
|
ret = tcf_bpf_dump_ebpf_info(prog, skb);
|
|
|
|
else
|
|
|
|
ret = tcf_bpf_dump_bpf_info(prog, skb);
|
|
|
|
if (ret)
|
2015-01-15 16:52:39 +08:00
|
|
|
goto nla_put_failure;
|
|
|
|
|
2016-06-06 18:32:55 +08:00
|
|
|
tcf_tm_dump(&tm, &prog->tcf_tm);
|
2016-04-26 16:06:18 +08:00
|
|
|
if (nla_put_64bit(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm,
|
|
|
|
TCA_ACT_BPF_PAD))
|
2015-01-15 16:52:39 +08:00
|
|
|
goto nla_put_failure;
|
2015-03-20 22:11:12 +08:00
|
|
|
|
2018-08-15 02:46:16 +08:00
|
|
|
spin_unlock_bh(&prog->tcf_lock);
|
2015-01-15 16:52:39 +08:00
|
|
|
return skb->len;
|
|
|
|
|
|
|
|
nla_put_failure:
|
2018-08-15 02:46:16 +08:00
|
|
|
spin_unlock_bh(&prog->tcf_lock);
|
2015-01-15 16:52:39 +08:00
|
|
|
nlmsg_trim(skb, tp);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = {
|
|
|
|
[TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) },
|
2015-03-20 22:11:12 +08:00
|
|
|
[TCA_ACT_BPF_FD] = { .type = NLA_U32 },
|
2016-06-05 22:41:32 +08:00
|
|
|
[TCA_ACT_BPF_NAME] = { .type = NLA_NUL_STRING,
|
|
|
|
.len = ACT_BPF_NAME_LEN },
|
2015-01-15 16:52:39 +08:00
|
|
|
[TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 },
|
|
|
|
[TCA_ACT_BPF_OPS] = { .type = NLA_BINARY,
|
|
|
|
.len = sizeof(struct sock_filter) * BPF_MAXINSNS },
|
|
|
|
};
|
|
|
|
|
2015-03-20 22:11:12 +08:00
|
|
|
static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
|
2015-01-15 16:52:39 +08:00
|
|
|
{
|
|
|
|
struct sock_filter *bpf_ops;
|
2015-03-20 22:11:12 +08:00
|
|
|
struct sock_fprog_kern fprog_tmp;
|
2015-01-15 16:52:39 +08:00
|
|
|
struct bpf_prog *fp;
|
2015-03-20 22:11:12 +08:00
|
|
|
u16 bpf_size, bpf_num_ops;
|
2015-01-15 16:52:39 +08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]);
|
|
|
|
if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
bpf_size = bpf_num_ops * sizeof(*bpf_ops);
|
2015-01-22 17:58:19 +08:00
|
|
|
if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS]))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2018-07-28 18:38:06 +08:00
|
|
|
bpf_ops = kmemdup(nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size, GFP_KERNEL);
|
2015-03-20 22:11:12 +08:00
|
|
|
if (bpf_ops == NULL)
|
2015-01-15 16:52:39 +08:00
|
|
|
return -ENOMEM;
|
|
|
|
|
2015-03-20 22:11:12 +08:00
|
|
|
fprog_tmp.len = bpf_num_ops;
|
|
|
|
fprog_tmp.filter = bpf_ops;
|
2015-01-15 16:52:39 +08:00
|
|
|
|
2015-03-20 22:11:12 +08:00
|
|
|
ret = bpf_prog_create(&fp, &fprog_tmp);
|
|
|
|
if (ret < 0) {
|
|
|
|
kfree(bpf_ops);
|
|
|
|
return ret;
|
|
|
|
}
|
2015-01-15 16:52:39 +08:00
|
|
|
|
2015-03-20 22:11:12 +08:00
|
|
|
cfg->bpf_ops = bpf_ops;
|
|
|
|
cfg->bpf_num_ops = bpf_num_ops;
|
|
|
|
cfg->filter = fp;
|
act_bpf: fix memory leaks when replacing bpf programs
We currently trigger multiple memory leaks when replacing bpf
actions, besides others:
comm "tc", pid 1909, jiffies 4294851310 (age 1602.796s)
hex dump (first 32 bytes):
01 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 ................
18 b0 98 6d 00 88 ff ff 00 00 00 00 00 00 00 00 ...m............
backtrace:
[<ffffffff817e623e>] kmemleak_alloc+0x4e/0xb0
[<ffffffff8120a22d>] __vmalloc_node_range+0x1bd/0x2c0
[<ffffffff8120a37a>] __vmalloc+0x4a/0x50
[<ffffffff811a8d0a>] bpf_prog_alloc+0x3a/0xa0
[<ffffffff816c0684>] bpf_prog_create+0x44/0xa0
[<ffffffffa09ba4eb>] tcf_bpf_init+0x28b/0x3c0 [act_bpf]
[<ffffffff816d7001>] tcf_action_init_1+0x191/0x1b0
[<ffffffff816d70a2>] tcf_action_init+0x82/0xf0
[<ffffffff816d4d12>] tcf_exts_validate+0xb2/0xc0
[<ffffffffa09b5838>] cls_bpf_modify_existing+0x98/0x340 [cls_bpf]
[<ffffffffa09b5cd6>] cls_bpf_change+0x1a6/0x274 [cls_bpf]
[<ffffffff816d56e5>] tc_ctl_tfilter+0x335/0x910
[<ffffffff816b9145>] rtnetlink_rcv_msg+0x95/0x240
[<ffffffff816df34f>] netlink_rcv_skb+0xaf/0xc0
[<ffffffff816b909e>] rtnetlink_rcv+0x2e/0x40
[<ffffffff816deaaf>] netlink_unicast+0xef/0x1b0
Issue is that the old content from tcf_bpf is allocated and needs
to be released when we replace it. We seem to do that since the
beginning of act_bpf on the filter and insns, later on the name as
well.
Example test case, after patch:
# FOO="1,6 0 0 4294967295,"
# BAR="1,6 0 0 4294967294,"
# tc actions add action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$BAR" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions del action bpf index 2
[...]
# echo "scan" > /sys/kernel/debug/kmemleak
# cat /sys/kernel/debug/kmemleak | grep "comm \"tc\"" | wc -l
0
Fixes: d23b8ad8ab23 ("tc: add BPF based action")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-30 00:40:56 +08:00
|
|
|
cfg->is_ebpf = false;
|
2015-03-20 22:11:12 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
|
|
|
|
{
|
|
|
|
struct bpf_prog *fp;
|
|
|
|
char *name = NULL;
|
|
|
|
u32 bpf_fd;
|
|
|
|
|
|
|
|
bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]);
|
|
|
|
|
2016-06-30 23:24:44 +08:00
|
|
|
fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_ACT);
|
2015-03-20 22:11:12 +08:00
|
|
|
if (IS_ERR(fp))
|
|
|
|
return PTR_ERR(fp);
|
|
|
|
|
|
|
|
if (tb[TCA_ACT_BPF_NAME]) {
|
2016-10-26 16:53:16 +08:00
|
|
|
name = nla_memdup(tb[TCA_ACT_BPF_NAME], GFP_KERNEL);
|
2015-03-20 22:11:12 +08:00
|
|
|
if (!name) {
|
|
|
|
bpf_prog_put(fp);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
cfg->bpf_name = name;
|
|
|
|
cfg->filter = fp;
|
act_bpf: fix memory leaks when replacing bpf programs
We currently trigger multiple memory leaks when replacing bpf
actions, besides others:
comm "tc", pid 1909, jiffies 4294851310 (age 1602.796s)
hex dump (first 32 bytes):
01 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 ................
18 b0 98 6d 00 88 ff ff 00 00 00 00 00 00 00 00 ...m............
backtrace:
[<ffffffff817e623e>] kmemleak_alloc+0x4e/0xb0
[<ffffffff8120a22d>] __vmalloc_node_range+0x1bd/0x2c0
[<ffffffff8120a37a>] __vmalloc+0x4a/0x50
[<ffffffff811a8d0a>] bpf_prog_alloc+0x3a/0xa0
[<ffffffff816c0684>] bpf_prog_create+0x44/0xa0
[<ffffffffa09ba4eb>] tcf_bpf_init+0x28b/0x3c0 [act_bpf]
[<ffffffff816d7001>] tcf_action_init_1+0x191/0x1b0
[<ffffffff816d70a2>] tcf_action_init+0x82/0xf0
[<ffffffff816d4d12>] tcf_exts_validate+0xb2/0xc0
[<ffffffffa09b5838>] cls_bpf_modify_existing+0x98/0x340 [cls_bpf]
[<ffffffffa09b5cd6>] cls_bpf_change+0x1a6/0x274 [cls_bpf]
[<ffffffff816d56e5>] tc_ctl_tfilter+0x335/0x910
[<ffffffff816b9145>] rtnetlink_rcv_msg+0x95/0x240
[<ffffffff816df34f>] netlink_rcv_skb+0xaf/0xc0
[<ffffffff816b909e>] rtnetlink_rcv+0x2e/0x40
[<ffffffff816deaaf>] netlink_unicast+0xef/0x1b0
Issue is that the old content from tcf_bpf is allocated and needs
to be released when we replace it. We seem to do that since the
beginning of act_bpf on the filter and insns, later on the name as
well.
Example test case, after patch:
# FOO="1,6 0 0 4294967295,"
# BAR="1,6 0 0 4294967294,"
# tc actions add action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$BAR" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions del action bpf index 2
[...]
# echo "scan" > /sys/kernel/debug/kmemleak
# cat /sys/kernel/debug/kmemleak | grep "comm \"tc\"" | wc -l
0
Fixes: d23b8ad8ab23 ("tc: add BPF based action")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-30 00:40:56 +08:00
|
|
|
cfg->is_ebpf = true;
|
2015-03-20 22:11:12 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
act_bpf: fix memory leaks when replacing bpf programs
We currently trigger multiple memory leaks when replacing bpf
actions, besides others:
comm "tc", pid 1909, jiffies 4294851310 (age 1602.796s)
hex dump (first 32 bytes):
01 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 ................
18 b0 98 6d 00 88 ff ff 00 00 00 00 00 00 00 00 ...m............
backtrace:
[<ffffffff817e623e>] kmemleak_alloc+0x4e/0xb0
[<ffffffff8120a22d>] __vmalloc_node_range+0x1bd/0x2c0
[<ffffffff8120a37a>] __vmalloc+0x4a/0x50
[<ffffffff811a8d0a>] bpf_prog_alloc+0x3a/0xa0
[<ffffffff816c0684>] bpf_prog_create+0x44/0xa0
[<ffffffffa09ba4eb>] tcf_bpf_init+0x28b/0x3c0 [act_bpf]
[<ffffffff816d7001>] tcf_action_init_1+0x191/0x1b0
[<ffffffff816d70a2>] tcf_action_init+0x82/0xf0
[<ffffffff816d4d12>] tcf_exts_validate+0xb2/0xc0
[<ffffffffa09b5838>] cls_bpf_modify_existing+0x98/0x340 [cls_bpf]
[<ffffffffa09b5cd6>] cls_bpf_change+0x1a6/0x274 [cls_bpf]
[<ffffffff816d56e5>] tc_ctl_tfilter+0x335/0x910
[<ffffffff816b9145>] rtnetlink_rcv_msg+0x95/0x240
[<ffffffff816df34f>] netlink_rcv_skb+0xaf/0xc0
[<ffffffff816b909e>] rtnetlink_rcv+0x2e/0x40
[<ffffffff816deaaf>] netlink_unicast+0xef/0x1b0
Issue is that the old content from tcf_bpf is allocated and needs
to be released when we replace it. We seem to do that since the
beginning of act_bpf on the filter and insns, later on the name as
well.
Example test case, after patch:
# FOO="1,6 0 0 4294967295,"
# BAR="1,6 0 0 4294967294,"
# tc actions add action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$BAR" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions del action bpf index 2
[...]
# echo "scan" > /sys/kernel/debug/kmemleak
# cat /sys/kernel/debug/kmemleak | grep "comm \"tc\"" | wc -l
0
Fixes: d23b8ad8ab23 ("tc: add BPF based action")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-30 00:40:56 +08:00
|
|
|
static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg)
|
|
|
|
{
|
2018-04-06 07:19:37 +08:00
|
|
|
struct bpf_prog *filter = cfg->filter;
|
|
|
|
|
|
|
|
if (filter) {
|
|
|
|
if (cfg->is_ebpf)
|
|
|
|
bpf_prog_put(filter);
|
|
|
|
else
|
|
|
|
bpf_prog_destroy(filter);
|
|
|
|
}
|
act_bpf: fix memory leaks when replacing bpf programs
We currently trigger multiple memory leaks when replacing bpf
actions, besides others:
comm "tc", pid 1909, jiffies 4294851310 (age 1602.796s)
hex dump (first 32 bytes):
01 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 ................
18 b0 98 6d 00 88 ff ff 00 00 00 00 00 00 00 00 ...m............
backtrace:
[<ffffffff817e623e>] kmemleak_alloc+0x4e/0xb0
[<ffffffff8120a22d>] __vmalloc_node_range+0x1bd/0x2c0
[<ffffffff8120a37a>] __vmalloc+0x4a/0x50
[<ffffffff811a8d0a>] bpf_prog_alloc+0x3a/0xa0
[<ffffffff816c0684>] bpf_prog_create+0x44/0xa0
[<ffffffffa09ba4eb>] tcf_bpf_init+0x28b/0x3c0 [act_bpf]
[<ffffffff816d7001>] tcf_action_init_1+0x191/0x1b0
[<ffffffff816d70a2>] tcf_action_init+0x82/0xf0
[<ffffffff816d4d12>] tcf_exts_validate+0xb2/0xc0
[<ffffffffa09b5838>] cls_bpf_modify_existing+0x98/0x340 [cls_bpf]
[<ffffffffa09b5cd6>] cls_bpf_change+0x1a6/0x274 [cls_bpf]
[<ffffffff816d56e5>] tc_ctl_tfilter+0x335/0x910
[<ffffffff816b9145>] rtnetlink_rcv_msg+0x95/0x240
[<ffffffff816df34f>] netlink_rcv_skb+0xaf/0xc0
[<ffffffff816b909e>] rtnetlink_rcv+0x2e/0x40
[<ffffffff816deaaf>] netlink_unicast+0xef/0x1b0
Issue is that the old content from tcf_bpf is allocated and needs
to be released when we replace it. We seem to do that since the
beginning of act_bpf on the filter and insns, later on the name as
well.
Example test case, after patch:
# FOO="1,6 0 0 4294967295,"
# BAR="1,6 0 0 4294967294,"
# tc actions add action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$BAR" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions del action bpf index 2
[...]
# echo "scan" > /sys/kernel/debug/kmemleak
# cat /sys/kernel/debug/kmemleak | grep "comm \"tc\"" | wc -l
0
Fixes: d23b8ad8ab23 ("tc: add BPF based action")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-30 00:40:56 +08:00
|
|
|
|
|
|
|
kfree(cfg->bpf_ops);
|
|
|
|
kfree(cfg->bpf_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
|
|
|
|
struct tcf_bpf_cfg *cfg)
|
|
|
|
{
|
|
|
|
cfg->is_ebpf = tcf_bpf_is_ebpf(prog);
|
2015-08-26 11:06:35 +08:00
|
|
|
/* updates to prog->filter are prevented, since it's called either
|
2018-08-11 01:51:41 +08:00
|
|
|
* with tcf lock or during final cleanup in rcu callback
|
2015-08-26 11:06:35 +08:00
|
|
|
*/
|
|
|
|
cfg->filter = rcu_dereference_protected(prog->filter, 1);
|
act_bpf: fix memory leaks when replacing bpf programs
We currently trigger multiple memory leaks when replacing bpf
actions, besides others:
comm "tc", pid 1909, jiffies 4294851310 (age 1602.796s)
hex dump (first 32 bytes):
01 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 ................
18 b0 98 6d 00 88 ff ff 00 00 00 00 00 00 00 00 ...m............
backtrace:
[<ffffffff817e623e>] kmemleak_alloc+0x4e/0xb0
[<ffffffff8120a22d>] __vmalloc_node_range+0x1bd/0x2c0
[<ffffffff8120a37a>] __vmalloc+0x4a/0x50
[<ffffffff811a8d0a>] bpf_prog_alloc+0x3a/0xa0
[<ffffffff816c0684>] bpf_prog_create+0x44/0xa0
[<ffffffffa09ba4eb>] tcf_bpf_init+0x28b/0x3c0 [act_bpf]
[<ffffffff816d7001>] tcf_action_init_1+0x191/0x1b0
[<ffffffff816d70a2>] tcf_action_init+0x82/0xf0
[<ffffffff816d4d12>] tcf_exts_validate+0xb2/0xc0
[<ffffffffa09b5838>] cls_bpf_modify_existing+0x98/0x340 [cls_bpf]
[<ffffffffa09b5cd6>] cls_bpf_change+0x1a6/0x274 [cls_bpf]
[<ffffffff816d56e5>] tc_ctl_tfilter+0x335/0x910
[<ffffffff816b9145>] rtnetlink_rcv_msg+0x95/0x240
[<ffffffff816df34f>] netlink_rcv_skb+0xaf/0xc0
[<ffffffff816b909e>] rtnetlink_rcv+0x2e/0x40
[<ffffffff816deaaf>] netlink_unicast+0xef/0x1b0
Issue is that the old content from tcf_bpf is allocated and needs
to be released when we replace it. We seem to do that since the
beginning of act_bpf on the filter and insns, later on the name as
well.
Example test case, after patch:
# FOO="1,6 0 0 4294967295,"
# BAR="1,6 0 0 4294967294,"
# tc actions add action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$BAR" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions del action bpf index 2
[...]
# echo "scan" > /sys/kernel/debug/kmemleak
# cat /sys/kernel/debug/kmemleak | grep "comm \"tc\"" | wc -l
0
Fixes: d23b8ad8ab23 ("tc: add BPF based action")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-30 00:40:56 +08:00
|
|
|
|
|
|
|
cfg->bpf_ops = prog->bpf_ops;
|
|
|
|
cfg->bpf_name = prog->bpf_name;
|
|
|
|
}
|
|
|
|
|
2015-03-20 22:11:12 +08:00
|
|
|
static int tcf_bpf_init(struct net *net, struct nlattr *nla,
|
2016-07-26 07:09:41 +08:00
|
|
|
struct nlattr *est, struct tc_action **act,
|
2019-10-30 22:09:05 +08:00
|
|
|
struct tcf_proto *tp, u32 flags,
|
|
|
|
struct netlink_ext_ack *extack)
|
2015-03-20 22:11:12 +08:00
|
|
|
{
|
2016-02-23 07:57:53 +08:00
|
|
|
struct tc_action_net *tn = net_generic(net, bpf_net_id);
|
2021-07-30 07:12:14 +08:00
|
|
|
bool bind = flags & TCA_ACT_FLAGS_BIND;
|
2015-03-20 22:11:12 +08:00
|
|
|
struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
|
2019-03-20 22:00:00 +08:00
|
|
|
struct tcf_chain *goto_ch = NULL;
|
act_bpf: fix memory leaks when replacing bpf programs
We currently trigger multiple memory leaks when replacing bpf
actions, besides others:
comm "tc", pid 1909, jiffies 4294851310 (age 1602.796s)
hex dump (first 32 bytes):
01 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 ................
18 b0 98 6d 00 88 ff ff 00 00 00 00 00 00 00 00 ...m............
backtrace:
[<ffffffff817e623e>] kmemleak_alloc+0x4e/0xb0
[<ffffffff8120a22d>] __vmalloc_node_range+0x1bd/0x2c0
[<ffffffff8120a37a>] __vmalloc+0x4a/0x50
[<ffffffff811a8d0a>] bpf_prog_alloc+0x3a/0xa0
[<ffffffff816c0684>] bpf_prog_create+0x44/0xa0
[<ffffffffa09ba4eb>] tcf_bpf_init+0x28b/0x3c0 [act_bpf]
[<ffffffff816d7001>] tcf_action_init_1+0x191/0x1b0
[<ffffffff816d70a2>] tcf_action_init+0x82/0xf0
[<ffffffff816d4d12>] tcf_exts_validate+0xb2/0xc0
[<ffffffffa09b5838>] cls_bpf_modify_existing+0x98/0x340 [cls_bpf]
[<ffffffffa09b5cd6>] cls_bpf_change+0x1a6/0x274 [cls_bpf]
[<ffffffff816d56e5>] tc_ctl_tfilter+0x335/0x910
[<ffffffff816b9145>] rtnetlink_rcv_msg+0x95/0x240
[<ffffffff816df34f>] netlink_rcv_skb+0xaf/0xc0
[<ffffffff816b909e>] rtnetlink_rcv+0x2e/0x40
[<ffffffff816deaaf>] netlink_unicast+0xef/0x1b0
Issue is that the old content from tcf_bpf is allocated and needs
to be released when we replace it. We seem to do that since the
beginning of act_bpf on the filter and insns, later on the name as
well.
Example test case, after patch:
# FOO="1,6 0 0 4294967295,"
# BAR="1,6 0 0 4294967294,"
# tc actions add action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$BAR" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions del action bpf index 2
[...]
# echo "scan" > /sys/kernel/debug/kmemleak
# cat /sys/kernel/debug/kmemleak | grep "comm \"tc\"" | wc -l
0
Fixes: d23b8ad8ab23 ("tc: add BPF based action")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-30 00:40:56 +08:00
|
|
|
struct tcf_bpf_cfg cfg, old;
|
2015-03-20 22:11:12 +08:00
|
|
|
struct tc_act_bpf *parm;
|
|
|
|
struct tcf_bpf *prog;
|
|
|
|
bool is_bpf, is_ebpf;
|
act_bpf: properly support late binding of bpf action to a classifier
Since the introduction of the BPF action in d23b8ad8ab23 ("tc: add BPF
based action"), late binding was not working as expected. I.e. setting
the action part for a classifier only via 'bpf index <num>', where <num>
is the index of an existing action, is being rejected by the kernel due
to other missing parameters.
It doesn't make sense to require these parameters such as BPF opcodes
etc, as they are not going to be used anyway: in this case, they're just
allocated/parsed and then freed again w/o doing anything meaningful.
Instead, parse and verify the remaining parameters *after* the test on
tcf_hash_check(), when we really know that we're dealing with creation
of a new action or replacement of an existing one and where late binding
is thus irrelevant.
After patch, test case is now working:
FOO="1,6 0 0 4294967295,"
tc actions add action bpf bytecode "$FOO"
tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action bpf index 1
tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
tc filter show dev foo
filter protocol all pref 49152 bpf
filter protocol all pref 49152 bpf handle 0x1 flowid 1:1 bytecode '1,6 0 0 4294967295'
action order 1: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
Late binding of a BPF action can be useful for preloading maps (e.g. before
they hit traffic) in case of eBPF programs, or to share a single eBPF action
with multiple classifiers.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-03 22:21:57 +08:00
|
|
|
int ret, res = 0;
|
2019-08-01 21:02:51 +08:00
|
|
|
u32 index;
|
2015-03-20 22:11:12 +08:00
|
|
|
|
|
|
|
if (!nla)
|
|
|
|
return -EINVAL;
|
|
|
|
|
netlink: make validation more configurable for future strictness
We currently have two levels of strict validation:
1) liberal (default)
- undefined (type >= max) & NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
- garbage at end of message accepted
2) strict (opt-in)
- NLA_UNSPEC attributes accepted
- attribute length >= expected accepted
Split out parsing strictness into four different options:
* TRAILING - check that there's no trailing data after parsing
attributes (in message or nested)
* MAXTYPE - reject attrs > max known type
* UNSPEC - reject attributes with NLA_UNSPEC policy entries
* STRICT_ATTRS - strictly validate attribute size
The default for future things should be *everything*.
The current *_strict() is a combination of TRAILING and MAXTYPE,
and is renamed to _deprecated_strict().
The current regular parsing has none of this, and is renamed to
*_parse_deprecated().
Additionally it allows us to selectively set one of the new flags
even on old policies. Notably, the UNSPEC flag could be useful in
this case, since it can be arranged (by filling in the policy) to
not be an incompatible userspace ABI change, but would then going
forward prevent forgetting attribute entries. Similar can apply
to the POLICY flag.
We end up with the following renames:
* nla_parse -> nla_parse_deprecated
* nla_parse_strict -> nla_parse_deprecated_strict
* nlmsg_parse -> nlmsg_parse_deprecated
* nlmsg_parse_strict -> nlmsg_parse_deprecated_strict
* nla_parse_nested -> nla_parse_nested_deprecated
* nla_validate_nested -> nla_validate_nested_deprecated
Using spatch, of course:
@@
expression TB, MAX, HEAD, LEN, POL, EXT;
@@
-nla_parse(TB, MAX, HEAD, LEN, POL, EXT)
+nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression NLH, HDRLEN, TB, MAX, POL, EXT;
@@
-nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
+nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT)
@@
expression TB, MAX, NLA, POL, EXT;
@@
-nla_parse_nested(TB, MAX, NLA, POL, EXT)
+nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT)
@@
expression START, MAX, POL, EXT;
@@
-nla_validate_nested(START, MAX, POL, EXT)
+nla_validate_nested_deprecated(START, MAX, POL, EXT)
@@
expression NLH, HDRLEN, MAX, POL, EXT;
@@
-nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT)
+nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT)
For this patch, don't actually add the strict, non-renamed versions
yet so that it breaks compile if I get it wrong.
Also, while at it, make nla_validate and nla_parse go down to a
common __nla_validate_parse() function to avoid code duplication.
Ultimately, this allows us to have very strict validation for every
new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the
next patch, while existing things will continue to work as is.
In effect then, this adds fully strict validation for any new command.
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
|
|
|
ret = nla_parse_nested_deprecated(tb, TCA_ACT_BPF_MAX, nla,
|
|
|
|
act_bpf_policy, NULL);
|
2015-03-20 22:11:12 +08:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
act_bpf: properly support late binding of bpf action to a classifier
Since the introduction of the BPF action in d23b8ad8ab23 ("tc: add BPF
based action"), late binding was not working as expected. I.e. setting
the action part for a classifier only via 'bpf index <num>', where <num>
is the index of an existing action, is being rejected by the kernel due
to other missing parameters.
It doesn't make sense to require these parameters such as BPF opcodes
etc, as they are not going to be used anyway: in this case, they're just
allocated/parsed and then freed again w/o doing anything meaningful.
Instead, parse and verify the remaining parameters *after* the test on
tcf_hash_check(), when we really know that we're dealing with creation
of a new action or replacement of an existing one and where late binding
is thus irrelevant.
After patch, test case is now working:
FOO="1,6 0 0 4294967295,"
tc actions add action bpf bytecode "$FOO"
tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action bpf index 1
tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
tc filter show dev foo
filter protocol all pref 49152 bpf
filter protocol all pref 49152 bpf handle 0x1 flowid 1:1 bytecode '1,6 0 0 4294967295'
action order 1: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
Late binding of a BPF action can be useful for preloading maps (e.g. before
they hit traffic) in case of eBPF programs, or to share a single eBPF action
with multiple classifiers.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-03 22:21:57 +08:00
|
|
|
if (!tb[TCA_ACT_BPF_PARMS])
|
2015-03-20 22:11:12 +08:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
|
2019-08-01 21:02:51 +08:00
|
|
|
index = parm->index;
|
|
|
|
ret = tcf_idr_check_alloc(tn, &index, act, bind);
|
2018-07-05 22:24:32 +08:00
|
|
|
if (!ret) {
|
2019-08-01 21:02:51 +08:00
|
|
|
ret = tcf_idr_create(tn, index, est, act,
|
2021-12-18 02:16:17 +08:00
|
|
|
&act_bpf_ops, bind, true, flags);
|
2018-07-05 22:24:32 +08:00
|
|
|
if (ret < 0) {
|
2019-08-01 21:02:51 +08:00
|
|
|
tcf_idr_cleanup(tn, index);
|
act_bpf: properly support late binding of bpf action to a classifier
Since the introduction of the BPF action in d23b8ad8ab23 ("tc: add BPF
based action"), late binding was not working as expected. I.e. setting
the action part for a classifier only via 'bpf index <num>', where <num>
is the index of an existing action, is being rejected by the kernel due
to other missing parameters.
It doesn't make sense to require these parameters such as BPF opcodes
etc, as they are not going to be used anyway: in this case, they're just
allocated/parsed and then freed again w/o doing anything meaningful.
Instead, parse and verify the remaining parameters *after* the test on
tcf_hash_check(), when we really know that we're dealing with creation
of a new action or replacement of an existing one and where late binding
is thus irrelevant.
After patch, test case is now working:
FOO="1,6 0 0 4294967295,"
tc actions add action bpf bytecode "$FOO"
tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action bpf index 1
tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
tc filter show dev foo
filter protocol all pref 49152 bpf
filter protocol all pref 49152 bpf handle 0x1 flowid 1:1 bytecode '1,6 0 0 4294967295'
action order 1: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
Late binding of a BPF action can be useful for preloading maps (e.g. before
they hit traffic) in case of eBPF programs, or to share a single eBPF action
with multiple classifiers.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-03 22:21:57 +08:00
|
|
|
return ret;
|
2018-07-05 22:24:32 +08:00
|
|
|
}
|
2015-01-15 16:52:39 +08:00
|
|
|
|
act_bpf: properly support late binding of bpf action to a classifier
Since the introduction of the BPF action in d23b8ad8ab23 ("tc: add BPF
based action"), late binding was not working as expected. I.e. setting
the action part for a classifier only via 'bpf index <num>', where <num>
is the index of an existing action, is being rejected by the kernel due
to other missing parameters.
It doesn't make sense to require these parameters such as BPF opcodes
etc, as they are not going to be used anyway: in this case, they're just
allocated/parsed and then freed again w/o doing anything meaningful.
Instead, parse and verify the remaining parameters *after* the test on
tcf_hash_check(), when we really know that we're dealing with creation
of a new action or replacement of an existing one and where late binding
is thus irrelevant.
After patch, test case is now working:
FOO="1,6 0 0 4294967295,"
tc actions add action bpf bytecode "$FOO"
tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action bpf index 1
tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
tc filter show dev foo
filter protocol all pref 49152 bpf
filter protocol all pref 49152 bpf handle 0x1 flowid 1:1 bytecode '1,6 0 0 4294967295'
action order 1: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
Late binding of a BPF action can be useful for preloading maps (e.g. before
they hit traffic) in case of eBPF programs, or to share a single eBPF action
with multiple classifiers.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-03 22:21:57 +08:00
|
|
|
res = ACT_P_CREATED;
|
2018-07-05 22:24:32 +08:00
|
|
|
} else if (ret > 0) {
|
2015-03-20 22:11:12 +08:00
|
|
|
/* Don't override defaults. */
|
2015-01-15 16:52:39 +08:00
|
|
|
if (bind)
|
act_bpf: properly support late binding of bpf action to a classifier
Since the introduction of the BPF action in d23b8ad8ab23 ("tc: add BPF
based action"), late binding was not working as expected. I.e. setting
the action part for a classifier only via 'bpf index <num>', where <num>
is the index of an existing action, is being rejected by the kernel due
to other missing parameters.
It doesn't make sense to require these parameters such as BPF opcodes
etc, as they are not going to be used anyway: in this case, they're just
allocated/parsed and then freed again w/o doing anything meaningful.
Instead, parse and verify the remaining parameters *after* the test on
tcf_hash_check(), when we really know that we're dealing with creation
of a new action or replacement of an existing one and where late binding
is thus irrelevant.
After patch, test case is now working:
FOO="1,6 0 0 4294967295,"
tc actions add action bpf bytecode "$FOO"
tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action bpf index 1
tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
tc filter show dev foo
filter protocol all pref 49152 bpf
filter protocol all pref 49152 bpf handle 0x1 flowid 1:1 bytecode '1,6 0 0 4294967295'
action order 1: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
Late binding of a BPF action can be useful for preloading maps (e.g. before
they hit traffic) in case of eBPF programs, or to share a single eBPF action
with multiple classifiers.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-03 22:21:57 +08:00
|
|
|
return 0;
|
2015-03-20 22:11:12 +08:00
|
|
|
|
2021-07-30 07:12:14 +08:00
|
|
|
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
|
2018-07-05 22:24:30 +08:00
|
|
|
tcf_idr_release(*act, bind);
|
act_bpf: properly support late binding of bpf action to a classifier
Since the introduction of the BPF action in d23b8ad8ab23 ("tc: add BPF
based action"), late binding was not working as expected. I.e. setting
the action part for a classifier only via 'bpf index <num>', where <num>
is the index of an existing action, is being rejected by the kernel due
to other missing parameters.
It doesn't make sense to require these parameters such as BPF opcodes
etc, as they are not going to be used anyway: in this case, they're just
allocated/parsed and then freed again w/o doing anything meaningful.
Instead, parse and verify the remaining parameters *after* the test on
tcf_hash_check(), when we really know that we're dealing with creation
of a new action or replacement of an existing one and where late binding
is thus irrelevant.
After patch, test case is now working:
FOO="1,6 0 0 4294967295,"
tc actions add action bpf bytecode "$FOO"
tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action bpf index 1
tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
tc filter show dev foo
filter protocol all pref 49152 bpf
filter protocol all pref 49152 bpf handle 0x1 flowid 1:1 bytecode '1,6 0 0 4294967295'
action order 1: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
Late binding of a BPF action can be useful for preloading maps (e.g. before
they hit traffic) in case of eBPF programs, or to share a single eBPF action
with multiple classifiers.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-03 22:21:57 +08:00
|
|
|
return -EEXIST;
|
2018-07-05 22:24:30 +08:00
|
|
|
}
|
2018-07-05 22:24:32 +08:00
|
|
|
} else {
|
|
|
|
return ret;
|
2015-01-15 16:52:39 +08:00
|
|
|
}
|
|
|
|
|
2019-03-20 22:00:00 +08:00
|
|
|
ret = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
|
|
|
|
if (ret < 0)
|
|
|
|
goto release_idr;
|
|
|
|
|
act_bpf: properly support late binding of bpf action to a classifier
Since the introduction of the BPF action in d23b8ad8ab23 ("tc: add BPF
based action"), late binding was not working as expected. I.e. setting
the action part for a classifier only via 'bpf index <num>', where <num>
is the index of an existing action, is being rejected by the kernel due
to other missing parameters.
It doesn't make sense to require these parameters such as BPF opcodes
etc, as they are not going to be used anyway: in this case, they're just
allocated/parsed and then freed again w/o doing anything meaningful.
Instead, parse and verify the remaining parameters *after* the test on
tcf_hash_check(), when we really know that we're dealing with creation
of a new action or replacement of an existing one and where late binding
is thus irrelevant.
After patch, test case is now working:
FOO="1,6 0 0 4294967295,"
tc actions add action bpf bytecode "$FOO"
tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action bpf index 1
tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
tc filter show dev foo
filter protocol all pref 49152 bpf
filter protocol all pref 49152 bpf handle 0x1 flowid 1:1 bytecode '1,6 0 0 4294967295'
action order 1: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
Late binding of a BPF action can be useful for preloading maps (e.g. before
they hit traffic) in case of eBPF programs, or to share a single eBPF action
with multiple classifiers.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-03 22:21:57 +08:00
|
|
|
is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
|
|
|
|
is_ebpf = tb[TCA_ACT_BPF_FD];
|
|
|
|
|
|
|
|
if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) {
|
|
|
|
ret = -EINVAL;
|
2019-03-20 22:00:00 +08:00
|
|
|
goto put_chain;
|
act_bpf: properly support late binding of bpf action to a classifier
Since the introduction of the BPF action in d23b8ad8ab23 ("tc: add BPF
based action"), late binding was not working as expected. I.e. setting
the action part for a classifier only via 'bpf index <num>', where <num>
is the index of an existing action, is being rejected by the kernel due
to other missing parameters.
It doesn't make sense to require these parameters such as BPF opcodes
etc, as they are not going to be used anyway: in this case, they're just
allocated/parsed and then freed again w/o doing anything meaningful.
Instead, parse and verify the remaining parameters *after* the test on
tcf_hash_check(), when we really know that we're dealing with creation
of a new action or replacement of an existing one and where late binding
is thus irrelevant.
After patch, test case is now working:
FOO="1,6 0 0 4294967295,"
tc actions add action bpf bytecode "$FOO"
tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action bpf index 1
tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
tc filter show dev foo
filter protocol all pref 49152 bpf
filter protocol all pref 49152 bpf handle 0x1 flowid 1:1 bytecode '1,6 0 0 4294967295'
action order 1: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
Late binding of a BPF action can be useful for preloading maps (e.g. before
they hit traffic) in case of eBPF programs, or to share a single eBPF action
with multiple classifiers.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-03 22:21:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
memset(&cfg, 0, sizeof(cfg));
|
|
|
|
|
|
|
|
ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) :
|
|
|
|
tcf_bpf_init_from_efd(tb, &cfg);
|
|
|
|
if (ret < 0)
|
2019-03-20 22:00:00 +08:00
|
|
|
goto put_chain;
|
act_bpf: properly support late binding of bpf action to a classifier
Since the introduction of the BPF action in d23b8ad8ab23 ("tc: add BPF
based action"), late binding was not working as expected. I.e. setting
the action part for a classifier only via 'bpf index <num>', where <num>
is the index of an existing action, is being rejected by the kernel due
to other missing parameters.
It doesn't make sense to require these parameters such as BPF opcodes
etc, as they are not going to be used anyway: in this case, they're just
allocated/parsed and then freed again w/o doing anything meaningful.
Instead, parse and verify the remaining parameters *after* the test on
tcf_hash_check(), when we really know that we're dealing with creation
of a new action or replacement of an existing one and where late binding
is thus irrelevant.
After patch, test case is now working:
FOO="1,6 0 0 4294967295,"
tc actions add action bpf bytecode "$FOO"
tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action bpf index 1
tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
tc filter show dev foo
filter protocol all pref 49152 bpf
filter protocol all pref 49152 bpf handle 0x1 flowid 1:1 bytecode '1,6 0 0 4294967295'
action order 1: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
Late binding of a BPF action can be useful for preloading maps (e.g. before
they hit traffic) in case of eBPF programs, or to share a single eBPF action
with multiple classifiers.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-03 22:21:57 +08:00
|
|
|
|
2016-07-26 07:09:41 +08:00
|
|
|
prog = to_bpf(*act);
|
2015-03-20 22:11:12 +08:00
|
|
|
|
2018-08-15 02:46:16 +08:00
|
|
|
spin_lock_bh(&prog->tcf_lock);
|
2015-08-26 11:06:32 +08:00
|
|
|
if (res != ACT_P_CREATED)
|
act_bpf: fix memory leaks when replacing bpf programs
We currently trigger multiple memory leaks when replacing bpf
actions, besides others:
comm "tc", pid 1909, jiffies 4294851310 (age 1602.796s)
hex dump (first 32 bytes):
01 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 ................
18 b0 98 6d 00 88 ff ff 00 00 00 00 00 00 00 00 ...m............
backtrace:
[<ffffffff817e623e>] kmemleak_alloc+0x4e/0xb0
[<ffffffff8120a22d>] __vmalloc_node_range+0x1bd/0x2c0
[<ffffffff8120a37a>] __vmalloc+0x4a/0x50
[<ffffffff811a8d0a>] bpf_prog_alloc+0x3a/0xa0
[<ffffffff816c0684>] bpf_prog_create+0x44/0xa0
[<ffffffffa09ba4eb>] tcf_bpf_init+0x28b/0x3c0 [act_bpf]
[<ffffffff816d7001>] tcf_action_init_1+0x191/0x1b0
[<ffffffff816d70a2>] tcf_action_init+0x82/0xf0
[<ffffffff816d4d12>] tcf_exts_validate+0xb2/0xc0
[<ffffffffa09b5838>] cls_bpf_modify_existing+0x98/0x340 [cls_bpf]
[<ffffffffa09b5cd6>] cls_bpf_change+0x1a6/0x274 [cls_bpf]
[<ffffffff816d56e5>] tc_ctl_tfilter+0x335/0x910
[<ffffffff816b9145>] rtnetlink_rcv_msg+0x95/0x240
[<ffffffff816df34f>] netlink_rcv_skb+0xaf/0xc0
[<ffffffff816b909e>] rtnetlink_rcv+0x2e/0x40
[<ffffffff816deaaf>] netlink_unicast+0xef/0x1b0
Issue is that the old content from tcf_bpf is allocated and needs
to be released when we replace it. We seem to do that since the
beginning of act_bpf on the filter and insns, later on the name as
well.
Example test case, after patch:
# FOO="1,6 0 0 4294967295,"
# BAR="1,6 0 0 4294967294,"
# tc actions add action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$BAR" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions del action bpf index 2
[...]
# echo "scan" > /sys/kernel/debug/kmemleak
# cat /sys/kernel/debug/kmemleak | grep "comm \"tc\"" | wc -l
0
Fixes: d23b8ad8ab23 ("tc: add BPF based action")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-30 00:40:56 +08:00
|
|
|
tcf_bpf_prog_fill_cfg(prog, &old);
|
|
|
|
|
2015-03-20 22:11:12 +08:00
|
|
|
prog->bpf_ops = cfg.bpf_ops;
|
|
|
|
prog->bpf_name = cfg.bpf_name;
|
|
|
|
|
|
|
|
if (cfg.bpf_num_ops)
|
|
|
|
prog->bpf_num_ops = cfg.bpf_num_ops;
|
|
|
|
|
2019-03-20 22:00:00 +08:00
|
|
|
goto_ch = tcf_action_set_ctrlact(*act, parm->action, goto_ch);
|
2015-08-26 11:06:35 +08:00
|
|
|
rcu_assign_pointer(prog->filter, cfg.filter);
|
2018-08-15 02:46:16 +08:00
|
|
|
spin_unlock_bh(&prog->tcf_lock);
|
2015-01-15 16:52:39 +08:00
|
|
|
|
2019-03-20 22:00:00 +08:00
|
|
|
if (goto_ch)
|
|
|
|
tcf_chain_put_by_act(goto_ch);
|
|
|
|
|
2020-09-23 11:56:23 +08:00
|
|
|
if (res != ACT_P_CREATED) {
|
2015-08-26 11:06:35 +08:00
|
|
|
/* make sure the program being replaced is no longer executing */
|
|
|
|
synchronize_rcu();
|
act_bpf: fix memory leaks when replacing bpf programs
We currently trigger multiple memory leaks when replacing bpf
actions, besides others:
comm "tc", pid 1909, jiffies 4294851310 (age 1602.796s)
hex dump (first 32 bytes):
01 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 ................
18 b0 98 6d 00 88 ff ff 00 00 00 00 00 00 00 00 ...m............
backtrace:
[<ffffffff817e623e>] kmemleak_alloc+0x4e/0xb0
[<ffffffff8120a22d>] __vmalloc_node_range+0x1bd/0x2c0
[<ffffffff8120a37a>] __vmalloc+0x4a/0x50
[<ffffffff811a8d0a>] bpf_prog_alloc+0x3a/0xa0
[<ffffffff816c0684>] bpf_prog_create+0x44/0xa0
[<ffffffffa09ba4eb>] tcf_bpf_init+0x28b/0x3c0 [act_bpf]
[<ffffffff816d7001>] tcf_action_init_1+0x191/0x1b0
[<ffffffff816d70a2>] tcf_action_init+0x82/0xf0
[<ffffffff816d4d12>] tcf_exts_validate+0xb2/0xc0
[<ffffffffa09b5838>] cls_bpf_modify_existing+0x98/0x340 [cls_bpf]
[<ffffffffa09b5cd6>] cls_bpf_change+0x1a6/0x274 [cls_bpf]
[<ffffffff816d56e5>] tc_ctl_tfilter+0x335/0x910
[<ffffffff816b9145>] rtnetlink_rcv_msg+0x95/0x240
[<ffffffff816df34f>] netlink_rcv_skb+0xaf/0xc0
[<ffffffff816b909e>] rtnetlink_rcv+0x2e/0x40
[<ffffffff816deaaf>] netlink_unicast+0xef/0x1b0
Issue is that the old content from tcf_bpf is allocated and needs
to be released when we replace it. We seem to do that since the
beginning of act_bpf on the filter and insns, later on the name as
well.
Example test case, after patch:
# FOO="1,6 0 0 4294967295,"
# BAR="1,6 0 0 4294967294,"
# tc actions add action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$BAR" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions del action bpf index 2
[...]
# echo "scan" > /sys/kernel/debug/kmemleak
# cat /sys/kernel/debug/kmemleak | grep "comm \"tc\"" | wc -l
0
Fixes: d23b8ad8ab23 ("tc: add BPF based action")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-30 00:40:56 +08:00
|
|
|
tcf_bpf_cfg_cleanup(&old);
|
2015-08-26 11:06:35 +08:00
|
|
|
}
|
2015-03-20 22:11:12 +08:00
|
|
|
|
act_bpf: properly support late binding of bpf action to a classifier
Since the introduction of the BPF action in d23b8ad8ab23 ("tc: add BPF
based action"), late binding was not working as expected. I.e. setting
the action part for a classifier only via 'bpf index <num>', where <num>
is the index of an existing action, is being rejected by the kernel due
to other missing parameters.
It doesn't make sense to require these parameters such as BPF opcodes
etc, as they are not going to be used anyway: in this case, they're just
allocated/parsed and then freed again w/o doing anything meaningful.
Instead, parse and verify the remaining parameters *after* the test on
tcf_hash_check(), when we really know that we're dealing with creation
of a new action or replacement of an existing one and where late binding
is thus irrelevant.
After patch, test case is now working:
FOO="1,6 0 0 4294967295,"
tc actions add action bpf bytecode "$FOO"
tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action bpf index 1
tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
tc filter show dev foo
filter protocol all pref 49152 bpf
filter protocol all pref 49152 bpf handle 0x1 flowid 1:1 bytecode '1,6 0 0 4294967295'
action order 1: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 1 ref 2 bind 1
Late binding of a BPF action can be useful for preloading maps (e.g. before
they hit traffic) in case of eBPF programs, or to share a single eBPF action
with multiple classifiers.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-03 22:21:57 +08:00
|
|
|
return res;
|
2015-01-15 16:52:39 +08:00
|
|
|
|
2019-03-20 22:00:00 +08:00
|
|
|
put_chain:
|
|
|
|
if (goto_ch)
|
|
|
|
tcf_chain_put_by_act(goto_ch);
|
|
|
|
|
|
|
|
release_idr:
|
|
|
|
tcf_idr_release(*act, bind);
|
2015-01-15 16:52:39 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-12-06 04:53:07 +08:00
|
|
|
static void tcf_bpf_cleanup(struct tc_action *act)
|
2015-01-15 16:52:39 +08:00
|
|
|
{
|
act_bpf: fix memory leaks when replacing bpf programs
We currently trigger multiple memory leaks when replacing bpf
actions, besides others:
comm "tc", pid 1909, jiffies 4294851310 (age 1602.796s)
hex dump (first 32 bytes):
01 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 ................
18 b0 98 6d 00 88 ff ff 00 00 00 00 00 00 00 00 ...m............
backtrace:
[<ffffffff817e623e>] kmemleak_alloc+0x4e/0xb0
[<ffffffff8120a22d>] __vmalloc_node_range+0x1bd/0x2c0
[<ffffffff8120a37a>] __vmalloc+0x4a/0x50
[<ffffffff811a8d0a>] bpf_prog_alloc+0x3a/0xa0
[<ffffffff816c0684>] bpf_prog_create+0x44/0xa0
[<ffffffffa09ba4eb>] tcf_bpf_init+0x28b/0x3c0 [act_bpf]
[<ffffffff816d7001>] tcf_action_init_1+0x191/0x1b0
[<ffffffff816d70a2>] tcf_action_init+0x82/0xf0
[<ffffffff816d4d12>] tcf_exts_validate+0xb2/0xc0
[<ffffffffa09b5838>] cls_bpf_modify_existing+0x98/0x340 [cls_bpf]
[<ffffffffa09b5cd6>] cls_bpf_change+0x1a6/0x274 [cls_bpf]
[<ffffffff816d56e5>] tc_ctl_tfilter+0x335/0x910
[<ffffffff816b9145>] rtnetlink_rcv_msg+0x95/0x240
[<ffffffff816df34f>] netlink_rcv_skb+0xaf/0xc0
[<ffffffff816b909e>] rtnetlink_rcv+0x2e/0x40
[<ffffffff816deaaf>] netlink_unicast+0xef/0x1b0
Issue is that the old content from tcf_bpf is allocated and needs
to be released when we replace it. We seem to do that since the
beginning of act_bpf on the filter and insns, later on the name as
well.
Example test case, after patch:
# FOO="1,6 0 0 4294967295,"
# BAR="1,6 0 0 4294967294,"
# tc actions add action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$BAR" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions del action bpf index 2
[...]
# echo "scan" > /sys/kernel/debug/kmemleak
# cat /sys/kernel/debug/kmemleak | grep "comm \"tc\"" | wc -l
0
Fixes: d23b8ad8ab23 ("tc: add BPF based action")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-30 00:40:56 +08:00
|
|
|
struct tcf_bpf_cfg tmp;
|
2015-07-15 03:15:19 +08:00
|
|
|
|
2016-07-26 07:09:41 +08:00
|
|
|
tcf_bpf_prog_fill_cfg(to_bpf(act), &tmp);
|
act_bpf: fix memory leaks when replacing bpf programs
We currently trigger multiple memory leaks when replacing bpf
actions, besides others:
comm "tc", pid 1909, jiffies 4294851310 (age 1602.796s)
hex dump (first 32 bytes):
01 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 ................
18 b0 98 6d 00 88 ff ff 00 00 00 00 00 00 00 00 ...m............
backtrace:
[<ffffffff817e623e>] kmemleak_alloc+0x4e/0xb0
[<ffffffff8120a22d>] __vmalloc_node_range+0x1bd/0x2c0
[<ffffffff8120a37a>] __vmalloc+0x4a/0x50
[<ffffffff811a8d0a>] bpf_prog_alloc+0x3a/0xa0
[<ffffffff816c0684>] bpf_prog_create+0x44/0xa0
[<ffffffffa09ba4eb>] tcf_bpf_init+0x28b/0x3c0 [act_bpf]
[<ffffffff816d7001>] tcf_action_init_1+0x191/0x1b0
[<ffffffff816d70a2>] tcf_action_init+0x82/0xf0
[<ffffffff816d4d12>] tcf_exts_validate+0xb2/0xc0
[<ffffffffa09b5838>] cls_bpf_modify_existing+0x98/0x340 [cls_bpf]
[<ffffffffa09b5cd6>] cls_bpf_change+0x1a6/0x274 [cls_bpf]
[<ffffffff816d56e5>] tc_ctl_tfilter+0x335/0x910
[<ffffffff816b9145>] rtnetlink_rcv_msg+0x95/0x240
[<ffffffff816df34f>] netlink_rcv_skb+0xaf/0xc0
[<ffffffff816b909e>] rtnetlink_rcv+0x2e/0x40
[<ffffffff816deaaf>] netlink_unicast+0xef/0x1b0
Issue is that the old content from tcf_bpf is allocated and needs
to be released when we replace it. We seem to do that since the
beginning of act_bpf on the filter and insns, later on the name as
well.
Example test case, after patch:
# FOO="1,6 0 0 4294967295,"
# BAR="1,6 0 0 4294967294,"
# tc actions add action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$BAR" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe
index 2 ref 1 bind 0
# tc actions replace action bpf bytecode "$FOO" index 2
# tc actions show action bpf
action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe
index 2 ref 1 bind 0
# tc actions del action bpf index 2
[...]
# echo "scan" > /sys/kernel/debug/kmemleak
# cat /sys/kernel/debug/kmemleak | grep "comm \"tc\"" | wc -l
0
Fixes: d23b8ad8ab23 ("tc: add BPF based action")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-30 00:40:56 +08:00
|
|
|
tcf_bpf_cfg_cleanup(&tmp);
|
2015-01-15 16:52:39 +08:00
|
|
|
}
|
|
|
|
|
2016-02-23 07:57:53 +08:00
|
|
|
static int tcf_bpf_walker(struct net *net, struct sk_buff *skb,
|
|
|
|
struct netlink_callback *cb, int type,
|
2018-02-15 23:54:58 +08:00
|
|
|
const struct tc_action_ops *ops,
|
|
|
|
struct netlink_ext_ack *extack)
|
2016-02-23 07:57:53 +08:00
|
|
|
{
|
|
|
|
struct tc_action_net *tn = net_generic(net, bpf_net_id);
|
|
|
|
|
2018-02-15 23:54:59 +08:00
|
|
|
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
|
2016-02-23 07:57:53 +08:00
|
|
|
}
|
|
|
|
|
2018-08-30 01:15:35 +08:00
|
|
|
static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
|
2016-02-23 07:57:53 +08:00
|
|
|
{
|
|
|
|
struct tc_action_net *tn = net_generic(net, bpf_net_id);
|
|
|
|
|
2017-08-30 14:31:59 +08:00
|
|
|
return tcf_idr_search(tn, a, index);
|
2016-02-23 07:57:53 +08:00
|
|
|
}
|
|
|
|
|
2015-03-20 22:11:12 +08:00
|
|
|
static struct tc_action_ops act_bpf_ops __read_mostly = {
|
|
|
|
.kind = "bpf",
|
2019-02-10 20:25:00 +08:00
|
|
|
.id = TCA_ID_BPF,
|
2015-03-20 22:11:12 +08:00
|
|
|
.owner = THIS_MODULE,
|
2018-08-12 21:34:50 +08:00
|
|
|
.act = tcf_bpf_act,
|
2015-03-20 22:11:12 +08:00
|
|
|
.dump = tcf_bpf_dump,
|
|
|
|
.cleanup = tcf_bpf_cleanup,
|
|
|
|
.init = tcf_bpf_init,
|
2016-02-23 07:57:53 +08:00
|
|
|
.walk = tcf_bpf_walker,
|
|
|
|
.lookup = tcf_bpf_search,
|
2016-07-26 07:09:41 +08:00
|
|
|
.size = sizeof(struct tcf_bpf),
|
2016-02-23 07:57:53 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static __net_init int bpf_init_net(struct net *net)
|
|
|
|
{
|
|
|
|
struct tc_action_net *tn = net_generic(net, bpf_net_id);
|
|
|
|
|
2019-08-26 01:01:32 +08:00
|
|
|
return tc_action_net_init(net, tn, &act_bpf_ops);
|
2016-02-23 07:57:53 +08:00
|
|
|
}
|
|
|
|
|
2017-12-12 07:35:03 +08:00
|
|
|
static void __net_exit bpf_exit_net(struct list_head *net_list)
|
2016-02-23 07:57:53 +08:00
|
|
|
{
|
2017-12-12 07:35:03 +08:00
|
|
|
tc_action_net_exit(net_list, bpf_net_id);
|
2016-02-23 07:57:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct pernet_operations bpf_net_ops = {
|
|
|
|
.init = bpf_init_net,
|
2017-12-12 07:35:03 +08:00
|
|
|
.exit_batch = bpf_exit_net,
|
2016-02-23 07:57:53 +08:00
|
|
|
.id = &bpf_net_id,
|
|
|
|
.size = sizeof(struct tc_action_net),
|
2015-01-15 16:52:39 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static int __init bpf_init_module(void)
|
|
|
|
{
|
2016-02-23 07:57:53 +08:00
|
|
|
return tcf_register_action(&act_bpf_ops, &bpf_net_ops);
|
2015-01-15 16:52:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit bpf_cleanup_module(void)
|
|
|
|
{
|
2016-02-23 07:57:53 +08:00
|
|
|
tcf_unregister_action(&act_bpf_ops, &bpf_net_ops);
|
2015-01-15 16:52:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
module_init(bpf_init_module);
|
|
|
|
module_exit(bpf_cleanup_module);
|
|
|
|
|
|
|
|
MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
|
|
|
|
MODULE_DESCRIPTION("TC BPF based action");
|
|
|
|
MODULE_LICENSE("GPL v2");
|