bpf: Restrict bpf_trace_printk()'s %s usage and add %pks, %pus specifier
Usage of plain %s conversion specifier in bpf_trace_printk() suffers from the very same issue as bpf_probe_read{,str}() helpers, that is, it is broken on archs with overlapping address ranges. While the helpers have been addressed through work in6ae08ae3de
("bpf: Add probe_read_{user, kernel} and probe_read_{user, kernel}_str helpers"), we need an option for bpf_trace_printk() as well to fix it. Similarly as with the helpers, force users to make an explicit choice by adding %pks and %pus specifier to bpf_trace_printk() which will then pick the corresponding strncpy_from_unsafe*() variant to perform the access under KERNEL_DS or USER_DS. The %pk* (kernel specifier) and %pu* (user specifier) can later also be extended for other objects aside strings that are probed and printed under tracing, and reused out of other facilities like bpf_seq_printf() or BTF based type printing. Existing behavior of %s for current users is still kept working for archs where it is not broken and therefore gated through CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE. For archs not having this property we fall-back to pick probing under KERNEL_DS as a sensible default. Fixes:8d3b7dce86
("bpf: add support for %s specifier to bpf_trace_printk()") Reported-by: Linus Torvalds <torvalds@linux-foundation.org> Reported-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Link: https://lore.kernel.org/bpf/20200515101118.6508-4-daniel@iogearbox.net
This commit is contained in:
parent
47cc0ed574
commit
b2a5212fb6
|
@ -112,6 +112,20 @@ used when printing stack backtraces. The specifier takes into
|
|||
consideration the effect of compiler optimisations which may occur
|
||||
when tail-calls are used and marked with the noreturn GCC attribute.
|
||||
|
||||
Probed Pointers from BPF / tracing
|
||||
----------------------------------
|
||||
|
||||
::
|
||||
|
||||
%pks kernel string
|
||||
%pus user string
|
||||
|
||||
The ``k`` and ``u`` specifiers are used for printing prior probed memory from
|
||||
either kernel memory (k) or user memory (u). The subsequent ``s`` specifier
|
||||
results in printing a string. For direct use in regular vsnprintf() the (k)
|
||||
and (u) annotation is ignored, however, when used out of BPF's bpf_trace_printk(),
|
||||
for example, it reads the memory it is pointing to without faulting.
|
||||
|
||||
Kernel Pointers
|
||||
---------------
|
||||
|
||||
|
|
|
@ -323,17 +323,15 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
|
|||
|
||||
/*
|
||||
* Only limited trace_printk() conversion specifiers allowed:
|
||||
* %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s
|
||||
* %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pks %pus %s
|
||||
*/
|
||||
BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
|
||||
u64, arg2, u64, arg3)
|
||||
{
|
||||
int i, mod[3] = {}, fmt_cnt = 0;
|
||||
char buf[64], fmt_ptype;
|
||||
void *unsafe_ptr = NULL;
|
||||
bool str_seen = false;
|
||||
int mod[3] = {};
|
||||
int fmt_cnt = 0;
|
||||
u64 unsafe_addr;
|
||||
char buf[64];
|
||||
int i;
|
||||
|
||||
/*
|
||||
* bpf_check()->check_func_arg()->check_stack_boundary()
|
||||
|
@ -359,40 +357,71 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
|
|||
if (fmt[i] == 'l') {
|
||||
mod[fmt_cnt]++;
|
||||
i++;
|
||||
} else if (fmt[i] == 'p' || fmt[i] == 's') {
|
||||
} else if (fmt[i] == 'p') {
|
||||
mod[fmt_cnt]++;
|
||||
if ((fmt[i + 1] == 'k' ||
|
||||
fmt[i + 1] == 'u') &&
|
||||
fmt[i + 2] == 's') {
|
||||
fmt_ptype = fmt[i + 1];
|
||||
i += 2;
|
||||
goto fmt_str;
|
||||
}
|
||||
|
||||
/* disallow any further format extensions */
|
||||
if (fmt[i + 1] != 0 &&
|
||||
!isspace(fmt[i + 1]) &&
|
||||
!ispunct(fmt[i + 1]))
|
||||
return -EINVAL;
|
||||
fmt_cnt++;
|
||||
if (fmt[i] == 's') {
|
||||
if (str_seen)
|
||||
/* allow only one '%s' per fmt string */
|
||||
return -EINVAL;
|
||||
str_seen = true;
|
||||
|
||||
switch (fmt_cnt) {
|
||||
case 1:
|
||||
unsafe_addr = arg1;
|
||||
arg1 = (long) buf;
|
||||
break;
|
||||
case 2:
|
||||
unsafe_addr = arg2;
|
||||
arg2 = (long) buf;
|
||||
break;
|
||||
case 3:
|
||||
unsafe_addr = arg3;
|
||||
arg3 = (long) buf;
|
||||
break;
|
||||
}
|
||||
buf[0] = 0;
|
||||
strncpy_from_unsafe(buf,
|
||||
(void *) (long) unsafe_addr,
|
||||
sizeof(buf));
|
||||
goto fmt_next;
|
||||
} else if (fmt[i] == 's') {
|
||||
mod[fmt_cnt]++;
|
||||
fmt_ptype = fmt[i];
|
||||
fmt_str:
|
||||
if (str_seen)
|
||||
/* allow only one '%s' per fmt string */
|
||||
return -EINVAL;
|
||||
str_seen = true;
|
||||
|
||||
if (fmt[i + 1] != 0 &&
|
||||
!isspace(fmt[i + 1]) &&
|
||||
!ispunct(fmt[i + 1]))
|
||||
return -EINVAL;
|
||||
|
||||
switch (fmt_cnt) {
|
||||
case 0:
|
||||
unsafe_ptr = (void *)(long)arg1;
|
||||
arg1 = (long)buf;
|
||||
break;
|
||||
case 1:
|
||||
unsafe_ptr = (void *)(long)arg2;
|
||||
arg2 = (long)buf;
|
||||
break;
|
||||
case 2:
|
||||
unsafe_ptr = (void *)(long)arg3;
|
||||
arg3 = (long)buf;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
|
||||
buf[0] = 0;
|
||||
switch (fmt_ptype) {
|
||||
case 's':
|
||||
#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
|
||||
strncpy_from_unsafe(buf, unsafe_ptr,
|
||||
sizeof(buf));
|
||||
break;
|
||||
#endif
|
||||
case 'k':
|
||||
strncpy_from_unsafe_strict(buf, unsafe_ptr,
|
||||
sizeof(buf));
|
||||
break;
|
||||
case 'u':
|
||||
strncpy_from_unsafe_user(buf,
|
||||
(__force void __user *)unsafe_ptr,
|
||||
sizeof(buf));
|
||||
break;
|
||||
}
|
||||
goto fmt_next;
|
||||
}
|
||||
|
||||
if (fmt[i] == 'l') {
|
||||
|
@ -403,6 +432,7 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
|
|||
if (fmt[i] != 'i' && fmt[i] != 'd' &&
|
||||
fmt[i] != 'u' && fmt[i] != 'x')
|
||||
return -EINVAL;
|
||||
fmt_next:
|
||||
fmt_cnt++;
|
||||
}
|
||||
|
||||
|
|
|
@ -2168,6 +2168,10 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode,
|
|||
* f full name
|
||||
* P node name, including a possible unit address
|
||||
* - 'x' For printing the address. Equivalent to "%lx".
|
||||
* - '[ku]s' For a BPF/tracing related format specifier, e.g. used out of
|
||||
* bpf_trace_printk() where [ku] prefix specifies either kernel (k)
|
||||
* or user (u) memory to probe, and:
|
||||
* s a string, equivalent to "%s" on direct vsnprintf() use
|
||||
*
|
||||
* ** When making changes please also update:
|
||||
* Documentation/core-api/printk-formats.rst
|
||||
|
@ -2251,6 +2255,14 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
|
|||
if (!IS_ERR(ptr))
|
||||
break;
|
||||
return err_ptr(buf, end, ptr, spec);
|
||||
case 'u':
|
||||
case 'k':
|
||||
switch (fmt[1]) {
|
||||
case 's':
|
||||
return string(buf, end, ptr, spec);
|
||||
default:
|
||||
return error_string(buf, end, "(einval)", spec);
|
||||
}
|
||||
}
|
||||
|
||||
/* default is to _not_ leak addresses, hash before printing */
|
||||
|
|
Loading…
Reference in New Issue