tracing: Add a way to filter function addresses to function names

There's been several times where an event records a function address in
its field and I needed to filter on that address for a specific function
name. It required looking up the function in kallsyms, finding its size,
and doing a compare of "field >= function_start && field < function_end".

But this would change from boot to boot and is unreliable in scripts.
Also, it is useful to have this at boot up, where the addresses will not
be known. For example, on the boot command line:

  trace_trigger="initcall_finish.traceoff if func.function == acpi_init"

To implement this, add a ".function" prefix, that will check that the
field is of size long, and the only operations allowed (so far) are "=="
and "!=".

Link: https://lkml.kernel.org/r/20221219183213.916833763@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Tom Zanussi <zanussi@kernel.org>
Cc: Zheng Yejian <zhengyejian1@huawei.com>
Reviewed-by: Ross Zwisler <zwisler@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
This commit is contained in:
Steven Rostedt (Google) 2022-12-19 13:31:07 -05:00
parent ae3edea88e
commit e6745a4da9
3 changed files with 105 additions and 2 deletions

View File

@ -207,6 +207,18 @@ field name::
As the kernel will have to know how to retrieve the memory that the pointer As the kernel will have to know how to retrieve the memory that the pointer
is at from user space. is at from user space.
You can convert any long type to a function address and search by function name::
call_site.function == security_prepare_creds
The above will filter when the field "call_site" falls on the address within
"security_prepare_creds". That is, it will compare the value of "call_site" and
the filter will return true if it is greater than or equal to the start of
the function "security_prepare_creds" and less than the end of that function.
The ".function" postfix can only be attached to values of size long, and can only
be compared with "==" or "!=".
5.2 Setting filters 5.2 Setting filters
------------------- -------------------

View File

@ -2822,7 +2822,7 @@ static __init int setup_trace_triggers(char *str)
if (!trigger) if (!trigger)
break; break;
bootup_triggers[i].event = strsep(&trigger, "."); bootup_triggers[i].event = strsep(&trigger, ".");
bootup_triggers[i].trigger = strsep(&trigger, "."); bootup_triggers[i].trigger = trigger;
if (!bootup_triggers[i].trigger) if (!bootup_triggers[i].trigger)
break; break;
} }

View File

@ -64,6 +64,7 @@ enum filter_pred_fn {
FILTER_PRED_FN_PCHAR_USER, FILTER_PRED_FN_PCHAR_USER,
FILTER_PRED_FN_PCHAR, FILTER_PRED_FN_PCHAR,
FILTER_PRED_FN_CPU, FILTER_PRED_FN_CPU,
FILTER_PRED_FN_FUNCTION,
FILTER_PRED_FN_, FILTER_PRED_FN_,
FILTER_PRED_TEST_VISITED, FILTER_PRED_TEST_VISITED,
}; };
@ -71,6 +72,7 @@ enum filter_pred_fn {
struct filter_pred { struct filter_pred {
enum filter_pred_fn fn_num; enum filter_pred_fn fn_num;
u64 val; u64 val;
u64 val2;
struct regex regex; struct regex regex;
unsigned short *ops; unsigned short *ops;
struct ftrace_event_field *field; struct ftrace_event_field *field;
@ -103,6 +105,7 @@ struct filter_pred {
C(INVALID_FILTER, "Meaningless filter expression"), \ C(INVALID_FILTER, "Meaningless filter expression"), \
C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \ C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \
C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), \ C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), \
C(NO_FUNCTION, "Function not found"), \
C(ERRNO, "Error"), \ C(ERRNO, "Error"), \
C(NO_FILTER, "No filter found") C(NO_FILTER, "No filter found")
@ -876,6 +879,17 @@ static int filter_pred_comm(struct filter_pred *pred, void *event)
return cmp ^ pred->not; return cmp ^ pred->not;
} }
/* Filter predicate for functions. */
static int filter_pred_function(struct filter_pred *pred, void *event)
{
unsigned long *addr = (unsigned long *)(event + pred->offset);
unsigned long start = (unsigned long)pred->val;
unsigned long end = (unsigned long)pred->val2;
int ret = *addr >= start && *addr < end;
return pred->op == OP_EQ ? ret : !ret;
}
/* /*
* regex_match_foo - Basic regex callbacks * regex_match_foo - Basic regex callbacks
* *
@ -1335,6 +1349,8 @@ static int filter_pred_fn_call(struct filter_pred *pred, void *event)
return filter_pred_pchar(pred, event); return filter_pred_pchar(pred, event);
case FILTER_PRED_FN_CPU: case FILTER_PRED_FN_CPU:
return filter_pred_cpu(pred, event); return filter_pred_cpu(pred, event);
case FILTER_PRED_FN_FUNCTION:
return filter_pred_function(pred, event);
case FILTER_PRED_TEST_VISITED: case FILTER_PRED_TEST_VISITED:
return test_pred_visited_fn(pred, event); return test_pred_visited_fn(pred, event);
default: default:
@ -1350,8 +1366,13 @@ static int parse_pred(const char *str, void *data,
struct trace_event_call *call = data; struct trace_event_call *call = data;
struct ftrace_event_field *field; struct ftrace_event_field *field;
struct filter_pred *pred = NULL; struct filter_pred *pred = NULL;
unsigned long offset;
unsigned long size;
unsigned long ip;
char num_buf[24]; /* Big enough to hold an address */ char num_buf[24]; /* Big enough to hold an address */
char *field_name; char *field_name;
char *name;
bool function = false;
bool ustring = false; bool ustring = false;
char q; char q;
u64 val; u64 val;
@ -1393,6 +1414,12 @@ static int parse_pred(const char *str, void *data,
i += len; i += len;
} }
/* See if the field is a kernel function name */
if ((len = str_has_prefix(str + i, ".function"))) {
function = true;
i += len;
}
while (isspace(str[i])) while (isspace(str[i]))
i++; i++;
@ -1423,7 +1450,71 @@ static int parse_pred(const char *str, void *data,
pred->offset = field->offset; pred->offset = field->offset;
pred->op = op; pred->op = op;
if (ftrace_event_is_function(call)) { if (function) {
/* The field must be the same size as long */
if (field->size != sizeof(long)) {
parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
goto err_free;
}
/* Function only works with '==' or '!=' and an unquoted string */
switch (op) {
case OP_NE:
case OP_EQ:
break;
default:
parse_error(pe, FILT_ERR_INVALID_OP, pos + i);
goto err_free;
}
if (isdigit(str[i])) {
/* We allow 0xDEADBEEF */
while (isalnum(str[i]))
i++;
len = i - s;
/* 0xfeedfacedeadbeef is 18 chars max */
if (len >= sizeof(num_buf)) {
parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i);
goto err_free;
}
strncpy(num_buf, str + s, len);
num_buf[len] = 0;
ret = kstrtoul(num_buf, 0, &ip);
if (ret) {
parse_error(pe, FILT_ERR_INVALID_VALUE, pos + i);
goto err_free;
}
} else {
s = i;
for (; str[i] && !isspace(str[i]); i++)
;
len = i - s;
name = kmemdup_nul(str + s, len, GFP_KERNEL);
if (!name)
goto err_mem;
ip = kallsyms_lookup_name(name);
kfree(name);
if (!ip) {
parse_error(pe, FILT_ERR_NO_FUNCTION, pos + i);
goto err_free;
}
}
/* Now find the function start and end address */
if (!kallsyms_lookup_size_offset(ip, &size, &offset)) {
parse_error(pe, FILT_ERR_NO_FUNCTION, pos + i);
goto err_free;
}
pred->fn_num = FILTER_PRED_FN_FUNCTION;
pred->val = ip - offset;
pred->val2 = pred->val + size;
} else if (ftrace_event_is_function(call)) {
/* /*
* Perf does things different with function events. * Perf does things different with function events.
* It only allows an "ip" field, and expects a string. * It only allows an "ip" field, and expects a string.