While rewriting the function graph tracer, I discovered a design flaw that

was introduced by a patch that tried to fix one bug, but by doing so created
 another bug. As both bugs corrupt the output (but they do not crash the
 kernel), I decided to fix the design such that it could have both bugs
 fixed. The original fix, fixed time reporting of the function graph tracer
 when doing a max_depth of one. This was code that can test how much the
 kernel interferes with userspace. But in doing so, it could corrupt the time
 keeping of the function profiler.
 
 The issue is that the curr_ret_stack variable was being used for two
 different meanings. One was to keep track of the stack pointer on the
 ret_stack (shadow stack used by the function graph tracer), and the other
 use case was the graph call depth.  Although, the two may be closely
 related, where they got updated was the issue that lead to the two different
 bugs that required the two use cases to be updated differently.
 
 The big issue with this fix is that it requires changing each architecture.
 The good news is, I was able to remove a lot of code that was duplicated
 within the architectures and place it into a single location. Then I could
 make the fix in one place.
 
 I pushed this code into linux-next to let it settle over a week, and before
 doing so, I cross compiled all the affected architectures to make sure that
 they built fine.
 
 In the mean time, I also pulled in a patch that fixes the sched_switch
 previous tasks state output, that was not actually correct.
 -----BEGIN PGP SIGNATURE-----
 
 iIoEABYIADIWIQRRSw7ePDh/lE+zeZMp5XQQmuv6qgUCW/4NPhQccm9zdGVkdEBn
 b29kbWlzLm9yZwAKCRAp5XQQmuv6qnWAAQCyUIRLgYImr81eTl52lxNRsULk+aiI
 U29kRFWWU0c40AEA1X9sDF0MgOItbRGfZtnHTZEousXRDaDf4Fge2kF7Egg=
 =liQ0
 -----END PGP SIGNATURE-----

Merge tag 'trace-v4.20-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace

Pull tracing fixes from Steven Rostedt:
 "While rewriting the function graph tracer, I discovered a design flaw
  that was introduced by a patch that tried to fix one bug, but by doing
  so created another bug.

  As both bugs corrupt the output (but they do not crash the kernel), I
  decided to fix the design such that it could have both bugs fixed. The
  original fix, fixed time reporting of the function graph tracer when
  doing a max_depth of one. This was code that can test how much the
  kernel interferes with userspace. But in doing so, it could corrupt
  the time keeping of the function profiler.

  The issue is that the curr_ret_stack variable was being used for two
  different meanings. One was to keep track of the stack pointer on the
  ret_stack (shadow stack used by the function graph tracer), and the
  other use case was the graph call depth. Although, the two may be
  closely related, where they got updated was the issue that lead to the
  two different bugs that required the two use cases to be updated
  differently.

  The big issue with this fix is that it requires changing each
  architecture. The good news is, I was able to remove a lot of code
  that was duplicated within the architectures and place it into a
  single location. Then I could make the fix in one place.

  I pushed this code into linux-next to let it settle over a week, and
  before doing so, I cross compiled all the affected architectures to
  make sure that they built fine.

  In the mean time, I also pulled in a patch that fixes the sched_switch
  previous tasks state output, that was not actually correct"

* tag 'trace-v4.20-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace:
  sched, trace: Fix prev_state output in sched_switch tracepoint
  function_graph: Have profiler use curr_ret_stack and not depth
  function_graph: Reverse the order of pushing the ret_stack and the callback
  function_graph: Move return callback before update of curr_ret_stack
  function_graph: Use new curr_ret_depth to manage depth instead of curr_ret_stack
  function_graph: Make ftrace_push_return_trace() static
  sparc/function_graph: Simplify with function_graph_enter()
  sh/function_graph: Simplify with function_graph_enter()
  s390/function_graph: Simplify with function_graph_enter()
  riscv/function_graph: Simplify with function_graph_enter()
  powerpc/function_graph: Simplify with function_graph_enter()
  parisc: function_graph: Simplify with function_graph_enter()
  nds32: function_graph: Simplify with function_graph_enter()
  MIPS: function_graph: Simplify with function_graph_enter()
  microblaze: function_graph: Simplify with function_graph_enter()
  arm64: function_graph: Simplify with function_graph_enter()
  ARM: function_graph: Simplify with function_graph_enter()
  x86/function_graph: Simplify with function_graph_enter()
  function_graph: Create function_graph_enter() to consolidate architecture code
This commit is contained in:
Linus Torvalds 2018-11-30 09:32:34 -08:00
commit 0f1f692375
17 changed files with 78 additions and 175 deletions

View File

@ -183,9 +183,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
unsigned long frame_pointer)
{
unsigned long return_hooker = (unsigned long) &return_to_handler;
struct ftrace_graph_ent trace;
unsigned long old;
int err;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
return;
@ -193,21 +191,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
old = *parent;
*parent = return_hooker;
trace.func = self_addr;
trace.depth = current->curr_ret_stack + 1;
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace)) {
if (function_graph_enter(old, self_addr, frame_pointer, NULL))
*parent = old;
return;
}
err = ftrace_push_return_trace(old, self_addr, &trace.depth,
frame_pointer, NULL);
if (err == -EBUSY) {
*parent = old;
return;
}
}
#ifdef CONFIG_DYNAMIC_FTRACE

View File

@ -216,8 +216,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
{
unsigned long return_hooker = (unsigned long)&return_to_handler;
unsigned long old;
struct ftrace_graph_ent trace;
int err;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
return;
@ -229,18 +227,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
*/
old = *parent;
trace.func = self_addr;
trace.depth = current->curr_ret_stack + 1;
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace))
return;
err = ftrace_push_return_trace(old, self_addr, &trace.depth,
frame_pointer, NULL);
if (err == -EBUSY)
return;
else
if (!function_graph_enter(old, self_addr, frame_pointer, NULL))
*parent = return_hooker;
}

View File

@ -22,8 +22,7 @@
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
{
unsigned long old;
int faulted, err;
struct ftrace_graph_ent trace;
int faulted;
unsigned long return_hooker = (unsigned long)
&return_to_handler;
@ -63,18 +62,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
return;
}
err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL);
if (err == -EBUSY) {
if (function_graph_enter(old, self_addr, 0, NULL))
*parent = old;
return;
}
trace.func = self_addr;
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace)) {
current->curr_ret_stack--;
*parent = old;
}
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

View File

@ -322,7 +322,6 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
unsigned long fp)
{
unsigned long old_parent_ra;
struct ftrace_graph_ent trace;
unsigned long return_hooker = (unsigned long)
&return_to_handler;
int faulted, insns;
@ -369,12 +368,6 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
if (unlikely(faulted))
goto out;
if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp,
NULL) == -EBUSY) {
*parent_ra_addr = old_parent_ra;
return;
}
/*
* Get the recorded ip of the current mcount calling site in the
* __mcount_loc section, which will be used to filter the function
@ -382,13 +375,10 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
*/
insns = core_kernel_text(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
trace.func = self_ra - (MCOUNT_INSN_SIZE * insns);
self_ra -= (MCOUNT_INSN_SIZE * insns);
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace)) {
current->curr_ret_stack--;
if (function_graph_enter(old_parent_ra, self_ra, fp, NULL))
*parent_ra_addr = old_parent_ra;
}
return;
out:
ftrace_graph_stop();

View File

@ -211,28 +211,14 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
unsigned long frame_pointer)
{
unsigned long return_hooker = (unsigned long)&return_to_handler;
struct ftrace_graph_ent trace;
unsigned long old;
int err;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
return;
old = *parent;
trace.func = self_addr;
trace.depth = current->curr_ret_stack + 1;
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace))
return;
err = ftrace_push_return_trace(old, self_addr, &trace.depth,
frame_pointer, NULL);
if (err == -EBUSY)
return;
if (!function_graph_enter(old, self_addr, frame_pointer, NULL))
*parent = return_hooker;
}

View File

@ -30,7 +30,6 @@ static void __hot prepare_ftrace_return(unsigned long *parent,
unsigned long self_addr)
{
unsigned long old;
struct ftrace_graph_ent trace;
extern int parisc_return_to_handler;
if (unlikely(ftrace_graph_is_dead()))
@ -41,17 +40,7 @@ static void __hot prepare_ftrace_return(unsigned long *parent,
old = *parent;
trace.func = self_addr;
trace.depth = current->curr_ret_stack + 1;
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace))
return;
if (ftrace_push_return_trace(old, self_addr, &trace.depth,
0, NULL) == -EBUSY)
return;
if (!function_graph_enter(old, self_addr, 0, NULL))
/* activate parisc_return_to_handler() as return point */
*parent = (unsigned long) &parisc_return_to_handler;
}

View File

@ -950,7 +950,6 @@ int ftrace_disable_ftrace_graph_caller(void)
*/
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
{
struct ftrace_graph_ent trace;
unsigned long return_hooker;
if (unlikely(ftrace_graph_is_dead()))
@ -961,17 +960,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
return_hooker = ppc_function_entry(return_to_handler);
trace.func = ip;
trace.depth = current->curr_ret_stack + 1;
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace))
goto out;
if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
NULL) == -EBUSY)
goto out;
if (!function_graph_enter(parent, ip, 0, NULL))
parent = return_hooker;
out:
return parent;

View File

@ -132,7 +132,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
{
unsigned long return_hooker = (unsigned long)&return_to_handler;
unsigned long old;
struct ftrace_graph_ent trace;
int err;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
@ -144,16 +143,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
*/
old = *parent;
trace.func = self_addr;
trace.depth = current->curr_ret_stack + 1;
if (!ftrace_graph_entry(&trace))
return;
err = ftrace_push_return_trace(old, self_addr, &trace.depth,
frame_pointer, parent);
if (err == -EBUSY)
return;
if (function_graph_enter(old, self_addr, frame_pointer, parent))
*parent = return_hooker;
}

View File

@ -203,21 +203,12 @@ device_initcall(ftrace_plt_init);
*/
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
{
struct ftrace_graph_ent trace;
if (unlikely(ftrace_graph_is_dead()))
goto out;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
goto out;
ip -= MCOUNT_INSN_SIZE;
trace.func = ip;
trace.depth = current->curr_ret_stack + 1;
/* Only trace if the calling function expects to. */
if (!ftrace_graph_entry(&trace))
goto out;
if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
NULL) == -EBUSY)
goto out;
if (!function_graph_enter(parent, ip, 0, NULL))
parent = (unsigned long) return_to_handler;
out:
return parent;

View File

@ -321,8 +321,7 @@ int ftrace_disable_ftrace_graph_caller(void)
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
{
unsigned long old;
int faulted, err;
struct ftrace_graph_ent trace;
int faulted;
unsigned long return_hooker = (unsigned long)&return_to_handler;
if (unlikely(ftrace_graph_is_dead()))
@ -365,18 +364,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
return;
}
err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL);
if (err == -EBUSY) {
if (function_graph_enter(old, self_addr, 0, NULL))
__raw_writel(old, parent);
return;
}
trace.func = self_addr;
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace)) {
current->curr_ret_stack--;
__raw_writel(old, parent);
}
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

View File

@ -126,20 +126,11 @@ unsigned long prepare_ftrace_return(unsigned long parent,
unsigned long frame_pointer)
{
unsigned long return_hooker = (unsigned long) &return_to_handler;
struct ftrace_graph_ent trace;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
return parent + 8UL;
trace.func = self_addr;
trace.depth = current->curr_ret_stack + 1;
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace))
return parent + 8UL;
if (ftrace_push_return_trace(parent, self_addr, &trace.depth,
frame_pointer, NULL) == -EBUSY)
if (function_graph_enter(parent, self_addr, frame_pointer, NULL))
return parent + 8UL;
return return_hooker;

View File

@ -994,7 +994,6 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
{
unsigned long old;
int faulted;
struct ftrace_graph_ent trace;
unsigned long return_hooker = (unsigned long)
&return_to_handler;
@ -1046,19 +1045,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
return;
}
trace.func = self_addr;
trace.depth = current->curr_ret_stack + 1;
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace)) {
if (function_graph_enter(old, self_addr, frame_pointer, parent))
*parent = old;
return;
}
if (ftrace_push_return_trace(old, self_addr, &trace.depth,
frame_pointer, parent) == -EBUSY) {
*parent = old;
return;
}
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

View File

@ -777,7 +777,7 @@ struct ftrace_ret_stack {
extern void return_to_handler(void);
extern int
ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
function_graph_enter(unsigned long ret, unsigned long func,
unsigned long frame_pointer, unsigned long *retp);
unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,

View File

@ -1116,6 +1116,7 @@ struct task_struct {
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* Index of current stored address in ret_stack: */
int curr_ret_stack;
int curr_ret_depth;
/* Stack of return addresses for return function tracing: */
struct ftrace_ret_stack *ret_stack;

View File

@ -107,6 +107,8 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
#ifdef CREATE_TRACE_POINTS
static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
{
unsigned int state;
#ifdef CONFIG_SCHED_DEBUG
BUG_ON(p != current);
#endif /* CONFIG_SCHED_DEBUG */
@ -118,7 +120,15 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
if (preempt)
return TASK_REPORT_MAX;
return 1 << task_state_index(p);
/*
* task_state_index() uses fls() and returns a value from 0-8 range.
* Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
* it for left shift operation to get the correct task->state
* mapping.
*/
state = task_state_index(p);
return state ? (1 << (state - 1)) : state;
}
#endif /* CREATE_TRACE_POINTS */

View File

@ -817,7 +817,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip,
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static int profile_graph_entry(struct ftrace_graph_ent *trace)
{
int index = trace->depth;
int index = current->curr_ret_stack;
function_profile_call(trace->func, 0, NULL, NULL);
@ -852,7 +852,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
if (!fgraph_graph_time) {
int index;
index = trace->depth;
index = current->curr_ret_stack;
/* Append this call time to the parent time to subtract */
if (index)
@ -6814,6 +6814,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
atomic_set(&t->tracing_graph_pause, 0);
atomic_set(&t->trace_overrun, 0);
t->curr_ret_stack = -1;
t->curr_ret_depth = -1;
/* Make sure the tasks see the -1 first: */
smp_wmb();
t->ret_stack = ret_stack_list[start++];
@ -7038,6 +7039,7 @@ graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
{
t->curr_ret_stack = -1;
t->curr_ret_depth = -1;
/*
* The idle task has no parent, it either has its own
* stack or no stack at all.
@ -7068,6 +7070,7 @@ void ftrace_graph_init_task(struct task_struct *t)
/* Make sure we do not use the parent ret_stack */
t->ret_stack = NULL;
t->curr_ret_stack = -1;
t->curr_ret_depth = -1;
if (ftrace_graph_active) {
struct ftrace_ret_stack *ret_stack;

View File

@ -118,8 +118,8 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
struct trace_seq *s, u32 flags);
/* Add a function return address to the trace stack on thread info.*/
int
ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
static int
ftrace_push_return_trace(unsigned long ret, unsigned long func,
unsigned long frame_pointer, unsigned long *retp)
{
unsigned long long calltime;
@ -177,9 +177,31 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
current->ret_stack[index].retp = retp;
#endif
*depth = current->curr_ret_stack;
return 0;
}
int function_graph_enter(unsigned long ret, unsigned long func,
unsigned long frame_pointer, unsigned long *retp)
{
struct ftrace_graph_ent trace;
trace.func = func;
trace.depth = ++current->curr_ret_depth;
if (ftrace_push_return_trace(ret, func,
frame_pointer, retp))
goto out;
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace))
goto out_ret;
return 0;
out_ret:
current->curr_ret_stack--;
out:
current->curr_ret_depth--;
return -EBUSY;
}
/* Retrieve a function return address to the trace stack on thread info.*/
@ -241,7 +263,13 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
trace->func = current->ret_stack[index].func;
trace->calltime = current->ret_stack[index].calltime;
trace->overrun = atomic_read(&current->trace_overrun);
trace->depth = index;
trace->depth = current->curr_ret_depth--;
/*
* We still want to trace interrupts coming in if
* max_depth is set to 1. Make sure the decrement is
* seen before ftrace_graph_return.
*/
barrier();
}
/*
@ -255,6 +283,12 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
ftrace_pop_return_trace(&trace, &ret, frame_pointer);
trace.rettime = trace_clock_local();
ftrace_graph_return(&trace);
/*
* The ftrace_graph_return() may still access the current
* ret_stack structure, we need to make sure the update of
* curr_ret_stack is after that.
*/
barrier();
current->curr_ret_stack--;
/*
@ -267,13 +301,6 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
return ret;
}
/*
* The trace should run after decrementing the ret counter
* in case an interrupt were to come in. We don't want to
* lose the interrupt if max_depth is set.
*/
ftrace_graph_return(&trace);
if (unlikely(!ret)) {
ftrace_graph_stop();
WARN_ON(1);