ftrace: Optimize the function tracer list loop
There is lots of places that perform: op = rcu_dereference_raw(ftrace_control_list); while (op != &ftrace_list_end) { Add a helper macro to do this, and also optimize for a single entity. That is, gcc will optimize a loop for either no iterations or more than one iteration. But usually only a single callback is registered to the function tracer, thus the optimized case should be a single pass. to do this we now do: op = rcu_dereference_raw(list); do { [...] } while (likely(op = rcu_dereference_raw((op)->next)) && unlikely((op) != &ftrace_list_end)); An op is always registered (ftrace_list_end when no callbacks is registered), thus when a single callback is registered, the link list looks like: top => callback => ftrace_list_end => NULL. The likely(op = op->next) still must be performed due to the race of removing the callback, where the first op assignment could equal ftrace_list_end. In that case, the op->next would be NULL. But this is unlikely (only happens in a race condition when removing the callback). But it is very likely that the next op would be ftrace_list_end, unless more than one callback has been registered. This tells gcc what the most common case is and makes the fast path with the least amount of branches. Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
This commit is contained in:
parent
9640388b63
commit
0a016409e4
|
@ -111,6 +111,26 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
|
||||||
#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
|
#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Traverse the ftrace_global_list, invoking all entries. The reason that we
|
||||||
|
* can use rcu_dereference_raw() is that elements removed from this list
|
||||||
|
* are simply leaked, so there is no need to interact with a grace-period
|
||||||
|
* mechanism. The rcu_dereference_raw() calls are needed to handle
|
||||||
|
* concurrent insertions into the ftrace_global_list.
|
||||||
|
*
|
||||||
|
* Silly Alpha and silly pointer-speculation compiler optimizations!
|
||||||
|
*/
|
||||||
|
#define do_for_each_ftrace_op(op, list) \
|
||||||
|
op = rcu_dereference_raw(list); \
|
||||||
|
do
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Optimized for just a single item in the list (as that is the normal case).
|
||||||
|
*/
|
||||||
|
#define while_for_each_ftrace_op(op) \
|
||||||
|
while (likely(op = rcu_dereference_raw((op)->next)) && \
|
||||||
|
unlikely((op) != &ftrace_list_end))
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ftrace_nr_registered_ops - return number of ops registered
|
* ftrace_nr_registered_ops - return number of ops registered
|
||||||
*
|
*
|
||||||
|
@ -132,15 +152,6 @@ int ftrace_nr_registered_ops(void)
|
||||||
return cnt;
|
return cnt;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Traverse the ftrace_global_list, invoking all entries. The reason that we
|
|
||||||
* can use rcu_dereference_raw() is that elements removed from this list
|
|
||||||
* are simply leaked, so there is no need to interact with a grace-period
|
|
||||||
* mechanism. The rcu_dereference_raw() calls are needed to handle
|
|
||||||
* concurrent insertions into the ftrace_global_list.
|
|
||||||
*
|
|
||||||
* Silly Alpha and silly pointer-speculation compiler optimizations!
|
|
||||||
*/
|
|
||||||
static void
|
static void
|
||||||
ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
|
ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
|
||||||
struct ftrace_ops *op, struct pt_regs *regs)
|
struct ftrace_ops *op, struct pt_regs *regs)
|
||||||
|
@ -149,11 +160,9 @@ ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
trace_recursion_set(TRACE_GLOBAL_BIT);
|
trace_recursion_set(TRACE_GLOBAL_BIT);
|
||||||
op = rcu_dereference_raw(ftrace_global_list); /*see above*/
|
do_for_each_ftrace_op(op, ftrace_global_list) {
|
||||||
while (op != &ftrace_list_end) {
|
|
||||||
op->func(ip, parent_ip, op, regs);
|
op->func(ip, parent_ip, op, regs);
|
||||||
op = rcu_dereference_raw(op->next); /*see above*/
|
} while_for_each_ftrace_op(op);
|
||||||
};
|
|
||||||
trace_recursion_clear(TRACE_GLOBAL_BIT);
|
trace_recursion_clear(TRACE_GLOBAL_BIT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4104,14 +4113,11 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
|
||||||
*/
|
*/
|
||||||
preempt_disable_notrace();
|
preempt_disable_notrace();
|
||||||
trace_recursion_set(TRACE_CONTROL_BIT);
|
trace_recursion_set(TRACE_CONTROL_BIT);
|
||||||
op = rcu_dereference_raw(ftrace_control_list);
|
do_for_each_ftrace_op(op, ftrace_control_list) {
|
||||||
while (op != &ftrace_list_end) {
|
|
||||||
if (!ftrace_function_local_disabled(op) &&
|
if (!ftrace_function_local_disabled(op) &&
|
||||||
ftrace_ops_test(op, ip))
|
ftrace_ops_test(op, ip))
|
||||||
op->func(ip, parent_ip, op, regs);
|
op->func(ip, parent_ip, op, regs);
|
||||||
|
} while_for_each_ftrace_op(op);
|
||||||
op = rcu_dereference_raw(op->next);
|
|
||||||
};
|
|
||||||
trace_recursion_clear(TRACE_CONTROL_BIT);
|
trace_recursion_clear(TRACE_CONTROL_BIT);
|
||||||
preempt_enable_notrace();
|
preempt_enable_notrace();
|
||||||
}
|
}
|
||||||
|
@ -4139,12 +4145,10 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
|
||||||
* they must be freed after a synchronize_sched().
|
* they must be freed after a synchronize_sched().
|
||||||
*/
|
*/
|
||||||
preempt_disable_notrace();
|
preempt_disable_notrace();
|
||||||
op = rcu_dereference_raw(ftrace_ops_list);
|
do_for_each_ftrace_op(op, ftrace_ops_list) {
|
||||||
while (op != &ftrace_list_end) {
|
|
||||||
if (ftrace_ops_test(op, ip))
|
if (ftrace_ops_test(op, ip))
|
||||||
op->func(ip, parent_ip, op, regs);
|
op->func(ip, parent_ip, op, regs);
|
||||||
op = rcu_dereference_raw(op->next);
|
} while_for_each_ftrace_op(op);
|
||||||
};
|
|
||||||
preempt_enable_notrace();
|
preempt_enable_notrace();
|
||||||
trace_recursion_clear(TRACE_INTERNAL_BIT);
|
trace_recursion_clear(TRACE_INTERNAL_BIT);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue