2009-07-13 10:33:21 +08:00
|
|
|
#undef TRACE_SYSTEM
|
|
|
|
#define TRACE_SYSTEM sched
|
|
|
|
|
2009-04-10 20:54:16 +08:00
|
|
|
#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
|
tracing, sched: LTTng instrumentation - scheduler
Instrument the scheduler activity (sched_switch, migration, wakeups,
wait for a task, signal delivery) and process/thread
creation/destruction (fork, exit, kthread stop). Actually, kthread
creation is not instrumented in this patch because it is architecture
dependent. It allows to connect tracers such as ftrace which detects
scheduling latencies, good/bad scheduler decisions. Tools like LTTng can
export this scheduler information along with instrumentation of the rest
of the kernel activity to perform post-mortem analysis on the scheduler
activity.
About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added. See the "Tracepoints" patch header for
performance result detail.
Changelog :
- Change instrumentation location and parameter to match ftrace
instrumentation, previously done with kernel markers.
[ mingo@elte.hu: conflict resolutions ]
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-19 00:16:17 +08:00
|
|
|
#define _TRACE_SCHED_H
|
|
|
|
|
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/tracepoint.h>
|
|
|
|
|
2009-04-10 20:54:16 +08:00
|
|
|
/*
|
|
|
|
* Tracepoint for calling kthread_stop, performed to end a kthread:
|
|
|
|
*/
|
|
|
|
TRACE_EVENT(sched_kthread_stop,
|
|
|
|
|
|
|
|
TP_PROTO(struct task_struct *t),
|
|
|
|
|
|
|
|
TP_ARGS(t),
|
|
|
|
|
|
|
|
TP_STRUCT__entry(
|
|
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
|
|
__field( pid_t, pid )
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_fast_assign(
|
|
|
|
memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
|
|
|
|
__entry->pid = t->pid;
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_printk("task %s:%d", __entry->comm, __entry->pid)
|
|
|
|
);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tracepoint for the return value of the kthread stopping:
|
|
|
|
*/
|
|
|
|
TRACE_EVENT(sched_kthread_stop_ret,
|
|
|
|
|
|
|
|
TP_PROTO(int ret),
|
|
|
|
|
|
|
|
TP_ARGS(ret),
|
|
|
|
|
|
|
|
TP_STRUCT__entry(
|
|
|
|
__field( int, ret )
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_fast_assign(
|
|
|
|
__entry->ret = ret;
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_printk("ret %d", __entry->ret)
|
|
|
|
);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tracepoint for waiting on task to unschedule:
|
|
|
|
*
|
|
|
|
* (NOTE: the 'rq' argument is not used by generic trace events,
|
|
|
|
* but used by the latency tracer plugin. )
|
|
|
|
*/
|
|
|
|
TRACE_EVENT(sched_wait_task,
|
|
|
|
|
|
|
|
TP_PROTO(struct rq *rq, struct task_struct *p),
|
|
|
|
|
|
|
|
TP_ARGS(rq, p),
|
|
|
|
|
|
|
|
TP_STRUCT__entry(
|
|
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
|
|
__field( pid_t, pid )
|
|
|
|
__field( int, prio )
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_fast_assign(
|
|
|
|
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
|
|
|
__entry->pid = p->pid;
|
|
|
|
__entry->prio = p->prio;
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_printk("task %s:%d [%d]",
|
|
|
|
__entry->comm, __entry->pid, __entry->prio)
|
|
|
|
);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tracepoint for waking up a task:
|
|
|
|
*
|
|
|
|
* (NOTE: the 'rq' argument is not used by generic trace events,
|
|
|
|
* but used by the latency tracer plugin. )
|
|
|
|
*/
|
|
|
|
TRACE_EVENT(sched_wakeup,
|
|
|
|
|
|
|
|
TP_PROTO(struct rq *rq, struct task_struct *p, int success),
|
|
|
|
|
|
|
|
TP_ARGS(rq, p, success),
|
|
|
|
|
|
|
|
TP_STRUCT__entry(
|
|
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
|
|
__field( pid_t, pid )
|
|
|
|
__field( int, prio )
|
|
|
|
__field( int, success )
|
2009-08-07 02:59:32 +08:00
|
|
|
__field( int, cpu )
|
2009-04-10 20:54:16 +08:00
|
|
|
),
|
|
|
|
|
|
|
|
TP_fast_assign(
|
|
|
|
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
|
|
|
__entry->pid = p->pid;
|
|
|
|
__entry->prio = p->prio;
|
|
|
|
__entry->success = success;
|
2009-08-07 02:59:32 +08:00
|
|
|
__entry->cpu = task_cpu(p);
|
2009-04-10 20:54:16 +08:00
|
|
|
),
|
|
|
|
|
2009-08-07 02:59:32 +08:00
|
|
|
TP_printk("task %s:%d [%d] success=%d [%03d]",
|
2009-04-10 20:54:16 +08:00
|
|
|
__entry->comm, __entry->pid, __entry->prio,
|
2009-08-07 02:59:32 +08:00
|
|
|
__entry->success, __entry->cpu)
|
2009-04-10 20:54:16 +08:00
|
|
|
);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tracepoint for waking up a new task:
|
|
|
|
*
|
|
|
|
* (NOTE: the 'rq' argument is not used by generic trace events,
|
|
|
|
* but used by the latency tracer plugin. )
|
|
|
|
*/
|
|
|
|
TRACE_EVENT(sched_wakeup_new,
|
|
|
|
|
|
|
|
TP_PROTO(struct rq *rq, struct task_struct *p, int success),
|
|
|
|
|
|
|
|
TP_ARGS(rq, p, success),
|
|
|
|
|
|
|
|
TP_STRUCT__entry(
|
|
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
|
|
__field( pid_t, pid )
|
|
|
|
__field( int, prio )
|
|
|
|
__field( int, success )
|
2009-08-07 02:59:32 +08:00
|
|
|
__field( int, cpu )
|
2009-04-10 20:54:16 +08:00
|
|
|
),
|
|
|
|
|
|
|
|
TP_fast_assign(
|
|
|
|
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
|
|
|
__entry->pid = p->pid;
|
|
|
|
__entry->prio = p->prio;
|
|
|
|
__entry->success = success;
|
2009-08-07 02:59:32 +08:00
|
|
|
__entry->cpu = task_cpu(p);
|
2009-04-10 20:54:16 +08:00
|
|
|
),
|
|
|
|
|
2009-08-07 02:59:32 +08:00
|
|
|
TP_printk("task %s:%d [%d] success=%d [%03d]",
|
2009-04-10 20:54:16 +08:00
|
|
|
__entry->comm, __entry->pid, __entry->prio,
|
2009-08-07 02:59:32 +08:00
|
|
|
__entry->success, __entry->cpu)
|
2009-04-10 20:54:16 +08:00
|
|
|
);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tracepoint for task switches, performed by the scheduler:
|
|
|
|
*
|
|
|
|
* (NOTE: the 'rq' argument is not used by generic trace events,
|
|
|
|
* but used by the latency tracer plugin. )
|
|
|
|
*/
|
|
|
|
TRACE_EVENT(sched_switch,
|
|
|
|
|
|
|
|
TP_PROTO(struct rq *rq, struct task_struct *prev,
|
|
|
|
struct task_struct *next),
|
|
|
|
|
|
|
|
TP_ARGS(rq, prev, next),
|
|
|
|
|
|
|
|
TP_STRUCT__entry(
|
|
|
|
__array( char, prev_comm, TASK_COMM_LEN )
|
|
|
|
__field( pid_t, prev_pid )
|
|
|
|
__field( int, prev_prio )
|
2009-05-15 22:51:13 +08:00
|
|
|
__field( long, prev_state )
|
2009-04-10 20:54:16 +08:00
|
|
|
__array( char, next_comm, TASK_COMM_LEN )
|
|
|
|
__field( pid_t, next_pid )
|
|
|
|
__field( int, next_prio )
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_fast_assign(
|
|
|
|
memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
|
|
|
|
__entry->prev_pid = prev->pid;
|
|
|
|
__entry->prev_prio = prev->prio;
|
2009-05-15 22:51:13 +08:00
|
|
|
__entry->prev_state = prev->state;
|
2009-04-10 20:54:16 +08:00
|
|
|
memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
|
|
|
|
__entry->next_pid = next->pid;
|
|
|
|
__entry->next_prio = next->prio;
|
|
|
|
),
|
|
|
|
|
2009-05-15 22:51:13 +08:00
|
|
|
TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]",
|
2009-04-10 20:54:16 +08:00
|
|
|
__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
|
2009-05-15 22:51:13 +08:00
|
|
|
__entry->prev_state ?
|
|
|
|
__print_flags(__entry->prev_state, "|",
|
|
|
|
{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
|
|
|
|
{ 16, "Z" }, { 32, "X" }, { 64, "x" },
|
|
|
|
{ 128, "W" }) : "R",
|
2009-04-10 20:54:16 +08:00
|
|
|
__entry->next_comm, __entry->next_pid, __entry->next_prio)
|
|
|
|
);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tracepoint for a task being migrated:
|
|
|
|
*/
|
|
|
|
TRACE_EVENT(sched_migrate_task,
|
|
|
|
|
2009-05-05 16:49:59 +08:00
|
|
|
TP_PROTO(struct task_struct *p, int dest_cpu),
|
2009-04-10 20:54:16 +08:00
|
|
|
|
2009-05-05 16:49:59 +08:00
|
|
|
TP_ARGS(p, dest_cpu),
|
2009-04-10 20:54:16 +08:00
|
|
|
|
|
|
|
TP_STRUCT__entry(
|
|
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
|
|
__field( pid_t, pid )
|
|
|
|
__field( int, prio )
|
|
|
|
__field( int, orig_cpu )
|
|
|
|
__field( int, dest_cpu )
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_fast_assign(
|
|
|
|
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
|
|
|
__entry->pid = p->pid;
|
|
|
|
__entry->prio = p->prio;
|
2009-05-05 16:49:59 +08:00
|
|
|
__entry->orig_cpu = task_cpu(p);
|
2009-04-10 20:54:16 +08:00
|
|
|
__entry->dest_cpu = dest_cpu;
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_printk("task %s:%d [%d] from: %d to: %d",
|
|
|
|
__entry->comm, __entry->pid, __entry->prio,
|
|
|
|
__entry->orig_cpu, __entry->dest_cpu)
|
|
|
|
);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tracepoint for freeing a task:
|
|
|
|
*/
|
|
|
|
TRACE_EVENT(sched_process_free,
|
|
|
|
|
|
|
|
TP_PROTO(struct task_struct *p),
|
|
|
|
|
|
|
|
TP_ARGS(p),
|
|
|
|
|
|
|
|
TP_STRUCT__entry(
|
|
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
|
|
__field( pid_t, pid )
|
|
|
|
__field( int, prio )
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_fast_assign(
|
|
|
|
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
|
|
|
__entry->pid = p->pid;
|
|
|
|
__entry->prio = p->prio;
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_printk("task %s:%d [%d]",
|
|
|
|
__entry->comm, __entry->pid, __entry->prio)
|
|
|
|
);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tracepoint for a task exiting:
|
|
|
|
*/
|
|
|
|
TRACE_EVENT(sched_process_exit,
|
|
|
|
|
|
|
|
TP_PROTO(struct task_struct *p),
|
|
|
|
|
|
|
|
TP_ARGS(p),
|
|
|
|
|
|
|
|
TP_STRUCT__entry(
|
|
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
|
|
__field( pid_t, pid )
|
|
|
|
__field( int, prio )
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_fast_assign(
|
|
|
|
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
|
|
|
__entry->pid = p->pid;
|
|
|
|
__entry->prio = p->prio;
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_printk("task %s:%d [%d]",
|
|
|
|
__entry->comm, __entry->pid, __entry->prio)
|
|
|
|
);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tracepoint for a waiting task:
|
|
|
|
*/
|
|
|
|
TRACE_EVENT(sched_process_wait,
|
|
|
|
|
|
|
|
TP_PROTO(struct pid *pid),
|
|
|
|
|
|
|
|
TP_ARGS(pid),
|
|
|
|
|
|
|
|
TP_STRUCT__entry(
|
|
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
|
|
__field( pid_t, pid )
|
|
|
|
__field( int, prio )
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_fast_assign(
|
|
|
|
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
|
|
|
|
__entry->pid = pid_nr(pid);
|
|
|
|
__entry->prio = current->prio;
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_printk("task %s:%d [%d]",
|
|
|
|
__entry->comm, __entry->pid, __entry->prio)
|
|
|
|
);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tracepoint for do_fork:
|
|
|
|
*/
|
|
|
|
TRACE_EVENT(sched_process_fork,
|
|
|
|
|
|
|
|
TP_PROTO(struct task_struct *parent, struct task_struct *child),
|
|
|
|
|
|
|
|
TP_ARGS(parent, child),
|
|
|
|
|
|
|
|
TP_STRUCT__entry(
|
|
|
|
__array( char, parent_comm, TASK_COMM_LEN )
|
|
|
|
__field( pid_t, parent_pid )
|
|
|
|
__array( char, child_comm, TASK_COMM_LEN )
|
|
|
|
__field( pid_t, child_pid )
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_fast_assign(
|
|
|
|
memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
|
|
|
|
__entry->parent_pid = parent->pid;
|
|
|
|
memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
|
|
|
|
__entry->child_pid = child->pid;
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_printk("parent %s:%d child %s:%d",
|
|
|
|
__entry->parent_comm, __entry->parent_pid,
|
|
|
|
__entry->child_comm, __entry->child_pid)
|
|
|
|
);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tracepoint for sending a signal:
|
|
|
|
*/
|
|
|
|
TRACE_EVENT(sched_signal_send,
|
|
|
|
|
|
|
|
TP_PROTO(int sig, struct task_struct *p),
|
|
|
|
|
|
|
|
TP_ARGS(sig, p),
|
|
|
|
|
|
|
|
TP_STRUCT__entry(
|
|
|
|
__field( int, sig )
|
|
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
|
|
__field( pid_t, pid )
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_fast_assign(
|
|
|
|
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
|
|
|
|
__entry->pid = p->pid;
|
|
|
|
__entry->sig = sig;
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_printk("sig: %d task %s:%d",
|
|
|
|
__entry->sig, __entry->comm, __entry->pid)
|
|
|
|
);
|
|
|
|
|
2009-07-24 02:13:26 +08:00
|
|
|
/*
|
|
|
|
* XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
|
|
|
|
* adding sched_stat support to SCHED_FIFO/RR would be welcome.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tracepoint for accounting wait time (time the task is runnable
|
|
|
|
* but not actually running due to scheduler contention).
|
|
|
|
*/
|
|
|
|
TRACE_EVENT(sched_stat_wait,
|
|
|
|
|
|
|
|
TP_PROTO(struct task_struct *tsk, u64 delay),
|
|
|
|
|
|
|
|
TP_ARGS(tsk, delay),
|
|
|
|
|
|
|
|
TP_STRUCT__entry(
|
|
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
|
|
__field( pid_t, pid )
|
|
|
|
__field( u64, delay )
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_fast_assign(
|
|
|
|
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
|
|
|
|
__entry->pid = tsk->pid;
|
|
|
|
__entry->delay = delay;
|
|
|
|
)
|
|
|
|
TP_perf_assign(
|
|
|
|
__perf_count(delay);
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_printk("task: %s:%d wait: %Lu [ns]",
|
|
|
|
__entry->comm, __entry->pid,
|
|
|
|
(unsigned long long)__entry->delay)
|
|
|
|
);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tracepoint for accounting sleep time (time the task is not runnable,
|
|
|
|
* including iowait, see below).
|
|
|
|
*/
|
|
|
|
TRACE_EVENT(sched_stat_sleep,
|
|
|
|
|
|
|
|
TP_PROTO(struct task_struct *tsk, u64 delay),
|
|
|
|
|
|
|
|
TP_ARGS(tsk, delay),
|
|
|
|
|
|
|
|
TP_STRUCT__entry(
|
|
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
|
|
__field( pid_t, pid )
|
|
|
|
__field( u64, delay )
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_fast_assign(
|
|
|
|
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
|
|
|
|
__entry->pid = tsk->pid;
|
|
|
|
__entry->delay = delay;
|
|
|
|
)
|
|
|
|
TP_perf_assign(
|
|
|
|
__perf_count(delay);
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_printk("task: %s:%d sleep: %Lu [ns]",
|
|
|
|
__entry->comm, __entry->pid,
|
|
|
|
(unsigned long long)__entry->delay)
|
|
|
|
);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tracepoint for accounting iowait time (time the task is not runnable
|
|
|
|
* due to waiting on IO to complete).
|
|
|
|
*/
|
|
|
|
TRACE_EVENT(sched_stat_iowait,
|
|
|
|
|
|
|
|
TP_PROTO(struct task_struct *tsk, u64 delay),
|
|
|
|
|
|
|
|
TP_ARGS(tsk, delay),
|
|
|
|
|
|
|
|
TP_STRUCT__entry(
|
|
|
|
__array( char, comm, TASK_COMM_LEN )
|
|
|
|
__field( pid_t, pid )
|
|
|
|
__field( u64, delay )
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_fast_assign(
|
|
|
|
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
|
|
|
|
__entry->pid = tsk->pid;
|
|
|
|
__entry->delay = delay;
|
|
|
|
)
|
|
|
|
TP_perf_assign(
|
|
|
|
__perf_count(delay);
|
|
|
|
),
|
|
|
|
|
|
|
|
TP_printk("task: %s:%d iowait: %Lu [ns]",
|
|
|
|
__entry->comm, __entry->pid,
|
|
|
|
(unsigned long long)__entry->delay)
|
|
|
|
);
|
|
|
|
|
2009-04-10 20:54:16 +08:00
|
|
|
#endif /* _TRACE_SCHED_H */
|
2009-04-10 21:36:00 +08:00
|
|
|
|
|
|
|
/* This part must be outside protection */
|
|
|
|
#include <trace/define_trace.h>
|