2009-04-09 02:40:59 +08:00
|
|
|
#include <trace/syscall.h>
|
2009-08-25 05:43:14 +08:00
|
|
|
#include <trace/events/syscalls.h>
|
2009-03-07 12:52:59 +08:00
|
|
|
#include <linux/kernel.h>
|
2009-08-11 04:52:47 +08:00
|
|
|
#include <linux/ftrace.h>
|
2009-08-11 04:53:02 +08:00
|
|
|
#include <linux/perf_counter.h>
|
2009-03-07 12:52:59 +08:00
|
|
|
#include <asm/syscall.h>
|
|
|
|
|
|
|
|
#include "trace_output.h"
|
|
|
|
#include "trace.h"
|
|
|
|
|
2009-03-16 05:10:37 +08:00
|
|
|
static DEFINE_MUTEX(syscall_trace_lock);
|
2009-08-11 04:52:47 +08:00
|
|
|
static int sys_refcount_enter;
|
|
|
|
static int sys_refcount_exit;
|
2009-08-25 05:40:22 +08:00
|
|
|
static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
|
|
|
|
static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
|
2009-03-07 12:52:59 +08:00
|
|
|
|
2009-03-13 22:42:11 +08:00
|
|
|
enum print_line_t
|
|
|
|
print_syscall_enter(struct trace_iterator *iter, int flags)
|
|
|
|
{
|
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
struct trace_entry *ent = iter->ent;
|
|
|
|
struct syscall_trace_enter *trace;
|
|
|
|
struct syscall_metadata *entry;
|
|
|
|
int i, ret, syscall;
|
|
|
|
|
2009-08-11 04:52:53 +08:00
|
|
|
trace = (typeof(trace))ent;
|
2009-03-13 22:42:11 +08:00
|
|
|
syscall = trace->nr;
|
|
|
|
entry = syscall_nr_to_meta(syscall);
|
2009-08-11 04:52:53 +08:00
|
|
|
|
2009-03-13 22:42:11 +08:00
|
|
|
if (!entry)
|
|
|
|
goto end;
|
|
|
|
|
2009-08-11 04:52:53 +08:00
|
|
|
if (entry->enter_id != ent->type) {
|
|
|
|
WARN_ON_ONCE(1);
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
2009-03-13 22:42:11 +08:00
|
|
|
ret = trace_seq_printf(s, "%s(", entry->name);
|
|
|
|
if (!ret)
|
|
|
|
return TRACE_TYPE_PARTIAL_LINE;
|
|
|
|
|
|
|
|
for (i = 0; i < entry->nb_args; i++) {
|
|
|
|
/* parameter types */
|
2009-08-17 16:55:18 +08:00
|
|
|
if (trace_flags & TRACE_ITER_VERBOSE) {
|
2009-03-13 22:42:11 +08:00
|
|
|
ret = trace_seq_printf(s, "%s ", entry->types[i]);
|
|
|
|
if (!ret)
|
|
|
|
return TRACE_TYPE_PARTIAL_LINE;
|
|
|
|
}
|
|
|
|
/* parameter values */
|
2009-08-20 16:13:35 +08:00
|
|
|
ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
|
2009-03-13 22:42:11 +08:00
|
|
|
trace->args[i],
|
2009-08-20 16:13:35 +08:00
|
|
|
i == entry->nb_args - 1 ? "" : ", ");
|
2009-03-13 22:42:11 +08:00
|
|
|
if (!ret)
|
|
|
|
return TRACE_TYPE_PARTIAL_LINE;
|
|
|
|
}
|
|
|
|
|
2009-08-20 16:13:35 +08:00
|
|
|
ret = trace_seq_putc(s, ')');
|
|
|
|
if (!ret)
|
|
|
|
return TRACE_TYPE_PARTIAL_LINE;
|
|
|
|
|
2009-03-13 22:42:11 +08:00
|
|
|
end:
|
2009-08-20 16:13:35 +08:00
|
|
|
ret = trace_seq_putc(s, '\n');
|
|
|
|
if (!ret)
|
|
|
|
return TRACE_TYPE_PARTIAL_LINE;
|
|
|
|
|
2009-03-13 22:42:11 +08:00
|
|
|
return TRACE_TYPE_HANDLED;
|
|
|
|
}
|
|
|
|
|
|
|
|
enum print_line_t
|
|
|
|
print_syscall_exit(struct trace_iterator *iter, int flags)
|
|
|
|
{
|
|
|
|
struct trace_seq *s = &iter->seq;
|
|
|
|
struct trace_entry *ent = iter->ent;
|
|
|
|
struct syscall_trace_exit *trace;
|
|
|
|
int syscall;
|
|
|
|
struct syscall_metadata *entry;
|
|
|
|
int ret;
|
|
|
|
|
2009-08-11 04:52:53 +08:00
|
|
|
trace = (typeof(trace))ent;
|
2009-03-13 22:42:11 +08:00
|
|
|
syscall = trace->nr;
|
|
|
|
entry = syscall_nr_to_meta(syscall);
|
2009-08-11 04:52:53 +08:00
|
|
|
|
2009-03-13 22:42:11 +08:00
|
|
|
if (!entry) {
|
|
|
|
trace_seq_printf(s, "\n");
|
|
|
|
return TRACE_TYPE_HANDLED;
|
|
|
|
}
|
|
|
|
|
2009-08-11 04:52:53 +08:00
|
|
|
if (entry->exit_id != ent->type) {
|
|
|
|
WARN_ON_ONCE(1);
|
|
|
|
return TRACE_TYPE_UNHANDLED;
|
|
|
|
}
|
|
|
|
|
2009-03-13 22:42:11 +08:00
|
|
|
ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
|
|
|
|
trace->ret);
|
|
|
|
if (!ret)
|
|
|
|
return TRACE_TYPE_PARTIAL_LINE;
|
|
|
|
|
|
|
|
return TRACE_TYPE_HANDLED;
|
|
|
|
}
|
|
|
|
|
2009-08-19 15:52:25 +08:00
|
|
|
extern char *__bad_type_size(void);
|
|
|
|
|
|
|
|
#define SYSCALL_FIELD(type, name) \
|
|
|
|
sizeof(type) != sizeof(trace.name) ? \
|
|
|
|
__bad_type_size() : \
|
|
|
|
#type, #name, offsetof(typeof(trace), name), sizeof(trace.name)
|
|
|
|
|
2009-08-19 15:53:05 +08:00
|
|
|
int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
|
2009-08-12 01:03:54 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int nr;
|
2009-08-19 15:52:25 +08:00
|
|
|
int ret;
|
2009-08-12 01:03:54 +08:00
|
|
|
struct syscall_metadata *entry;
|
2009-08-19 15:52:25 +08:00
|
|
|
struct syscall_trace_enter trace;
|
|
|
|
int offset = offsetof(struct syscall_trace_enter, args);
|
2009-08-12 01:03:54 +08:00
|
|
|
|
2009-08-19 15:52:25 +08:00
|
|
|
nr = syscall_name_to_nr(call->data);
|
2009-08-12 01:03:54 +08:00
|
|
|
entry = syscall_nr_to_meta(nr);
|
|
|
|
|
|
|
|
if (!entry)
|
2009-08-19 15:52:25 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
|
|
|
|
SYSCALL_FIELD(int, nr));
|
|
|
|
if (!ret)
|
|
|
|
return 0;
|
2009-08-12 01:03:54 +08:00
|
|
|
|
|
|
|
for (i = 0; i < entry->nb_args; i++) {
|
|
|
|
ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
|
|
|
|
entry->args[i]);
|
|
|
|
if (!ret)
|
|
|
|
return 0;
|
2009-08-19 15:52:25 +08:00
|
|
|
ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset,
|
2009-08-12 01:03:54 +08:00
|
|
|
sizeof(unsigned long));
|
|
|
|
if (!ret)
|
|
|
|
return 0;
|
|
|
|
offset += sizeof(unsigned long);
|
|
|
|
}
|
|
|
|
|
2009-08-20 16:13:35 +08:00
|
|
|
trace_seq_puts(s, "\nprint fmt: \"");
|
2009-08-12 01:03:54 +08:00
|
|
|
for (i = 0; i < entry->nb_args; i++) {
|
2009-08-19 15:52:25 +08:00
|
|
|
ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i],
|
2009-08-12 01:03:54 +08:00
|
|
|
sizeof(unsigned long),
|
2009-08-20 16:13:35 +08:00
|
|
|
i == entry->nb_args - 1 ? "" : ", ");
|
2009-08-12 01:03:54 +08:00
|
|
|
if (!ret)
|
|
|
|
return 0;
|
|
|
|
}
|
2009-08-20 16:13:35 +08:00
|
|
|
trace_seq_putc(s, '"');
|
2009-08-12 01:03:54 +08:00
|
|
|
|
|
|
|
for (i = 0; i < entry->nb_args; i++) {
|
2009-08-20 16:13:35 +08:00
|
|
|
ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
|
|
|
|
entry->args[i]);
|
2009-08-12 01:03:54 +08:00
|
|
|
if (!ret)
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-08-20 16:13:35 +08:00
|
|
|
return trace_seq_putc(s, '\n');
|
2009-08-12 01:03:54 +08:00
|
|
|
}
|
|
|
|
|
2009-08-19 15:53:05 +08:00
|
|
|
int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct syscall_trace_exit trace;
|
|
|
|
|
|
|
|
ret = trace_seq_printf(s,
|
|
|
|
"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
|
|
|
|
"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
|
|
|
|
SYSCALL_FIELD(int, nr),
|
|
|
|
SYSCALL_FIELD(unsigned long, ret));
|
|
|
|
if (!ret)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
|
|
|
|
}
|
|
|
|
|
2009-08-19 15:54:51 +08:00
|
|
|
int syscall_enter_define_fields(struct ftrace_event_call *call)
|
|
|
|
{
|
|
|
|
struct syscall_trace_enter trace;
|
|
|
|
struct syscall_metadata *meta;
|
|
|
|
int ret;
|
|
|
|
int nr;
|
|
|
|
int i;
|
|
|
|
int offset = offsetof(typeof(trace), args);
|
|
|
|
|
|
|
|
nr = syscall_name_to_nr(call->data);
|
|
|
|
meta = syscall_nr_to_meta(nr);
|
|
|
|
|
|
|
|
if (!meta)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
ret = trace_define_common_fields(call);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
for (i = 0; i < meta->nb_args; i++) {
|
|
|
|
ret = trace_define_field(call, meta->types[i],
|
|
|
|
meta->args[i], offset,
|
2009-08-07 10:33:22 +08:00
|
|
|
sizeof(unsigned long), 0,
|
|
|
|
FILTER_OTHER);
|
2009-08-19 15:54:51 +08:00
|
|
|
offset += sizeof(unsigned long);
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int syscall_exit_define_fields(struct ftrace_event_call *call)
|
|
|
|
{
|
|
|
|
struct syscall_trace_exit trace;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = trace_define_common_fields(call);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2009-08-07 10:33:22 +08:00
|
|
|
ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0,
|
|
|
|
FILTER_OTHER);
|
2009-08-19 15:54:51 +08:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2009-08-11 04:52:47 +08:00
|
|
|
void ftrace_syscall_enter(struct pt_regs *regs, long id)
|
2009-03-07 12:52:59 +08:00
|
|
|
{
|
2009-03-13 22:42:11 +08:00
|
|
|
struct syscall_trace_enter *entry;
|
|
|
|
struct syscall_metadata *sys_data;
|
|
|
|
struct ring_buffer_event *event;
|
2009-09-03 02:17:06 +08:00
|
|
|
struct ring_buffer *buffer;
|
2009-03-13 22:42:11 +08:00
|
|
|
int size;
|
2009-03-07 12:52:59 +08:00
|
|
|
int syscall_nr;
|
|
|
|
|
|
|
|
syscall_nr = syscall_get_nr(current, regs);
|
tracing: Check invalid syscall nr while tracing syscalls
Most arch syscall_get_nr() implementations returns -1 if the syscall
number is not valid. Accessing the bit field without a check might
result in a kernel oops (at least I saw it on s390 for ftrace selftest).
Before this change, this problem did not occur, because the invalid
syscall number (-1) caused syscall_nr_to_meta() to return NULL.
There are at least two scenarios where syscall_get_nr() can return -1:
1. For example, ptrace stores an invalid syscall number, and thus,
tracing code resets it.
(see do_syscall_trace_enter in arch/s390/kernel/ptrace.c)
2. The syscall_regfunc() (kernel/tracepoint.c) sets the
TIF_SYSCALL_FTRACE (now: TIF_SYSCALL_TRACEPOINT) flag for all threads
which include kernel threads.
However, the ftrace selftest triggers a kernel oops when testing
syscall trace points:
- The kernel thread is started as ususal (do_fork()),
- tracing code sets TIF_SYSCALL_FTRACE,
- the ret_from_fork() function is triggered and starts
ftrace_syscall_exit() with an invalid syscall number.
To avoid these scenarios, I suggest to check the syscall_nr.
For instance, the ftrace selftest fails for s390 (with config option
CONFIG_FTRACE_SYSCALLS set) and produces the following kernel oops.
Unable to handle kernel pointer dereference at virtual kernel address 2000000000
Oops: 0038 [#1] PREEMPT SMP
Modules linked in:
CPU: 0 Not tainted 2.6.31-rc6-next-20090819-dirty #18
Process kthreadd (pid: 818, task: 000000003ea207e8, ksp: 000000003e813eb8)
Krnl PSW : 0704100180000000 00000000000ea54c (ftrace_syscall_exit+0x58/0xdc)
R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:0 CC:1 PM:0 EA:3
Krnl GPRS: 0000000000000000 00000000000e0000 ffffffffffffffff 20000000008c2650
0000000000000007 0000000000000000 0000000000000000 0000000000000000
0000000000000000 0000000000000000 ffffffffffffffff 000000003e813d78
000000003e813f58 0000000000505ba8 000000003e813e18 000000003e813d78
Krnl Code: 00000000000ea540: e330d0000008 ag %r3,0(%r13)
00000000000ea546: a7480007 lhi %r4,7
00000000000ea54a: 1442 nr %r4,%r2
>00000000000ea54c: e31030000090 llgc %r1,0(%r3)
00000000000ea552: 5410d008 n %r1,8(%r13)
00000000000ea556: 8a104000 sra %r1,0(%r4)
00000000000ea55a: 5410d00c n %r1,12(%r13)
00000000000ea55e: 1211 ltr %r1,%r1
Call Trace:
([<0000000000000000>] 0x0)
[<000000000001fa22>] do_syscall_trace_exit+0x132/0x18c
[<000000000002d0c4>] sysc_return+0x0/0x8
[<000000000001c738>] kernel_thread_starter+0x0/0xc
Last Breaking-Event-Address:
[<00000000000ea51e>] ftrace_syscall_exit+0x2a/0xdc
Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
LKML-Reference: <20090825125027.GE4639@cetus.boeblingen.de.ibm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
2009-08-25 20:50:27 +08:00
|
|
|
if (syscall_nr < 0)
|
|
|
|
return;
|
2009-08-11 04:52:47 +08:00
|
|
|
if (!test_bit(syscall_nr, enabled_enter_syscalls))
|
|
|
|
return;
|
2009-03-07 12:52:59 +08:00
|
|
|
|
2009-03-13 22:42:11 +08:00
|
|
|
sys_data = syscall_nr_to_meta(syscall_nr);
|
|
|
|
if (!sys_data)
|
|
|
|
return;
|
|
|
|
|
|
|
|
size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
|
|
|
|
|
2009-09-03 02:17:06 +08:00
|
|
|
event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id,
|
|
|
|
size, 0, 0);
|
2009-03-13 22:42:11 +08:00
|
|
|
if (!event)
|
|
|
|
return;
|
|
|
|
|
|
|
|
entry = ring_buffer_event_data(event);
|
|
|
|
entry->nr = syscall_nr;
|
|
|
|
syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
|
|
|
|
|
2009-09-03 02:17:06 +08:00
|
|
|
if (!filter_current_check_discard(buffer, sys_data->enter_event,
|
|
|
|
entry, event))
|
|
|
|
trace_current_buffer_unlock_commit(buffer, event, 0, 0);
|
2009-03-07 12:52:59 +08:00
|
|
|
}
|
|
|
|
|
2009-08-11 04:52:47 +08:00
|
|
|
void ftrace_syscall_exit(struct pt_regs *regs, long ret)
|
2009-03-07 12:52:59 +08:00
|
|
|
{
|
2009-03-13 22:42:11 +08:00
|
|
|
struct syscall_trace_exit *entry;
|
|
|
|
struct syscall_metadata *sys_data;
|
|
|
|
struct ring_buffer_event *event;
|
2009-09-03 02:17:06 +08:00
|
|
|
struct ring_buffer *buffer;
|
2009-03-07 12:52:59 +08:00
|
|
|
int syscall_nr;
|
|
|
|
|
|
|
|
syscall_nr = syscall_get_nr(current, regs);
|
tracing: Check invalid syscall nr while tracing syscalls
Most arch syscall_get_nr() implementations returns -1 if the syscall
number is not valid. Accessing the bit field without a check might
result in a kernel oops (at least I saw it on s390 for ftrace selftest).
Before this change, this problem did not occur, because the invalid
syscall number (-1) caused syscall_nr_to_meta() to return NULL.
There are at least two scenarios where syscall_get_nr() can return -1:
1. For example, ptrace stores an invalid syscall number, and thus,
tracing code resets it.
(see do_syscall_trace_enter in arch/s390/kernel/ptrace.c)
2. The syscall_regfunc() (kernel/tracepoint.c) sets the
TIF_SYSCALL_FTRACE (now: TIF_SYSCALL_TRACEPOINT) flag for all threads
which include kernel threads.
However, the ftrace selftest triggers a kernel oops when testing
syscall trace points:
- The kernel thread is started as ususal (do_fork()),
- tracing code sets TIF_SYSCALL_FTRACE,
- the ret_from_fork() function is triggered and starts
ftrace_syscall_exit() with an invalid syscall number.
To avoid these scenarios, I suggest to check the syscall_nr.
For instance, the ftrace selftest fails for s390 (with config option
CONFIG_FTRACE_SYSCALLS set) and produces the following kernel oops.
Unable to handle kernel pointer dereference at virtual kernel address 2000000000
Oops: 0038 [#1] PREEMPT SMP
Modules linked in:
CPU: 0 Not tainted 2.6.31-rc6-next-20090819-dirty #18
Process kthreadd (pid: 818, task: 000000003ea207e8, ksp: 000000003e813eb8)
Krnl PSW : 0704100180000000 00000000000ea54c (ftrace_syscall_exit+0x58/0xdc)
R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:0 CC:1 PM:0 EA:3
Krnl GPRS: 0000000000000000 00000000000e0000 ffffffffffffffff 20000000008c2650
0000000000000007 0000000000000000 0000000000000000 0000000000000000
0000000000000000 0000000000000000 ffffffffffffffff 000000003e813d78
000000003e813f58 0000000000505ba8 000000003e813e18 000000003e813d78
Krnl Code: 00000000000ea540: e330d0000008 ag %r3,0(%r13)
00000000000ea546: a7480007 lhi %r4,7
00000000000ea54a: 1442 nr %r4,%r2
>00000000000ea54c: e31030000090 llgc %r1,0(%r3)
00000000000ea552: 5410d008 n %r1,8(%r13)
00000000000ea556: 8a104000 sra %r1,0(%r4)
00000000000ea55a: 5410d00c n %r1,12(%r13)
00000000000ea55e: 1211 ltr %r1,%r1
Call Trace:
([<0000000000000000>] 0x0)
[<000000000001fa22>] do_syscall_trace_exit+0x132/0x18c
[<000000000002d0c4>] sysc_return+0x0/0x8
[<000000000001c738>] kernel_thread_starter+0x0/0xc
Last Breaking-Event-Address:
[<00000000000ea51e>] ftrace_syscall_exit+0x2a/0xdc
Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
LKML-Reference: <20090825125027.GE4639@cetus.boeblingen.de.ibm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
2009-08-25 20:50:27 +08:00
|
|
|
if (syscall_nr < 0)
|
|
|
|
return;
|
2009-08-11 04:52:47 +08:00
|
|
|
if (!test_bit(syscall_nr, enabled_exit_syscalls))
|
|
|
|
return;
|
2009-03-07 12:52:59 +08:00
|
|
|
|
2009-03-13 22:42:11 +08:00
|
|
|
sys_data = syscall_nr_to_meta(syscall_nr);
|
|
|
|
if (!sys_data)
|
|
|
|
return;
|
|
|
|
|
2009-09-03 02:17:06 +08:00
|
|
|
event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id,
|
2009-03-13 22:42:11 +08:00
|
|
|
sizeof(*entry), 0, 0);
|
|
|
|
if (!event)
|
|
|
|
return;
|
|
|
|
|
|
|
|
entry = ring_buffer_event_data(event);
|
|
|
|
entry->nr = syscall_nr;
|
|
|
|
entry->ret = syscall_get_return_value(current, regs);
|
|
|
|
|
2009-09-03 02:17:06 +08:00
|
|
|
if (!filter_current_check_discard(buffer, sys_data->exit_event,
|
|
|
|
entry, event))
|
|
|
|
trace_current_buffer_unlock_commit(buffer, event, 0, 0);
|
2009-03-07 12:52:59 +08:00
|
|
|
}
|
|
|
|
|
2009-08-11 04:52:47 +08:00
|
|
|
int reg_event_syscall_enter(void *ptr)
|
2009-03-07 12:52:59 +08:00
|
|
|
{
|
2009-08-11 04:52:47 +08:00
|
|
|
int ret = 0;
|
|
|
|
int num;
|
|
|
|
char *name;
|
|
|
|
|
|
|
|
name = (char *)ptr;
|
|
|
|
num = syscall_name_to_nr(name);
|
2009-08-25 05:40:22 +08:00
|
|
|
if (num < 0 || num >= NR_syscalls)
|
2009-08-11 04:52:47 +08:00
|
|
|
return -ENOSYS;
|
|
|
|
mutex_lock(&syscall_trace_lock);
|
|
|
|
if (!sys_refcount_enter)
|
2009-08-25 05:43:14 +08:00
|
|
|
ret = register_trace_sys_enter(ftrace_syscall_enter);
|
2009-08-11 04:52:47 +08:00
|
|
|
if (ret) {
|
|
|
|
pr_info("event trace: Could not activate"
|
|
|
|
"syscall entry trace point");
|
|
|
|
} else {
|
|
|
|
set_bit(num, enabled_enter_syscalls);
|
|
|
|
sys_refcount_enter++;
|
|
|
|
}
|
|
|
|
mutex_unlock(&syscall_trace_lock);
|
|
|
|
return ret;
|
2009-03-07 12:52:59 +08:00
|
|
|
}
|
|
|
|
|
2009-08-11 04:52:47 +08:00
|
|
|
void unreg_event_syscall_enter(void *ptr)
|
2009-03-07 12:52:59 +08:00
|
|
|
{
|
2009-08-11 04:52:47 +08:00
|
|
|
int num;
|
|
|
|
char *name;
|
2009-03-07 12:52:59 +08:00
|
|
|
|
2009-08-11 04:52:47 +08:00
|
|
|
name = (char *)ptr;
|
|
|
|
num = syscall_name_to_nr(name);
|
2009-08-25 05:40:22 +08:00
|
|
|
if (num < 0 || num >= NR_syscalls)
|
2009-08-11 04:52:47 +08:00
|
|
|
return;
|
|
|
|
mutex_lock(&syscall_trace_lock);
|
|
|
|
sys_refcount_enter--;
|
|
|
|
clear_bit(num, enabled_enter_syscalls);
|
|
|
|
if (!sys_refcount_enter)
|
2009-08-25 05:43:14 +08:00
|
|
|
unregister_trace_sys_enter(ftrace_syscall_enter);
|
2009-08-11 04:52:47 +08:00
|
|
|
mutex_unlock(&syscall_trace_lock);
|
|
|
|
}
|
2009-03-07 12:52:59 +08:00
|
|
|
|
2009-08-11 04:52:47 +08:00
|
|
|
int reg_event_syscall_exit(void *ptr)
|
2009-03-07 12:52:59 +08:00
|
|
|
{
|
2009-08-11 04:52:47 +08:00
|
|
|
int ret = 0;
|
|
|
|
int num;
|
|
|
|
char *name;
|
|
|
|
|
|
|
|
name = (char *)ptr;
|
|
|
|
num = syscall_name_to_nr(name);
|
2009-08-25 05:40:22 +08:00
|
|
|
if (num < 0 || num >= NR_syscalls)
|
2009-08-11 04:52:47 +08:00
|
|
|
return -ENOSYS;
|
|
|
|
mutex_lock(&syscall_trace_lock);
|
|
|
|
if (!sys_refcount_exit)
|
2009-08-25 05:43:14 +08:00
|
|
|
ret = register_trace_sys_exit(ftrace_syscall_exit);
|
2009-08-11 04:52:47 +08:00
|
|
|
if (ret) {
|
|
|
|
pr_info("event trace: Could not activate"
|
|
|
|
"syscall exit trace point");
|
|
|
|
} else {
|
|
|
|
set_bit(num, enabled_exit_syscalls);
|
|
|
|
sys_refcount_exit++;
|
2009-03-07 12:52:59 +08:00
|
|
|
}
|
2009-08-11 04:52:47 +08:00
|
|
|
mutex_unlock(&syscall_trace_lock);
|
|
|
|
return ret;
|
|
|
|
}
|
2009-03-07 12:52:59 +08:00
|
|
|
|
2009-08-11 04:52:47 +08:00
|
|
|
void unreg_event_syscall_exit(void *ptr)
|
|
|
|
{
|
|
|
|
int num;
|
|
|
|
char *name;
|
2009-03-07 12:52:59 +08:00
|
|
|
|
2009-08-11 04:52:47 +08:00
|
|
|
name = (char *)ptr;
|
|
|
|
num = syscall_name_to_nr(name);
|
2009-08-25 05:40:22 +08:00
|
|
|
if (num < 0 || num >= NR_syscalls)
|
2009-08-11 04:52:47 +08:00
|
|
|
return;
|
|
|
|
mutex_lock(&syscall_trace_lock);
|
|
|
|
sys_refcount_exit--;
|
|
|
|
clear_bit(num, enabled_exit_syscalls);
|
|
|
|
if (!sys_refcount_exit)
|
2009-08-25 05:43:14 +08:00
|
|
|
unregister_trace_sys_exit(ftrace_syscall_exit);
|
2009-08-11 04:52:47 +08:00
|
|
|
mutex_unlock(&syscall_trace_lock);
|
2009-03-07 12:52:59 +08:00
|
|
|
}
|
2009-08-11 04:52:47 +08:00
|
|
|
|
|
|
|
struct trace_event event_syscall_enter = {
|
|
|
|
.trace = print_syscall_enter,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct trace_event event_syscall_exit = {
|
|
|
|
.trace = print_syscall_exit,
|
|
|
|
};
|
2009-08-11 04:53:02 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_EVENT_PROFILE
|
2009-08-12 02:22:53 +08:00
|
|
|
|
2009-08-25 05:40:22 +08:00
|
|
|
static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
|
|
|
|
static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
|
2009-08-11 04:53:02 +08:00
|
|
|
static int sys_prof_refcount_enter;
|
|
|
|
static int sys_prof_refcount_exit;
|
|
|
|
|
|
|
|
static void prof_syscall_enter(struct pt_regs *regs, long id)
|
|
|
|
{
|
|
|
|
struct syscall_metadata *sys_data;
|
2009-09-18 12:10:28 +08:00
|
|
|
struct syscall_trace_enter *rec;
|
|
|
|
unsigned long flags;
|
|
|
|
char *raw_data;
|
2009-08-11 04:53:02 +08:00
|
|
|
int syscall_nr;
|
2009-08-12 02:22:53 +08:00
|
|
|
int size;
|
2009-09-18 12:10:28 +08:00
|
|
|
int cpu;
|
2009-08-11 04:53:02 +08:00
|
|
|
|
|
|
|
syscall_nr = syscall_get_nr(current, regs);
|
|
|
|
if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
|
|
|
|
return;
|
|
|
|
|
|
|
|
sys_data = syscall_nr_to_meta(syscall_nr);
|
|
|
|
if (!sys_data)
|
|
|
|
return;
|
|
|
|
|
2009-08-12 02:22:53 +08:00
|
|
|
/* get the size after alignment with the u32 buffer size field */
|
|
|
|
size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
|
|
|
|
size = ALIGN(size + sizeof(u32), sizeof(u64));
|
|
|
|
size -= sizeof(u32);
|
|
|
|
|
2009-09-18 12:10:28 +08:00
|
|
|
if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
|
|
|
|
"profile buffer not large enough"))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Protect the per cpu buffer, begin the rcu read side */
|
|
|
|
local_irq_save(flags);
|
2009-08-12 02:22:53 +08:00
|
|
|
|
2009-09-18 12:10:28 +08:00
|
|
|
cpu = smp_processor_id();
|
|
|
|
|
|
|
|
if (in_nmi())
|
|
|
|
raw_data = rcu_dereference(trace_profile_buf_nmi);
|
|
|
|
else
|
|
|
|
raw_data = rcu_dereference(trace_profile_buf);
|
|
|
|
|
|
|
|
if (!raw_data)
|
|
|
|
goto end;
|
2009-08-12 02:22:53 +08:00
|
|
|
|
2009-09-18 12:10:28 +08:00
|
|
|
raw_data = per_cpu_ptr(raw_data, cpu);
|
|
|
|
|
|
|
|
/* zero the dead bytes from align to not leak stack to user */
|
|
|
|
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
|
|
|
|
|
|
|
|
rec = (struct syscall_trace_enter *) raw_data;
|
|
|
|
tracing_generic_entry_update(&rec->ent, 0, 0);
|
|
|
|
rec->ent.type = sys_data->enter_id;
|
|
|
|
rec->nr = syscall_nr;
|
|
|
|
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
|
|
|
|
(unsigned long *)&rec->args);
|
|
|
|
perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
|
|
|
|
|
|
|
|
end:
|
|
|
|
local_irq_restore(flags);
|
2009-08-11 04:53:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int reg_prof_syscall_enter(char *name)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
int num;
|
|
|
|
|
|
|
|
num = syscall_name_to_nr(name);
|
2009-08-25 05:40:22 +08:00
|
|
|
if (num < 0 || num >= NR_syscalls)
|
2009-08-11 04:53:02 +08:00
|
|
|
return -ENOSYS;
|
|
|
|
|
|
|
|
mutex_lock(&syscall_trace_lock);
|
|
|
|
if (!sys_prof_refcount_enter)
|
2009-08-25 05:43:14 +08:00
|
|
|
ret = register_trace_sys_enter(prof_syscall_enter);
|
2009-08-11 04:53:02 +08:00
|
|
|
if (ret) {
|
|
|
|
pr_info("event trace: Could not activate"
|
|
|
|
"syscall entry trace point");
|
|
|
|
} else {
|
|
|
|
set_bit(num, enabled_prof_enter_syscalls);
|
|
|
|
sys_prof_refcount_enter++;
|
|
|
|
}
|
|
|
|
mutex_unlock(&syscall_trace_lock);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
void unreg_prof_syscall_enter(char *name)
|
|
|
|
{
|
|
|
|
int num;
|
|
|
|
|
|
|
|
num = syscall_name_to_nr(name);
|
2009-08-25 05:40:22 +08:00
|
|
|
if (num < 0 || num >= NR_syscalls)
|
2009-08-11 04:53:02 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
mutex_lock(&syscall_trace_lock);
|
|
|
|
sys_prof_refcount_enter--;
|
|
|
|
clear_bit(num, enabled_prof_enter_syscalls);
|
|
|
|
if (!sys_prof_refcount_enter)
|
2009-08-25 05:43:14 +08:00
|
|
|
unregister_trace_sys_enter(prof_syscall_enter);
|
2009-08-11 04:53:02 +08:00
|
|
|
mutex_unlock(&syscall_trace_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void prof_syscall_exit(struct pt_regs *regs, long ret)
|
|
|
|
{
|
|
|
|
struct syscall_metadata *sys_data;
|
2009-09-18 12:10:28 +08:00
|
|
|
struct syscall_trace_exit *rec;
|
|
|
|
unsigned long flags;
|
2009-08-11 04:53:02 +08:00
|
|
|
int syscall_nr;
|
2009-09-18 12:10:28 +08:00
|
|
|
char *raw_data;
|
|
|
|
int size;
|
|
|
|
int cpu;
|
2009-08-11 04:53:02 +08:00
|
|
|
|
|
|
|
syscall_nr = syscall_get_nr(current, regs);
|
|
|
|
if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
|
|
|
|
return;
|
|
|
|
|
|
|
|
sys_data = syscall_nr_to_meta(syscall_nr);
|
|
|
|
if (!sys_data)
|
|
|
|
return;
|
|
|
|
|
2009-09-18 12:10:28 +08:00
|
|
|
/* We can probably do that at build time */
|
|
|
|
size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
|
|
|
|
size -= sizeof(u32);
|
2009-08-12 02:22:53 +08:00
|
|
|
|
2009-09-18 12:10:28 +08:00
|
|
|
/*
|
|
|
|
* Impossible, but be paranoid with the future
|
|
|
|
* How to put this check outside runtime?
|
|
|
|
*/
|
|
|
|
if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
|
|
|
|
"exit event has grown above profile buffer size"))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Protect the per cpu buffer, begin the rcu read side */
|
|
|
|
local_irq_save(flags);
|
|
|
|
cpu = smp_processor_id();
|
|
|
|
|
|
|
|
if (in_nmi())
|
|
|
|
raw_data = rcu_dereference(trace_profile_buf_nmi);
|
|
|
|
else
|
|
|
|
raw_data = rcu_dereference(trace_profile_buf);
|
|
|
|
|
|
|
|
if (!raw_data)
|
|
|
|
goto end;
|
|
|
|
|
|
|
|
raw_data = per_cpu_ptr(raw_data, cpu);
|
|
|
|
|
|
|
|
/* zero the dead bytes from align to not leak stack to user */
|
|
|
|
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
|
|
|
|
|
|
|
|
rec = (struct syscall_trace_exit *)raw_data;
|
|
|
|
|
|
|
|
tracing_generic_entry_update(&rec->ent, 0, 0);
|
|
|
|
rec->ent.type = sys_data->exit_id;
|
|
|
|
rec->nr = syscall_nr;
|
|
|
|
rec->ret = syscall_get_return_value(current, regs);
|
|
|
|
|
|
|
|
perf_tpcounter_event(sys_data->exit_id, 0, 1, rec, size);
|
|
|
|
|
|
|
|
end:
|
|
|
|
local_irq_restore(flags);
|
2009-08-11 04:53:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int reg_prof_syscall_exit(char *name)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
int num;
|
|
|
|
|
|
|
|
num = syscall_name_to_nr(name);
|
2009-08-25 05:40:22 +08:00
|
|
|
if (num < 0 || num >= NR_syscalls)
|
2009-08-11 04:53:02 +08:00
|
|
|
return -ENOSYS;
|
|
|
|
|
|
|
|
mutex_lock(&syscall_trace_lock);
|
|
|
|
if (!sys_prof_refcount_exit)
|
2009-08-25 05:43:14 +08:00
|
|
|
ret = register_trace_sys_exit(prof_syscall_exit);
|
2009-08-11 04:53:02 +08:00
|
|
|
if (ret) {
|
|
|
|
pr_info("event trace: Could not activate"
|
|
|
|
"syscall entry trace point");
|
|
|
|
} else {
|
|
|
|
set_bit(num, enabled_prof_exit_syscalls);
|
|
|
|
sys_prof_refcount_exit++;
|
|
|
|
}
|
|
|
|
mutex_unlock(&syscall_trace_lock);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
void unreg_prof_syscall_exit(char *name)
|
|
|
|
{
|
|
|
|
int num;
|
|
|
|
|
|
|
|
num = syscall_name_to_nr(name);
|
2009-08-25 05:40:22 +08:00
|
|
|
if (num < 0 || num >= NR_syscalls)
|
2009-08-11 04:53:02 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
mutex_lock(&syscall_trace_lock);
|
|
|
|
sys_prof_refcount_exit--;
|
|
|
|
clear_bit(num, enabled_prof_exit_syscalls);
|
|
|
|
if (!sys_prof_refcount_exit)
|
2009-08-25 05:43:14 +08:00
|
|
|
unregister_trace_sys_exit(prof_syscall_exit);
|
2009-08-11 04:53:02 +08:00
|
|
|
mutex_unlock(&syscall_trace_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|