From fe8e5b5a60f8427940d33b205e127aecfb0bca10 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 3 Oct 2009 14:55:18 +0200 Subject: [PATCH 1/8] tracing: Check total refcount before releasing bufs in profile_enable failure When we call the profile_enable() callback of an event, we release the shared perf event tracing buffers unconditionnaly in the failure path. This is wrong because there may be other users of these. Then check the total refcount before doing this. Reported-by: Paul Mackerras Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Li Zefan --- kernel/trace/trace_event_profile.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index dd44b8768867..e52784b7b844 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -31,7 +31,7 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event) if (atomic_inc_return(&event->profile_count)) return 0; - if (!total_profile_count++) { + if (!total_profile_count) { buf = (char *)alloc_percpu(profile_buf_t); if (!buf) goto fail_buf; @@ -46,14 +46,19 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event) } ret = event->profile_enable(); - if (!ret) + if (!ret) { + total_profile_count++; return 0; + } - kfree(trace_profile_buf_nmi); fail_buf_nmi: - kfree(trace_profile_buf); + if (!total_profile_count) { + kfree(trace_profile_buf_nmi); + kfree(trace_profile_buf); + trace_profile_buf_nmi = NULL; + trace_profile_buf = NULL; + } fail_buf: - total_profile_count--; atomic_dec(&event->profile_count); return ret; From 75fb4090b39a3d7bf9ac77a28665c991ec5eaadc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 3 Oct 2009 15:08:54 +0200 Subject: [PATCH 2/8] tracing: Use free_percpu instead of kfree In the event->profile_enable() failure path, we release the per cpu buffers using kfree which is wrong because they are per cpu pointers. Although free_percpu only wraps kfree for now, that may change in the future so lets use the correct way. Reported-by: Paul Mackerras Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Li Zefan --- kernel/trace/trace_event_profile.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index e52784b7b844..8d5c171cc998 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -53,8 +53,8 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event) fail_buf_nmi: if (!total_profile_count) { - kfree(trace_profile_buf_nmi); - kfree(trace_profile_buf); + free_percpu(trace_profile_buf_nmi); + free_percpu(trace_profile_buf); trace_profile_buf_nmi = NULL; trace_profile_buf = NULL; } From b0f56f1a63b7b968e6feeeefeace24bc8e0a4a65 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Thu, 1 Oct 2009 13:33:28 +0900 Subject: [PATCH 3/8] trace: Fix missing assignment in trace_ctxwake_* The state char variable S should be reassigned, if S == 0. We are missing the state of the task that is going to sleep for the context switch events (in the raw mode). Fortunately the problem arises with the sched_switch/wake_up tracers, not the sched trace events. The formers are legacy now. But still, that was buggy. Signed-off-by: Hiroshi Shimamoto Cc: Steven Rostedt Acked-by: Frederic Weisbecker LKML-Reference: <4AC43118.6050409@ct.jp.nec.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index f572f44c6e1e..cda766f9f421 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -883,7 +883,7 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S) trace_assign_type(field, iter->ent); if (!S) - task_state_char(field->prev_state); + S = task_state_char(field->prev_state); T = task_state_char(field->next_state); if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", field->prev_pid, @@ -918,7 +918,7 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S) trace_assign_type(field, iter->ent); if (!S) - task_state_char(field->prev_state); + S = task_state_char(field->prev_state); T = task_state_char(field->next_state); SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid); From 829b876dfc94ea8be3a47e200d06f1f217bb104f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sun, 27 Sep 2009 07:02:07 -0400 Subject: [PATCH 4/8] tracing: fix transposed numbers of lock_depth and preempt_count The lock_depth and preempt_count numbers in the latency format is transposed. Signed-off-by: Steven Rostedt --- kernel/trace/trace_output.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index cda766f9f421..ed17565826b0 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -486,16 +486,18 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) hardirq ? 'h' : softirq ? 's' : '.')) return 0; - if (entry->lock_depth < 0) - ret = trace_seq_putc(s, '.'); + if (entry->preempt_count) + ret = trace_seq_printf(s, "%x", entry->preempt_count); else - ret = trace_seq_printf(s, "%d", entry->lock_depth); + ret = trace_seq_putc(s, '.'); + if (!ret) return 0; - if (entry->preempt_count) - return trace_seq_printf(s, "%x", entry->preempt_count); - return trace_seq_putc(s, '.'); + if (entry->lock_depth < 0) + return trace_seq_putc(s, '.'); + + return trace_seq_printf(s, "%d", entry->lock_depth); } static int From e7247a15ff3bbdab0a8b402dffa1171e5c05a8e0 Mon Sep 17 00:00:00 2001 From: "jolsa@redhat.com" Date: Wed, 7 Oct 2009 19:00:35 +0200 Subject: [PATCH 5/8] tracing: correct module boundaries for ftrace_release When the module is about the unload we release its call records. The ftrace_release function was given wrong values representing the module core boundaries, thus not releasing its call records. Plus making ftrace_release function module specific. Signed-off-by: Jiri Olsa LKML-Reference: <1254934835-363-3-git-send-email-jolsa@redhat.com> Cc: stable@kernel.org Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 +- kernel/trace/ftrace.c | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index cd3d2abaf30a..0b4f97d24d7f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -241,7 +241,7 @@ extern void ftrace_enable_daemon(void); # define ftrace_set_filter(buf, len, reset) do { } while (0) # define ftrace_disable_daemon() do { } while (0) # define ftrace_enable_daemon() do { } while (0) -static inline void ftrace_release(void *start, unsigned long size) { } +static inline void ftrace_release_mod(struct module *mod) {} static inline int register_ftrace_command(struct ftrace_func_command *cmd) { return -EINVAL; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 46592feab5a6..c701476a648b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2658,19 +2658,17 @@ static int ftrace_convert_nops(struct module *mod, } #ifdef CONFIG_MODULES -void ftrace_release(void *start, void *end) +void ftrace_release_mod(struct module *mod) { struct dyn_ftrace *rec; struct ftrace_page *pg; - unsigned long s = (unsigned long)start; - unsigned long e = (unsigned long)end; - if (ftrace_disabled || !start || start == end) + if (ftrace_disabled) return; mutex_lock(&ftrace_lock); do_for_each_ftrace_rec(pg, rec) { - if ((rec->ip >= s) && (rec->ip < e)) { + if (within_module_core(rec->ip, mod)) { /* * rec->ip is changed in ftrace_free_rec() * It should not between s and e if record was freed. @@ -2702,9 +2700,7 @@ static int ftrace_module_notify(struct notifier_block *self, mod->num_ftrace_callsites); break; case MODULE_STATE_GOING: - ftrace_release(mod->ftrace_callsites, - mod->ftrace_callsites + - mod->num_ftrace_callsites); + ftrace_release_mod(mod); break; } From 3279ba37db5d65c4ab0dcdee3b211ccb85bb563f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 7 Oct 2009 16:57:56 -0400 Subject: [PATCH 6/8] ftrace: check for failure for all conversions Due to legacy code from back when the dynamic tracer used a daemon, only core kernel code was checking for failures. This is no longer the case. We must check for failures any time we perform text modifications. Cc: stable@kernel.org Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index c701476a648b..f136fe5da07a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1074,14 +1074,9 @@ static void ftrace_replace_code(int enable) failed = __ftrace_replace_code(rec, enable); if (failed) { rec->flags |= FTRACE_FL_FAILED; - if ((system_state == SYSTEM_BOOTING) || - !core_kernel_text(rec->ip)) { - ftrace_free_rec(rec); - } else { - ftrace_bug(failed, rec->ip); - /* Stop processing */ - return; - } + ftrace_bug(failed, rec->ip); + /* Stop processing */ + return; } } while_for_each_ftrace_rec(); } From c8647b28726b09b087155417bb698e7b3789f8a0 Mon Sep 17 00:00:00 2001 From: Zhenwen Xu Date: Thu, 8 Oct 2009 09:21:46 +0800 Subject: [PATCH 7/8] tracing: fix warning on kernel/trace/trace_branch.c andtrace_hw_branches.c fix warnings that caused the API change of trace_buffer_lock_reserve() change files: kernel/trace/trace_hw_branch.c kernel/trace/trace_branch.c Signed-off-by: Zhenwen Xu LKML-Reference: <20091008012146.GA4170@helight> Signed-off-by: Steven Rostedt --- kernel/trace/trace_branch.c | 2 +- kernel/trace/trace_hw_branches.c | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 7a7a9fd249a9..216e2dd302a0 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -54,7 +54,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) goto out; pc = preempt_count(); - event = trace_buffer_lock_reserve(tr, TRACE_BRANCH, + event = trace_buffer_lock_reserve(tr->buffer, TRACE_BRANCH, sizeof(*entry), flags, pc); if (!event) goto out; diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index 23b63859130e..69543a905cd5 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -165,6 +165,7 @@ void trace_hw_branch(u64 from, u64 to) struct ftrace_event_call *call = &event_hw_branch; struct trace_array *tr = hw_branch_trace; struct ring_buffer_event *event; + struct ring_buffer *buf; struct hw_branch_entry *entry; unsigned long irq1; int cpu; @@ -180,7 +181,8 @@ void trace_hw_branch(u64 from, u64 to) if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) goto out; - event = trace_buffer_lock_reserve(tr, TRACE_HW_BRANCHES, + buf = tr->buffer; + event = trace_buffer_lock_reserve(buf, TRACE_HW_BRANCHES, sizeof(*entry), 0, 0); if (!event) goto out; @@ -189,8 +191,8 @@ void trace_hw_branch(u64 from, u64 to) entry->ent.type = TRACE_HW_BRANCHES; entry->from = from; entry->to = to; - if (!filter_check_discard(call, entry, tr->buffer, event)) - trace_buffer_unlock_commit(tr, event, 0, 0); + if (!filter_check_discard(call, entry, buf, event)) + trace_buffer_unlock_commit(buf, event, 0, 0); out: atomic_dec(&tr->data[cpu]->disabled); From 8f6e8a314ab37cadd72da5ace9027f2d04aba854 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 7 Oct 2009 21:53:41 -0400 Subject: [PATCH 8/8] tracing: user local buffer variable for trace branch tracer Just using the tr->buffer for the API to trace_buffer_lock_reserve is not good enough. This is because the tr->buffer may change, and we do not want to commit with a different buffer that we reserved from. This patch uses a local variable to hold the buffer that was used to reserve and commit with. Signed-off-by: Steven Rostedt --- kernel/trace/trace_branch.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 216e2dd302a0..4a194f08f88c 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -34,6 +34,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) struct trace_array *tr = branch_tracer; struct ring_buffer_event *event; struct trace_branch *entry; + struct ring_buffer *buffer; unsigned long flags; int cpu, pc; const char *p; @@ -54,7 +55,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) goto out; pc = preempt_count(); - event = trace_buffer_lock_reserve(tr->buffer, TRACE_BRANCH, + buffer = tr->buffer; + event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH, sizeof(*entry), flags, pc); if (!event) goto out; @@ -74,8 +76,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) entry->line = f->line; entry->correct = val == expect; - if (!filter_check_discard(call, entry, tr->buffer, event)) - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, buffer, event)) + ring_buffer_unlock_commit(buffer, event); out: atomic_dec(&tr->data[cpu]->disabled);