trace,smp: Trace all smp_function_call*() invocations
(Ab)use the trace_ipi_send_cpu*() family to trace all smp_function_call*() invocations, not only those that result in an actual IPI. The queued entries log their callback function while the actual IPIs are traced on generic_smp_call_function_single_interrupt(). Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
This commit is contained in:
parent
68e2d17c9e
commit
5c3124975e
66
kernel/smp.c
66
kernel/smp.c
|
@ -104,18 +104,20 @@ void __init call_function_init(void)
|
|||
}
|
||||
|
||||
static __always_inline void
|
||||
send_call_function_single_ipi(int cpu, smp_call_func_t func)
|
||||
send_call_function_single_ipi(int cpu)
|
||||
{
|
||||
if (call_function_single_prep_ipi(cpu)) {
|
||||
trace_ipi_send_cpu(cpu, _RET_IP_, func);
|
||||
trace_ipi_send_cpu(cpu, _RET_IP_,
|
||||
generic_smp_call_function_single_interrupt);
|
||||
arch_send_call_function_single_ipi(cpu);
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline void
|
||||
send_call_function_ipi_mask(struct cpumask *mask, smp_call_func_t func)
|
||||
send_call_function_ipi_mask(struct cpumask *mask)
|
||||
{
|
||||
trace_ipi_send_cpumask(mask, _RET_IP_, func);
|
||||
trace_ipi_send_cpumask(mask, _RET_IP_,
|
||||
generic_smp_call_function_single_interrupt);
|
||||
arch_send_call_function_ipi_mask(mask);
|
||||
}
|
||||
|
||||
|
@ -316,25 +318,6 @@ static __always_inline void csd_unlock(struct __call_single_data *csd)
|
|||
smp_store_release(&csd->node.u_flags, 0);
|
||||
}
|
||||
|
||||
static __always_inline void
|
||||
raw_smp_call_single_queue(int cpu, struct llist_node *node, smp_call_func_t func)
|
||||
{
|
||||
/*
|
||||
* The list addition should be visible to the target CPU when it pops
|
||||
* the head of the list to pull the entry off it in the IPI handler
|
||||
* because of normal cache coherency rules implied by the underlying
|
||||
* llist ops.
|
||||
*
|
||||
* If IPIs can go out of order to the cache coherency protocol
|
||||
* in an architecture, sufficient synchronisation should be added
|
||||
* to arch code to make it appear to obey cache coherency WRT
|
||||
* locking and barrier primitives. Generic code isn't really
|
||||
* equipped to do the right thing...
|
||||
*/
|
||||
if (llist_add(node, &per_cpu(call_single_queue, cpu)))
|
||||
send_call_function_single_ipi(cpu, func);
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
|
||||
|
||||
void __smp_call_single_queue(int cpu, struct llist_node *node)
|
||||
|
@ -354,10 +337,23 @@ void __smp_call_single_queue(int cpu, struct llist_node *node)
|
|||
func = CSD_TYPE(csd) == CSD_TYPE_TTWU ?
|
||||
sched_ttwu_pending : csd->func;
|
||||
|
||||
raw_smp_call_single_queue(cpu, node, func);
|
||||
} else {
|
||||
raw_smp_call_single_queue(cpu, node, NULL);
|
||||
trace_ipi_send_cpu(cpu, _RET_IP_, func);
|
||||
}
|
||||
|
||||
/*
|
||||
* The list addition should be visible to the target CPU when it pops
|
||||
* the head of the list to pull the entry off it in the IPI handler
|
||||
* because of normal cache coherency rules implied by the underlying
|
||||
* llist ops.
|
||||
*
|
||||
* If IPIs can go out of order to the cache coherency protocol
|
||||
* in an architecture, sufficient synchronisation should be added
|
||||
* to arch code to make it appear to obey cache coherency WRT
|
||||
* locking and barrier primitives. Generic code isn't really
|
||||
* equipped to do the right thing...
|
||||
*/
|
||||
if (llist_add(node, &per_cpu(call_single_queue, cpu)))
|
||||
send_call_function_single_ipi(cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -732,9 +728,9 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
|
|||
int cpu, last_cpu, this_cpu = smp_processor_id();
|
||||
struct call_function_data *cfd;
|
||||
bool wait = scf_flags & SCF_WAIT;
|
||||
int nr_cpus = 0, nr_queued = 0;
|
||||
bool run_remote = false;
|
||||
bool run_local = false;
|
||||
int nr_cpus = 0;
|
||||
|
||||
lockdep_assert_preemption_disabled();
|
||||
|
||||
|
@ -776,8 +772,10 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
|
|||
for_each_cpu(cpu, cfd->cpumask) {
|
||||
call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
|
||||
|
||||
if (cond_func && !cond_func(cpu, info))
|
||||
if (cond_func && !cond_func(cpu, info)) {
|
||||
__cpumask_clear_cpu(cpu, cfd->cpumask);
|
||||
continue;
|
||||
}
|
||||
|
||||
csd_lock(csd);
|
||||
if (wait)
|
||||
|
@ -793,17 +791,25 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
|
|||
nr_cpus++;
|
||||
last_cpu = cpu;
|
||||
}
|
||||
nr_queued++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Trace each smp_function_call_*() as an IPI, actual IPIs
|
||||
* will be traced with func==generic_smp_call_function_single_ipi().
|
||||
*/
|
||||
if (nr_queued)
|
||||
trace_ipi_send_cpumask(cfd->cpumask, _RET_IP_, func);
|
||||
|
||||
/*
|
||||
* Choose the most efficient way to send an IPI. Note that the
|
||||
* number of CPUs might be zero due to concurrent changes to the
|
||||
* provided mask.
|
||||
*/
|
||||
if (nr_cpus == 1)
|
||||
send_call_function_single_ipi(last_cpu, func);
|
||||
send_call_function_single_ipi(last_cpu);
|
||||
else if (likely(nr_cpus > 1))
|
||||
send_call_function_ipi_mask(cfd->cpumask_ipi, func);
|
||||
send_call_function_ipi_mask(cfd->cpumask_ipi);
|
||||
}
|
||||
|
||||
if (run_local && (!cond_func || cond_func(this_cpu, info))) {
|
||||
|
|
Loading…
Reference in New Issue