2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* SMP support for ppc.
|
|
|
|
*
|
|
|
|
* Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great
|
|
|
|
* deal of code from the sparc and intel versions.
|
|
|
|
*
|
|
|
|
* Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
|
|
|
|
*
|
|
|
|
* PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
|
|
|
|
* Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#undef DEBUG
|
|
|
|
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/smp.h>
|
|
|
|
#include <linux/interrupt.h>
|
|
|
|
#include <linux/delay.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/spinlock.h>
|
|
|
|
#include <linux/cache.h>
|
|
|
|
#include <linux/err.h>
|
|
|
|
#include <linux/sysdev.h>
|
|
|
|
#include <linux/cpu.h>
|
|
|
|
#include <linux/notifier.h>
|
2005-12-13 03:56:47 +08:00
|
|
|
#include <linux/topology.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#include <asm/ptrace.h>
|
|
|
|
#include <asm/atomic.h>
|
|
|
|
#include <asm/irq.h>
|
|
|
|
#include <asm/page.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
|
|
#include <asm/prom.h>
|
|
|
|
#include <asm/smp.h>
|
|
|
|
#include <asm/time.h>
|
|
|
|
#include <asm/machdep.h>
|
2008-07-27 13:24:52 +08:00
|
|
|
#include <asm/cputhreads.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <asm/cputable.h>
|
|
|
|
#include <asm/system.h>
|
2005-09-27 11:51:59 +08:00
|
|
|
#include <asm/mpic.h>
|
2005-11-11 18:15:21 +08:00
|
|
|
#include <asm/vdso_datapage.h>
|
2005-11-05 07:33:55 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
#include <asm/paca.h>
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#ifdef DEBUG
|
2005-11-15 12:16:38 +08:00
|
|
|
#include <asm/udbg.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#define DBG(fmt...) udbg_printf(fmt)
|
|
|
|
#else
|
|
|
|
#define DBG(fmt...)
|
|
|
|
#endif
|
|
|
|
|
2011-03-08 11:40:04 +08:00
|
|
|
|
|
|
|
/* Store all idle threads, this can be reused instead of creating
|
|
|
|
* a new thread. Also avoids complicated thread destroy functionality
|
|
|
|
* for idle threads.
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
|
/*
|
|
|
|
* Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is
|
|
|
|
* removed after init for !CONFIG_HOTPLUG_CPU.
|
|
|
|
*/
|
|
|
|
static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
|
|
|
|
#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
|
|
|
|
#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
|
|
|
|
#else
|
|
|
|
static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
|
|
|
|
#define get_idle_for_cpu(x) (idle_thread_array[(x)])
|
|
|
|
#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p))
|
|
|
|
#endif
|
|
|
|
|
2005-11-15 12:16:38 +08:00
|
|
|
struct thread_info *secondary_ti;
|
|
|
|
|
2010-04-26 23:32:41 +08:00
|
|
|
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
|
|
|
|
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-10-16 16:24:05 +08:00
|
|
|
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
|
2008-07-27 13:24:53 +08:00
|
|
|
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2005-11-05 07:33:55 +08:00
|
|
|
/* SMP operations for this machine */
|
2005-04-17 06:20:36 +08:00
|
|
|
struct smp_ops_t *smp_ops;
|
|
|
|
|
2009-06-19 07:30:07 +08:00
|
|
|
/* Can't be static due to PowerMac hackery */
|
|
|
|
volatile unsigned int cpu_callin_map[NR_CPUS];
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
int smt_enabled_at_boot = 1;
|
|
|
|
|
2005-12-04 15:39:43 +08:00
|
|
|
static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL;
|
|
|
|
|
2005-11-05 07:33:55 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
2011-04-12 05:46:19 +08:00
|
|
|
int __devinit smp_generic_kick_cpu(int nr)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
BUG_ON(nr < 0 || nr >= NR_CPUS);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The processor is currently spinning, waiting for the
|
|
|
|
* cpu_start field to become non-zero After we set cpu_start,
|
|
|
|
* the processor will continue on to secondary_start
|
|
|
|
*/
|
|
|
|
paca[nr].cpu_start = 1;
|
2005-05-01 23:58:47 +08:00
|
|
|
smp_mb();
|
2011-04-12 05:46:19 +08:00
|
|
|
|
|
|
|
return 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
2005-11-05 07:33:55 +08:00
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-11-15 04:11:49 +08:00
|
|
|
static irqreturn_t call_function_action(int irq, void *data)
|
|
|
|
{
|
|
|
|
generic_smp_call_function_interrupt();
|
|
|
|
return IRQ_HANDLED;
|
|
|
|
}
|
|
|
|
|
|
|
|
static irqreturn_t reschedule_action(int irq, void *data)
|
|
|
|
{
|
2011-04-05 23:23:39 +08:00
|
|
|
scheduler_ipi();
|
2008-11-15 04:11:49 +08:00
|
|
|
return IRQ_HANDLED;
|
|
|
|
}
|
|
|
|
|
|
|
|
static irqreturn_t call_function_single_action(int irq, void *data)
|
|
|
|
{
|
|
|
|
generic_smp_call_function_single_interrupt();
|
|
|
|
return IRQ_HANDLED;
|
|
|
|
}
|
|
|
|
|
2011-05-25 04:34:18 +08:00
|
|
|
static irqreturn_t debug_ipi_action(int irq, void *data)
|
2008-11-15 04:11:49 +08:00
|
|
|
{
|
powerpc: Consolidate ipi message mux and demux
Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.
The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger). However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu. To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops. Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu. Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.
This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).
I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.
The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code. The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number. While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call. I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.
The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.
Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context. Add the missing calls.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-11 03:29:39 +08:00
|
|
|
if (crash_ipi_function_ptr) {
|
|
|
|
crash_ipi_function_ptr(get_irq_regs());
|
|
|
|
return IRQ_HANDLED;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_DEBUGGER
|
|
|
|
debugger_ipi(get_irq_regs());
|
|
|
|
#endif /* CONFIG_DEBUGGER */
|
|
|
|
|
2008-11-15 04:11:49 +08:00
|
|
|
return IRQ_HANDLED;
|
|
|
|
}
|
|
|
|
|
|
|
|
static irq_handler_t smp_ipi_action[] = {
|
|
|
|
[PPC_MSG_CALL_FUNCTION] = call_function_action,
|
|
|
|
[PPC_MSG_RESCHEDULE] = reschedule_action,
|
|
|
|
[PPC_MSG_CALL_FUNC_SINGLE] = call_function_single_action,
|
|
|
|
[PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,
|
|
|
|
};
|
|
|
|
|
|
|
|
const char *smp_ipi_name[] = {
|
|
|
|
[PPC_MSG_CALL_FUNCTION] = "ipi call function",
|
|
|
|
[PPC_MSG_RESCHEDULE] = "ipi reschedule",
|
|
|
|
[PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single",
|
|
|
|
[PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",
|
|
|
|
};
|
|
|
|
|
|
|
|
/* optional function to request ipi, for controllers with >= 4 ipis */
|
|
|
|
int smp_request_message_ipi(int virq, int msg)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC)
|
|
|
|
if (msg == PPC_MSG_DEBUGGER_BREAK) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
err = request_irq(virq, smp_ipi_action[msg], IRQF_DISABLED|IRQF_PERCPU,
|
|
|
|
smp_ipi_name[msg], 0);
|
|
|
|
WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n",
|
|
|
|
virq, smp_ipi_name[msg], err);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2011-05-11 03:29:42 +08:00
|
|
|
#ifdef CONFIG_PPC_SMP_MUXED_IPI
|
powerpc: Consolidate ipi message mux and demux
Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.
The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger). However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu. To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops. Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu. Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.
This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).
I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.
The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code. The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number. While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call. I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.
The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.
Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context. Add the missing calls.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-11 03:29:39 +08:00
|
|
|
struct cpu_messages {
|
2011-05-11 03:29:46 +08:00
|
|
|
int messages; /* current messages */
|
powerpc: Consolidate ipi message mux and demux
Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.
The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger). However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu. To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops. Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu. Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.
This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).
I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.
The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code. The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number. While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call. I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.
The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.
Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context. Add the missing calls.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-11 03:29:39 +08:00
|
|
|
unsigned long data; /* data for cause ipi */
|
|
|
|
};
|
|
|
|
static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
|
|
|
|
|
|
|
|
void smp_muxed_ipi_set_data(int cpu, unsigned long data)
|
|
|
|
{
|
|
|
|
struct cpu_messages *info = &per_cpu(ipi_message, cpu);
|
|
|
|
|
|
|
|
info->data = data;
|
|
|
|
}
|
|
|
|
|
|
|
|
void smp_muxed_ipi_message_pass(int cpu, int msg)
|
|
|
|
{
|
|
|
|
struct cpu_messages *info = &per_cpu(ipi_message, cpu);
|
2011-05-11 03:29:46 +08:00
|
|
|
char *message = (char *)&info->messages;
|
powerpc: Consolidate ipi message mux and demux
Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.
The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger). However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu. To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops. Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu. Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.
This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).
I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.
The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code. The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number. While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call. I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.
The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.
Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context. Add the missing calls.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-11 03:29:39 +08:00
|
|
|
|
2011-05-11 03:29:46 +08:00
|
|
|
message[msg] = 1;
|
powerpc: Consolidate ipi message mux and demux
Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.
The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger). However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu. To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops. Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu. Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.
This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).
I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.
The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code. The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number. While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call. I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.
The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.
Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context. Add the missing calls.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-11 03:29:39 +08:00
|
|
|
mb();
|
|
|
|
smp_ops->cause_ipi(cpu, info->data);
|
|
|
|
}
|
|
|
|
|
|
|
|
void smp_muxed_ipi_resend(void)
|
|
|
|
{
|
|
|
|
struct cpu_messages *info = &__get_cpu_var(ipi_message);
|
|
|
|
|
2011-05-11 03:29:46 +08:00
|
|
|
if (info->messages)
|
powerpc: Consolidate ipi message mux and demux
Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.
The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger). However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu. To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops. Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu. Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.
This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).
I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.
The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code. The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number. While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call. I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.
The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.
Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context. Add the missing calls.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-11 03:29:39 +08:00
|
|
|
smp_ops->cause_ipi(smp_processor_id(), info->data);
|
|
|
|
}
|
|
|
|
|
|
|
|
irqreturn_t smp_ipi_demux(void)
|
|
|
|
{
|
|
|
|
struct cpu_messages *info = &__get_cpu_var(ipi_message);
|
2011-05-11 03:29:46 +08:00
|
|
|
unsigned int all;
|
powerpc: Consolidate ipi message mux and demux
Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.
The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger). However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu. To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops. Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu. Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.
This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).
I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.
The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code. The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number. While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call. I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.
The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.
Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context. Add the missing calls.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-11 03:29:39 +08:00
|
|
|
|
|
|
|
mb(); /* order any irq clear */
|
2011-05-11 03:29:46 +08:00
|
|
|
|
|
|
|
do {
|
|
|
|
all = xchg_local(&info->messages, 0);
|
|
|
|
|
|
|
|
#ifdef __BIG_ENDIAN
|
|
|
|
if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION)))
|
powerpc: Consolidate ipi message mux and demux
Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.
The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger). However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu. To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops. Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu. Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.
This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).
I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.
The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code. The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number. While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call. I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.
The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.
Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context. Add the missing calls.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-11 03:29:39 +08:00
|
|
|
generic_smp_call_function_interrupt();
|
2011-05-11 03:29:46 +08:00
|
|
|
if (all & (1 << (24 - 8 * PPC_MSG_RESCHEDULE)))
|
2011-05-20 13:36:52 +08:00
|
|
|
scheduler_ipi();
|
2011-05-11 03:29:46 +08:00
|
|
|
if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNC_SINGLE)))
|
powerpc: Consolidate ipi message mux and demux
Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.
The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger). However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu. To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops. Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu. Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.
This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).
I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.
The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code. The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number. While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call. I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.
The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.
Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context. Add the missing calls.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-11 03:29:39 +08:00
|
|
|
generic_smp_call_function_single_interrupt();
|
2011-05-11 03:29:46 +08:00
|
|
|
if (all & (1 << (24 - 8 * PPC_MSG_DEBUGGER_BREAK)))
|
powerpc: Consolidate ipi message mux and demux
Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.
The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger). However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu. To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops. Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu. Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.
This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).
I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.
The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code. The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number. While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call. I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.
The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.
Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context. Add the missing calls.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-11 03:29:39 +08:00
|
|
|
debug_ipi_action(0, NULL);
|
2011-05-11 03:29:46 +08:00
|
|
|
#else
|
|
|
|
#error Unsupported ENDIAN
|
powerpc: Consolidate ipi message mux and demux
Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.
The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger). However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu. To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops. Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu. Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.
This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).
I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.
The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code. The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number. While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call. I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.
The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.
Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context. Add the missing calls.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-11 03:29:39 +08:00
|
|
|
#endif
|
2011-05-11 03:29:46 +08:00
|
|
|
} while (info->messages);
|
|
|
|
|
powerpc: Consolidate ipi message mux and demux
Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.
The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger). However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu. To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops. Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu. Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.
This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).
I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.
The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code. The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number. While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call. I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.
The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.
Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context. Add the missing calls.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-11 03:29:39 +08:00
|
|
|
return IRQ_HANDLED;
|
|
|
|
}
|
2011-05-11 03:29:42 +08:00
|
|
|
#endif /* CONFIG_PPC_SMP_MUXED_IPI */
|
powerpc: Consolidate ipi message mux and demux
Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.
The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger). However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu. To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops. Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu. Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.
This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).
I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.
The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code. The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number. While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call. I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.
The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature. The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.
Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context. Add the missing calls.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-05-11 03:29:39 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
void smp_send_reschedule(int cpu)
|
|
|
|
{
|
2006-07-04 12:09:36 +08:00
|
|
|
if (likely(smp_ops))
|
|
|
|
smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2008-06-26 17:22:13 +08:00
|
|
|
void arch_send_call_function_single_ipi(int cpu)
|
|
|
|
{
|
|
|
|
smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE);
|
|
|
|
}
|
|
|
|
|
2009-09-24 23:34:45 +08:00
|
|
|
void arch_send_call_function_ipi_mask(const struct cpumask *mask)
|
2008-06-26 17:22:13 +08:00
|
|
|
{
|
|
|
|
unsigned int cpu;
|
|
|
|
|
2009-09-24 23:34:45 +08:00
|
|
|
for_each_cpu(cpu, mask)
|
2008-06-26 17:22:13 +08:00
|
|
|
smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION);
|
|
|
|
}
|
|
|
|
|
2011-05-11 03:29:06 +08:00
|
|
|
#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
|
|
|
|
void smp_send_debugger_break(void)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2011-05-11 03:29:06 +08:00
|
|
|
int cpu;
|
|
|
|
int me = raw_smp_processor_id();
|
|
|
|
|
|
|
|
if (unlikely(!smp_ops))
|
|
|
|
return;
|
|
|
|
|
|
|
|
for_each_online_cpu(cpu)
|
|
|
|
if (cpu != me)
|
|
|
|
smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2005-12-04 15:39:43 +08:00
|
|
|
#ifdef CONFIG_KEXEC
|
|
|
|
void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
|
|
|
|
{
|
|
|
|
crash_ipi_function_ptr = crash_ipi_callback;
|
2011-05-11 03:29:06 +08:00
|
|
|
if (crash_ipi_callback) {
|
2005-12-04 15:39:43 +08:00
|
|
|
mb();
|
2011-05-11 03:29:06 +08:00
|
|
|
smp_send_debugger_break();
|
2005-12-04 15:39:43 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
static void stop_this_cpu(void *dummy)
|
|
|
|
{
|
2009-11-25 19:48:52 +08:00
|
|
|
/* Remove this CPU */
|
|
|
|
set_cpu_online(smp_processor_id(), false);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
local_irq_disable();
|
|
|
|
while (1)
|
|
|
|
;
|
|
|
|
}
|
|
|
|
|
2007-09-18 07:43:40 +08:00
|
|
|
void smp_send_stop(void)
|
|
|
|
{
|
2008-06-06 17:18:06 +08:00
|
|
|
smp_call_function(stop_this_cpu, NULL, 0);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
struct thread_info *current_set[NR_CPUS];
|
|
|
|
|
|
|
|
static void __devinit smp_store_cpu_info(int id)
|
|
|
|
{
|
2009-10-29 21:34:14 +08:00
|
|
|
per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void __init smp_prepare_cpus(unsigned int max_cpus)
|
|
|
|
{
|
|
|
|
unsigned int cpu;
|
|
|
|
|
|
|
|
DBG("smp_prepare_cpus\n");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* setup_cpu may need to be called on the boot cpu. We havent
|
|
|
|
* spun any cpus up but lets be paranoid.
|
|
|
|
*/
|
|
|
|
BUG_ON(boot_cpuid != smp_processor_id());
|
|
|
|
|
|
|
|
/* Fixup boot cpu */
|
|
|
|
smp_store_cpu_info(boot_cpuid);
|
|
|
|
cpu_callin_map[boot_cpuid] = 1;
|
|
|
|
|
2010-04-26 23:32:41 +08:00
|
|
|
for_each_possible_cpu(cpu) {
|
|
|
|
zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu),
|
|
|
|
GFP_KERNEL, cpu_to_node(cpu));
|
|
|
|
zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
|
|
|
|
GFP_KERNEL, cpu_to_node(cpu));
|
|
|
|
}
|
|
|
|
|
|
|
|
cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
|
|
|
|
cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
|
|
|
|
|
2006-07-04 12:09:36 +08:00
|
|
|
if (smp_ops)
|
2009-09-09 01:38:52 +08:00
|
|
|
if (smp_ops->probe)
|
|
|
|
max_cpus = smp_ops->probe();
|
|
|
|
else
|
|
|
|
max_cpus = NR_CPUS;
|
2006-07-04 12:09:36 +08:00
|
|
|
else
|
|
|
|
max_cpus = 1;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void __devinit smp_prepare_boot_cpu(void)
|
|
|
|
{
|
|
|
|
BUG_ON(smp_processor_id() != boot_cpuid);
|
2005-11-05 07:33:55 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
2005-04-17 06:20:36 +08:00
|
|
|
paca[boot_cpuid].__current = current;
|
2005-11-05 07:33:55 +08:00
|
|
|
#endif
|
2006-01-12 17:06:01 +08:00
|
|
|
current_set[boot_cpuid] = task_thread_info(current);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
|
/* State of each CPU during hotplug phases */
|
2011-04-01 06:23:37 +08:00
|
|
|
static DEFINE_PER_CPU(int, cpu_state) = { 0 };
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
int generic_cpu_disable(void)
|
|
|
|
{
|
|
|
|
unsigned int cpu = smp_processor_id();
|
|
|
|
|
|
|
|
if (cpu == boot_cpuid)
|
|
|
|
return -EBUSY;
|
|
|
|
|
2009-09-24 23:34:48 +08:00
|
|
|
set_cpu_online(cpu, false);
|
2005-11-10 10:37:51 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
2005-11-11 18:15:21 +08:00
|
|
|
vdso_data->processorCount--;
|
2005-11-10 11:26:12 +08:00
|
|
|
#endif
|
2011-02-11 10:05:17 +08:00
|
|
|
migrate_irqs();
|
2005-04-17 06:20:36 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void generic_cpu_die(unsigned int cpu)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < 100; i++) {
|
2005-05-01 23:58:47 +08:00
|
|
|
smp_rmb();
|
2005-04-17 06:20:36 +08:00
|
|
|
if (per_cpu(cpu_state, cpu) == CPU_DEAD)
|
|
|
|
return;
|
|
|
|
msleep(100);
|
|
|
|
}
|
|
|
|
printk(KERN_ERR "CPU%d didn't die...\n", cpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
void generic_mach_cpu_die(void)
|
|
|
|
{
|
|
|
|
unsigned int cpu;
|
|
|
|
|
|
|
|
local_irq_disable();
|
2011-02-10 15:46:50 +08:00
|
|
|
idle_task_exit();
|
2005-04-17 06:20:36 +08:00
|
|
|
cpu = smp_processor_id();
|
|
|
|
printk(KERN_DEBUG "CPU%d offline\n", cpu);
|
|
|
|
__get_cpu_var(cpu_state) = CPU_DEAD;
|
2005-05-01 23:58:47 +08:00
|
|
|
smp_wmb();
|
2005-04-17 06:20:36 +08:00
|
|
|
while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
|
|
|
|
cpu_relax();
|
|
|
|
}
|
2011-04-01 06:23:37 +08:00
|
|
|
|
|
|
|
void generic_set_cpu_dead(unsigned int cpu)
|
|
|
|
{
|
|
|
|
per_cpu(cpu_state, cpu) = CPU_DEAD;
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
|
|
|
|
2011-03-08 11:40:04 +08:00
|
|
|
struct create_idle {
|
|
|
|
struct work_struct work;
|
|
|
|
struct task_struct *idle;
|
|
|
|
struct completion done;
|
|
|
|
int cpu;
|
|
|
|
};
|
|
|
|
|
|
|
|
static void __cpuinit do_fork_idle(struct work_struct *work)
|
|
|
|
{
|
|
|
|
struct create_idle *c_idle =
|
|
|
|
container_of(work, struct create_idle, work);
|
|
|
|
|
|
|
|
c_idle->idle = fork_idle(c_idle->cpu);
|
|
|
|
complete(&c_idle->done);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __cpuinit create_idle(unsigned int cpu)
|
|
|
|
{
|
|
|
|
struct thread_info *ti;
|
|
|
|
struct create_idle c_idle = {
|
|
|
|
.cpu = cpu,
|
|
|
|
.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
|
|
|
|
};
|
|
|
|
INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
|
|
|
|
|
|
|
|
c_idle.idle = get_idle_for_cpu(cpu);
|
|
|
|
|
|
|
|
/* We can't use kernel_thread since we must avoid to
|
|
|
|
* reschedule the child. We use a workqueue because
|
|
|
|
* we want to fork from a kernel thread, not whatever
|
|
|
|
* userspace process happens to be trying to online us.
|
|
|
|
*/
|
|
|
|
if (!c_idle.idle) {
|
|
|
|
schedule_work(&c_idle.work);
|
|
|
|
wait_for_completion(&c_idle.done);
|
|
|
|
} else
|
|
|
|
init_idle(c_idle.idle, cpu);
|
|
|
|
if (IS_ERR(c_idle.idle)) {
|
|
|
|
pr_err("Failed fork for CPU %u: %li", cpu, PTR_ERR(c_idle.idle));
|
|
|
|
return PTR_ERR(c_idle.idle);
|
|
|
|
}
|
|
|
|
ti = task_thread_info(c_idle.idle);
|
|
|
|
|
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
paca[cpu].__current = c_idle.idle;
|
|
|
|
paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
|
|
|
|
#endif
|
|
|
|
ti->cpu = cpu;
|
|
|
|
current_set[cpu] = ti;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-01-11 15:15:34 +08:00
|
|
|
int __cpuinit __cpu_up(unsigned int cpu)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2011-03-08 11:40:04 +08:00
|
|
|
int rc, c;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-07-04 12:09:36 +08:00
|
|
|
if (smp_ops == NULL ||
|
|
|
|
(smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EINVAL;
|
|
|
|
|
2011-03-08 11:40:04 +08:00
|
|
|
/* Make sure we have an idle thread */
|
|
|
|
rc = create_idle(cpu);
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
|
2011-05-18 07:57:11 +08:00
|
|
|
secondary_ti = current_set[cpu];
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* Make sure callin-map entry is 0 (can be leftover a CPU
|
|
|
|
* hotplug
|
|
|
|
*/
|
|
|
|
cpu_callin_map[cpu] = 0;
|
|
|
|
|
|
|
|
/* The information for processor bringup must
|
|
|
|
* be written out to main store before we release
|
|
|
|
* the processor.
|
|
|
|
*/
|
2005-05-01 23:58:47 +08:00
|
|
|
smp_mb();
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* wake up cpus */
|
|
|
|
DBG("smp: kicking cpu %d\n", cpu);
|
2011-04-12 05:46:19 +08:00
|
|
|
rc = smp_ops->kick_cpu(cpu);
|
|
|
|
if (rc) {
|
|
|
|
pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc);
|
|
|
|
return rc;
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* wait to see if the cpu made a callin (is actually up).
|
|
|
|
* use this value that I found through experimentation.
|
|
|
|
* -- Cort
|
|
|
|
*/
|
|
|
|
if (system_state < SYSTEM_RUNNING)
|
2006-06-18 06:52:44 +08:00
|
|
|
for (c = 50000; c && !cpu_callin_map[cpu]; c--)
|
2005-04-17 06:20:36 +08:00
|
|
|
udelay(100);
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
|
else
|
|
|
|
/*
|
|
|
|
* CPUs can take much longer to come up in the
|
|
|
|
* hotplug case. Wait five seconds.
|
|
|
|
*/
|
2009-06-24 07:26:37 +08:00
|
|
|
for (c = 5000; c && !cpu_callin_map[cpu]; c--)
|
|
|
|
msleep(1);
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
if (!cpu_callin_map[cpu]) {
|
2010-08-05 02:28:34 +08:00
|
|
|
printk(KERN_ERR "Processor %u is stuck.\n", cpu);
|
2005-04-17 06:20:36 +08:00
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
2010-08-05 02:28:34 +08:00
|
|
|
DBG("Processor %u found.\n", cpu);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
if (smp_ops->give_timebase)
|
|
|
|
smp_ops->give_timebase();
|
|
|
|
|
|
|
|
/* Wait until cpu puts itself in the online map */
|
|
|
|
while (!cpu_online(cpu))
|
|
|
|
cpu_relax();
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-07-27 13:24:54 +08:00
|
|
|
/* Return the value of the reg property corresponding to the given
|
|
|
|
* logical cpu.
|
|
|
|
*/
|
|
|
|
int cpu_to_core_id(int cpu)
|
|
|
|
{
|
|
|
|
struct device_node *np;
|
|
|
|
const int *reg;
|
|
|
|
int id = -1;
|
|
|
|
|
|
|
|
np = of_get_cpu_node(cpu, NULL);
|
|
|
|
if (!np)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
reg = of_get_property(np, "reg", NULL);
|
|
|
|
if (!reg)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
id = *reg;
|
|
|
|
out:
|
|
|
|
of_node_put(np);
|
|
|
|
return id;
|
|
|
|
}
|
|
|
|
|
2010-10-06 16:36:59 +08:00
|
|
|
/* Helper routines for cpu to core mapping */
|
|
|
|
int cpu_core_index_of_thread(int cpu)
|
|
|
|
{
|
|
|
|
return cpu >> threads_shift;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(cpu_core_index_of_thread);
|
|
|
|
|
|
|
|
int cpu_first_thread_of_core(int core)
|
|
|
|
{
|
|
|
|
return core << threads_shift;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
|
|
|
|
|
2011-04-28 13:07:23 +08:00
|
|
|
/* Must be called when no change can occur to cpu_present_mask,
|
2008-07-27 13:24:53 +08:00
|
|
|
* i.e. during cpu online or offline.
|
|
|
|
*/
|
|
|
|
static struct device_node *cpu_to_l2cache(int cpu)
|
|
|
|
{
|
|
|
|
struct device_node *np;
|
2008-12-11 04:16:07 +08:00
|
|
|
struct device_node *cache;
|
2008-07-27 13:24:53 +08:00
|
|
|
|
|
|
|
if (!cpu_present(cpu))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
np = of_get_cpu_node(cpu, NULL);
|
|
|
|
if (np == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
2008-12-11 04:16:07 +08:00
|
|
|
cache = of_find_next_cache_node(np);
|
|
|
|
|
2008-07-27 13:24:53 +08:00
|
|
|
of_node_put(np);
|
|
|
|
|
2008-12-11 04:16:07 +08:00
|
|
|
return cache;
|
2008-07-27 13:24:53 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Activate a secondary processor. */
|
2011-02-10 15:45:24 +08:00
|
|
|
void __devinit start_secondary(void *unused)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
unsigned int cpu = smp_processor_id();
|
2008-07-27 13:24:53 +08:00
|
|
|
struct device_node *l2_cache;
|
2008-07-27 13:24:52 +08:00
|
|
|
int i, base;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
atomic_inc(&init_mm.mm_count);
|
|
|
|
current->active_mm = &init_mm;
|
|
|
|
|
|
|
|
smp_store_cpu_info(cpu);
|
2005-11-05 07:33:55 +08:00
|
|
|
set_dec(tb_ticks_per_jiffy);
|
2005-11-10 07:45:30 +08:00
|
|
|
preempt_disable();
|
2005-04-17 06:20:36 +08:00
|
|
|
cpu_callin_map[cpu] = 1;
|
|
|
|
|
2009-09-09 01:38:52 +08:00
|
|
|
if (smp_ops->setup_cpu)
|
|
|
|
smp_ops->setup_cpu(cpu);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (smp_ops->take_timebase)
|
|
|
|
smp_ops->take_timebase();
|
|
|
|
|
2007-09-21 11:26:03 +08:00
|
|
|
secondary_cpu_time_init();
|
|
|
|
|
2011-03-08 11:49:33 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
if (system_state == SYSTEM_RUNNING)
|
|
|
|
vdso_data->processorCount++;
|
|
|
|
#endif
|
2008-06-26 17:22:13 +08:00
|
|
|
ipi_call_lock();
|
2008-09-07 22:57:22 +08:00
|
|
|
notify_cpu_starting(cpu);
|
2009-09-24 23:34:48 +08:00
|
|
|
set_cpu_online(cpu, true);
|
2008-07-27 13:24:52 +08:00
|
|
|
/* Update sibling maps */
|
2010-10-06 16:36:59 +08:00
|
|
|
base = cpu_first_thread_sibling(cpu);
|
2008-07-27 13:24:52 +08:00
|
|
|
for (i = 0; i < threads_per_core; i++) {
|
|
|
|
if (cpu_is_offline(base + i))
|
|
|
|
continue;
|
2010-04-26 23:32:41 +08:00
|
|
|
cpumask_set_cpu(cpu, cpu_sibling_mask(base + i));
|
|
|
|
cpumask_set_cpu(base + i, cpu_sibling_mask(cpu));
|
2008-07-27 13:24:53 +08:00
|
|
|
|
|
|
|
/* cpu_core_map should be a superset of
|
|
|
|
* cpu_sibling_map even if we don't have cache
|
|
|
|
* information, so update the former here, too.
|
|
|
|
*/
|
2010-04-26 23:32:41 +08:00
|
|
|
cpumask_set_cpu(cpu, cpu_core_mask(base + i));
|
|
|
|
cpumask_set_cpu(base + i, cpu_core_mask(cpu));
|
2008-07-27 13:24:52 +08:00
|
|
|
}
|
2008-07-27 13:24:53 +08:00
|
|
|
l2_cache = cpu_to_l2cache(cpu);
|
|
|
|
for_each_online_cpu(i) {
|
|
|
|
struct device_node *np = cpu_to_l2cache(i);
|
|
|
|
if (!np)
|
|
|
|
continue;
|
|
|
|
if (np == l2_cache) {
|
2010-04-26 23:32:41 +08:00
|
|
|
cpumask_set_cpu(cpu, cpu_core_mask(i));
|
|
|
|
cpumask_set_cpu(i, cpu_core_mask(cpu));
|
2008-07-27 13:24:53 +08:00
|
|
|
}
|
|
|
|
of_node_put(np);
|
|
|
|
}
|
|
|
|
of_node_put(l2_cache);
|
2008-06-26 17:22:13 +08:00
|
|
|
ipi_call_unlock();
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
local_irq_enable();
|
|
|
|
|
|
|
|
cpu_idle();
|
2011-02-10 15:45:24 +08:00
|
|
|
|
|
|
|
BUG();
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int setup_profiling_timer(unsigned int multiplier)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void __init smp_cpus_done(unsigned int max_cpus)
|
|
|
|
{
|
2010-04-26 23:32:34 +08:00
|
|
|
cpumask_var_t old_mask;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* We want the setup_cpu() here to be called from CPU 0, but our
|
|
|
|
* init thread may have been "borrowed" by another CPU in the meantime
|
|
|
|
* se we pin us down to CPU 0 for a short while
|
|
|
|
*/
|
2010-04-26 23:32:34 +08:00
|
|
|
alloc_cpumask_var(&old_mask, GFP_NOWAIT);
|
2011-04-28 13:07:23 +08:00
|
|
|
cpumask_copy(old_mask, tsk_cpus_allowed(current));
|
2010-03-26 20:03:29 +08:00
|
|
|
set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid));
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2009-09-09 01:38:52 +08:00
|
|
|
if (smp_ops && smp_ops->setup_cpu)
|
2006-07-04 12:09:36 +08:00
|
|
|
smp_ops->setup_cpu(boot_cpuid);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2010-04-26 23:32:34 +08:00
|
|
|
set_cpus_allowed_ptr(current, old_mask);
|
|
|
|
|
|
|
|
free_cpumask_var(old_mask);
|
2005-12-13 03:56:47 +08:00
|
|
|
|
2011-03-08 10:50:37 +08:00
|
|
|
if (smp_ops && smp_ops->bringup_done)
|
|
|
|
smp_ops->bringup_done();
|
|
|
|
|
2005-12-13 03:56:47 +08:00
|
|
|
dump_numa_cpu_topology();
|
2011-03-08 10:50:37 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2010-08-11 04:02:05 +08:00
|
|
|
int arch_sd_sibling_asym_packing(void)
|
|
|
|
{
|
|
|
|
if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
|
|
|
|
printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
|
|
|
|
return SD_ASYM_PACKING;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
|
int __cpu_disable(void)
|
|
|
|
{
|
2008-07-27 13:24:53 +08:00
|
|
|
struct device_node *l2_cache;
|
2008-07-27 13:24:52 +08:00
|
|
|
int cpu = smp_processor_id();
|
|
|
|
int base, i;
|
|
|
|
int err;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-07-27 13:24:52 +08:00
|
|
|
if (!smp_ops->cpu_disable)
|
|
|
|
return -ENOSYS;
|
|
|
|
|
|
|
|
err = smp_ops->cpu_disable();
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* Update sibling maps */
|
2010-10-06 16:36:59 +08:00
|
|
|
base = cpu_first_thread_sibling(cpu);
|
2008-07-27 13:24:52 +08:00
|
|
|
for (i = 0; i < threads_per_core; i++) {
|
2010-04-26 23:32:41 +08:00
|
|
|
cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i));
|
|
|
|
cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu));
|
|
|
|
cpumask_clear_cpu(cpu, cpu_core_mask(base + i));
|
|
|
|
cpumask_clear_cpu(base + i, cpu_core_mask(cpu));
|
2008-07-27 13:24:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
l2_cache = cpu_to_l2cache(cpu);
|
|
|
|
for_each_present_cpu(i) {
|
|
|
|
struct device_node *np = cpu_to_l2cache(i);
|
|
|
|
if (!np)
|
|
|
|
continue;
|
|
|
|
if (np == l2_cache) {
|
2010-04-26 23:32:41 +08:00
|
|
|
cpumask_clear_cpu(cpu, cpu_core_mask(i));
|
|
|
|
cpumask_clear_cpu(i, cpu_core_mask(cpu));
|
2008-07-27 13:24:53 +08:00
|
|
|
}
|
|
|
|
of_node_put(np);
|
2008-07-27 13:24:52 +08:00
|
|
|
}
|
2008-07-27 13:24:53 +08:00
|
|
|
of_node_put(l2_cache);
|
|
|
|
|
2008-07-27 13:24:52 +08:00
|
|
|
|
|
|
|
return 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void __cpu_die(unsigned int cpu)
|
|
|
|
{
|
|
|
|
if (smp_ops->cpu_die)
|
|
|
|
smp_ops->cpu_die(cpu);
|
|
|
|
}
|
2010-01-14 17:52:44 +08:00
|
|
|
|
|
|
|
static DEFINE_MUTEX(powerpc_cpu_hotplug_driver_mutex);
|
|
|
|
|
|
|
|
void cpu_hotplug_driver_lock()
|
|
|
|
{
|
|
|
|
mutex_lock(&powerpc_cpu_hotplug_driver_mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
void cpu_hotplug_driver_unlock()
|
|
|
|
{
|
|
|
|
mutex_unlock(&powerpc_cpu_hotplug_driver_mutex);
|
|
|
|
}
|
2010-05-19 10:56:29 +08:00
|
|
|
|
|
|
|
void cpu_die(void)
|
|
|
|
{
|
|
|
|
if (ppc_md.cpu_die)
|
|
|
|
ppc_md.cpu_die();
|
2011-02-10 15:45:24 +08:00
|
|
|
|
|
|
|
/* If we return, we re-enter start_secondary */
|
|
|
|
start_secondary_resume();
|
2010-05-19 10:56:29 +08:00
|
|
|
}
|
2011-02-10 15:45:24 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|