2009-06-03 05:17:38 +08:00
|
|
|
/*
|
|
|
|
* This file contains the 64-bit "server" PowerPC variant
|
|
|
|
* of the low level exception handling including exception
|
|
|
|
* vectors, exception return, part of the slb and stab
|
|
|
|
* handling and other fixed offset specific things.
|
|
|
|
*
|
|
|
|
* This file is meant to be #included from head_64.S due to
|
2011-03-31 09:57:33 +08:00
|
|
|
* position dependent assembly.
|
2009-06-03 05:17:38 +08:00
|
|
|
*
|
|
|
|
* Most of this originates from head_64.S and thus has the same
|
|
|
|
* copyright history.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
powerpc: Rework lazy-interrupt handling
The current implementation of lazy interrupts handling has some
issues that this tries to address.
We don't do the various workarounds we need to do when re-enabling
interrupts in some cases such as when returning from an interrupt
and thus we may still lose or get delayed decrementer or doorbell
interrupts.
The current scheme also makes it much harder to handle the external
"edge" interrupts provided by some BookE processors when using the
EPR facility (External Proxy) and the Freescale Hypervisor.
Additionally, we tend to keep interrupts hard disabled in a number
of cases, such as decrementer interrupts, external interrupts, or
when a masked decrementer interrupt is pending. This is sub-optimal.
This is an attempt at fixing it all in one go by reworking the way
we do the lazy interrupt disabling from the ground up.
The base idea is to replace the "hard_enabled" field with a
"irq_happened" field in which we store a bit mask of what interrupt
occurred while soft-disabled.
When re-enabling, either via arch_local_irq_restore() or when returning
from an interrupt, we can now decide what to do by testing bits in that
field.
We then implement replaying of the missed interrupts either by
re-using the existing exception frame (in exception exit case) or via
the creation of a new one from an assembly trampoline (in the
arch_local_irq_enable case).
This removes the need to play with the decrementer to try to create
fake interrupts, among others.
In addition, this adds a few refinements:
- We no longer hard disable decrementer interrupts that occur
while soft-disabled. We now simply bump the decrementer back to max
(on BookS) or leave it stopped (on BookE) and continue with hard interrupts
enabled, which means that we'll potentially get better sample quality from
performance monitor interrupts.
- Timer, decrementer and doorbell interrupts now hard-enable
shortly after removing the source of the interrupt, which means
they no longer run entirely hard disabled. Again, this will improve
perf sample quality.
- On Book3E 64-bit, we now make the performance monitor interrupt
act as an NMI like Book3S (the necessary C code for that to work
appear to already be present in the FSL perf code, notably calling
nmi_enter instead of irq_enter). (This also fixes a bug where BookE
perfmon interrupts could clobber r14 ... oops)
- We could make "masked" decrementer interrupts act as NMIs when doing
timer-based perf sampling to improve the sample quality.
Signed-off-by-yet: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
v2:
- Add hard-enable to decrementer, timer and doorbells
- Fix CR clobber in masked irq handling on BookE
- Make embedded perf interrupt act as an NMI
- Add a PACA_HAPPENED_EE_EDGE for use by FSL if they want
to retrigger an interrupt without preventing hard-enable
v3:
- Fix or vs. ori bug on Book3E
- Fix enabling of interrupts for some exceptions on Book3E
v4:
- Fix resend of doorbells on return from interrupt on Book3E
v5:
- Rebased on top of my latest series, which involves some significant
rework of some aspects of the patch.
v6:
- 32-bit compile fix
- more compile fixes with various .config combos
- factor out the asm code to soft-disable interrupts
- remove the C wrapper around preempt_schedule_irq
v7:
- Fix a bug with hard irq state tracking on native power7
2012-03-06 15:27:59 +08:00
|
|
|
#include <asm/hw_irq.h>
|
2009-07-15 04:52:52 +08:00
|
|
|
#include <asm/exception-64s.h>
|
2010-11-18 23:06:17 +08:00
|
|
|
#include <asm/ptrace.h>
|
2014-12-10 02:56:52 +08:00
|
|
|
#include <asm/cpuidle.h>
|
2009-07-15 04:52:52 +08:00
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
/*
|
|
|
|
* We layout physical memory as follows:
|
|
|
|
* 0x0000 - 0x00ff : Secondary processor spin code
|
2012-11-02 14:21:43 +08:00
|
|
|
* 0x0100 - 0x17ff : pSeries Interrupt prologs
|
|
|
|
* 0x1800 - 0x4000 : interrupt support common interrupt prologs
|
|
|
|
* 0x4000 - 0x5fff : pSeries interrupts with IR=1,DR=1
|
|
|
|
* 0x6000 - 0x6fff : more interrupt support including for IR=1,DR=1
|
2009-06-03 05:17:38 +08:00
|
|
|
* 0x7000 - 0x7fff : FWNMI data area
|
2012-11-02 14:21:43 +08:00
|
|
|
* 0x8000 - 0x8fff : Initial (CPU0) segment table
|
|
|
|
* 0x9000 - : Early init and support code
|
2009-06-03 05:17:38 +08:00
|
|
|
*/
|
2012-11-02 14:16:01 +08:00
|
|
|
/* Syscall routine is used twice, in reloc-off and reloc-on paths */
|
|
|
|
#define SYSCALL_PSERIES_1 \
|
|
|
|
BEGIN_FTR_SECTION \
|
|
|
|
cmpdi r0,0x1ebe ; \
|
|
|
|
beq- 1f ; \
|
|
|
|
END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
|
|
|
|
mr r9,r13 ; \
|
|
|
|
GET_PACA(r13) ; \
|
|
|
|
mfspr r11,SPRN_SRR0 ; \
|
|
|
|
0:
|
|
|
|
|
|
|
|
#define SYSCALL_PSERIES_2_RFID \
|
|
|
|
mfspr r12,SPRN_SRR1 ; \
|
|
|
|
ld r10,PACAKBASE(r13) ; \
|
|
|
|
LOAD_HANDLER(r10, system_call_entry) ; \
|
|
|
|
mtspr SPRN_SRR0,r10 ; \
|
|
|
|
ld r10,PACAKMSR(r13) ; \
|
|
|
|
mtspr SPRN_SRR1,r10 ; \
|
|
|
|
rfid ; \
|
|
|
|
b . ; /* prevent speculative execution */
|
|
|
|
|
|
|
|
#define SYSCALL_PSERIES_3 \
|
|
|
|
/* Fast LE/BE switch system call */ \
|
|
|
|
1: mfspr r12,SPRN_SRR1 ; \
|
|
|
|
xori r12,r12,MSR_LE ; \
|
|
|
|
mtspr SPRN_SRR1,r12 ; \
|
|
|
|
rfid ; /* return to userspace */ \
|
|
|
|
b . ; /* prevent speculative execution */
|
|
|
|
|
2012-11-02 14:21:28 +08:00
|
|
|
#if defined(CONFIG_RELOCATABLE)
|
|
|
|
/*
|
2015-05-26 13:46:54 +08:00
|
|
|
* We can't branch directly so we do it via the CTR which
|
|
|
|
* is volatile across system calls.
|
2012-11-02 14:21:28 +08:00
|
|
|
*/
|
|
|
|
#define SYSCALL_PSERIES_2_DIRECT \
|
|
|
|
mflr r10 ; \
|
|
|
|
ld r12,PACAKBASE(r13) ; \
|
2015-05-26 13:46:54 +08:00
|
|
|
LOAD_HANDLER(r12, system_call_entry) ; \
|
2013-02-27 18:45:52 +08:00
|
|
|
mtctr r12 ; \
|
2012-11-02 14:21:28 +08:00
|
|
|
mfspr r12,SPRN_SRR1 ; \
|
|
|
|
/* Re-use of r13... No spare regs to do this */ \
|
|
|
|
li r13,MSR_RI ; \
|
|
|
|
mtmsrd r13,1 ; \
|
|
|
|
GET_PACA(r13) ; /* get r13 back */ \
|
2013-02-27 18:45:52 +08:00
|
|
|
bctr ;
|
2012-11-02 14:21:28 +08:00
|
|
|
#else
|
|
|
|
/* We can branch directly */
|
|
|
|
#define SYSCALL_PSERIES_2_DIRECT \
|
|
|
|
mfspr r12,SPRN_SRR1 ; \
|
|
|
|
li r10,MSR_RI ; \
|
|
|
|
mtmsrd r10,1 ; /* Set RI (EE=0) */ \
|
2015-05-26 13:46:55 +08:00
|
|
|
b system_call_common ;
|
2012-11-02 14:21:28 +08:00
|
|
|
#endif
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This is the start of the interrupt handlers for pSeries
|
|
|
|
* This code runs with relocation off.
|
|
|
|
* Code from here to __end_interrupts gets copied down to real
|
|
|
|
* address 0x100 when we are running a relocatable kernel.
|
|
|
|
* Therefore any relative branches in this section must only
|
|
|
|
* branch to labels in this section.
|
|
|
|
*/
|
|
|
|
. = 0x100
|
|
|
|
.globl __start_interrupts
|
|
|
|
__start_interrupts:
|
|
|
|
|
2011-01-24 15:42:41 +08:00
|
|
|
.globl system_reset_pSeries;
|
|
|
|
system_reset_pSeries:
|
2012-12-07 05:51:04 +08:00
|
|
|
HMT_MEDIUM_PPR_DISCARD
|
2011-01-24 15:42:41 +08:00
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
#ifdef CONFIG_PPC_P7_NAP
|
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
/* Running native on arch 2.06 or later, check if we are
|
2014-12-10 02:56:53 +08:00
|
|
|
* waking up from nap/sleep/winkle.
|
2011-01-24 15:42:41 +08:00
|
|
|
*/
|
|
|
|
mfspr r13,SPRN_SRR1
|
KVM: PPC: Allow book3s_hv guests to use SMT processor modes
This lifts the restriction that book3s_hv guests can only run one
hardware thread per core, and allows them to use up to 4 threads
per core on POWER7. The host still has to run single-threaded.
This capability is advertised to qemu through a new KVM_CAP_PPC_SMT
capability. The return value of the ioctl querying this capability
is the number of vcpus per virtual CPU core (vcore), currently 4.
To use this, the host kernel should be booted with all threads
active, and then all the secondary threads should be offlined.
This will put the secondary threads into nap mode. KVM will then
wake them from nap mode and use them for running guest code (while
they are still offline). To wake the secondary threads, we send
them an IPI using a new xics_wake_cpu() function, implemented in
arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage
we assume that the platform has a XICS interrupt controller and
we are using icp-native.c to drive it. Since the woken thread will
need to acknowledge and clear the IPI, we also export the base
physical address of the XICS registers using kvmppc_set_xics_phys()
for use in the low-level KVM book3s code.
When a vcpu is created, it is assigned to a virtual CPU core.
The vcore number is obtained by dividing the vcpu number by the
number of threads per core in the host. This number is exported
to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes
to run the guest in single-threaded mode, it should make all vcpu
numbers be multiples of the number of threads per core.
We distinguish three states of a vcpu: runnable (i.e., ready to execute
the guest), blocked (that is, idle), and busy in host. We currently
implement a policy that the vcore can run only when all its threads
are runnable or blocked. This way, if a vcpu needs to execute elsewhere
in the kernel or in qemu, it can do so without being starved of CPU
by the other vcpus.
When a vcore starts to run, it executes in the context of one of the
vcpu threads. The other vcpu threads all go to sleep and stay asleep
until something happens requiring the vcpu thread to return to qemu,
or to wake up to run the vcore (this can happen when another vcpu
thread goes from busy in host state to blocked).
It can happen that a vcpu goes from blocked to runnable state (e.g.
because of an interrupt), and the vcore it belongs to is already
running. In that case it can start to run immediately as long as
the none of the vcpus in the vcore have started to exit the guest.
We send the next free thread in the vcore an IPI to get it to start
to execute the guest. It synchronizes with the other threads via
the vcore->entry_exit_count field to make sure that it doesn't go
into the guest if the other vcpus are exiting by the time that it
is ready to actually enter the guest.
Note that there is no fixed relationship between the hardware thread
number and the vcpu number. Hardware threads are assigned to vcpus
as they become runnable, so we will always use the lower-numbered
hardware threads in preference to higher-numbered threads if not all
the vcpus in the vcore are runnable, regardless of which vcpus are
runnable.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:23:08 +08:00
|
|
|
rlwinm. r13,r13,47-31,30,31
|
|
|
|
beq 9f
|
|
|
|
|
2014-12-10 02:56:52 +08:00
|
|
|
cmpwi cr3,r13,2
|
|
|
|
|
2014-12-10 02:56:53 +08:00
|
|
|
/*
|
|
|
|
* Check if last bit of HSPGR0 is set. This indicates whether we are
|
|
|
|
* waking up from winkle.
|
|
|
|
*/
|
KVM: PPC: Allow book3s_hv guests to use SMT processor modes
This lifts the restriction that book3s_hv guests can only run one
hardware thread per core, and allows them to use up to 4 threads
per core on POWER7. The host still has to run single-threaded.
This capability is advertised to qemu through a new KVM_CAP_PPC_SMT
capability. The return value of the ioctl querying this capability
is the number of vcpus per virtual CPU core (vcore), currently 4.
To use this, the host kernel should be booted with all threads
active, and then all the secondary threads should be offlined.
This will put the secondary threads into nap mode. KVM will then
wake them from nap mode and use them for running guest code (while
they are still offline). To wake the secondary threads, we send
them an IPI using a new xics_wake_cpu() function, implemented in
arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage
we assume that the platform has a XICS interrupt controller and
we are using icp-native.c to drive it. Since the woken thread will
need to acknowledge and clear the IPI, we also export the base
physical address of the XICS registers using kvmppc_set_xics_phys()
for use in the low-level KVM book3s code.
When a vcpu is created, it is assigned to a virtual CPU core.
The vcore number is obtained by dividing the vcpu number by the
number of threads per core in the host. This number is exported
to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes
to run the guest in single-threaded mode, it should make all vcpu
numbers be multiples of the number of threads per core.
We distinguish three states of a vcpu: runnable (i.e., ready to execute
the guest), blocked (that is, idle), and busy in host. We currently
implement a policy that the vcore can run only when all its threads
are runnable or blocked. This way, if a vcpu needs to execute elsewhere
in the kernel or in qemu, it can do so without being starved of CPU
by the other vcpus.
When a vcore starts to run, it executes in the context of one of the
vcpu threads. The other vcpu threads all go to sleep and stay asleep
until something happens requiring the vcpu thread to return to qemu,
or to wake up to run the vcore (this can happen when another vcpu
thread goes from busy in host state to blocked).
It can happen that a vcpu goes from blocked to runnable state (e.g.
because of an interrupt), and the vcore it belongs to is already
running. In that case it can start to run immediately as long as
the none of the vcpus in the vcore have started to exit the guest.
We send the next free thread in the vcore an IPI to get it to start
to execute the guest. It synchronizes with the other threads via
the vcore->entry_exit_count field to make sure that it doesn't go
into the guest if the other vcpus are exiting by the time that it
is ready to actually enter the guest.
Note that there is no fixed relationship between the hardware thread
number and the vcpu number. Hardware threads are assigned to vcpus
as they become runnable, so we will always use the lower-numbered
hardware threads in preference to higher-numbered threads if not all
the vcpus in the vcore are runnable, regardless of which vcpus are
runnable.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:23:08 +08:00
|
|
|
GET_PACA(r13)
|
2014-12-10 02:56:53 +08:00
|
|
|
clrldi r5,r13,63
|
|
|
|
clrrdi r13,r13,1
|
|
|
|
cmpwi cr4,r5,1
|
|
|
|
mtspr SPRN_HSPRG0,r13
|
|
|
|
|
2014-12-10 02:56:52 +08:00
|
|
|
lbz r0,PACA_THREAD_IDLE_STATE(r13)
|
|
|
|
cmpwi cr2,r0,PNV_THREAD_NAP
|
|
|
|
bgt cr2,8f /* Either sleep or Winkle */
|
|
|
|
|
|
|
|
/* Waking up from nap should not cause hypervisor state loss */
|
|
|
|
bgt cr3,.
|
|
|
|
|
|
|
|
/* Waking up from nap */
|
|
|
|
li r0,PNV_THREAD_RUNNING
|
|
|
|
stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
|
KVM: PPC: Allow book3s_hv guests to use SMT processor modes
This lifts the restriction that book3s_hv guests can only run one
hardware thread per core, and allows them to use up to 4 threads
per core on POWER7. The host still has to run single-threaded.
This capability is advertised to qemu through a new KVM_CAP_PPC_SMT
capability. The return value of the ioctl querying this capability
is the number of vcpus per virtual CPU core (vcore), currently 4.
To use this, the host kernel should be booted with all threads
active, and then all the secondary threads should be offlined.
This will put the secondary threads into nap mode. KVM will then
wake them from nap mode and use them for running guest code (while
they are still offline). To wake the secondary threads, we send
them an IPI using a new xics_wake_cpu() function, implemented in
arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage
we assume that the platform has a XICS interrupt controller and
we are using icp-native.c to drive it. Since the woken thread will
need to acknowledge and clear the IPI, we also export the base
physical address of the XICS registers using kvmppc_set_xics_phys()
for use in the low-level KVM book3s code.
When a vcpu is created, it is assigned to a virtual CPU core.
The vcore number is obtained by dividing the vcpu number by the
number of threads per core in the host. This number is exported
to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes
to run the guest in single-threaded mode, it should make all vcpu
numbers be multiples of the number of threads per core.
We distinguish three states of a vcpu: runnable (i.e., ready to execute
the guest), blocked (that is, idle), and busy in host. We currently
implement a policy that the vcore can run only when all its threads
are runnable or blocked. This way, if a vcpu needs to execute elsewhere
in the kernel or in qemu, it can do so without being starved of CPU
by the other vcpus.
When a vcore starts to run, it executes in the context of one of the
vcpu threads. The other vcpu threads all go to sleep and stay asleep
until something happens requiring the vcpu thread to return to qemu,
or to wake up to run the vcore (this can happen when another vcpu
thread goes from busy in host state to blocked).
It can happen that a vcpu goes from blocked to runnable state (e.g.
because of an interrupt), and the vcore it belongs to is already
running. In that case it can start to run immediately as long as
the none of the vcpus in the vcore have started to exit the guest.
We send the next free thread in the vcore an IPI to get it to start
to execute the guest. It synchronizes with the other threads via
the vcore->entry_exit_count field to make sure that it doesn't go
into the guest if the other vcpus are exiting by the time that it
is ready to actually enter the guest.
Note that there is no fixed relationship between the hardware thread
number and the vcpu number. Hardware threads are assigned to vcpus
as they become runnable, so we will always use the lower-numbered
hardware threads in preference to higher-numbered threads if not all
the vcpus in the vcore are runnable, regardless of which vcpus are
runnable.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:23:08 +08:00
|
|
|
|
2013-10-08 00:47:53 +08:00
|
|
|
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
2012-02-03 08:54:17 +08:00
|
|
|
li r0,KVM_HWTHREAD_IN_KERNEL
|
|
|
|
stb r0,HSTATE_HWTHREAD_STATE(r13)
|
|
|
|
/* Order setting hwthread_state vs. testing hwthread_req */
|
|
|
|
sync
|
|
|
|
lbz r0,HSTATE_HWTHREAD_REQ(r13)
|
|
|
|
cmpwi r0,0
|
|
|
|
beq 1f
|
KVM: PPC: Allow book3s_hv guests to use SMT processor modes
This lifts the restriction that book3s_hv guests can only run one
hardware thread per core, and allows them to use up to 4 threads
per core on POWER7. The host still has to run single-threaded.
This capability is advertised to qemu through a new KVM_CAP_PPC_SMT
capability. The return value of the ioctl querying this capability
is the number of vcpus per virtual CPU core (vcore), currently 4.
To use this, the host kernel should be booted with all threads
active, and then all the secondary threads should be offlined.
This will put the secondary threads into nap mode. KVM will then
wake them from nap mode and use them for running guest code (while
they are still offline). To wake the secondary threads, we send
them an IPI using a new xics_wake_cpu() function, implemented in
arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage
we assume that the platform has a XICS interrupt controller and
we are using icp-native.c to drive it. Since the woken thread will
need to acknowledge and clear the IPI, we also export the base
physical address of the XICS registers using kvmppc_set_xics_phys()
for use in the low-level KVM book3s code.
When a vcpu is created, it is assigned to a virtual CPU core.
The vcore number is obtained by dividing the vcpu number by the
number of threads per core in the host. This number is exported
to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes
to run the guest in single-threaded mode, it should make all vcpu
numbers be multiples of the number of threads per core.
We distinguish three states of a vcpu: runnable (i.e., ready to execute
the guest), blocked (that is, idle), and busy in host. We currently
implement a policy that the vcore can run only when all its threads
are runnable or blocked. This way, if a vcpu needs to execute elsewhere
in the kernel or in qemu, it can do so without being starved of CPU
by the other vcpus.
When a vcore starts to run, it executes in the context of one of the
vcpu threads. The other vcpu threads all go to sleep and stay asleep
until something happens requiring the vcpu thread to return to qemu,
or to wake up to run the vcore (this can happen when another vcpu
thread goes from busy in host state to blocked).
It can happen that a vcpu goes from blocked to runnable state (e.g.
because of an interrupt), and the vcore it belongs to is already
running. In that case it can start to run immediately as long as
the none of the vcpus in the vcore have started to exit the guest.
We send the next free thread in the vcore an IPI to get it to start
to execute the guest. It synchronizes with the other threads via
the vcore->entry_exit_count field to make sure that it doesn't go
into the guest if the other vcpus are exiting by the time that it
is ready to actually enter the guest.
Note that there is no fixed relationship between the hardware thread
number and the vcpu number. Hardware threads are assigned to vcpus
as they become runnable, so we will always use the lower-numbered
hardware threads in preference to higher-numbered threads if not all
the vcpus in the vcore are runnable, regardless of which vcpus are
runnable.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:23:08 +08:00
|
|
|
b kvm_start_guest
|
|
|
|
1:
|
|
|
|
#endif
|
|
|
|
|
powerpc/powernv: Return to cpu offline loop when finished in KVM guest
When a secondary hardware thread has finished running a KVM guest, we
currently put that thread into nap mode using a nap instruction in
the KVM code. This changes the code so that instead of doing a nap
instruction directly, we instead cause the call to power7_nap() that
put the thread into nap mode to return. The reason for doing this is
to avoid having the KVM code having to know what low-power mode to
put the thread into.
In the case of a secondary thread used to run a KVM guest, the thread
will be offline from the point of view of the host kernel, and the
relevant power7_nap() call is the one in pnv_smp_cpu_disable().
In this case we don't want to clear pending IPIs in the offline loop
in that function, since that might cause us to miss the wakeup for
the next time the thread needs to run a guest. To tell whether or
not to clear the interrupt, we use the SRR1 value returned from
power7_nap(), and check if it indicates an external interrupt. We
arrange that the return from power7_nap() when we have finished running
a guest returns 0, so pending interrupts don't get flushed in that
case.
Note that it is important a secondary thread that has finished
executing in the guest, or that didn't have a guest to run, should
not return to power7_nap's caller while the kvm_hstate.hwthread_req
flag in the PACA is non-zero, because the return from power7_nap
will reenable the MMU, and the MMU might still be in guest context.
In this situation we spin at low priority in real mode waiting for
hwthread_req to become zero.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2014-12-03 11:48:40 +08:00
|
|
|
/* Return SRR1 from power7_nap() */
|
|
|
|
mfspr r3,SPRN_SRR1
|
2014-12-10 02:56:52 +08:00
|
|
|
beq cr3,2f
|
2014-02-04 13:04:35 +08:00
|
|
|
b power7_wakeup_noloss
|
|
|
|
2: b power7_wakeup_loss
|
2014-02-26 08:08:25 +08:00
|
|
|
|
|
|
|
/* Fast Sleep wakeup on PowerNV */
|
|
|
|
8: GET_PACA(r13)
|
2014-02-04 13:04:35 +08:00
|
|
|
b power7_wakeup_tb_loss
|
2014-02-26 08:08:25 +08:00
|
|
|
|
KVM: PPC: Allow book3s_hv guests to use SMT processor modes
This lifts the restriction that book3s_hv guests can only run one
hardware thread per core, and allows them to use up to 4 threads
per core on POWER7. The host still has to run single-threaded.
This capability is advertised to qemu through a new KVM_CAP_PPC_SMT
capability. The return value of the ioctl querying this capability
is the number of vcpus per virtual CPU core (vcore), currently 4.
To use this, the host kernel should be booted with all threads
active, and then all the secondary threads should be offlined.
This will put the secondary threads into nap mode. KVM will then
wake them from nap mode and use them for running guest code (while
they are still offline). To wake the secondary threads, we send
them an IPI using a new xics_wake_cpu() function, implemented in
arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage
we assume that the platform has a XICS interrupt controller and
we are using icp-native.c to drive it. Since the woken thread will
need to acknowledge and clear the IPI, we also export the base
physical address of the XICS registers using kvmppc_set_xics_phys()
for use in the low-level KVM book3s code.
When a vcpu is created, it is assigned to a virtual CPU core.
The vcore number is obtained by dividing the vcpu number by the
number of threads per core in the host. This number is exported
to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes
to run the guest in single-threaded mode, it should make all vcpu
numbers be multiples of the number of threads per core.
We distinguish three states of a vcpu: runnable (i.e., ready to execute
the guest), blocked (that is, idle), and busy in host. We currently
implement a policy that the vcore can run only when all its threads
are runnable or blocked. This way, if a vcpu needs to execute elsewhere
in the kernel or in qemu, it can do so without being starved of CPU
by the other vcpus.
When a vcore starts to run, it executes in the context of one of the
vcpu threads. The other vcpu threads all go to sleep and stay asleep
until something happens requiring the vcpu thread to return to qemu,
or to wake up to run the vcore (this can happen when another vcpu
thread goes from busy in host state to blocked).
It can happen that a vcpu goes from blocked to runnable state (e.g.
because of an interrupt), and the vcore it belongs to is already
running. In that case it can start to run immediately as long as
the none of the vcpus in the vcore have started to exit the guest.
We send the next free thread in the vcore an IPI to get it to start
to execute the guest. It synchronizes with the other threads via
the vcore->entry_exit_count field to make sure that it doesn't go
into the guest if the other vcpus are exiting by the time that it
is ready to actually enter the guest.
Note that there is no fixed relationship between the hardware thread
number and the vcpu number. Hardware threads are assigned to vcpus
as they become runnable, so we will always use the lower-numbered
hardware threads in preference to higher-numbered threads if not all
the vcpus in the vcore are runnable, regardless of which vcpus are
runnable.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:23:08 +08:00
|
|
|
9:
|
powerpc, KVM: Split HVMODE_206 cpu feature bit into separate HV and architecture bits
This replaces the single CPU_FTR_HVMODE_206 bit with two bits, one to
indicate that we have a usable hypervisor mode, and another to indicate
that the processor conforms to PowerISA version 2.06. We also add
another bit to indicate that the processor conforms to ISA version 2.01
and set that for PPC970 and derivatives.
Some PPC970 chips (specifically those in Apple machines) have a
hypervisor mode in that MSR[HV] is always 1, but the hypervisor mode
is not useful in the sense that there is no way to run any code in
supervisor mode (HV=0 PR=0). On these processors, the LPES0 and LPES1
bits in HID4 are always 0, and we use that as a way of detecting that
hypervisor mode is not useful.
Where we have a feature section in assembly code around code that
only applies on POWER7 in hypervisor mode, we use a construct like
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
The definition of END_FTR_SECTION_IFSET is such that the code will
be enabled (not overwritten with nops) only if all bits in the
provided mask are set.
Note that the CPU feature check in __tlbie() only needs to check the
ARCH_206 bit, not the HVMODE bit, because __tlbie() can only get called
if we are running bare-metal, i.e. in hypervisor mode.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:26:11 +08:00
|
|
|
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
|
2011-01-24 15:42:41 +08:00
|
|
|
#endif /* CONFIG_PPC_P7_NAP */
|
2011-06-29 08:18:26 +08:00
|
|
|
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
|
|
|
|
NOTEST, 0x100)
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
. = 0x200
|
2011-06-29 08:18:26 +08:00
|
|
|
machine_check_pSeries_1:
|
|
|
|
/* This is moved out of line as it can be patched by FW, but
|
|
|
|
* some code path might still want to branch into the original
|
|
|
|
* vector
|
|
|
|
*/
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
HMT_MEDIUM_PPR_DISCARD
|
|
|
|
SET_SCRATCH0(r13) /* save r13 */
|
2013-10-30 22:34:31 +08:00
|
|
|
#ifdef CONFIG_PPC_P7_NAP
|
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
/* Running native on arch 2.06 or later, check if we are
|
|
|
|
* waking up from nap. We only handle no state loss and
|
|
|
|
* supervisor state loss. We do -not- handle hypervisor
|
|
|
|
* state loss at this time.
|
|
|
|
*/
|
|
|
|
mfspr r13,SPRN_SRR1
|
|
|
|
rlwinm. r13,r13,47-31,30,31
|
2014-03-11 13:26:18 +08:00
|
|
|
OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
|
2013-10-30 22:34:31 +08:00
|
|
|
beq 9f
|
|
|
|
|
2014-03-11 13:26:18 +08:00
|
|
|
mfspr r13,SPRN_SRR1
|
|
|
|
rlwinm. r13,r13,47-31,30,31
|
2013-10-30 22:34:31 +08:00
|
|
|
/* waking up from powersave (nap) state */
|
|
|
|
cmpwi cr1,r13,2
|
|
|
|
/* Total loss of HV state is fatal. let's just stay stuck here */
|
2014-03-11 13:26:18 +08:00
|
|
|
OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
|
2013-10-30 22:34:31 +08:00
|
|
|
bgt cr1,.
|
|
|
|
9:
|
2014-03-11 13:26:18 +08:00
|
|
|
OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
|
2013-10-30 22:34:31 +08:00
|
|
|
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
|
|
|
|
#endif /* CONFIG_PPC_P7_NAP */
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
EXCEPTION_PROLOG_0(PACA_EXMC)
|
powerpc/book3s: handle machine check in Linux host.
Move machine check entry point into Linux. So far we were dependent on
firmware to decode MCE error details and handover the high level info to OS.
This patch introduces early machine check routine that saves the MCE
information (srr1, srr0, dar and dsisr) to the emergency stack. We allocate
stack frame on emergency stack and set the r1 accordingly. This allows us to be
prepared to take another exception without loosing context. One thing to note
here that, if we get another machine check while ME bit is off then we risk a
checkstop. Hence we restrict ourselves to save only MCE information and
register saved on PACA_EXMC save are before we turn the ME bit on. We use
paca->in_mce flag to differentiate between first entry and nested machine check
entry which helps proper use of emergency stack. We increment paca->in_mce
every time we enter in early machine check handler and decrement it while
leaving. When we enter machine check early handler first time (paca->in_mce ==
0), we are sure nobody is using MC emergency stack and allocate a stack frame
at the start of the emergency stack. During subsequent entry (paca->in_mce >
0), we know that r1 points inside emergency stack and we allocate separate
stack frame accordingly. This prevents us from clobbering MCE information
during nested machine checks.
The early machine check handler changes are placed under CPU_FTR_HVMODE
section. This makes sure that the early machine check handler will get executed
only in hypervisor kernel.
This is the code flow:
Machine Check Interrupt
|
V
0x200 vector ME=0, IR=0, DR=0
|
V
+-----------------------------------------------+
|machine_check_pSeries_early: | ME=0, IR=0, DR=0
| Alloc frame on emergency stack |
| Save srr1, srr0, dar and dsisr on stack |
+-----------------------------------------------+
|
(ME=1, IR=0, DR=0, RFID)
|
V
machine_check_handle_early ME=1, IR=0, DR=0
|
V
+-----------------------------------------------+
| machine_check_early (r3=pt_regs) | ME=1, IR=0, DR=0
| Things to do: (in next patches) |
| Flush SLB for SLB errors |
| Flush TLB for TLB errors |
| Decode and save MCE info |
+-----------------------------------------------+
|
(Fall through existing exception handler routine.)
|
V
machine_check_pSerie ME=1, IR=0, DR=0
|
(ME=1, IR=1, DR=1, RFID)
|
V
machine_check_common ME=1, IR=1, DR=1
.
.
.
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-10-30 22:34:08 +08:00
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
b machine_check_pSeries_early
|
|
|
|
FTR_SECTION_ELSE
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
b machine_check_pSeries_0
|
powerpc/book3s: handle machine check in Linux host.
Move machine check entry point into Linux. So far we were dependent on
firmware to decode MCE error details and handover the high level info to OS.
This patch introduces early machine check routine that saves the MCE
information (srr1, srr0, dar and dsisr) to the emergency stack. We allocate
stack frame on emergency stack and set the r1 accordingly. This allows us to be
prepared to take another exception without loosing context. One thing to note
here that, if we get another machine check while ME bit is off then we risk a
checkstop. Hence we restrict ourselves to save only MCE information and
register saved on PACA_EXMC save are before we turn the ME bit on. We use
paca->in_mce flag to differentiate between first entry and nested machine check
entry which helps proper use of emergency stack. We increment paca->in_mce
every time we enter in early machine check handler and decrement it while
leaving. When we enter machine check early handler first time (paca->in_mce ==
0), we are sure nobody is using MC emergency stack and allocate a stack frame
at the start of the emergency stack. During subsequent entry (paca->in_mce >
0), we know that r1 points inside emergency stack and we allocate separate
stack frame accordingly. This prevents us from clobbering MCE information
during nested machine checks.
The early machine check handler changes are placed under CPU_FTR_HVMODE
section. This makes sure that the early machine check handler will get executed
only in hypervisor kernel.
This is the code flow:
Machine Check Interrupt
|
V
0x200 vector ME=0, IR=0, DR=0
|
V
+-----------------------------------------------+
|machine_check_pSeries_early: | ME=0, IR=0, DR=0
| Alloc frame on emergency stack |
| Save srr1, srr0, dar and dsisr on stack |
+-----------------------------------------------+
|
(ME=1, IR=0, DR=0, RFID)
|
V
machine_check_handle_early ME=1, IR=0, DR=0
|
V
+-----------------------------------------------+
| machine_check_early (r3=pt_regs) | ME=1, IR=0, DR=0
| Things to do: (in next patches) |
| Flush SLB for SLB errors |
| Flush TLB for TLB errors |
| Decode and save MCE info |
+-----------------------------------------------+
|
(Fall through existing exception handler routine.)
|
V
machine_check_pSerie ME=1, IR=0, DR=0
|
(ME=1, IR=1, DR=1, RFID)
|
V
machine_check_common ME=1, IR=1, DR=1
.
.
.
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-10-30 22:34:08 +08:00
|
|
|
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
. = 0x300
|
|
|
|
.globl data_access_pSeries
|
|
|
|
data_access_pSeries:
|
2012-12-07 05:51:04 +08:00
|
|
|
HMT_MEDIUM_PPR_DISCARD
|
2011-04-05 11:59:58 +08:00
|
|
|
SET_SCRATCH0(r13)
|
2011-06-29 08:18:26 +08:00
|
|
|
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD,
|
KVM: PPC: Implement MMIO emulation support for Book3S HV guests
This provides the low-level support for MMIO emulation in Book3S HV
guests. When the guest tries to map a page which is not covered by
any memslot, that page is taken to be an MMIO emulation page. Instead
of inserting a valid HPTE, we insert an HPTE that has the valid bit
clear but another hypervisor software-use bit set, which we call
HPTE_V_ABSENT, to indicate that this is an absent page. An
absent page is treated much like a valid page as far as guest hcalls
(H_ENTER, H_REMOVE, H_READ etc.) are concerned, except of course that
an absent HPTE doesn't need to be invalidated with tlbie since it
was never valid as far as the hardware is concerned.
When the guest accesses a page for which there is an absent HPTE, it
will take a hypervisor data storage interrupt (HDSI) since we now set
the VPM1 bit in the LPCR. Our HDSI handler for HPTE-not-present faults
looks up the hash table and if it finds an absent HPTE mapping the
requested virtual address, will switch to kernel mode and handle the
fault in kvmppc_book3s_hv_page_fault(), which at present just calls
kvmppc_hv_emulate_mmio() to set up the MMIO emulation.
This is based on an earlier patch by Benjamin Herrenschmidt, but since
heavily reworked.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
2011-12-12 20:36:37 +08:00
|
|
|
KVMTEST, 0x300)
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
. = 0x380
|
|
|
|
.globl data_access_slb_pSeries
|
|
|
|
data_access_slb_pSeries:
|
2012-12-07 05:51:04 +08:00
|
|
|
HMT_MEDIUM_PPR_DISCARD
|
2011-04-05 11:59:58 +08:00
|
|
|
SET_SCRATCH0(r13)
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
EXCEPTION_PROLOG_0(PACA_EXSLB)
|
KVM: PPC: Implement MMIO emulation support for Book3S HV guests
This provides the low-level support for MMIO emulation in Book3S HV
guests. When the guest tries to map a page which is not covered by
any memslot, that page is taken to be an MMIO emulation page. Instead
of inserting a valid HPTE, we insert an HPTE that has the valid bit
clear but another hypervisor software-use bit set, which we call
HPTE_V_ABSENT, to indicate that this is an absent page. An
absent page is treated much like a valid page as far as guest hcalls
(H_ENTER, H_REMOVE, H_READ etc.) are concerned, except of course that
an absent HPTE doesn't need to be invalidated with tlbie since it
was never valid as far as the hardware is concerned.
When the guest accesses a page for which there is an absent HPTE, it
will take a hypervisor data storage interrupt (HDSI) since we now set
the VPM1 bit in the LPCR. Our HDSI handler for HPTE-not-present faults
looks up the hash table and if it finds an absent HPTE mapping the
requested virtual address, will switch to kernel mode and handle the
fault in kvmppc_book3s_hv_page_fault(), which at present just calls
kvmppc_hv_emulate_mmio() to set up the MMIO emulation.
This is based on an earlier patch by Benjamin Herrenschmidt, but since
heavily reworked.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
2011-12-12 20:36:37 +08:00
|
|
|
EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380)
|
2009-06-03 05:17:38 +08:00
|
|
|
std r3,PACA_EXSLB+EX_R3(r13)
|
|
|
|
mfspr r3,SPRN_DAR
|
|
|
|
#ifdef __DISABLED__
|
|
|
|
/* Keep that around for when we re-implement dynamic VSIDs */
|
|
|
|
cmpdi r3,0
|
|
|
|
bge slb_miss_user_pseries
|
|
|
|
#endif /* __DISABLED__ */
|
2011-06-29 08:18:26 +08:00
|
|
|
mfspr r12,SPRN_SRR1
|
2009-06-03 05:17:38 +08:00
|
|
|
#ifndef CONFIG_RELOCATABLE
|
2014-02-04 13:04:35 +08:00
|
|
|
b slb_miss_realmode
|
2009-06-03 05:17:38 +08:00
|
|
|
#else
|
|
|
|
/*
|
2014-02-04 13:04:52 +08:00
|
|
|
* We can't just use a direct branch to slb_miss_realmode
|
2009-06-03 05:17:38 +08:00
|
|
|
* because the distance from here to there depends on where
|
|
|
|
* the kernel ends up being put.
|
|
|
|
*/
|
|
|
|
mfctr r11
|
|
|
|
ld r10,PACAKBASE(r13)
|
2014-02-04 13:04:52 +08:00
|
|
|
LOAD_HANDLER(r10, slb_miss_realmode)
|
2009-06-03 05:17:38 +08:00
|
|
|
mtctr r10
|
|
|
|
bctr
|
|
|
|
#endif
|
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(0x400, 0x400, instruction_access)
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
. = 0x480
|
|
|
|
.globl instruction_access_slb_pSeries
|
|
|
|
instruction_access_slb_pSeries:
|
2012-12-07 05:51:04 +08:00
|
|
|
HMT_MEDIUM_PPR_DISCARD
|
2011-04-05 11:59:58 +08:00
|
|
|
SET_SCRATCH0(r13)
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
EXCEPTION_PROLOG_0(PACA_EXSLB)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
|
2009-06-03 05:17:38 +08:00
|
|
|
std r3,PACA_EXSLB+EX_R3(r13)
|
|
|
|
mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
|
|
|
|
#ifdef __DISABLED__
|
|
|
|
/* Keep that around for when we re-implement dynamic VSIDs */
|
|
|
|
cmpdi r3,0
|
|
|
|
bge slb_miss_user_pseries
|
|
|
|
#endif /* __DISABLED__ */
|
2011-06-29 08:18:26 +08:00
|
|
|
mfspr r12,SPRN_SRR1
|
2009-06-03 05:17:38 +08:00
|
|
|
#ifndef CONFIG_RELOCATABLE
|
2014-02-04 13:04:35 +08:00
|
|
|
b slb_miss_realmode
|
2009-06-03 05:17:38 +08:00
|
|
|
#else
|
|
|
|
mfctr r11
|
|
|
|
ld r10,PACAKBASE(r13)
|
2014-02-04 13:04:52 +08:00
|
|
|
LOAD_HANDLER(r10, slb_miss_realmode)
|
2009-06-03 05:17:38 +08:00
|
|
|
mtctr r10
|
|
|
|
bctr
|
|
|
|
#endif
|
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
/* We open code these as we can't have a ". = x" (even with
|
|
|
|
* x = "." within a feature section
|
|
|
|
*/
|
2011-04-05 12:20:31 +08:00
|
|
|
. = 0x500;
|
2011-04-05 12:27:11 +08:00
|
|
|
.globl hardware_interrupt_pSeries;
|
|
|
|
.globl hardware_interrupt_hv;
|
2011-04-05 12:20:31 +08:00
|
|
|
hardware_interrupt_pSeries:
|
2011-04-05 12:27:11 +08:00
|
|
|
hardware_interrupt_hv:
|
powerpc: Fix "attempt to move .org backwards" error
Building a 64-bit powerpc kernel with PR KVM enabled currently gives
this error:
AS arch/powerpc/kernel/head_64.o
arch/powerpc/kernel/exceptions-64s.S: Assembler messages:
arch/powerpc/kernel/exceptions-64s.S:258: Error: attempt to move .org backwards
make[2]: *** [arch/powerpc/kernel/head_64.o] Error 1
This happens because the MASKABLE_EXCEPTION_PSERIES macro turns into
33 instructions, but we only have space for 32 at the decrementer
interrupt vector (from 0x900 to 0x980).
In the code generated by the MASKABLE_EXCEPTION_PSERIES macro, we
currently have two instances of the HMT_MEDIUM macro, which has the
effect of setting the SMT thread priority to medium. One is the
first instruction, and is overwritten by a no-op on processors where
we save the PPR (processor priority register), that is, POWER7 or
later. The other is after we have saved the PPR.
In order to reduce the code at 0x900 by one instruction, we omit the
first HMT_MEDIUM. On processors without SMT this will have no effect
since HMT_MEDIUM is a no-op there. On POWER5 and RS64 machines this
will mean that the first few instructions take a little longer in the
case where a decrementer interrupt occurs when the hardware thread is
running at low SMT priority. On POWER6 and later machines, the
hardware automatically boosts the thread priority when a decrementer
interrupt is taken if the thread priority was below medium, so this
change won't make any difference.
The alternative would be to branch out of line after saving the CFAR.
However, that would incur an extra overhead on all processors, whereas
the approach adopted here only adds overhead on older threaded processors.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-04-26 01:51:40 +08:00
|
|
|
HMT_MEDIUM_PPR_DISCARD
|
2011-04-05 12:20:31 +08:00
|
|
|
BEGIN_FTR_SECTION
|
2011-06-29 08:18:26 +08:00
|
|
|
_MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt,
|
|
|
|
EXC_HV, SOFTEN_TEST_HV)
|
|
|
|
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
FTR_SECTION_ELSE
|
|
|
|
_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt,
|
KVM: PPC: book3s_hv: Add support for PPC970-family processors
This adds support for running KVM guests in supervisor mode on those
PPC970 processors that have a usable hypervisor mode. Unfortunately,
Apple G5 machines have supervisor mode disabled (MSR[HV] is forced to
1), but the YDL PowerStation does have a usable hypervisor mode.
There are several differences between the PPC970 and POWER7 in how
guests are managed. These differences are accommodated using the
CPU_FTR_ARCH_201 (PPC970) and CPU_FTR_ARCH_206 (POWER7) CPU feature
bits. Notably, on PPC970:
* The LPCR, LPID or RMOR registers don't exist, and the functions of
those registers are provided by bits in HID4 and one bit in HID0.
* External interrupts can be directed to the hypervisor, but unlike
POWER7 they are masked by MSR[EE] in non-hypervisor modes and use
SRR0/1 not HSRR0/1.
* There is no virtual RMA (VRMA) mode; the guest must use an RMO
(real mode offset) area.
* The TLB entries are not tagged with the LPID, so it is necessary to
flush the whole TLB on partition switch. Furthermore, when switching
partitions we have to ensure that no other CPU is executing the tlbie
or tlbsync instructions in either the old or the new partition,
otherwise undefined behaviour can occur.
* The PMU has 8 counters (PMC registers) rather than 6.
* The DSCR, PURR, SPURR, AMR, AMOR, UAMOR registers don't exist.
* The SLB has 64 entries rather than 32.
* There is no mediated external interrupt facility, so if we switch to
a guest that has a virtual external interrupt pending but the guest
has MSR[EE] = 0, we have to arrange to have an interrupt pending for
it so that we can get control back once it re-enables interrupts. We
do that by sending ourselves an IPI with smp_send_reschedule after
hard-disabling interrupts.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:40:08 +08:00
|
|
|
EXC_STD, SOFTEN_TEST_HV_201)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500)
|
powerpc, KVM: Split HVMODE_206 cpu feature bit into separate HV and architecture bits
This replaces the single CPU_FTR_HVMODE_206 bit with two bits, one to
indicate that we have a usable hypervisor mode, and another to indicate
that the processor conforms to PowerISA version 2.06. We also add
another bit to indicate that the processor conforms to ISA version 2.01
and set that for PPC970 and derivatives.
Some PPC970 chips (specifically those in Apple machines) have a
hypervisor mode in that MSR[HV] is always 1, but the hypervisor mode
is not useful in the sense that there is no way to run any code in
supervisor mode (HV=0 PR=0). On these processors, the LPES0 and LPES1
bits in HID4 are always 0, and we use that as a way of detecting that
hypervisor mode is not useful.
Where we have a feature section in assembly code around code that
only applies on POWER7 in hypervisor mode, we use a construct like
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
The definition of END_FTR_SECTION_IFSET is such that the code will
be enabled (not overwritten with nops) only if all bits in the
provided mask are set.
Note that the CPU feature check in __tlbie() only needs to check the
ARCH_206 bit, not the HVMODE bit, because __tlbie() can only get called
if we are running bare-metal, i.e. in hypervisor mode.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:26:11 +08:00
|
|
|
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
|
2011-04-05 12:20:31 +08:00
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(0x600, 0x600, alignment)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x600)
|
2011-06-29 08:18:26 +08:00
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(0x700, 0x700, program_check)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x700)
|
2011-06-29 08:18:26 +08:00
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x800)
|
2011-04-05 12:20:31 +08:00
|
|
|
|
powerpc: Fix "attempt to move .org backwards" error
Building a 64-bit powerpc kernel with PR KVM enabled currently gives
this error:
AS arch/powerpc/kernel/head_64.o
arch/powerpc/kernel/exceptions-64s.S: Assembler messages:
arch/powerpc/kernel/exceptions-64s.S:258: Error: attempt to move .org backwards
make[2]: *** [arch/powerpc/kernel/head_64.o] Error 1
This happens because the MASKABLE_EXCEPTION_PSERIES macro turns into
33 instructions, but we only have space for 32 at the decrementer
interrupt vector (from 0x900 to 0x980).
In the code generated by the MASKABLE_EXCEPTION_PSERIES macro, we
currently have two instances of the HMT_MEDIUM macro, which has the
effect of setting the SMT thread priority to medium. One is the
first instruction, and is overwritten by a no-op on processors where
we save the PPR (processor priority register), that is, POWER7 or
later. The other is after we have saved the PPR.
In order to reduce the code at 0x900 by one instruction, we omit the
first HMT_MEDIUM. On processors without SMT this will have no effect
since HMT_MEDIUM is a no-op there. On POWER5 and RS64 machines this
will mean that the first few instructions take a little longer in the
case where a decrementer interrupt occurs when the hardware thread is
running at low SMT priority. On POWER6 and later machines, the
hardware automatically boosts the thread priority when a decrementer
interrupt is taken if the thread priority was below medium, so this
change won't make any difference.
The alternative would be to branch out of line after saving the CFAR.
However, that would incur an extra overhead on all processors, whereas
the approach adopted here only adds overhead on older threaded processors.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-04-26 01:51:40 +08:00
|
|
|
. = 0x900
|
|
|
|
.globl decrementer_pSeries
|
|
|
|
decrementer_pSeries:
|
|
|
|
_MASKABLE_EXCEPTION_PSERIES(0x900, decrementer, EXC_STD, SOFTEN_TEST_PR)
|
|
|
|
|
2012-07-26 21:56:11 +08:00
|
|
|
STD_EXCEPTION_HV(0x980, 0x982, hdecrementer)
|
2011-04-05 12:20:31 +08:00
|
|
|
|
2012-11-15 02:49:46 +08:00
|
|
|
MASKABLE_EXCEPTION_PSERIES(0xa00, 0xa00, doorbell_super)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xa00)
|
2011-06-29 08:18:26 +08:00
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xb00)
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
. = 0xc00
|
|
|
|
.globl system_call_pSeries
|
|
|
|
system_call_pSeries:
|
2014-11-03 12:46:42 +08:00
|
|
|
/*
|
|
|
|
* If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems
|
|
|
|
* that support it) before changing to HMT_MEDIUM. That allows the KVM
|
|
|
|
* code to save that value into the guest state (it is the guest's PPR
|
|
|
|
* value). Otherwise just change to HMT_MEDIUM as userspace has
|
|
|
|
* already saved the PPR.
|
|
|
|
*/
|
2011-06-29 08:18:26 +08:00
|
|
|
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
|
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
GET_PACA(r13)
|
|
|
|
std r9,PACA_EXGEN+EX_R9(r13)
|
2014-11-03 12:46:42 +08:00
|
|
|
OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR);
|
|
|
|
HMT_MEDIUM;
|
2011-06-29 08:18:26 +08:00
|
|
|
std r10,PACA_EXGEN+EX_R10(r13)
|
2014-11-03 12:46:42 +08:00
|
|
|
OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR);
|
2011-06-29 08:18:26 +08:00
|
|
|
mfcr r9
|
|
|
|
KVMTEST(0xc00)
|
|
|
|
GET_SCRATCH0(r13)
|
2014-11-03 12:46:42 +08:00
|
|
|
#else
|
|
|
|
HMT_MEDIUM;
|
2011-06-29 08:18:26 +08:00
|
|
|
#endif
|
2012-11-02 14:16:01 +08:00
|
|
|
SYSCALL_PSERIES_1
|
|
|
|
SYSCALL_PSERIES_2_RFID
|
|
|
|
SYSCALL_PSERIES_3
|
2011-06-29 08:18:26 +08:00
|
|
|
KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
|
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xd00)
|
2011-04-05 12:27:11 +08:00
|
|
|
|
|
|
|
/* At 0xe??? we have a bunch of hypervisor exceptions, we branch
|
|
|
|
* out of line to handle them
|
|
|
|
*/
|
|
|
|
. = 0xe00
|
2013-08-15 13:22:18 +08:00
|
|
|
hv_data_storage_trampoline:
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
2011-04-05 12:27:11 +08:00
|
|
|
b h_data_storage_hv
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
. = 0xe20
|
2013-08-15 13:22:18 +08:00
|
|
|
hv_instr_storage_trampoline:
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
2011-04-05 12:27:11 +08:00
|
|
|
b h_instr_storage_hv
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
. = 0xe40
|
2013-08-15 13:22:18 +08:00
|
|
|
emulation_assist_trampoline:
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
2011-04-05 12:27:11 +08:00
|
|
|
b emulation_assist_hv
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
. = 0xe60
|
2013-08-15 13:22:18 +08:00
|
|
|
hv_exception_trampoline:
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
2014-07-29 21:10:01 +08:00
|
|
|
b hmi_exception_early
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
|
2012-11-15 02:49:45 +08:00
|
|
|
. = 0xe80
|
2013-08-15 13:22:18 +08:00
|
|
|
hv_doorbell_trampoline:
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
2012-11-15 02:49:45 +08:00
|
|
|
b h_doorbell_hv
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
/* We need to deal with the Altivec unavailable exception
|
|
|
|
* here which is at 0xf20, thus in the middle of the
|
|
|
|
* prolog code of the PerformanceMonitor one. A little
|
|
|
|
* trickery is thus necessary
|
|
|
|
*/
|
|
|
|
. = 0xf00
|
2013-08-15 13:22:17 +08:00
|
|
|
performance_monitor_pseries_trampoline:
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
2009-06-03 05:17:38 +08:00
|
|
|
b performance_monitor_pSeries
|
|
|
|
|
|
|
|
. = 0xf20
|
2013-08-15 13:22:17 +08:00
|
|
|
altivec_unavailable_pseries_trampoline:
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
2009-06-03 05:17:38 +08:00
|
|
|
b altivec_unavailable_pSeries
|
|
|
|
|
|
|
|
. = 0xf40
|
2013-08-15 13:22:17 +08:00
|
|
|
vsx_unavailable_pseries_trampoline:
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
2009-06-03 05:17:38 +08:00
|
|
|
b vsx_unavailable_pSeries
|
|
|
|
|
2013-02-14 00:21:38 +08:00
|
|
|
. = 0xf60
|
2013-08-15 13:22:17 +08:00
|
|
|
facility_unavailable_trampoline:
|
2013-02-14 00:21:38 +08:00
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
2013-06-25 15:47:56 +08:00
|
|
|
b facility_unavailable_pSeries
|
2013-02-14 00:21:38 +08:00
|
|
|
|
2013-06-25 15:47:57 +08:00
|
|
|
. = 0xf80
|
2013-08-15 13:22:17 +08:00
|
|
|
hv_facility_unavailable_trampoline:
|
2013-06-25 15:47:57 +08:00
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
|
|
|
b facility_unavailable_hv
|
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
#ifdef CONFIG_CBE_RAS
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error)
|
2011-09-13 12:15:31 +08:00
|
|
|
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202)
|
2009-06-03 05:17:38 +08:00
|
|
|
#endif /* CONFIG_CBE_RAS */
|
2011-06-29 08:18:26 +08:00
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x1300)
|
2011-06-29 08:18:26 +08:00
|
|
|
|
2012-09-10 08:35:26 +08:00
|
|
|
. = 0x1500
|
2012-11-01 02:58:36 +08:00
|
|
|
.global denorm_exception_hv
|
2012-09-10 08:35:26 +08:00
|
|
|
denorm_exception_hv:
|
2012-12-07 05:51:04 +08:00
|
|
|
HMT_MEDIUM_PPR_DISCARD
|
2012-09-10 08:35:26 +08:00
|
|
|
mtspr SPRN_SPRG_HSCRATCH0,r13
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
2013-08-12 14:12:06 +08:00
|
|
|
EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500)
|
2012-09-10 08:35:26 +08:00
|
|
|
|
|
|
|
#ifdef CONFIG_PPC_DENORMALISATION
|
|
|
|
mfspr r10,SPRN_HSRR1
|
|
|
|
mfspr r11,SPRN_HSRR0 /* save HSRR0 */
|
|
|
|
andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
|
|
|
|
addi r11,r11,-4 /* HSRR0 is next instruction */
|
|
|
|
bne+ denorm_assist
|
|
|
|
#endif
|
|
|
|
|
2013-08-12 14:12:06 +08:00
|
|
|
KVMTEST(0x1500)
|
2012-09-10 08:35:26 +08:00
|
|
|
EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV)
|
|
|
|
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1500)
|
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
#ifdef CONFIG_CBE_RAS
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance)
|
2011-09-13 12:15:31 +08:00
|
|
|
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602)
|
2009-06-03 05:17:38 +08:00
|
|
|
#endif /* CONFIG_CBE_RAS */
|
2011-06-29 08:18:26 +08:00
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x1700)
|
2011-06-29 08:18:26 +08:00
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
#ifdef CONFIG_CBE_RAS
|
2011-04-05 12:27:11 +08:00
|
|
|
STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal)
|
2011-09-13 12:15:31 +08:00
|
|
|
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1802)
|
2012-11-02 10:53:36 +08:00
|
|
|
#else
|
|
|
|
. = 0x1800
|
2009-06-03 05:17:38 +08:00
|
|
|
#endif /* CONFIG_CBE_RAS */
|
|
|
|
|
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
/*** Out of line interrupts support ***/
|
|
|
|
|
2012-11-02 10:53:36 +08:00
|
|
|
.align 7
|
2011-06-29 08:18:26 +08:00
|
|
|
/* moved from 0x200 */
|
powerpc/book3s: handle machine check in Linux host.
Move machine check entry point into Linux. So far we were dependent on
firmware to decode MCE error details and handover the high level info to OS.
This patch introduces early machine check routine that saves the MCE
information (srr1, srr0, dar and dsisr) to the emergency stack. We allocate
stack frame on emergency stack and set the r1 accordingly. This allows us to be
prepared to take another exception without loosing context. One thing to note
here that, if we get another machine check while ME bit is off then we risk a
checkstop. Hence we restrict ourselves to save only MCE information and
register saved on PACA_EXMC save are before we turn the ME bit on. We use
paca->in_mce flag to differentiate between first entry and nested machine check
entry which helps proper use of emergency stack. We increment paca->in_mce
every time we enter in early machine check handler and decrement it while
leaving. When we enter machine check early handler first time (paca->in_mce ==
0), we are sure nobody is using MC emergency stack and allocate a stack frame
at the start of the emergency stack. During subsequent entry (paca->in_mce >
0), we know that r1 points inside emergency stack and we allocate separate
stack frame accordingly. This prevents us from clobbering MCE information
during nested machine checks.
The early machine check handler changes are placed under CPU_FTR_HVMODE
section. This makes sure that the early machine check handler will get executed
only in hypervisor kernel.
This is the code flow:
Machine Check Interrupt
|
V
0x200 vector ME=0, IR=0, DR=0
|
V
+-----------------------------------------------+
|machine_check_pSeries_early: | ME=0, IR=0, DR=0
| Alloc frame on emergency stack |
| Save srr1, srr0, dar and dsisr on stack |
+-----------------------------------------------+
|
(ME=1, IR=0, DR=0, RFID)
|
V
machine_check_handle_early ME=1, IR=0, DR=0
|
V
+-----------------------------------------------+
| machine_check_early (r3=pt_regs) | ME=1, IR=0, DR=0
| Things to do: (in next patches) |
| Flush SLB for SLB errors |
| Flush TLB for TLB errors |
| Decode and save MCE info |
+-----------------------------------------------+
|
(Fall through existing exception handler routine.)
|
V
machine_check_pSerie ME=1, IR=0, DR=0
|
(ME=1, IR=1, DR=1, RFID)
|
V
machine_check_common ME=1, IR=1, DR=1
.
.
.
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-10-30 22:34:08 +08:00
|
|
|
machine_check_pSeries_early:
|
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
|
|
|
|
/*
|
|
|
|
* Register contents:
|
|
|
|
* R13 = PACA
|
|
|
|
* R9 = CR
|
|
|
|
* Original R9 to R13 is saved on PACA_EXMC
|
|
|
|
*
|
2014-06-11 16:48:02 +08:00
|
|
|
* Switch to mc_emergency stack and handle re-entrancy (we limit
|
|
|
|
* the nested MCE upto level 4 to avoid stack overflow).
|
|
|
|
* Save MCE registers srr1, srr0, dar and dsisr and then set ME=1
|
powerpc/book3s: handle machine check in Linux host.
Move machine check entry point into Linux. So far we were dependent on
firmware to decode MCE error details and handover the high level info to OS.
This patch introduces early machine check routine that saves the MCE
information (srr1, srr0, dar and dsisr) to the emergency stack. We allocate
stack frame on emergency stack and set the r1 accordingly. This allows us to be
prepared to take another exception without loosing context. One thing to note
here that, if we get another machine check while ME bit is off then we risk a
checkstop. Hence we restrict ourselves to save only MCE information and
register saved on PACA_EXMC save are before we turn the ME bit on. We use
paca->in_mce flag to differentiate between first entry and nested machine check
entry which helps proper use of emergency stack. We increment paca->in_mce
every time we enter in early machine check handler and decrement it while
leaving. When we enter machine check early handler first time (paca->in_mce ==
0), we are sure nobody is using MC emergency stack and allocate a stack frame
at the start of the emergency stack. During subsequent entry (paca->in_mce >
0), we know that r1 points inside emergency stack and we allocate separate
stack frame accordingly. This prevents us from clobbering MCE information
during nested machine checks.
The early machine check handler changes are placed under CPU_FTR_HVMODE
section. This makes sure that the early machine check handler will get executed
only in hypervisor kernel.
This is the code flow:
Machine Check Interrupt
|
V
0x200 vector ME=0, IR=0, DR=0
|
V
+-----------------------------------------------+
|machine_check_pSeries_early: | ME=0, IR=0, DR=0
| Alloc frame on emergency stack |
| Save srr1, srr0, dar and dsisr on stack |
+-----------------------------------------------+
|
(ME=1, IR=0, DR=0, RFID)
|
V
machine_check_handle_early ME=1, IR=0, DR=0
|
V
+-----------------------------------------------+
| machine_check_early (r3=pt_regs) | ME=1, IR=0, DR=0
| Things to do: (in next patches) |
| Flush SLB for SLB errors |
| Flush TLB for TLB errors |
| Decode and save MCE info |
+-----------------------------------------------+
|
(Fall through existing exception handler routine.)
|
V
machine_check_pSerie ME=1, IR=0, DR=0
|
(ME=1, IR=1, DR=1, RFID)
|
V
machine_check_common ME=1, IR=1, DR=1
.
.
.
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-10-30 22:34:08 +08:00
|
|
|
*
|
|
|
|
* We use paca->in_mce to check whether this is the first entry or
|
|
|
|
* nested machine check. We increment paca->in_mce to track nested
|
|
|
|
* machine checks.
|
|
|
|
*
|
|
|
|
* If this is the first entry then set stack pointer to
|
|
|
|
* paca->mc_emergency_sp, otherwise r1 is already pointing to
|
|
|
|
* stack frame on mc_emergency stack.
|
|
|
|
*
|
|
|
|
* NOTE: We are here with MSR_ME=0 (off), which means we risk a
|
|
|
|
* checkstop if we get another machine check exception before we do
|
|
|
|
* rfid with MSR_ME=1.
|
|
|
|
*/
|
|
|
|
mr r11,r1 /* Save r1 */
|
|
|
|
lhz r10,PACA_IN_MCE(r13)
|
|
|
|
cmpwi r10,0 /* Are we in nested machine check */
|
|
|
|
bne 0f /* Yes, we are. */
|
|
|
|
/* First machine check entry */
|
|
|
|
ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
|
|
|
|
0: subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
|
|
|
|
addi r10,r10,1 /* increment paca->in_mce */
|
|
|
|
sth r10,PACA_IN_MCE(r13)
|
2014-06-11 16:48:02 +08:00
|
|
|
/* Limit nested MCE to level 4 to avoid stack overflow */
|
|
|
|
cmpwi r10,4
|
|
|
|
bgt 2f /* Check if we hit limit of 4 */
|
powerpc/book3s: handle machine check in Linux host.
Move machine check entry point into Linux. So far we were dependent on
firmware to decode MCE error details and handover the high level info to OS.
This patch introduces early machine check routine that saves the MCE
information (srr1, srr0, dar and dsisr) to the emergency stack. We allocate
stack frame on emergency stack and set the r1 accordingly. This allows us to be
prepared to take another exception without loosing context. One thing to note
here that, if we get another machine check while ME bit is off then we risk a
checkstop. Hence we restrict ourselves to save only MCE information and
register saved on PACA_EXMC save are before we turn the ME bit on. We use
paca->in_mce flag to differentiate between first entry and nested machine check
entry which helps proper use of emergency stack. We increment paca->in_mce
every time we enter in early machine check handler and decrement it while
leaving. When we enter machine check early handler first time (paca->in_mce ==
0), we are sure nobody is using MC emergency stack and allocate a stack frame
at the start of the emergency stack. During subsequent entry (paca->in_mce >
0), we know that r1 points inside emergency stack and we allocate separate
stack frame accordingly. This prevents us from clobbering MCE information
during nested machine checks.
The early machine check handler changes are placed under CPU_FTR_HVMODE
section. This makes sure that the early machine check handler will get executed
only in hypervisor kernel.
This is the code flow:
Machine Check Interrupt
|
V
0x200 vector ME=0, IR=0, DR=0
|
V
+-----------------------------------------------+
|machine_check_pSeries_early: | ME=0, IR=0, DR=0
| Alloc frame on emergency stack |
| Save srr1, srr0, dar and dsisr on stack |
+-----------------------------------------------+
|
(ME=1, IR=0, DR=0, RFID)
|
V
machine_check_handle_early ME=1, IR=0, DR=0
|
V
+-----------------------------------------------+
| machine_check_early (r3=pt_regs) | ME=1, IR=0, DR=0
| Things to do: (in next patches) |
| Flush SLB for SLB errors |
| Flush TLB for TLB errors |
| Decode and save MCE info |
+-----------------------------------------------+
|
(Fall through existing exception handler routine.)
|
V
machine_check_pSerie ME=1, IR=0, DR=0
|
(ME=1, IR=1, DR=1, RFID)
|
V
machine_check_common ME=1, IR=1, DR=1
.
.
.
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-10-30 22:34:08 +08:00
|
|
|
std r11,GPR1(r1) /* Save r1 on the stack. */
|
|
|
|
std r11,0(r1) /* make stack chain pointer */
|
|
|
|
mfspr r11,SPRN_SRR0 /* Save SRR0 */
|
|
|
|
std r11,_NIP(r1)
|
|
|
|
mfspr r11,SPRN_SRR1 /* Save SRR1 */
|
|
|
|
std r11,_MSR(r1)
|
|
|
|
mfspr r11,SPRN_DAR /* Save DAR */
|
|
|
|
std r11,_DAR(r1)
|
|
|
|
mfspr r11,SPRN_DSISR /* Save DSISR */
|
|
|
|
std r11,_DSISR(r1)
|
|
|
|
std r9,_CCR(r1) /* Save CR in stackframe */
|
|
|
|
/* Save r9 through r13 from EXMC save area to stack frame. */
|
|
|
|
EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
|
|
|
|
mfmsr r11 /* get MSR value */
|
|
|
|
ori r11,r11,MSR_ME /* turn on ME bit */
|
|
|
|
ori r11,r11,MSR_RI /* turn on RI bit */
|
|
|
|
ld r12,PACAKBASE(r13) /* get high part of &label */
|
|
|
|
LOAD_HANDLER(r12, machine_check_handle_early)
|
2014-06-11 16:48:02 +08:00
|
|
|
1: mtspr SPRN_SRR0,r12
|
powerpc/book3s: handle machine check in Linux host.
Move machine check entry point into Linux. So far we were dependent on
firmware to decode MCE error details and handover the high level info to OS.
This patch introduces early machine check routine that saves the MCE
information (srr1, srr0, dar and dsisr) to the emergency stack. We allocate
stack frame on emergency stack and set the r1 accordingly. This allows us to be
prepared to take another exception without loosing context. One thing to note
here that, if we get another machine check while ME bit is off then we risk a
checkstop. Hence we restrict ourselves to save only MCE information and
register saved on PACA_EXMC save are before we turn the ME bit on. We use
paca->in_mce flag to differentiate between first entry and nested machine check
entry which helps proper use of emergency stack. We increment paca->in_mce
every time we enter in early machine check handler and decrement it while
leaving. When we enter machine check early handler first time (paca->in_mce ==
0), we are sure nobody is using MC emergency stack and allocate a stack frame
at the start of the emergency stack. During subsequent entry (paca->in_mce >
0), we know that r1 points inside emergency stack and we allocate separate
stack frame accordingly. This prevents us from clobbering MCE information
during nested machine checks.
The early machine check handler changes are placed under CPU_FTR_HVMODE
section. This makes sure that the early machine check handler will get executed
only in hypervisor kernel.
This is the code flow:
Machine Check Interrupt
|
V
0x200 vector ME=0, IR=0, DR=0
|
V
+-----------------------------------------------+
|machine_check_pSeries_early: | ME=0, IR=0, DR=0
| Alloc frame on emergency stack |
| Save srr1, srr0, dar and dsisr on stack |
+-----------------------------------------------+
|
(ME=1, IR=0, DR=0, RFID)
|
V
machine_check_handle_early ME=1, IR=0, DR=0
|
V
+-----------------------------------------------+
| machine_check_early (r3=pt_regs) | ME=1, IR=0, DR=0
| Things to do: (in next patches) |
| Flush SLB for SLB errors |
| Flush TLB for TLB errors |
| Decode and save MCE info |
+-----------------------------------------------+
|
(Fall through existing exception handler routine.)
|
V
machine_check_pSerie ME=1, IR=0, DR=0
|
(ME=1, IR=1, DR=1, RFID)
|
V
machine_check_common ME=1, IR=1, DR=1
.
.
.
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-10-30 22:34:08 +08:00
|
|
|
mtspr SPRN_SRR1,r11
|
|
|
|
rfid
|
|
|
|
b . /* prevent speculative execution */
|
2014-06-11 16:48:02 +08:00
|
|
|
2:
|
|
|
|
/* Stack overflow. Stay on emergency stack and panic.
|
|
|
|
* Keep the ME bit off while panic-ing, so that if we hit
|
|
|
|
* another machine check we checkstop.
|
|
|
|
*/
|
|
|
|
addi r1,r1,INT_FRAME_SIZE /* go back to previous stack frame */
|
|
|
|
ld r11,PACAKMSR(r13)
|
|
|
|
ld r12,PACAKBASE(r13)
|
|
|
|
LOAD_HANDLER(r12, unrecover_mce)
|
|
|
|
li r10,MSR_ME
|
|
|
|
andc r11,r11,r10 /* Turn off MSR_ME */
|
|
|
|
b 1b
|
|
|
|
b . /* prevent speculative execution */
|
powerpc/book3s: handle machine check in Linux host.
Move machine check entry point into Linux. So far we were dependent on
firmware to decode MCE error details and handover the high level info to OS.
This patch introduces early machine check routine that saves the MCE
information (srr1, srr0, dar and dsisr) to the emergency stack. We allocate
stack frame on emergency stack and set the r1 accordingly. This allows us to be
prepared to take another exception without loosing context. One thing to note
here that, if we get another machine check while ME bit is off then we risk a
checkstop. Hence we restrict ourselves to save only MCE information and
register saved on PACA_EXMC save are before we turn the ME bit on. We use
paca->in_mce flag to differentiate between first entry and nested machine check
entry which helps proper use of emergency stack. We increment paca->in_mce
every time we enter in early machine check handler and decrement it while
leaving. When we enter machine check early handler first time (paca->in_mce ==
0), we are sure nobody is using MC emergency stack and allocate a stack frame
at the start of the emergency stack. During subsequent entry (paca->in_mce >
0), we know that r1 points inside emergency stack and we allocate separate
stack frame accordingly. This prevents us from clobbering MCE information
during nested machine checks.
The early machine check handler changes are placed under CPU_FTR_HVMODE
section. This makes sure that the early machine check handler will get executed
only in hypervisor kernel.
This is the code flow:
Machine Check Interrupt
|
V
0x200 vector ME=0, IR=0, DR=0
|
V
+-----------------------------------------------+
|machine_check_pSeries_early: | ME=0, IR=0, DR=0
| Alloc frame on emergency stack |
| Save srr1, srr0, dar and dsisr on stack |
+-----------------------------------------------+
|
(ME=1, IR=0, DR=0, RFID)
|
V
machine_check_handle_early ME=1, IR=0, DR=0
|
V
+-----------------------------------------------+
| machine_check_early (r3=pt_regs) | ME=1, IR=0, DR=0
| Things to do: (in next patches) |
| Flush SLB for SLB errors |
| Flush TLB for TLB errors |
| Decode and save MCE info |
+-----------------------------------------------+
|
(Fall through existing exception handler routine.)
|
V
machine_check_pSerie ME=1, IR=0, DR=0
|
(ME=1, IR=1, DR=1, RFID)
|
V
machine_check_common ME=1, IR=1, DR=1
.
.
.
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-10-30 22:34:08 +08:00
|
|
|
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
|
|
|
|
|
2011-06-29 08:18:26 +08:00
|
|
|
machine_check_pSeries:
|
|
|
|
.globl machine_check_fwnmi
|
|
|
|
machine_check_fwnmi:
|
2012-12-07 05:51:04 +08:00
|
|
|
HMT_MEDIUM_PPR_DISCARD
|
2011-06-29 08:18:26 +08:00
|
|
|
SET_SCRATCH0(r13) /* save r13 */
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
EXCEPTION_PROLOG_0(PACA_EXMC)
|
|
|
|
machine_check_pSeries_0:
|
|
|
|
EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST, 0x200)
|
|
|
|
EXCEPTION_PROLOG_PSERIES_1(machine_check_common, EXC_STD)
|
2011-06-29 08:18:26 +08:00
|
|
|
KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200)
|
KVM: PPC: Implement MMIO emulation support for Book3S HV guests
This provides the low-level support for MMIO emulation in Book3S HV
guests. When the guest tries to map a page which is not covered by
any memslot, that page is taken to be an MMIO emulation page. Instead
of inserting a valid HPTE, we insert an HPTE that has the valid bit
clear but another hypervisor software-use bit set, which we call
HPTE_V_ABSENT, to indicate that this is an absent page. An
absent page is treated much like a valid page as far as guest hcalls
(H_ENTER, H_REMOVE, H_READ etc.) are concerned, except of course that
an absent HPTE doesn't need to be invalidated with tlbie since it
was never valid as far as the hardware is concerned.
When the guest accesses a page for which there is an absent HPTE, it
will take a hypervisor data storage interrupt (HDSI) since we now set
the VPM1 bit in the LPCR. Our HDSI handler for HPTE-not-present faults
looks up the hash table and if it finds an absent HPTE mapping the
requested virtual address, will switch to kernel mode and handle the
fault in kvmppc_book3s_hv_page_fault(), which at present just calls
kvmppc_hv_emulate_mmio() to set up the MMIO emulation.
This is based on an earlier patch by Benjamin Herrenschmidt, but since
heavily reworked.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
2011-12-12 20:36:37 +08:00
|
|
|
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)
|
|
|
|
KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400)
|
|
|
|
KVM_HANDLER_PR(PACA_EXSLB, EXC_STD, 0x480)
|
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900)
|
2011-06-29 08:18:26 +08:00
|
|
|
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982)
|
|
|
|
|
2012-09-10 08:35:26 +08:00
|
|
|
#ifdef CONFIG_PPC_DENORMALISATION
|
|
|
|
denorm_assist:
|
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
/*
|
|
|
|
* To denormalise we need to move a copy of the register to itself.
|
|
|
|
* For POWER6 do that here for all FP regs.
|
|
|
|
*/
|
|
|
|
mfmsr r10
|
|
|
|
ori r10,r10,(MSR_FP|MSR_FE0|MSR_FE1)
|
|
|
|
xori r10,r10,(MSR_FE0|MSR_FE1)
|
|
|
|
mtmsrd r10
|
|
|
|
sync
|
2013-05-30 05:33:18 +08:00
|
|
|
|
|
|
|
#define FMR2(n) fmr (n), (n) ; fmr n+1, n+1
|
|
|
|
#define FMR4(n) FMR2(n) ; FMR2(n+2)
|
|
|
|
#define FMR8(n) FMR4(n) ; FMR4(n+4)
|
|
|
|
#define FMR16(n) FMR8(n) ; FMR8(n+8)
|
|
|
|
#define FMR32(n) FMR16(n) ; FMR16(n+16)
|
|
|
|
FMR32(0)
|
|
|
|
|
2012-09-10 08:35:26 +08:00
|
|
|
FTR_SECTION_ELSE
|
|
|
|
/*
|
|
|
|
* To denormalise we need to move a copy of the register to itself.
|
|
|
|
* For POWER7 do that here for the first 32 VSX registers only.
|
|
|
|
*/
|
|
|
|
mfmsr r10
|
|
|
|
oris r10,r10,MSR_VSX@h
|
|
|
|
mtmsrd r10
|
|
|
|
sync
|
2013-05-30 05:33:18 +08:00
|
|
|
|
|
|
|
#define XVCPSGNDP2(n) XVCPSGNDP(n,n,n) ; XVCPSGNDP(n+1,n+1,n+1)
|
|
|
|
#define XVCPSGNDP4(n) XVCPSGNDP2(n) ; XVCPSGNDP2(n+2)
|
|
|
|
#define XVCPSGNDP8(n) XVCPSGNDP4(n) ; XVCPSGNDP4(n+4)
|
|
|
|
#define XVCPSGNDP16(n) XVCPSGNDP8(n) ; XVCPSGNDP8(n+8)
|
|
|
|
#define XVCPSGNDP32(n) XVCPSGNDP16(n) ; XVCPSGNDP16(n+16)
|
|
|
|
XVCPSGNDP32(0)
|
|
|
|
|
2012-09-10 08:35:26 +08:00
|
|
|
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206)
|
2013-05-30 05:33:19 +08:00
|
|
|
|
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
b denorm_done
|
|
|
|
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
|
|
|
|
/*
|
|
|
|
* To denormalise we need to move a copy of the register to itself.
|
|
|
|
* For POWER8 we need to do that for all 64 VSX registers
|
|
|
|
*/
|
|
|
|
XVCPSGNDP32(32)
|
|
|
|
denorm_done:
|
2012-09-10 08:35:26 +08:00
|
|
|
mtspr SPRN_HSRR0,r11
|
|
|
|
mtcrf 0x80,r9
|
|
|
|
ld r9,PACA_EXGEN+EX_R9(r13)
|
2012-12-07 05:51:04 +08:00
|
|
|
RESTORE_PPR_PACA(PACA_EXGEN, r10)
|
2013-08-12 14:12:06 +08:00
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
ld r10,PACA_EXGEN+EX_CFAR(r13)
|
|
|
|
mtspr SPRN_CFAR,r10
|
|
|
|
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
|
2012-09-10 08:35:26 +08:00
|
|
|
ld r10,PACA_EXGEN+EX_R10(r13)
|
|
|
|
ld r11,PACA_EXGEN+EX_R11(r13)
|
|
|
|
ld r12,PACA_EXGEN+EX_R12(r13)
|
|
|
|
ld r13,PACA_EXGEN+EX_R13(r13)
|
|
|
|
HRFID
|
|
|
|
b .
|
|
|
|
#endif
|
|
|
|
|
2011-06-29 08:18:26 +08:00
|
|
|
.align 7
|
2011-04-05 12:27:11 +08:00
|
|
|
/* moved from 0xe00 */
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
STD_EXCEPTION_HV_OOL(0xe02, h_data_storage)
|
2011-06-29 08:18:26 +08:00
|
|
|
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0xe02)
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
STD_EXCEPTION_HV_OOL(0xe22, h_instr_storage)
|
2011-06-29 08:18:26 +08:00
|
|
|
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe22)
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
STD_EXCEPTION_HV_OOL(0xe42, emulation_assist)
|
2011-06-29 08:18:26 +08:00
|
|
|
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe42)
|
2014-07-29 21:10:01 +08:00
|
|
|
MASKABLE_EXCEPTION_HV_OOL(0xe62, hmi_exception)
|
2011-06-29 08:18:26 +08:00
|
|
|
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62)
|
2014-07-29 21:10:01 +08:00
|
|
|
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell)
|
2012-11-15 02:49:45 +08:00
|
|
|
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82)
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
/* moved from 0xf00 */
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
STD_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf00)
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
STD_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20)
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable)
|
KVM: PPC: Add support for Book3S processors in hypervisor mode
This adds support for KVM running on 64-bit Book 3S processors,
specifically POWER7, in hypervisor mode. Using hypervisor mode means
that the guest can use the processor's supervisor mode. That means
that the guest can execute privileged instructions and access privileged
registers itself without trapping to the host. This gives excellent
performance, but does mean that KVM cannot emulate a processor
architecture other than the one that the hardware implements.
This code assumes that the guest is running paravirtualized using the
PAPR (Power Architecture Platform Requirements) interface, which is the
interface that IBM's PowerVM hypervisor uses. That means that existing
Linux distributions that run on IBM pSeries machines will also run
under KVM without modification. In order to communicate the PAPR
hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code
to include/linux/kvm.h.
Currently the choice between book3s_hv support and book3s_pr support
(i.e. the existing code, which runs the guest in user mode) has to be
made at kernel configuration time, so a given kernel binary can only
do one or the other.
This new book3s_hv code doesn't support MMIO emulation at present.
Since we are running paravirtualized guests, this isn't a serious
restriction.
With the guest running in supervisor mode, most exceptions go straight
to the guest. We will never get data or instruction storage or segment
interrupts, alignment interrupts, decrementer interrupts, program
interrupts, single-step interrupts, etc., coming to the hypervisor from
the guest. Therefore this introduces a new KVMTEST_NONHV macro for the
exception entry path so that we don't have to do the KVM test on entry
to those exception handlers.
We do however get hypervisor decrementer, hypervisor data storage,
hypervisor instruction storage, and hypervisor emulation assist
interrupts, so we have to handle those.
In hypervisor mode, real-mode accesses can access all of RAM, not just
a limited amount. Therefore we put all the guest state in the vcpu.arch
and use the shadow_vcpu in the PACA only for temporary scratch space.
We allocate the vcpu with kzalloc rather than vzalloc, and we don't use
anything in the kvmppc_vcpu_book3s struct, so we don't allocate it.
We don't have a shared page with the guest, but we still need a
kvm_vcpu_arch_shared struct to store the values of various registers,
so we include one in the vcpu_arch struct.
The POWER7 processor has a restriction that all threads in a core have
to be in the same partition. MMU-on kernel code counts as a partition
(partition 0), so we have to do a partition switch on every entry to and
exit from the guest. At present we require the host and guest to run
in single-thread mode because of this hardware restriction.
This code allocates a hashed page table for the guest and initializes
it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We
require that the guest memory is allocated using 16MB huge pages, in
order to simplify the low-level memory management. This also means that
we can get away without tracking paging activity in the host for now,
since huge pages can't be paged or swapped.
This also adds a few new exports needed by the book3s_hv code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40)
|
2013-06-25 15:47:56 +08:00
|
|
|
STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
|
2013-02-14 00:21:38 +08:00
|
|
|
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60)
|
2013-06-25 15:47:57 +08:00
|
|
|
STD_EXCEPTION_HV_OOL(0xf82, facility_unavailable)
|
|
|
|
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xf82)
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
/*
|
2012-11-15 02:49:48 +08:00
|
|
|
* An interrupt came in while soft-disabled. We set paca->irq_happened, then:
|
|
|
|
* - If it was a decrementer interrupt, we bump the dec to max and and return.
|
|
|
|
* - If it was a doorbell we return immediately since doorbells are edge
|
|
|
|
* triggered and won't automatically refire.
|
2014-07-29 21:10:01 +08:00
|
|
|
* - If it was a HMI we return immediately since we handled it in realmode
|
|
|
|
* and it won't refire.
|
2012-11-15 02:49:48 +08:00
|
|
|
* - else we hard disable and return.
|
|
|
|
* This is called with r10 containing the value to OR to the paca field.
|
2009-06-03 05:17:38 +08:00
|
|
|
*/
|
powerpc: Rework lazy-interrupt handling
The current implementation of lazy interrupts handling has some
issues that this tries to address.
We don't do the various workarounds we need to do when re-enabling
interrupts in some cases such as when returning from an interrupt
and thus we may still lose or get delayed decrementer or doorbell
interrupts.
The current scheme also makes it much harder to handle the external
"edge" interrupts provided by some BookE processors when using the
EPR facility (External Proxy) and the Freescale Hypervisor.
Additionally, we tend to keep interrupts hard disabled in a number
of cases, such as decrementer interrupts, external interrupts, or
when a masked decrementer interrupt is pending. This is sub-optimal.
This is an attempt at fixing it all in one go by reworking the way
we do the lazy interrupt disabling from the ground up.
The base idea is to replace the "hard_enabled" field with a
"irq_happened" field in which we store a bit mask of what interrupt
occurred while soft-disabled.
When re-enabling, either via arch_local_irq_restore() or when returning
from an interrupt, we can now decide what to do by testing bits in that
field.
We then implement replaying of the missed interrupts either by
re-using the existing exception frame (in exception exit case) or via
the creation of a new one from an assembly trampoline (in the
arch_local_irq_enable case).
This removes the need to play with the decrementer to try to create
fake interrupts, among others.
In addition, this adds a few refinements:
- We no longer hard disable decrementer interrupts that occur
while soft-disabled. We now simply bump the decrementer back to max
(on BookS) or leave it stopped (on BookE) and continue with hard interrupts
enabled, which means that we'll potentially get better sample quality from
performance monitor interrupts.
- Timer, decrementer and doorbell interrupts now hard-enable
shortly after removing the source of the interrupt, which means
they no longer run entirely hard disabled. Again, this will improve
perf sample quality.
- On Book3E 64-bit, we now make the performance monitor interrupt
act as an NMI like Book3S (the necessary C code for that to work
appear to already be present in the FSL perf code, notably calling
nmi_enter instead of irq_enter). (This also fixes a bug where BookE
perfmon interrupts could clobber r14 ... oops)
- We could make "masked" decrementer interrupts act as NMIs when doing
timer-based perf sampling to improve the sample quality.
Signed-off-by-yet: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
v2:
- Add hard-enable to decrementer, timer and doorbells
- Fix CR clobber in masked irq handling on BookE
- Make embedded perf interrupt act as an NMI
- Add a PACA_HAPPENED_EE_EDGE for use by FSL if they want
to retrigger an interrupt without preventing hard-enable
v3:
- Fix or vs. ori bug on Book3E
- Fix enabling of interrupts for some exceptions on Book3E
v4:
- Fix resend of doorbells on return from interrupt on Book3E
v5:
- Rebased on top of my latest series, which involves some significant
rework of some aspects of the patch.
v6:
- 32-bit compile fix
- more compile fixes with various .config combos
- factor out the asm code to soft-disable interrupts
- remove the C wrapper around preempt_schedule_irq
v7:
- Fix a bug with hard irq state tracking on native power7
2012-03-06 15:27:59 +08:00
|
|
|
#define MASKED_INTERRUPT(_H) \
|
|
|
|
masked_##_H##interrupt: \
|
|
|
|
std r11,PACA_EXGEN+EX_R11(r13); \
|
|
|
|
lbz r11,PACAIRQHAPPENED(r13); \
|
|
|
|
or r11,r11,r10; \
|
|
|
|
stb r11,PACAIRQHAPPENED(r13); \
|
2012-11-15 02:49:48 +08:00
|
|
|
cmpwi r10,PACA_IRQ_DEC; \
|
|
|
|
bne 1f; \
|
powerpc: Rework lazy-interrupt handling
The current implementation of lazy interrupts handling has some
issues that this tries to address.
We don't do the various workarounds we need to do when re-enabling
interrupts in some cases such as when returning from an interrupt
and thus we may still lose or get delayed decrementer or doorbell
interrupts.
The current scheme also makes it much harder to handle the external
"edge" interrupts provided by some BookE processors when using the
EPR facility (External Proxy) and the Freescale Hypervisor.
Additionally, we tend to keep interrupts hard disabled in a number
of cases, such as decrementer interrupts, external interrupts, or
when a masked decrementer interrupt is pending. This is sub-optimal.
This is an attempt at fixing it all in one go by reworking the way
we do the lazy interrupt disabling from the ground up.
The base idea is to replace the "hard_enabled" field with a
"irq_happened" field in which we store a bit mask of what interrupt
occurred while soft-disabled.
When re-enabling, either via arch_local_irq_restore() or when returning
from an interrupt, we can now decide what to do by testing bits in that
field.
We then implement replaying of the missed interrupts either by
re-using the existing exception frame (in exception exit case) or via
the creation of a new one from an assembly trampoline (in the
arch_local_irq_enable case).
This removes the need to play with the decrementer to try to create
fake interrupts, among others.
In addition, this adds a few refinements:
- We no longer hard disable decrementer interrupts that occur
while soft-disabled. We now simply bump the decrementer back to max
(on BookS) or leave it stopped (on BookE) and continue with hard interrupts
enabled, which means that we'll potentially get better sample quality from
performance monitor interrupts.
- Timer, decrementer and doorbell interrupts now hard-enable
shortly after removing the source of the interrupt, which means
they no longer run entirely hard disabled. Again, this will improve
perf sample quality.
- On Book3E 64-bit, we now make the performance monitor interrupt
act as an NMI like Book3S (the necessary C code for that to work
appear to already be present in the FSL perf code, notably calling
nmi_enter instead of irq_enter). (This also fixes a bug where BookE
perfmon interrupts could clobber r14 ... oops)
- We could make "masked" decrementer interrupts act as NMIs when doing
timer-based perf sampling to improve the sample quality.
Signed-off-by-yet: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
v2:
- Add hard-enable to decrementer, timer and doorbells
- Fix CR clobber in masked irq handling on BookE
- Make embedded perf interrupt act as an NMI
- Add a PACA_HAPPENED_EE_EDGE for use by FSL if they want
to retrigger an interrupt without preventing hard-enable
v3:
- Fix or vs. ori bug on Book3E
- Fix enabling of interrupts for some exceptions on Book3E
v4:
- Fix resend of doorbells on return from interrupt on Book3E
v5:
- Rebased on top of my latest series, which involves some significant
rework of some aspects of the patch.
v6:
- 32-bit compile fix
- more compile fixes with various .config combos
- factor out the asm code to soft-disable interrupts
- remove the C wrapper around preempt_schedule_irq
v7:
- Fix a bug with hard irq state tracking on native power7
2012-03-06 15:27:59 +08:00
|
|
|
lis r10,0x7fff; \
|
|
|
|
ori r10,r10,0xffff; \
|
|
|
|
mtspr SPRN_DEC,r10; \
|
|
|
|
b 2f; \
|
2012-11-15 02:49:48 +08:00
|
|
|
1: cmpwi r10,PACA_IRQ_DBELL; \
|
2014-07-29 21:10:01 +08:00
|
|
|
beq 2f; \
|
|
|
|
cmpwi r10,PACA_IRQ_HMI; \
|
2012-11-15 02:49:48 +08:00
|
|
|
beq 2f; \
|
|
|
|
mfspr r10,SPRN_##_H##SRR1; \
|
powerpc: Rework lazy-interrupt handling
The current implementation of lazy interrupts handling has some
issues that this tries to address.
We don't do the various workarounds we need to do when re-enabling
interrupts in some cases such as when returning from an interrupt
and thus we may still lose or get delayed decrementer or doorbell
interrupts.
The current scheme also makes it much harder to handle the external
"edge" interrupts provided by some BookE processors when using the
EPR facility (External Proxy) and the Freescale Hypervisor.
Additionally, we tend to keep interrupts hard disabled in a number
of cases, such as decrementer interrupts, external interrupts, or
when a masked decrementer interrupt is pending. This is sub-optimal.
This is an attempt at fixing it all in one go by reworking the way
we do the lazy interrupt disabling from the ground up.
The base idea is to replace the "hard_enabled" field with a
"irq_happened" field in which we store a bit mask of what interrupt
occurred while soft-disabled.
When re-enabling, either via arch_local_irq_restore() or when returning
from an interrupt, we can now decide what to do by testing bits in that
field.
We then implement replaying of the missed interrupts either by
re-using the existing exception frame (in exception exit case) or via
the creation of a new one from an assembly trampoline (in the
arch_local_irq_enable case).
This removes the need to play with the decrementer to try to create
fake interrupts, among others.
In addition, this adds a few refinements:
- We no longer hard disable decrementer interrupts that occur
while soft-disabled. We now simply bump the decrementer back to max
(on BookS) or leave it stopped (on BookE) and continue with hard interrupts
enabled, which means that we'll potentially get better sample quality from
performance monitor interrupts.
- Timer, decrementer and doorbell interrupts now hard-enable
shortly after removing the source of the interrupt, which means
they no longer run entirely hard disabled. Again, this will improve
perf sample quality.
- On Book3E 64-bit, we now make the performance monitor interrupt
act as an NMI like Book3S (the necessary C code for that to work
appear to already be present in the FSL perf code, notably calling
nmi_enter instead of irq_enter). (This also fixes a bug where BookE
perfmon interrupts could clobber r14 ... oops)
- We could make "masked" decrementer interrupts act as NMIs when doing
timer-based perf sampling to improve the sample quality.
Signed-off-by-yet: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
v2:
- Add hard-enable to decrementer, timer and doorbells
- Fix CR clobber in masked irq handling on BookE
- Make embedded perf interrupt act as an NMI
- Add a PACA_HAPPENED_EE_EDGE for use by FSL if they want
to retrigger an interrupt without preventing hard-enable
v3:
- Fix or vs. ori bug on Book3E
- Fix enabling of interrupts for some exceptions on Book3E
v4:
- Fix resend of doorbells on return from interrupt on Book3E
v5:
- Rebased on top of my latest series, which involves some significant
rework of some aspects of the patch.
v6:
- 32-bit compile fix
- more compile fixes with various .config combos
- factor out the asm code to soft-disable interrupts
- remove the C wrapper around preempt_schedule_irq
v7:
- Fix a bug with hard irq state tracking on native power7
2012-03-06 15:27:59 +08:00
|
|
|
rldicl r10,r10,48,1; /* clear MSR_EE */ \
|
|
|
|
rotldi r10,r10,16; \
|
|
|
|
mtspr SPRN_##_H##SRR1,r10; \
|
|
|
|
2: mtcrf 0x80,r9; \
|
|
|
|
ld r9,PACA_EXGEN+EX_R9(r13); \
|
|
|
|
ld r10,PACA_EXGEN+EX_R10(r13); \
|
|
|
|
ld r11,PACA_EXGEN+EX_R11(r13); \
|
|
|
|
GET_SCRATCH0(r13); \
|
|
|
|
##_H##rfid; \
|
2009-06-03 05:17:38 +08:00
|
|
|
b .
|
powerpc: Rework lazy-interrupt handling
The current implementation of lazy interrupts handling has some
issues that this tries to address.
We don't do the various workarounds we need to do when re-enabling
interrupts in some cases such as when returning from an interrupt
and thus we may still lose or get delayed decrementer or doorbell
interrupts.
The current scheme also makes it much harder to handle the external
"edge" interrupts provided by some BookE processors when using the
EPR facility (External Proxy) and the Freescale Hypervisor.
Additionally, we tend to keep interrupts hard disabled in a number
of cases, such as decrementer interrupts, external interrupts, or
when a masked decrementer interrupt is pending. This is sub-optimal.
This is an attempt at fixing it all in one go by reworking the way
we do the lazy interrupt disabling from the ground up.
The base idea is to replace the "hard_enabled" field with a
"irq_happened" field in which we store a bit mask of what interrupt
occurred while soft-disabled.
When re-enabling, either via arch_local_irq_restore() or when returning
from an interrupt, we can now decide what to do by testing bits in that
field.
We then implement replaying of the missed interrupts either by
re-using the existing exception frame (in exception exit case) or via
the creation of a new one from an assembly trampoline (in the
arch_local_irq_enable case).
This removes the need to play with the decrementer to try to create
fake interrupts, among others.
In addition, this adds a few refinements:
- We no longer hard disable decrementer interrupts that occur
while soft-disabled. We now simply bump the decrementer back to max
(on BookS) or leave it stopped (on BookE) and continue with hard interrupts
enabled, which means that we'll potentially get better sample quality from
performance monitor interrupts.
- Timer, decrementer and doorbell interrupts now hard-enable
shortly after removing the source of the interrupt, which means
they no longer run entirely hard disabled. Again, this will improve
perf sample quality.
- On Book3E 64-bit, we now make the performance monitor interrupt
act as an NMI like Book3S (the necessary C code for that to work
appear to already be present in the FSL perf code, notably calling
nmi_enter instead of irq_enter). (This also fixes a bug where BookE
perfmon interrupts could clobber r14 ... oops)
- We could make "masked" decrementer interrupts act as NMIs when doing
timer-based perf sampling to improve the sample quality.
Signed-off-by-yet: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
v2:
- Add hard-enable to decrementer, timer and doorbells
- Fix CR clobber in masked irq handling on BookE
- Make embedded perf interrupt act as an NMI
- Add a PACA_HAPPENED_EE_EDGE for use by FSL if they want
to retrigger an interrupt without preventing hard-enable
v3:
- Fix or vs. ori bug on Book3E
- Fix enabling of interrupts for some exceptions on Book3E
v4:
- Fix resend of doorbells on return from interrupt on Book3E
v5:
- Rebased on top of my latest series, which involves some significant
rework of some aspects of the patch.
v6:
- 32-bit compile fix
- more compile fixes with various .config combos
- factor out the asm code to soft-disable interrupts
- remove the C wrapper around preempt_schedule_irq
v7:
- Fix a bug with hard irq state tracking on native power7
2012-03-06 15:27:59 +08:00
|
|
|
|
|
|
|
MASKED_INTERRUPT()
|
|
|
|
MASKED_INTERRUPT(H)
|
2009-06-03 05:17:38 +08:00
|
|
|
|
powerpc: Rework lazy-interrupt handling
The current implementation of lazy interrupts handling has some
issues that this tries to address.
We don't do the various workarounds we need to do when re-enabling
interrupts in some cases such as when returning from an interrupt
and thus we may still lose or get delayed decrementer or doorbell
interrupts.
The current scheme also makes it much harder to handle the external
"edge" interrupts provided by some BookE processors when using the
EPR facility (External Proxy) and the Freescale Hypervisor.
Additionally, we tend to keep interrupts hard disabled in a number
of cases, such as decrementer interrupts, external interrupts, or
when a masked decrementer interrupt is pending. This is sub-optimal.
This is an attempt at fixing it all in one go by reworking the way
we do the lazy interrupt disabling from the ground up.
The base idea is to replace the "hard_enabled" field with a
"irq_happened" field in which we store a bit mask of what interrupt
occurred while soft-disabled.
When re-enabling, either via arch_local_irq_restore() or when returning
from an interrupt, we can now decide what to do by testing bits in that
field.
We then implement replaying of the missed interrupts either by
re-using the existing exception frame (in exception exit case) or via
the creation of a new one from an assembly trampoline (in the
arch_local_irq_enable case).
This removes the need to play with the decrementer to try to create
fake interrupts, among others.
In addition, this adds a few refinements:
- We no longer hard disable decrementer interrupts that occur
while soft-disabled. We now simply bump the decrementer back to max
(on BookS) or leave it stopped (on BookE) and continue with hard interrupts
enabled, which means that we'll potentially get better sample quality from
performance monitor interrupts.
- Timer, decrementer and doorbell interrupts now hard-enable
shortly after removing the source of the interrupt, which means
they no longer run entirely hard disabled. Again, this will improve
perf sample quality.
- On Book3E 64-bit, we now make the performance monitor interrupt
act as an NMI like Book3S (the necessary C code for that to work
appear to already be present in the FSL perf code, notably calling
nmi_enter instead of irq_enter). (This also fixes a bug where BookE
perfmon interrupts could clobber r14 ... oops)
- We could make "masked" decrementer interrupts act as NMIs when doing
timer-based perf sampling to improve the sample quality.
Signed-off-by-yet: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
v2:
- Add hard-enable to decrementer, timer and doorbells
- Fix CR clobber in masked irq handling on BookE
- Make embedded perf interrupt act as an NMI
- Add a PACA_HAPPENED_EE_EDGE for use by FSL if they want
to retrigger an interrupt without preventing hard-enable
v3:
- Fix or vs. ori bug on Book3E
- Fix enabling of interrupts for some exceptions on Book3E
v4:
- Fix resend of doorbells on return from interrupt on Book3E
v5:
- Rebased on top of my latest series, which involves some significant
rework of some aspects of the patch.
v6:
- 32-bit compile fix
- more compile fixes with various .config combos
- factor out the asm code to soft-disable interrupts
- remove the C wrapper around preempt_schedule_irq
v7:
- Fix a bug with hard irq state tracking on native power7
2012-03-06 15:27:59 +08:00
|
|
|
/*
|
|
|
|
* Called from arch_local_irq_enable when an interrupt needs
|
2012-11-15 02:49:48 +08:00
|
|
|
* to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate
|
|
|
|
* which kind of interrupt. MSR:EE is already off. We generate a
|
powerpc: Rework lazy-interrupt handling
The current implementation of lazy interrupts handling has some
issues that this tries to address.
We don't do the various workarounds we need to do when re-enabling
interrupts in some cases such as when returning from an interrupt
and thus we may still lose or get delayed decrementer or doorbell
interrupts.
The current scheme also makes it much harder to handle the external
"edge" interrupts provided by some BookE processors when using the
EPR facility (External Proxy) and the Freescale Hypervisor.
Additionally, we tend to keep interrupts hard disabled in a number
of cases, such as decrementer interrupts, external interrupts, or
when a masked decrementer interrupt is pending. This is sub-optimal.
This is an attempt at fixing it all in one go by reworking the way
we do the lazy interrupt disabling from the ground up.
The base idea is to replace the "hard_enabled" field with a
"irq_happened" field in which we store a bit mask of what interrupt
occurred while soft-disabled.
When re-enabling, either via arch_local_irq_restore() or when returning
from an interrupt, we can now decide what to do by testing bits in that
field.
We then implement replaying of the missed interrupts either by
re-using the existing exception frame (in exception exit case) or via
the creation of a new one from an assembly trampoline (in the
arch_local_irq_enable case).
This removes the need to play with the decrementer to try to create
fake interrupts, among others.
In addition, this adds a few refinements:
- We no longer hard disable decrementer interrupts that occur
while soft-disabled. We now simply bump the decrementer back to max
(on BookS) or leave it stopped (on BookE) and continue with hard interrupts
enabled, which means that we'll potentially get better sample quality from
performance monitor interrupts.
- Timer, decrementer and doorbell interrupts now hard-enable
shortly after removing the source of the interrupt, which means
they no longer run entirely hard disabled. Again, this will improve
perf sample quality.
- On Book3E 64-bit, we now make the performance monitor interrupt
act as an NMI like Book3S (the necessary C code for that to work
appear to already be present in the FSL perf code, notably calling
nmi_enter instead of irq_enter). (This also fixes a bug where BookE
perfmon interrupts could clobber r14 ... oops)
- We could make "masked" decrementer interrupts act as NMIs when doing
timer-based perf sampling to improve the sample quality.
Signed-off-by-yet: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
v2:
- Add hard-enable to decrementer, timer and doorbells
- Fix CR clobber in masked irq handling on BookE
- Make embedded perf interrupt act as an NMI
- Add a PACA_HAPPENED_EE_EDGE for use by FSL if they want
to retrigger an interrupt without preventing hard-enable
v3:
- Fix or vs. ori bug on Book3E
- Fix enabling of interrupts for some exceptions on Book3E
v4:
- Fix resend of doorbells on return from interrupt on Book3E
v5:
- Rebased on top of my latest series, which involves some significant
rework of some aspects of the patch.
v6:
- 32-bit compile fix
- more compile fixes with various .config combos
- factor out the asm code to soft-disable interrupts
- remove the C wrapper around preempt_schedule_irq
v7:
- Fix a bug with hard irq state tracking on native power7
2012-03-06 15:27:59 +08:00
|
|
|
* stackframe like if a real interrupt had happened.
|
|
|
|
*
|
|
|
|
* Note: While MSR:EE is off, we need to make sure that _MSR
|
|
|
|
* in the generated frame has EE set to 1 or the exception
|
|
|
|
* handler will not properly re-enable them.
|
|
|
|
*/
|
|
|
|
_GLOBAL(__replay_interrupt)
|
|
|
|
/* We are going to jump to the exception common code which
|
|
|
|
* will retrieve various register values from the PACA which
|
|
|
|
* we don't give a damn about, so we don't bother storing them.
|
|
|
|
*/
|
|
|
|
mfmsr r12
|
|
|
|
mflr r11
|
|
|
|
mfcr r9
|
|
|
|
ori r12,r12,MSR_EE
|
2012-11-15 02:49:48 +08:00
|
|
|
cmpwi r3,0x900
|
|
|
|
beq decrementer_common
|
|
|
|
cmpwi r3,0x500
|
|
|
|
beq hardware_interrupt_common
|
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
cmpwi r3,0xe80
|
|
|
|
beq h_doorbell_common
|
|
|
|
FTR_SECTION_ELSE
|
|
|
|
cmpwi r3,0xa00
|
|
|
|
beq doorbell_super_common
|
|
|
|
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
|
|
|
|
blr
|
2011-04-05 12:20:31 +08:00
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
#ifdef CONFIG_PPC_PSERIES
|
|
|
|
/*
|
|
|
|
* Vectors for the FWNMI option. Share common code.
|
|
|
|
*/
|
|
|
|
.globl system_reset_fwnmi
|
|
|
|
.align 7
|
|
|
|
system_reset_fwnmi:
|
2012-12-07 05:51:04 +08:00
|
|
|
HMT_MEDIUM_PPR_DISCARD
|
2011-04-05 11:59:58 +08:00
|
|
|
SET_SCRATCH0(r13) /* save r13 */
|
2011-06-29 08:18:26 +08:00
|
|
|
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
|
|
|
|
NOTEST, 0x100)
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
#endif /* CONFIG_PPC_PSERIES */
|
|
|
|
|
|
|
|
#ifdef __DISABLED__
|
|
|
|
/*
|
|
|
|
* This is used for when the SLB miss handler has to go virtual,
|
|
|
|
* which doesn't happen for now anymore but will once we re-implement
|
|
|
|
* dynamic VSIDs for shared page tables
|
|
|
|
*/
|
|
|
|
slb_miss_user_pseries:
|
|
|
|
std r10,PACA_EXGEN+EX_R10(r13)
|
|
|
|
std r11,PACA_EXGEN+EX_R11(r13)
|
|
|
|
std r12,PACA_EXGEN+EX_R12(r13)
|
2011-04-05 11:59:58 +08:00
|
|
|
GET_SCRATCH0(r10)
|
2009-06-03 05:17:38 +08:00
|
|
|
ld r11,PACA_EXSLB+EX_R9(r13)
|
|
|
|
ld r12,PACA_EXSLB+EX_R3(r13)
|
|
|
|
std r10,PACA_EXGEN+EX_R13(r13)
|
|
|
|
std r11,PACA_EXGEN+EX_R9(r13)
|
|
|
|
std r12,PACA_EXGEN+EX_R3(r13)
|
|
|
|
clrrdi r12,r13,32
|
|
|
|
mfmsr r10
|
|
|
|
mfspr r11,SRR0 /* save SRR0 */
|
|
|
|
ori r12,r12,slb_miss_user_common@l /* virt addr of handler */
|
|
|
|
ori r10,r10,MSR_IR|MSR_DR|MSR_RI
|
|
|
|
mtspr SRR0,r12
|
|
|
|
mfspr r12,SRR1 /* and SRR1 */
|
|
|
|
mtspr SRR1,r10
|
|
|
|
rfid
|
|
|
|
b . /* prevent spec. execution */
|
|
|
|
#endif /* __DISABLED__ */
|
|
|
|
|
2013-09-20 12:52:50 +08:00
|
|
|
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
|
|
|
|
kvmppc_skip_interrupt:
|
|
|
|
/*
|
|
|
|
* Here all GPRs are unchanged from when the interrupt happened
|
|
|
|
* except for r13, which is saved in SPRG_SCRATCH0.
|
|
|
|
*/
|
|
|
|
mfspr r13, SPRN_SRR0
|
|
|
|
addi r13, r13, 4
|
|
|
|
mtspr SPRN_SRR0, r13
|
|
|
|
GET_SCRATCH0(r13)
|
|
|
|
rfid
|
|
|
|
b .
|
|
|
|
|
|
|
|
kvmppc_skip_Hinterrupt:
|
|
|
|
/*
|
|
|
|
* Here all GPRs are unchanged from when the interrupt happened
|
|
|
|
* except for r13, which is saved in SPRG_SCRATCH0.
|
|
|
|
*/
|
|
|
|
mfspr r13, SPRN_HSRR0
|
|
|
|
addi r13, r13, 4
|
|
|
|
mtspr SPRN_HSRR0, r13
|
|
|
|
GET_SCRATCH0(r13)
|
|
|
|
hrfid
|
|
|
|
b .
|
|
|
|
#endif
|
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
/*
|
|
|
|
* Code from here down to __end_handlers is invoked from the
|
|
|
|
* exception prologs above. Because the prologs assemble the
|
|
|
|
* addresses of these handlers using the LOAD_HANDLER macro,
|
2012-11-05 14:10:35 +08:00
|
|
|
* which uses an ori instruction, these handlers must be in
|
|
|
|
* the first 64k of the kernel image.
|
2009-06-03 05:17:38 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
/*** Common interrupt handlers ***/
|
|
|
|
|
2014-02-04 13:06:46 +08:00
|
|
|
STD_EXCEPTION_COMMON(0x100, system_reset, system_reset_exception)
|
2009-06-03 05:17:38 +08:00
|
|
|
|
2012-03-01 07:52:01 +08:00
|
|
|
STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ)
|
2014-02-04 13:06:46 +08:00
|
|
|
STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, timer_interrupt)
|
|
|
|
STD_EXCEPTION_COMMON(0x980, hdecrementer, hdec_interrupt)
|
2012-11-15 02:49:46 +08:00
|
|
|
#ifdef CONFIG_PPC_DOORBELL
|
2014-02-04 13:06:46 +08:00
|
|
|
STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, doorbell_exception)
|
2012-11-15 02:49:46 +08:00
|
|
|
#else
|
2014-02-04 13:06:46 +08:00
|
|
|
STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, unknown_exception)
|
2012-11-15 02:49:46 +08:00
|
|
|
#endif
|
2014-02-04 13:06:46 +08:00
|
|
|
STD_EXCEPTION_COMMON(0xb00, trap_0b, unknown_exception)
|
|
|
|
STD_EXCEPTION_COMMON(0xd00, single_step, single_step_exception)
|
|
|
|
STD_EXCEPTION_COMMON(0xe00, trap_0e, unknown_exception)
|
|
|
|
STD_EXCEPTION_COMMON(0xe40, emulation_assist, emulation_assist_interrupt)
|
2014-07-29 21:10:01 +08:00
|
|
|
STD_EXCEPTION_COMMON_ASYNC(0xe60, hmi_exception, handle_hmi_exception)
|
2012-11-15 02:49:45 +08:00
|
|
|
#ifdef CONFIG_PPC_DOORBELL
|
2014-02-04 13:06:46 +08:00
|
|
|
STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, doorbell_exception)
|
2012-11-15 02:49:45 +08:00
|
|
|
#else
|
2014-02-04 13:06:46 +08:00
|
|
|
STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, unknown_exception)
|
2012-11-15 02:49:45 +08:00
|
|
|
#endif
|
2014-02-04 13:06:46 +08:00
|
|
|
STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, performance_monitor_exception)
|
|
|
|
STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, instruction_breakpoint_exception)
|
|
|
|
STD_EXCEPTION_COMMON(0x1502, denorm, unknown_exception)
|
2009-06-03 05:17:38 +08:00
|
|
|
#ifdef CONFIG_ALTIVEC
|
2014-02-04 13:06:46 +08:00
|
|
|
STD_EXCEPTION_COMMON(0x1700, altivec_assist, altivec_assist_exception)
|
2009-06-03 05:17:38 +08:00
|
|
|
#else
|
2014-02-04 13:06:46 +08:00
|
|
|
STD_EXCEPTION_COMMON(0x1700, altivec_assist, unknown_exception)
|
2009-06-03 05:17:38 +08:00
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_CBE_RAS
|
2014-02-04 13:06:46 +08:00
|
|
|
STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception)
|
|
|
|
STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception)
|
|
|
|
STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception)
|
2009-06-03 05:17:38 +08:00
|
|
|
#endif /* CONFIG_CBE_RAS */
|
|
|
|
|
2012-11-02 14:21:43 +08:00
|
|
|
/*
|
|
|
|
* Relocation-on interrupts: A subset of the interrupts can be delivered
|
|
|
|
* with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
|
|
|
|
* it. Addresses are the same as the original interrupt addresses, but
|
|
|
|
* offset by 0xc000000000004000.
|
|
|
|
* It's impossible to receive interrupts below 0x300 via this mechanism.
|
|
|
|
* KVM: None of these traps are from the guest ; anything that escalated
|
|
|
|
* to HV=1 from HV=0 is delivered via real mode handlers.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This uses the standard macro, since the original 0x300 vector
|
|
|
|
* only has extra guff for STAB-based processors -- which never
|
|
|
|
* come here.
|
|
|
|
*/
|
|
|
|
STD_RELON_EXCEPTION_PSERIES(0x4300, 0x300, data_access)
|
|
|
|
. = 0x4380
|
|
|
|
.globl data_access_slb_relon_pSeries
|
|
|
|
data_access_slb_relon_pSeries:
|
|
|
|
SET_SCRATCH0(r13)
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
EXCEPTION_PROLOG_0(PACA_EXSLB)
|
2012-11-02 14:21:43 +08:00
|
|
|
EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
|
|
|
|
std r3,PACA_EXSLB+EX_R3(r13)
|
|
|
|
mfspr r3,SPRN_DAR
|
|
|
|
mfspr r12,SPRN_SRR1
|
|
|
|
#ifndef CONFIG_RELOCATABLE
|
2014-02-04 13:04:35 +08:00
|
|
|
b slb_miss_realmode
|
2012-11-02 14:21:43 +08:00
|
|
|
#else
|
|
|
|
/*
|
2014-02-04 13:04:52 +08:00
|
|
|
* We can't just use a direct branch to slb_miss_realmode
|
2012-11-02 14:21:43 +08:00
|
|
|
* because the distance from here to there depends on where
|
|
|
|
* the kernel ends up being put.
|
|
|
|
*/
|
|
|
|
mfctr r11
|
|
|
|
ld r10,PACAKBASE(r13)
|
2014-02-04 13:04:52 +08:00
|
|
|
LOAD_HANDLER(r10, slb_miss_realmode)
|
2012-11-02 14:21:43 +08:00
|
|
|
mtctr r10
|
|
|
|
bctr
|
|
|
|
#endif
|
|
|
|
|
|
|
|
STD_RELON_EXCEPTION_PSERIES(0x4400, 0x400, instruction_access)
|
|
|
|
. = 0x4480
|
|
|
|
.globl instruction_access_slb_relon_pSeries
|
|
|
|
instruction_access_slb_relon_pSeries:
|
|
|
|
SET_SCRATCH0(r13)
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
EXCEPTION_PROLOG_0(PACA_EXSLB)
|
2012-11-02 14:21:43 +08:00
|
|
|
EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480)
|
|
|
|
std r3,PACA_EXSLB+EX_R3(r13)
|
|
|
|
mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
|
|
|
|
mfspr r12,SPRN_SRR1
|
|
|
|
#ifndef CONFIG_RELOCATABLE
|
2014-02-04 13:04:35 +08:00
|
|
|
b slb_miss_realmode
|
2012-11-02 14:21:43 +08:00
|
|
|
#else
|
|
|
|
mfctr r11
|
|
|
|
ld r10,PACAKBASE(r13)
|
2014-02-04 13:04:52 +08:00
|
|
|
LOAD_HANDLER(r10, slb_miss_realmode)
|
2012-11-02 14:21:43 +08:00
|
|
|
mtctr r10
|
|
|
|
bctr
|
|
|
|
#endif
|
|
|
|
|
|
|
|
. = 0x4500
|
|
|
|
.globl hardware_interrupt_relon_pSeries;
|
|
|
|
.globl hardware_interrupt_relon_hv;
|
|
|
|
hardware_interrupt_relon_pSeries:
|
|
|
|
hardware_interrupt_relon_hv:
|
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
_MASKABLE_RELON_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV, SOFTEN_TEST_HV)
|
|
|
|
FTR_SECTION_ELSE
|
|
|
|
_MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD, SOFTEN_TEST_PR)
|
2013-04-25 23:30:57 +08:00
|
|
|
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
|
2012-11-02 14:21:43 +08:00
|
|
|
STD_RELON_EXCEPTION_PSERIES(0x4600, 0x600, alignment)
|
|
|
|
STD_RELON_EXCEPTION_PSERIES(0x4700, 0x700, program_check)
|
|
|
|
STD_RELON_EXCEPTION_PSERIES(0x4800, 0x800, fp_unavailable)
|
|
|
|
MASKABLE_RELON_EXCEPTION_PSERIES(0x4900, 0x900, decrementer)
|
|
|
|
STD_RELON_EXCEPTION_HV(0x4980, 0x982, hdecrementer)
|
2012-11-15 02:49:46 +08:00
|
|
|
MASKABLE_RELON_EXCEPTION_PSERIES(0x4a00, 0xa00, doorbell_super)
|
2012-11-02 14:21:43 +08:00
|
|
|
STD_RELON_EXCEPTION_PSERIES(0x4b00, 0xb00, trap_0b)
|
|
|
|
|
|
|
|
. = 0x4c00
|
|
|
|
.globl system_call_relon_pSeries
|
|
|
|
system_call_relon_pSeries:
|
|
|
|
HMT_MEDIUM
|
|
|
|
SYSCALL_PSERIES_1
|
|
|
|
SYSCALL_PSERIES_2_DIRECT
|
|
|
|
SYSCALL_PSERIES_3
|
|
|
|
|
|
|
|
STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step)
|
|
|
|
|
|
|
|
. = 0x4e00
|
2013-06-25 15:47:54 +08:00
|
|
|
b . /* Can't happen, see v2.07 Book III-S section 6.5 */
|
2012-11-02 14:21:43 +08:00
|
|
|
|
|
|
|
. = 0x4e20
|
2013-06-25 15:47:54 +08:00
|
|
|
b . /* Can't happen, see v2.07 Book III-S section 6.5 */
|
2012-11-02 14:21:43 +08:00
|
|
|
|
|
|
|
. = 0x4e40
|
2013-08-15 13:22:18 +08:00
|
|
|
emulation_assist_relon_trampoline:
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
2012-11-02 14:21:43 +08:00
|
|
|
b emulation_assist_relon_hv
|
|
|
|
|
|
|
|
. = 0x4e60
|
2013-06-25 15:47:54 +08:00
|
|
|
b . /* Can't happen, see v2.07 Book III-S section 6.5 */
|
2012-11-02 14:21:43 +08:00
|
|
|
|
2012-11-15 02:49:45 +08:00
|
|
|
. = 0x4e80
|
2013-08-15 13:22:18 +08:00
|
|
|
h_doorbell_relon_trampoline:
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
2012-11-15 02:49:45 +08:00
|
|
|
b h_doorbell_relon_hv
|
2012-11-02 14:21:43 +08:00
|
|
|
|
|
|
|
. = 0x4f00
|
2013-08-15 13:22:17 +08:00
|
|
|
performance_monitor_relon_pseries_trampoline:
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
2012-11-02 14:21:43 +08:00
|
|
|
b performance_monitor_relon_pSeries
|
|
|
|
|
|
|
|
. = 0x4f20
|
2013-08-15 13:22:17 +08:00
|
|
|
altivec_unavailable_relon_pseries_trampoline:
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
2012-11-02 14:21:43 +08:00
|
|
|
b altivec_unavailable_relon_pSeries
|
|
|
|
|
|
|
|
. = 0x4f40
|
2013-08-15 13:22:17 +08:00
|
|
|
vsx_unavailable_relon_pseries_trampoline:
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
2012-11-02 14:21:43 +08:00
|
|
|
b vsx_unavailable_relon_pSeries
|
|
|
|
|
2013-02-14 00:21:38 +08:00
|
|
|
. = 0x4f60
|
2013-08-15 13:22:17 +08:00
|
|
|
facility_unavailable_relon_trampoline:
|
2013-02-14 00:21:38 +08:00
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
2013-06-25 15:47:56 +08:00
|
|
|
b facility_unavailable_relon_pSeries
|
2013-02-14 00:21:38 +08:00
|
|
|
|
2013-06-25 15:47:57 +08:00
|
|
|
. = 0x4f80
|
2013-08-15 13:22:17 +08:00
|
|
|
hv_facility_unavailable_relon_trampoline:
|
2013-06-25 15:47:57 +08:00
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
2013-08-09 15:29:27 +08:00
|
|
|
b hv_facility_unavailable_relon_hv
|
2013-06-25 15:47:57 +08:00
|
|
|
|
2012-11-02 14:21:43 +08:00
|
|
|
STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint)
|
|
|
|
#ifdef CONFIG_PPC_DENORMALISATION
|
|
|
|
. = 0x5500
|
|
|
|
b denorm_exception_hv
|
|
|
|
#endif
|
|
|
|
STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist)
|
|
|
|
|
|
|
|
/* Other future vectors */
|
|
|
|
.align 7
|
|
|
|
.globl __end_interrupts
|
|
|
|
__end_interrupts:
|
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
.align 7
|
|
|
|
system_call_entry:
|
|
|
|
b system_call_common
|
|
|
|
|
2012-03-01 09:45:27 +08:00
|
|
|
ppc64_runlatch_on_trampoline:
|
2014-02-04 13:04:35 +08:00
|
|
|
b __ppc64_runlatch_on
|
2012-03-01 09:45:27 +08:00
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
/*
|
|
|
|
* Here r13 points to the paca, r9 contains the saved CR,
|
|
|
|
* SRR0 and SRR1 are saved in r11 and r12,
|
|
|
|
* r9 - r13 are saved in paca->exgen.
|
|
|
|
*/
|
|
|
|
.align 7
|
|
|
|
.globl data_access_common
|
|
|
|
data_access_common:
|
|
|
|
mfspr r10,SPRN_DAR
|
|
|
|
std r10,PACA_EXGEN+EX_DAR(r13)
|
|
|
|
mfspr r10,SPRN_DSISR
|
|
|
|
stw r10,PACA_EXGEN+EX_DSISR(r13)
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
|
2014-07-15 19:15:38 +08:00
|
|
|
RECONCILE_IRQ_STATE(r10, r11)
|
2012-03-07 13:48:45 +08:00
|
|
|
ld r12,_MSR(r1)
|
2009-06-03 05:17:38 +08:00
|
|
|
ld r3,PACA_EXGEN+EX_DAR(r13)
|
|
|
|
lwz r4,PACA_EXGEN+EX_DSISR(r13)
|
|
|
|
li r5,0x300
|
2014-02-04 13:04:35 +08:00
|
|
|
b do_hash_page /* Try to handle as hpte fault */
|
2009-06-03 05:17:38 +08:00
|
|
|
|
2011-04-05 12:27:11 +08:00
|
|
|
.align 7
|
2012-11-02 11:11:51 +08:00
|
|
|
.globl h_data_storage_common
|
2011-04-05 12:27:11 +08:00
|
|
|
h_data_storage_common:
|
2012-11-02 11:11:51 +08:00
|
|
|
mfspr r10,SPRN_HDAR
|
|
|
|
std r10,PACA_EXGEN+EX_DAR(r13)
|
|
|
|
mfspr r10,SPRN_HDSISR
|
|
|
|
stw r10,PACA_EXGEN+EX_DSISR(r13)
|
|
|
|
EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
|
2014-02-04 13:04:35 +08:00
|
|
|
bl save_nvgprs
|
2014-07-15 19:15:38 +08:00
|
|
|
RECONCILE_IRQ_STATE(r10, r11)
|
2012-11-02 11:11:51 +08:00
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
2014-02-04 13:04:35 +08:00
|
|
|
bl unknown_exception
|
|
|
|
b ret_from_except
|
2011-04-05 12:27:11 +08:00
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
.align 7
|
|
|
|
.globl instruction_access_common
|
|
|
|
instruction_access_common:
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
|
2014-07-15 19:15:38 +08:00
|
|
|
RECONCILE_IRQ_STATE(r10, r11)
|
2012-03-07 13:48:45 +08:00
|
|
|
ld r12,_MSR(r1)
|
2009-06-03 05:17:38 +08:00
|
|
|
ld r3,_NIP(r1)
|
|
|
|
andis. r4,r12,0x5820
|
|
|
|
li r5,0x400
|
2014-02-04 13:04:35 +08:00
|
|
|
b do_hash_page /* Try to handle as hpte fault */
|
2009-06-03 05:17:38 +08:00
|
|
|
|
2014-02-04 13:06:46 +08:00
|
|
|
STD_EXCEPTION_COMMON(0xe20, h_instr_storage, unknown_exception)
|
2011-04-05 12:27:11 +08:00
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
/*
|
|
|
|
* Here is the common SLB miss user that is used when going to virtual
|
|
|
|
* mode for SLB misses, that is currently not used
|
|
|
|
*/
|
|
|
|
#ifdef __DISABLED__
|
|
|
|
.align 7
|
|
|
|
.globl slb_miss_user_common
|
|
|
|
slb_miss_user_common:
|
|
|
|
mflr r10
|
|
|
|
std r3,PACA_EXGEN+EX_DAR(r13)
|
|
|
|
stw r9,PACA_EXGEN+EX_CCR(r13)
|
|
|
|
std r10,PACA_EXGEN+EX_LR(r13)
|
|
|
|
std r11,PACA_EXGEN+EX_SRR0(r13)
|
2014-02-04 13:04:35 +08:00
|
|
|
bl slb_allocate_user
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
ld r10,PACA_EXGEN+EX_LR(r13)
|
|
|
|
ld r3,PACA_EXGEN+EX_R3(r13)
|
|
|
|
lwz r9,PACA_EXGEN+EX_CCR(r13)
|
|
|
|
ld r11,PACA_EXGEN+EX_SRR0(r13)
|
|
|
|
mtlr r10
|
|
|
|
beq- slb_miss_fault
|
|
|
|
|
|
|
|
andi. r10,r12,MSR_RI /* check for unrecoverable exception */
|
|
|
|
beq- unrecov_user_slb
|
|
|
|
mfmsr r10
|
|
|
|
|
|
|
|
.machine push
|
|
|
|
.machine "power4"
|
|
|
|
mtcrf 0x80,r9
|
|
|
|
.machine pop
|
|
|
|
|
|
|
|
clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */
|
|
|
|
mtmsrd r10,1
|
|
|
|
|
|
|
|
mtspr SRR0,r11
|
|
|
|
mtspr SRR1,r12
|
|
|
|
|
|
|
|
ld r9,PACA_EXGEN+EX_R9(r13)
|
|
|
|
ld r10,PACA_EXGEN+EX_R10(r13)
|
|
|
|
ld r11,PACA_EXGEN+EX_R11(r13)
|
|
|
|
ld r12,PACA_EXGEN+EX_R12(r13)
|
|
|
|
ld r13,PACA_EXGEN+EX_R13(r13)
|
|
|
|
rfid
|
|
|
|
b .
|
|
|
|
|
|
|
|
slb_miss_fault:
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
|
|
|
|
ld r4,PACA_EXGEN+EX_DAR(r13)
|
|
|
|
li r5,0
|
|
|
|
std r4,_DAR(r1)
|
|
|
|
std r5,_DSISR(r1)
|
|
|
|
b handle_page_fault
|
|
|
|
|
|
|
|
unrecov_user_slb:
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
|
2014-07-15 19:15:38 +08:00
|
|
|
RECONCILE_IRQ_STATE(r10, r11)
|
2014-02-04 13:04:35 +08:00
|
|
|
bl save_nvgprs
|
2009-06-03 05:17:38 +08:00
|
|
|
1: addi r3,r1,STACK_FRAME_OVERHEAD
|
2014-02-04 13:04:35 +08:00
|
|
|
bl unrecoverable_exception
|
2009-06-03 05:17:38 +08:00
|
|
|
b 1b
|
|
|
|
|
|
|
|
#endif /* __DISABLED__ */
|
|
|
|
|
|
|
|
|
2013-12-10 03:10:15 +08:00
|
|
|
/*
|
|
|
|
* Machine check is different because we use a different
|
|
|
|
* save area: PACA_EXMC instead of PACA_EXGEN.
|
|
|
|
*/
|
|
|
|
.align 7
|
|
|
|
.globl machine_check_common
|
|
|
|
machine_check_common:
|
|
|
|
|
|
|
|
mfspr r10,SPRN_DAR
|
|
|
|
std r10,PACA_EXGEN+EX_DAR(r13)
|
|
|
|
mfspr r10,SPRN_DSISR
|
|
|
|
stw r10,PACA_EXGEN+EX_DSISR(r13)
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
|
|
|
|
FINISH_NAP
|
2014-07-15 19:15:38 +08:00
|
|
|
RECONCILE_IRQ_STATE(r10, r11)
|
2013-12-10 03:10:15 +08:00
|
|
|
ld r3,PACA_EXGEN+EX_DAR(r13)
|
|
|
|
lwz r4,PACA_EXGEN+EX_DSISR(r13)
|
|
|
|
std r3,_DAR(r1)
|
|
|
|
std r4,_DSISR(r1)
|
2014-02-04 13:04:35 +08:00
|
|
|
bl save_nvgprs
|
2013-12-10 03:10:15 +08:00
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
2014-02-04 13:04:35 +08:00
|
|
|
bl machine_check_exception
|
|
|
|
b ret_from_except
|
2013-12-10 03:10:15 +08:00
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
.align 7
|
|
|
|
.globl alignment_common
|
|
|
|
alignment_common:
|
|
|
|
mfspr r10,SPRN_DAR
|
|
|
|
std r10,PACA_EXGEN+EX_DAR(r13)
|
|
|
|
mfspr r10,SPRN_DSISR
|
|
|
|
stw r10,PACA_EXGEN+EX_DSISR(r13)
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
|
|
|
|
ld r3,PACA_EXGEN+EX_DAR(r13)
|
|
|
|
lwz r4,PACA_EXGEN+EX_DSISR(r13)
|
|
|
|
std r3,_DAR(r1)
|
|
|
|
std r4,_DSISR(r1)
|
2014-02-04 13:04:35 +08:00
|
|
|
bl save_nvgprs
|
2014-07-15 19:15:38 +08:00
|
|
|
RECONCILE_IRQ_STATE(r10, r11)
|
2009-06-03 05:17:38 +08:00
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
2014-02-04 13:04:35 +08:00
|
|
|
bl alignment_exception
|
|
|
|
b ret_from_except
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
.align 7
|
|
|
|
.globl program_check_common
|
|
|
|
program_check_common:
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
|
2014-02-04 13:04:35 +08:00
|
|
|
bl save_nvgprs
|
2014-07-15 19:15:38 +08:00
|
|
|
RECONCILE_IRQ_STATE(r10, r11)
|
2012-02-21 05:32:30 +08:00
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
2014-02-04 13:04:35 +08:00
|
|
|
bl program_check_exception
|
|
|
|
b ret_from_except
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
.align 7
|
|
|
|
.globl fp_unavailable_common
|
|
|
|
fp_unavailable_common:
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
|
|
|
|
bne 1f /* if from user, just load it up */
|
2014-02-04 13:04:35 +08:00
|
|
|
bl save_nvgprs
|
2014-07-15 19:15:38 +08:00
|
|
|
RECONCILE_IRQ_STATE(r10, r11)
|
2009-06-03 05:17:38 +08:00
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
2014-02-04 13:04:35 +08:00
|
|
|
bl kernel_fp_unavailable_exception
|
2009-06-03 05:17:38 +08:00
|
|
|
BUG_OPCODE
|
2013-02-14 00:21:40 +08:00
|
|
|
1:
|
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
/* Test if 2 TM state bits are zero. If non-zero (ie. userspace was in
|
|
|
|
* transaction), go do TM stuff
|
|
|
|
*/
|
|
|
|
rldicl. r0, r12, (64-MSR_TS_LG), (64-2)
|
|
|
|
bne- 2f
|
|
|
|
END_FTR_SECTION_IFSET(CPU_FTR_TM)
|
|
|
|
#endif
|
2014-02-04 13:04:35 +08:00
|
|
|
bl load_up_fpu
|
2009-06-03 05:17:38 +08:00
|
|
|
b fast_exception_return
|
2013-02-14 00:21:40 +08:00
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
|
|
|
2: /* User process was in a transaction */
|
2014-02-04 13:04:35 +08:00
|
|
|
bl save_nvgprs
|
2014-07-15 19:15:38 +08:00
|
|
|
RECONCILE_IRQ_STATE(r10, r11)
|
2013-02-14 00:21:40 +08:00
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
2014-02-04 13:04:35 +08:00
|
|
|
bl fp_unavailable_tm
|
|
|
|
b ret_from_except
|
2013-02-14 00:21:40 +08:00
|
|
|
#endif
|
2009-06-03 05:17:38 +08:00
|
|
|
.align 7
|
|
|
|
.globl altivec_unavailable_common
|
|
|
|
altivec_unavailable_common:
|
|
|
|
EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
|
|
|
|
#ifdef CONFIG_ALTIVEC
|
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
beq 1f
|
2013-02-14 00:21:40 +08:00
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
|
|
|
BEGIN_FTR_SECTION_NESTED(69)
|
|
|
|
/* Test if 2 TM state bits are zero. If non-zero (ie. userspace was in
|
|
|
|
* transaction), go do TM stuff
|
|
|
|
*/
|
|
|
|
rldicl. r0, r12, (64-MSR_TS_LG), (64-2)
|
|
|
|
bne- 2f
|
|
|
|
END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
|
|
|
|
#endif
|
2014-02-04 13:04:35 +08:00
|
|
|
bl load_up_altivec
|
2009-06-03 05:17:38 +08:00
|
|
|
b fast_exception_return
|
2013-02-14 00:21:40 +08:00
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
|
|
|
2: /* User process was in a transaction */
|
2014-02-04 13:04:35 +08:00
|
|
|
bl save_nvgprs
|
2014-07-15 19:15:38 +08:00
|
|
|
RECONCILE_IRQ_STATE(r10, r11)
|
2013-02-14 00:21:40 +08:00
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
2014-02-04 13:04:35 +08:00
|
|
|
bl altivec_unavailable_tm
|
|
|
|
b ret_from_except
|
2013-02-14 00:21:40 +08:00
|
|
|
#endif
|
2009-06-03 05:17:38 +08:00
|
|
|
1:
|
|
|
|
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
|
|
|
|
#endif
|
2014-02-04 13:04:35 +08:00
|
|
|
bl save_nvgprs
|
2014-07-15 19:15:38 +08:00
|
|
|
RECONCILE_IRQ_STATE(r10, r11)
|
2009-06-03 05:17:38 +08:00
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
2014-02-04 13:04:35 +08:00
|
|
|
bl altivec_unavailable_exception
|
|
|
|
b ret_from_except
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
.align 7
|
|
|
|
.globl vsx_unavailable_common
|
|
|
|
vsx_unavailable_common:
|
|
|
|
EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN)
|
|
|
|
#ifdef CONFIG_VSX
|
|
|
|
BEGIN_FTR_SECTION
|
powerpc: Rework lazy-interrupt handling
The current implementation of lazy interrupts handling has some
issues that this tries to address.
We don't do the various workarounds we need to do when re-enabling
interrupts in some cases such as when returning from an interrupt
and thus we may still lose or get delayed decrementer or doorbell
interrupts.
The current scheme also makes it much harder to handle the external
"edge" interrupts provided by some BookE processors when using the
EPR facility (External Proxy) and the Freescale Hypervisor.
Additionally, we tend to keep interrupts hard disabled in a number
of cases, such as decrementer interrupts, external interrupts, or
when a masked decrementer interrupt is pending. This is sub-optimal.
This is an attempt at fixing it all in one go by reworking the way
we do the lazy interrupt disabling from the ground up.
The base idea is to replace the "hard_enabled" field with a
"irq_happened" field in which we store a bit mask of what interrupt
occurred while soft-disabled.
When re-enabling, either via arch_local_irq_restore() or when returning
from an interrupt, we can now decide what to do by testing bits in that
field.
We then implement replaying of the missed interrupts either by
re-using the existing exception frame (in exception exit case) or via
the creation of a new one from an assembly trampoline (in the
arch_local_irq_enable case).
This removes the need to play with the decrementer to try to create
fake interrupts, among others.
In addition, this adds a few refinements:
- We no longer hard disable decrementer interrupts that occur
while soft-disabled. We now simply bump the decrementer back to max
(on BookS) or leave it stopped (on BookE) and continue with hard interrupts
enabled, which means that we'll potentially get better sample quality from
performance monitor interrupts.
- Timer, decrementer and doorbell interrupts now hard-enable
shortly after removing the source of the interrupt, which means
they no longer run entirely hard disabled. Again, this will improve
perf sample quality.
- On Book3E 64-bit, we now make the performance monitor interrupt
act as an NMI like Book3S (the necessary C code for that to work
appear to already be present in the FSL perf code, notably calling
nmi_enter instead of irq_enter). (This also fixes a bug where BookE
perfmon interrupts could clobber r14 ... oops)
- We could make "masked" decrementer interrupts act as NMIs when doing
timer-based perf sampling to improve the sample quality.
Signed-off-by-yet: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
v2:
- Add hard-enable to decrementer, timer and doorbells
- Fix CR clobber in masked irq handling on BookE
- Make embedded perf interrupt act as an NMI
- Add a PACA_HAPPENED_EE_EDGE for use by FSL if they want
to retrigger an interrupt without preventing hard-enable
v3:
- Fix or vs. ori bug on Book3E
- Fix enabling of interrupts for some exceptions on Book3E
v4:
- Fix resend of doorbells on return from interrupt on Book3E
v5:
- Rebased on top of my latest series, which involves some significant
rework of some aspects of the patch.
v6:
- 32-bit compile fix
- more compile fixes with various .config combos
- factor out the asm code to soft-disable interrupts
- remove the C wrapper around preempt_schedule_irq
v7:
- Fix a bug with hard irq state tracking on native power7
2012-03-06 15:27:59 +08:00
|
|
|
beq 1f
|
2013-02-14 00:21:40 +08:00
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
|
|
|
BEGIN_FTR_SECTION_NESTED(69)
|
|
|
|
/* Test if 2 TM state bits are zero. If non-zero (ie. userspace was in
|
|
|
|
* transaction), go do TM stuff
|
|
|
|
*/
|
|
|
|
rldicl. r0, r12, (64-MSR_TS_LG), (64-2)
|
|
|
|
bne- 2f
|
|
|
|
END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
|
|
|
|
#endif
|
2014-02-04 13:04:35 +08:00
|
|
|
b load_up_vsx
|
2013-02-14 00:21:40 +08:00
|
|
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
|
|
|
2: /* User process was in a transaction */
|
2014-02-04 13:04:35 +08:00
|
|
|
bl save_nvgprs
|
2014-07-15 19:15:38 +08:00
|
|
|
RECONCILE_IRQ_STATE(r10, r11)
|
2013-02-14 00:21:40 +08:00
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
2014-02-04 13:04:35 +08:00
|
|
|
bl vsx_unavailable_tm
|
|
|
|
b ret_from_except
|
2013-02-14 00:21:40 +08:00
|
|
|
#endif
|
2009-06-03 05:17:38 +08:00
|
|
|
1:
|
|
|
|
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
|
|
|
|
#endif
|
2014-02-04 13:04:35 +08:00
|
|
|
bl save_nvgprs
|
2014-07-15 19:15:38 +08:00
|
|
|
RECONCILE_IRQ_STATE(r10, r11)
|
2009-06-03 05:17:38 +08:00
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
2014-02-04 13:04:35 +08:00
|
|
|
bl vsx_unavailable_exception
|
|
|
|
b ret_from_except
|
2009-06-03 05:17:38 +08:00
|
|
|
|
2014-02-04 13:06:46 +08:00
|
|
|
STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception)
|
|
|
|
STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception)
|
2013-02-14 00:21:38 +08:00
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
.align 7
|
|
|
|
.globl __end_handlers
|
|
|
|
__end_handlers:
|
|
|
|
|
2013-01-10 14:44:19 +08:00
|
|
|
/* Equivalents to the above handlers for relocation-on interrupt vectors */
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist)
|
|
|
|
MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell)
|
2013-01-10 14:44:19 +08:00
|
|
|
|
powerpc: Save CFAR before branching in interrupt entry paths
Some of the interrupt vectors on 64-bit POWER server processors are
only 32 bytes long, which is not enough for the full first-level
interrupt handler. For these we currently just have a branch to an
out-of-line handler. However, this means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors.
To fix this, we split the EXCEPTION_PROLOG_1 macro into two pieces:
EXCEPTION_PROLOG_0 contains the part up to the point where the CFAR
is saved in the PACA, and EXCEPTION_PROLOG_1 contains the rest. We
then put EXCEPTION_PROLOG_0 in the short interrupt vectors before
we branch to the out-of-line handler, which contains the rest of the
first-level interrupt handler. To facilitate this, we define new
_OOL (out of line) variants of STD_EXCEPTION_PSERIES, etc.
In order to get EXCEPTION_PROLOG_0 to be short enough, i.e., no more
than 6 instructions, it was necessary to move the stores that move
the PPR and CFAR values into the PACA into __EXCEPTION_PROLOG_1 and
to get rid of one of the two HMT_MEDIUM instructions. Previously
there was a HMT_MEDIUM_PPR_DISCARD before the prolog, which was
nop'd out on processors with the PPR (POWER7 and later), and then
another HMT_MEDIUM inside the HMT_MEDIUM_PPR_SAVE macro call inside
__EXCEPTION_PROLOG_1, which was nop'd out on processors without PPR.
Now the HMT_MEDIUM inside EXCEPTION_PROLOG_0 is there unconditionally
and the HMT_MEDIUM_PPR_DISCARD is not strictly necessary, although
this leaves it in for the interrupt vectors where there is room for
it.
Previously we had a handler for hypervisor maintenance interrupts at
0xe50, which doesn't leave enough room for the vector for hypervisor
emulation assist interrupts at 0xe40, since we need 8 instructions.
The 0xe50 vector was only used on POWER6, as the HMI vector was moved
to 0xe60 on POWER7. Since we don't support running in hypervisor mode
on POWER6, we just remove the handler at 0xe50.
This also changes denorm_exception_hv to use EXCEPTION_PROLOG_0
instead of open-coding it, and removes the HMT_MEDIUM_PPR_DISCARD
from the relocation-on vectors (since any CPU that supports
relocation-on interrupts also has the PPR).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-02-05 02:10:15 +08:00
|
|
|
STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor)
|
|
|
|
STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable)
|
|
|
|
STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable)
|
2013-06-25 15:47:56 +08:00
|
|
|
STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
|
2013-08-09 15:29:27 +08:00
|
|
|
STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable)
|
2013-01-10 14:44:19 +08:00
|
|
|
|
|
|
|
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
|
|
|
|
/*
|
|
|
|
* Data area reserved for FWNMI option.
|
|
|
|
* This address (0x7000) is fixed by the RPA.
|
|
|
|
*/
|
|
|
|
.= 0x7000
|
|
|
|
.globl fwnmi_data_area
|
|
|
|
fwnmi_data_area:
|
|
|
|
|
|
|
|
/* pseries and powernv need to keep the whole page from
|
|
|
|
* 0x7000 to 0x8000 free for use by the firmware
|
|
|
|
*/
|
|
|
|
. = 0x8000
|
|
|
|
#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
|
|
|
|
|
2014-08-09 13:22:12 +08:00
|
|
|
.globl hmi_exception_early
|
|
|
|
hmi_exception_early:
|
|
|
|
EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60)
|
|
|
|
mr r10,r1 /* Save r1 */
|
|
|
|
ld r1,PACAEMERGSP(r13) /* Use emergency stack */
|
|
|
|
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
|
|
|
|
std r9,_CCR(r1) /* save CR in stackframe */
|
|
|
|
mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
|
|
|
|
std r11,_NIP(r1) /* save HSRR0 in stackframe */
|
|
|
|
mfspr r12,SPRN_HSRR1 /* Save SRR1 */
|
|
|
|
std r12,_MSR(r1) /* save SRR1 in stackframe */
|
|
|
|
std r10,0(r1) /* make stack chain pointer */
|
|
|
|
std r0,GPR0(r1) /* save r0 in stackframe */
|
|
|
|
std r10,GPR1(r1) /* save r1 in stackframe */
|
|
|
|
EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
|
|
|
|
EXCEPTION_PROLOG_COMMON_3(0xe60)
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
bl hmi_exception_realmode
|
|
|
|
/* Windup the stack. */
|
|
|
|
/* Move original HSRR0 and HSRR1 into the respective regs */
|
|
|
|
ld r9,_MSR(r1)
|
|
|
|
mtspr SPRN_HSRR1,r9
|
|
|
|
ld r3,_NIP(r1)
|
|
|
|
mtspr SPRN_HSRR0,r3
|
|
|
|
ld r9,_CTR(r1)
|
|
|
|
mtctr r9
|
|
|
|
ld r9,_XER(r1)
|
|
|
|
mtxer r9
|
|
|
|
ld r9,_LINK(r1)
|
|
|
|
mtlr r9
|
|
|
|
REST_GPR(0, r1)
|
|
|
|
REST_8GPRS(2, r1)
|
|
|
|
REST_GPR(10, r1)
|
|
|
|
ld r11,_CCR(r1)
|
|
|
|
mtcr r11
|
|
|
|
REST_GPR(11, r1)
|
|
|
|
REST_2GPRS(12, r1)
|
|
|
|
/* restore original r1. */
|
|
|
|
ld r1,GPR1(r1)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Go to virtual mode and pull the HMI event information from
|
|
|
|
* firmware.
|
|
|
|
*/
|
|
|
|
.globl hmi_exception_after_realmode
|
|
|
|
hmi_exception_after_realmode:
|
|
|
|
SET_SCRATCH0(r13)
|
|
|
|
EXCEPTION_PROLOG_0(PACA_EXGEN)
|
|
|
|
b hmi_exception_hv
|
|
|
|
|
2013-01-10 14:44:19 +08:00
|
|
|
|
2013-12-10 03:10:15 +08:00
|
|
|
#define MACHINE_CHECK_HANDLER_WINDUP \
|
|
|
|
/* Clear MSR_RI before setting SRR0 and SRR1. */\
|
|
|
|
li r0,MSR_RI; \
|
|
|
|
mfmsr r9; /* get MSR value */ \
|
|
|
|
andc r9,r9,r0; \
|
|
|
|
mtmsrd r9,1; /* Clear MSR_RI */ \
|
|
|
|
/* Move original SRR0 and SRR1 into the respective regs */ \
|
|
|
|
ld r9,_MSR(r1); \
|
|
|
|
mtspr SPRN_SRR1,r9; \
|
|
|
|
ld r3,_NIP(r1); \
|
|
|
|
mtspr SPRN_SRR0,r3; \
|
|
|
|
ld r9,_CTR(r1); \
|
|
|
|
mtctr r9; \
|
|
|
|
ld r9,_XER(r1); \
|
|
|
|
mtxer r9; \
|
|
|
|
ld r9,_LINK(r1); \
|
|
|
|
mtlr r9; \
|
|
|
|
REST_GPR(0, r1); \
|
|
|
|
REST_8GPRS(2, r1); \
|
|
|
|
REST_GPR(10, r1); \
|
|
|
|
ld r11,_CCR(r1); \
|
|
|
|
mtcr r11; \
|
|
|
|
/* Decrement paca->in_mce. */ \
|
|
|
|
lhz r12,PACA_IN_MCE(r13); \
|
|
|
|
subi r12,r12,1; \
|
|
|
|
sth r12,PACA_IN_MCE(r13); \
|
|
|
|
REST_GPR(11, r1); \
|
|
|
|
REST_2GPRS(12, r1); \
|
|
|
|
/* restore original r1. */ \
|
|
|
|
ld r1,GPR1(r1)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Handle machine check early in real mode. We come here with
|
|
|
|
* ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
|
|
|
|
*/
|
|
|
|
.align 7
|
|
|
|
.globl machine_check_handle_early
|
|
|
|
machine_check_handle_early:
|
|
|
|
std r0,GPR0(r1) /* Save r0 */
|
|
|
|
EXCEPTION_PROLOG_COMMON_3(0x200)
|
2014-02-04 13:04:35 +08:00
|
|
|
bl save_nvgprs
|
2013-12-10 03:10:15 +08:00
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
2014-02-04 13:04:35 +08:00
|
|
|
bl machine_check_early
|
2014-06-11 16:47:56 +08:00
|
|
|
std r3,RESULT(r1) /* Save result */
|
2013-12-10 03:10:15 +08:00
|
|
|
ld r12,_MSR(r1)
|
|
|
|
#ifdef CONFIG_PPC_P7_NAP
|
|
|
|
/*
|
|
|
|
* Check if thread was in power saving mode. We come here when any
|
|
|
|
* of the following is true:
|
|
|
|
* a. thread wasn't in power saving mode
|
|
|
|
* b. thread was in power saving mode with no state loss or
|
|
|
|
* supervisor state loss
|
|
|
|
*
|
|
|
|
* Go back to nap again if (b) is true.
|
|
|
|
*/
|
|
|
|
rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */
|
|
|
|
beq 4f /* No, it wasn;t */
|
|
|
|
/* Thread was in power saving mode. Go back to nap again. */
|
|
|
|
cmpwi r11,2
|
|
|
|
bne 3f
|
|
|
|
/* Supervisor state loss */
|
|
|
|
li r0,1
|
|
|
|
stb r0,PACA_NAPSTATELOST(r13)
|
2014-02-04 13:04:35 +08:00
|
|
|
3: bl machine_check_queue_event
|
2013-12-10 03:10:15 +08:00
|
|
|
MACHINE_CHECK_HANDLER_WINDUP
|
|
|
|
GET_PACA(r13)
|
|
|
|
ld r1,PACAR1(r13)
|
2014-12-10 02:56:52 +08:00
|
|
|
li r3,PNV_THREAD_NAP
|
2014-02-04 13:04:35 +08:00
|
|
|
b power7_enter_nap_mode
|
2013-12-10 03:10:15 +08:00
|
|
|
4:
|
|
|
|
#endif
|
|
|
|
/*
|
|
|
|
* Check if we are coming from hypervisor userspace. If yes then we
|
|
|
|
* continue in host kernel in V mode to deliver the MC event.
|
|
|
|
*/
|
|
|
|
rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */
|
|
|
|
beq 5f
|
|
|
|
andi. r11,r12,MSR_PR /* See if coming from user. */
|
|
|
|
bne 9f /* continue in V mode if we are. */
|
|
|
|
|
|
|
|
5:
|
2015-03-17 18:44:41 +08:00
|
|
|
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
|
2013-12-10 03:10:15 +08:00
|
|
|
/*
|
|
|
|
* We are coming from kernel context. Check if we are coming from
|
|
|
|
* guest. if yes, then we can continue. We will fall through
|
|
|
|
* do_kvm_200->kvmppc_interrupt to deliver the MC event to guest.
|
|
|
|
*/
|
|
|
|
lbz r11,HSTATE_IN_GUEST(r13)
|
|
|
|
cmpwi r11,0 /* Check if coming from guest */
|
|
|
|
bne 9f /* continue if we are. */
|
|
|
|
#endif
|
|
|
|
/*
|
|
|
|
* At this point we are not sure about what context we come from.
|
|
|
|
* Queue up the MCE event and return from the interrupt.
|
|
|
|
* But before that, check if this is an un-recoverable exception.
|
|
|
|
* If yes, then stay on emergency stack and panic.
|
|
|
|
*/
|
|
|
|
andi. r11,r12,MSR_RI
|
|
|
|
bne 2f
|
2014-06-11 16:47:56 +08:00
|
|
|
1: mfspr r11,SPRN_SRR0
|
|
|
|
ld r10,PACAKBASE(r13)
|
|
|
|
LOAD_HANDLER(r10,unrecover_mce)
|
|
|
|
mtspr SPRN_SRR0,r10
|
|
|
|
ld r10,PACAKMSR(r13)
|
|
|
|
/*
|
|
|
|
* We are going down. But there are chances that we might get hit by
|
|
|
|
* another MCE during panic path and we may run into unstable state
|
|
|
|
* with no way out. Hence, turn ME bit off while going down, so that
|
|
|
|
* when another MCE is hit during panic path, system will checkstop
|
|
|
|
* and hypervisor will get restarted cleanly by SP.
|
|
|
|
*/
|
|
|
|
li r3,MSR_ME
|
|
|
|
andc r10,r10,r3 /* Turn off MSR_ME */
|
|
|
|
mtspr SPRN_SRR1,r10
|
|
|
|
rfid
|
|
|
|
b .
|
2013-12-10 03:10:15 +08:00
|
|
|
2:
|
2014-06-11 16:47:56 +08:00
|
|
|
/*
|
|
|
|
* Check if we have successfully handled/recovered from error, if not
|
|
|
|
* then stay on emergency stack and panic.
|
|
|
|
*/
|
|
|
|
ld r3,RESULT(r1) /* Load result */
|
|
|
|
cmpdi r3,0 /* see if we handled MCE successfully */
|
|
|
|
|
|
|
|
beq 1b /* if !handled then panic */
|
2013-12-10 03:10:15 +08:00
|
|
|
/*
|
|
|
|
* Return from MC interrupt.
|
|
|
|
* Queue up the MCE event so that we can log it later, while
|
|
|
|
* returning from kernel or opal call.
|
|
|
|
*/
|
2014-02-04 13:04:35 +08:00
|
|
|
bl machine_check_queue_event
|
2013-12-10 03:10:15 +08:00
|
|
|
MACHINE_CHECK_HANDLER_WINDUP
|
|
|
|
rfid
|
|
|
|
9:
|
|
|
|
/* Deliver the machine check to host kernel in V mode. */
|
|
|
|
MACHINE_CHECK_HANDLER_WINDUP
|
|
|
|
b machine_check_pSeries
|
|
|
|
|
2014-06-11 16:47:56 +08:00
|
|
|
unrecover_mce:
|
|
|
|
/* Invoke machine_check_exception to print MCE event and panic. */
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
2014-06-12 07:08:37 +08:00
|
|
|
bl machine_check_exception
|
2014-06-11 16:47:56 +08:00
|
|
|
/*
|
|
|
|
* We will not reach here. Even if we did, there is no way out. Call
|
|
|
|
* unrecoverable_exception and die.
|
|
|
|
*/
|
|
|
|
1: addi r3,r1,STACK_FRAME_OVERHEAD
|
2014-06-12 07:08:37 +08:00
|
|
|
bl unrecoverable_exception
|
2014-06-11 16:47:56 +08:00
|
|
|
b 1b
|
2013-03-25 09:31:31 +08:00
|
|
|
/*
|
|
|
|
* r13 points to the PACA, r9 contains the saved CR,
|
|
|
|
* r12 contain the saved SRR1, SRR0 is still ready for return
|
|
|
|
* r3 has the faulting address
|
|
|
|
* r9 - r13 are saved in paca->exslb.
|
|
|
|
* r3 is saved in paca->slb_r3
|
|
|
|
* We assume we aren't going to take any exceptions during this procedure.
|
|
|
|
*/
|
2014-02-04 13:04:52 +08:00
|
|
|
slb_miss_realmode:
|
2013-03-25 09:31:31 +08:00
|
|
|
mflr r10
|
|
|
|
#ifdef CONFIG_RELOCATABLE
|
|
|
|
mtctr r11
|
|
|
|
#endif
|
|
|
|
|
|
|
|
stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
|
|
|
|
std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
|
|
|
|
|
2014-02-04 13:04:35 +08:00
|
|
|
bl slb_allocate_realmode
|
2013-03-25 09:31:31 +08:00
|
|
|
|
|
|
|
/* All done -- return from exception. */
|
|
|
|
|
|
|
|
ld r10,PACA_EXSLB+EX_LR(r13)
|
|
|
|
ld r3,PACA_EXSLB+EX_R3(r13)
|
|
|
|
lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
|
|
|
|
|
|
|
|
mtlr r10
|
|
|
|
|
|
|
|
andi. r10,r12,MSR_RI /* check for unrecoverable exception */
|
|
|
|
beq- 2f
|
|
|
|
|
|
|
|
.machine push
|
|
|
|
.machine "power4"
|
|
|
|
mtcrf 0x80,r9
|
|
|
|
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
|
|
|
|
.machine pop
|
|
|
|
|
|
|
|
RESTORE_PPR_PACA(PACA_EXSLB, r9)
|
|
|
|
ld r9,PACA_EXSLB+EX_R9(r13)
|
|
|
|
ld r10,PACA_EXSLB+EX_R10(r13)
|
|
|
|
ld r11,PACA_EXSLB+EX_R11(r13)
|
|
|
|
ld r12,PACA_EXSLB+EX_R12(r13)
|
|
|
|
ld r13,PACA_EXSLB+EX_R13(r13)
|
|
|
|
rfid
|
|
|
|
b . /* prevent speculative execution */
|
|
|
|
|
|
|
|
2: mfspr r11,SPRN_SRR0
|
|
|
|
ld r10,PACAKBASE(r13)
|
|
|
|
LOAD_HANDLER(r10,unrecov_slb)
|
|
|
|
mtspr SPRN_SRR0,r10
|
|
|
|
ld r10,PACAKMSR(r13)
|
|
|
|
mtspr SPRN_SRR1,r10
|
|
|
|
rfid
|
|
|
|
b .
|
|
|
|
|
|
|
|
unrecov_slb:
|
|
|
|
EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
|
2014-07-15 19:15:38 +08:00
|
|
|
RECONCILE_IRQ_STATE(r10, r11)
|
2014-02-04 13:04:35 +08:00
|
|
|
bl save_nvgprs
|
2013-03-25 09:31:31 +08:00
|
|
|
1: addi r3,r1,STACK_FRAME_OVERHEAD
|
2014-02-04 13:04:35 +08:00
|
|
|
bl unrecoverable_exception
|
2013-03-25 09:31:31 +08:00
|
|
|
b 1b
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef CONFIG_PPC_970_NAP
|
|
|
|
power4_fixup_nap:
|
|
|
|
andc r9,r9,r10
|
|
|
|
std r9,TI_LOCAL_FLAGS(r11)
|
|
|
|
ld r10,_LINK(r1) /* make idle task do the */
|
|
|
|
std r10,_NIP(r1) /* equivalent of a blr */
|
|
|
|
blr
|
|
|
|
#endif
|
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
/*
|
|
|
|
* Hash table stuff
|
|
|
|
*/
|
|
|
|
.align 7
|
2014-02-04 13:06:11 +08:00
|
|
|
do_hash_page:
|
2009-06-03 05:17:38 +08:00
|
|
|
std r3,_DAR(r1)
|
|
|
|
std r4,_DSISR(r1)
|
|
|
|
|
2010-03-30 07:59:25 +08:00
|
|
|
andis. r0,r4,0xa410 /* weird error? */
|
2009-06-03 05:17:38 +08:00
|
|
|
bne- handle_page_fault /* if not, try to insert a HPTE */
|
2010-03-30 07:59:25 +08:00
|
|
|
andis. r0,r4,DSISR_DABRMATCH@h
|
|
|
|
bne- handle_dabr_fault
|
2012-07-05 12:41:35 +08:00
|
|
|
CURRENT_THREAD_INFO(r11, r1)
|
powerpc: Allow perf_counters to access user memory at interrupt time
This provides a mechanism to allow the perf_counters code to access
user memory in a PMU interrupt routine. Such an access can cause
various kinds of interrupt: SLB miss, MMU hash table miss, segment
table miss, or TLB miss, depending on the processor. This commit
only deals with 64-bit classic/server processors, which use an MMU
hash table. 32-bit processors are already able to access user memory
at interrupt time. Since we don't soft-disable on 32-bit, we avoid
the possibility of reentering hash_page or the TLB miss handlers,
since they run with interrupts disabled.
On 64-bit processors, an SLB miss interrupt on a user address will
update the slb_cache and slb_cache_ptr fields in the paca. This is
OK except in the case where a PMU interrupt occurs in switch_slb,
which also accesses those fields. To prevent this, we hard-disable
interrupts in switch_slb. Interrupts are already soft-disabled at
this point, and will get hard-enabled when they get soft-enabled
later.
This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice,
and to make sure that it clears the slb_cache_ptr when called from
other callers than switch_slb, the existing routine is renamed to
__slb_flush_and_rebolt, which is called by switch_slb and the new
version of slb_flush_and_rebolt.
Similarly, switch_stab (used on POWER3 and RS64 processors) gets a
hard_irq_disable() to protect the per-cpu variables used there and
in ste_allocate.
If a MMU hashtable miss interrupt occurs, normally we would call
hash_page to look up the Linux PTE for the address and create a HPTE.
However, hash_page is fairly complex and takes some locks, so to
avoid the possibility of deadlock, we check the preemption count
to see if we are in a (pseudo-)NMI handler, and if so, we don't call
hash_page but instead treat it like a bad access that will get
reported up through the exception table mechanism. An interrupt
whose handler runs even though the interrupt occurred when
soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI
handler, which should use nmi_enter()/nmi_exit() rather than
irq_enter()/irq_exit().
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-08-17 13:17:54 +08:00
|
|
|
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
|
|
|
|
andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
|
|
|
|
bne 77f /* then don't call hash_page now */
|
2009-06-03 05:17:38 +08:00
|
|
|
/*
|
|
|
|
* We need to set the _PAGE_USER bit if MSR_PR is set or if we are
|
|
|
|
* accessing a userspace segment (even from the kernel). We assume
|
|
|
|
* kernel addresses always have the high bit set.
|
|
|
|
*/
|
|
|
|
rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */
|
|
|
|
rotldi r0,r3,15 /* Move high bit into MSR_PR posn */
|
|
|
|
orc r0,r12,r0 /* MSR_PR | ~high_bit */
|
|
|
|
rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */
|
|
|
|
ori r4,r4,1 /* add _PAGE_PRESENT */
|
|
|
|
rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* r3 contains the faulting address
|
|
|
|
* r4 contains the required access permissions
|
|
|
|
* r5 contains the trap number
|
2014-12-04 13:30:14 +08:00
|
|
|
* r6 contains dsisr
|
2009-06-03 05:17:38 +08:00
|
|
|
*
|
powerpc: Rework lazy-interrupt handling
The current implementation of lazy interrupts handling has some
issues that this tries to address.
We don't do the various workarounds we need to do when re-enabling
interrupts in some cases such as when returning from an interrupt
and thus we may still lose or get delayed decrementer or doorbell
interrupts.
The current scheme also makes it much harder to handle the external
"edge" interrupts provided by some BookE processors when using the
EPR facility (External Proxy) and the Freescale Hypervisor.
Additionally, we tend to keep interrupts hard disabled in a number
of cases, such as decrementer interrupts, external interrupts, or
when a masked decrementer interrupt is pending. This is sub-optimal.
This is an attempt at fixing it all in one go by reworking the way
we do the lazy interrupt disabling from the ground up.
The base idea is to replace the "hard_enabled" field with a
"irq_happened" field in which we store a bit mask of what interrupt
occurred while soft-disabled.
When re-enabling, either via arch_local_irq_restore() or when returning
from an interrupt, we can now decide what to do by testing bits in that
field.
We then implement replaying of the missed interrupts either by
re-using the existing exception frame (in exception exit case) or via
the creation of a new one from an assembly trampoline (in the
arch_local_irq_enable case).
This removes the need to play with the decrementer to try to create
fake interrupts, among others.
In addition, this adds a few refinements:
- We no longer hard disable decrementer interrupts that occur
while soft-disabled. We now simply bump the decrementer back to max
(on BookS) or leave it stopped (on BookE) and continue with hard interrupts
enabled, which means that we'll potentially get better sample quality from
performance monitor interrupts.
- Timer, decrementer and doorbell interrupts now hard-enable
shortly after removing the source of the interrupt, which means
they no longer run entirely hard disabled. Again, this will improve
perf sample quality.
- On Book3E 64-bit, we now make the performance monitor interrupt
act as an NMI like Book3S (the necessary C code for that to work
appear to already be present in the FSL perf code, notably calling
nmi_enter instead of irq_enter). (This also fixes a bug where BookE
perfmon interrupts could clobber r14 ... oops)
- We could make "masked" decrementer interrupts act as NMIs when doing
timer-based perf sampling to improve the sample quality.
Signed-off-by-yet: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
v2:
- Add hard-enable to decrementer, timer and doorbells
- Fix CR clobber in masked irq handling on BookE
- Make embedded perf interrupt act as an NMI
- Add a PACA_HAPPENED_EE_EDGE for use by FSL if they want
to retrigger an interrupt without preventing hard-enable
v3:
- Fix or vs. ori bug on Book3E
- Fix enabling of interrupts for some exceptions on Book3E
v4:
- Fix resend of doorbells on return from interrupt on Book3E
v5:
- Rebased on top of my latest series, which involves some significant
rework of some aspects of the patch.
v6:
- 32-bit compile fix
- more compile fixes with various .config combos
- factor out the asm code to soft-disable interrupts
- remove the C wrapper around preempt_schedule_irq
v7:
- Fix a bug with hard irq state tracking on native power7
2012-03-06 15:27:59 +08:00
|
|
|
* at return r3 = 0 for success, 1 for page fault, negative for error
|
2009-06-03 05:17:38 +08:00
|
|
|
*/
|
2014-12-04 13:30:14 +08:00
|
|
|
ld r6,_DSISR(r1)
|
2014-02-04 13:04:35 +08:00
|
|
|
bl hash_page /* build HPTE if possible */
|
2009-06-03 05:17:38 +08:00
|
|
|
cmpdi r3,0 /* see if hash_page succeeded */
|
|
|
|
|
powerpc: Rework lazy-interrupt handling
The current implementation of lazy interrupts handling has some
issues that this tries to address.
We don't do the various workarounds we need to do when re-enabling
interrupts in some cases such as when returning from an interrupt
and thus we may still lose or get delayed decrementer or doorbell
interrupts.
The current scheme also makes it much harder to handle the external
"edge" interrupts provided by some BookE processors when using the
EPR facility (External Proxy) and the Freescale Hypervisor.
Additionally, we tend to keep interrupts hard disabled in a number
of cases, such as decrementer interrupts, external interrupts, or
when a masked decrementer interrupt is pending. This is sub-optimal.
This is an attempt at fixing it all in one go by reworking the way
we do the lazy interrupt disabling from the ground up.
The base idea is to replace the "hard_enabled" field with a
"irq_happened" field in which we store a bit mask of what interrupt
occurred while soft-disabled.
When re-enabling, either via arch_local_irq_restore() or when returning
from an interrupt, we can now decide what to do by testing bits in that
field.
We then implement replaying of the missed interrupts either by
re-using the existing exception frame (in exception exit case) or via
the creation of a new one from an assembly trampoline (in the
arch_local_irq_enable case).
This removes the need to play with the decrementer to try to create
fake interrupts, among others.
In addition, this adds a few refinements:
- We no longer hard disable decrementer interrupts that occur
while soft-disabled. We now simply bump the decrementer back to max
(on BookS) or leave it stopped (on BookE) and continue with hard interrupts
enabled, which means that we'll potentially get better sample quality from
performance monitor interrupts.
- Timer, decrementer and doorbell interrupts now hard-enable
shortly after removing the source of the interrupt, which means
they no longer run entirely hard disabled. Again, this will improve
perf sample quality.
- On Book3E 64-bit, we now make the performance monitor interrupt
act as an NMI like Book3S (the necessary C code for that to work
appear to already be present in the FSL perf code, notably calling
nmi_enter instead of irq_enter). (This also fixes a bug where BookE
perfmon interrupts could clobber r14 ... oops)
- We could make "masked" decrementer interrupts act as NMIs when doing
timer-based perf sampling to improve the sample quality.
Signed-off-by-yet: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
v2:
- Add hard-enable to decrementer, timer and doorbells
- Fix CR clobber in masked irq handling on BookE
- Make embedded perf interrupt act as an NMI
- Add a PACA_HAPPENED_EE_EDGE for use by FSL if they want
to retrigger an interrupt without preventing hard-enable
v3:
- Fix or vs. ori bug on Book3E
- Fix enabling of interrupts for some exceptions on Book3E
v4:
- Fix resend of doorbells on return from interrupt on Book3E
v5:
- Rebased on top of my latest series, which involves some significant
rework of some aspects of the patch.
v6:
- 32-bit compile fix
- more compile fixes with various .config combos
- factor out the asm code to soft-disable interrupts
- remove the C wrapper around preempt_schedule_irq
v7:
- Fix a bug with hard irq state tracking on native power7
2012-03-06 15:27:59 +08:00
|
|
|
/* Success */
|
2009-06-03 05:17:38 +08:00
|
|
|
beq fast_exc_return_irq /* Return from exception on success */
|
|
|
|
|
powerpc: Rework lazy-interrupt handling
The current implementation of lazy interrupts handling has some
issues that this tries to address.
We don't do the various workarounds we need to do when re-enabling
interrupts in some cases such as when returning from an interrupt
and thus we may still lose or get delayed decrementer or doorbell
interrupts.
The current scheme also makes it much harder to handle the external
"edge" interrupts provided by some BookE processors when using the
EPR facility (External Proxy) and the Freescale Hypervisor.
Additionally, we tend to keep interrupts hard disabled in a number
of cases, such as decrementer interrupts, external interrupts, or
when a masked decrementer interrupt is pending. This is sub-optimal.
This is an attempt at fixing it all in one go by reworking the way
we do the lazy interrupt disabling from the ground up.
The base idea is to replace the "hard_enabled" field with a
"irq_happened" field in which we store a bit mask of what interrupt
occurred while soft-disabled.
When re-enabling, either via arch_local_irq_restore() or when returning
from an interrupt, we can now decide what to do by testing bits in that
field.
We then implement replaying of the missed interrupts either by
re-using the existing exception frame (in exception exit case) or via
the creation of a new one from an assembly trampoline (in the
arch_local_irq_enable case).
This removes the need to play with the decrementer to try to create
fake interrupts, among others.
In addition, this adds a few refinements:
- We no longer hard disable decrementer interrupts that occur
while soft-disabled. We now simply bump the decrementer back to max
(on BookS) or leave it stopped (on BookE) and continue with hard interrupts
enabled, which means that we'll potentially get better sample quality from
performance monitor interrupts.
- Timer, decrementer and doorbell interrupts now hard-enable
shortly after removing the source of the interrupt, which means
they no longer run entirely hard disabled. Again, this will improve
perf sample quality.
- On Book3E 64-bit, we now make the performance monitor interrupt
act as an NMI like Book3S (the necessary C code for that to work
appear to already be present in the FSL perf code, notably calling
nmi_enter instead of irq_enter). (This also fixes a bug where BookE
perfmon interrupts could clobber r14 ... oops)
- We could make "masked" decrementer interrupts act as NMIs when doing
timer-based perf sampling to improve the sample quality.
Signed-off-by-yet: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
v2:
- Add hard-enable to decrementer, timer and doorbells
- Fix CR clobber in masked irq handling on BookE
- Make embedded perf interrupt act as an NMI
- Add a PACA_HAPPENED_EE_EDGE for use by FSL if they want
to retrigger an interrupt without preventing hard-enable
v3:
- Fix or vs. ori bug on Book3E
- Fix enabling of interrupts for some exceptions on Book3E
v4:
- Fix resend of doorbells on return from interrupt on Book3E
v5:
- Rebased on top of my latest series, which involves some significant
rework of some aspects of the patch.
v6:
- 32-bit compile fix
- more compile fixes with various .config combos
- factor out the asm code to soft-disable interrupts
- remove the C wrapper around preempt_schedule_irq
v7:
- Fix a bug with hard irq state tracking on native power7
2012-03-06 15:27:59 +08:00
|
|
|
/* Error */
|
|
|
|
blt- 13f
|
2010-03-30 07:59:25 +08:00
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
/* Here we have a page fault that hash_page can't handle. */
|
|
|
|
handle_page_fault:
|
|
|
|
11: ld r4,_DAR(r1)
|
|
|
|
ld r5,_DSISR(r1)
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
2014-02-04 13:04:35 +08:00
|
|
|
bl do_page_fault
|
2009-06-03 05:17:38 +08:00
|
|
|
cmpdi r3,0
|
2012-03-07 13:48:45 +08:00
|
|
|
beq+ 12f
|
2014-02-04 13:04:35 +08:00
|
|
|
bl save_nvgprs
|
2009-06-03 05:17:38 +08:00
|
|
|
mr r5,r3
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
lwz r4,_DAR(r1)
|
2014-02-04 13:04:35 +08:00
|
|
|
bl bad_page_fault
|
|
|
|
b ret_from_except
|
2009-06-03 05:17:38 +08:00
|
|
|
|
2012-03-07 13:48:45 +08:00
|
|
|
/* We have a data breakpoint exception - handle it */
|
|
|
|
handle_dabr_fault:
|
2014-02-04 13:04:35 +08:00
|
|
|
bl save_nvgprs
|
2012-03-07 13:48:45 +08:00
|
|
|
ld r4,_DAR(r1)
|
|
|
|
ld r5,_DSISR(r1)
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
2014-02-04 13:04:35 +08:00
|
|
|
bl do_break
|
|
|
|
12: b ret_from_except_lite
|
2012-03-07 13:48:45 +08:00
|
|
|
|
2009-06-03 05:17:38 +08:00
|
|
|
|
|
|
|
/* We have a page fault that hash_page could handle but HV refused
|
|
|
|
* the PTE insertion
|
|
|
|
*/
|
2014-02-04 13:04:35 +08:00
|
|
|
13: bl save_nvgprs
|
2009-06-03 05:17:38 +08:00
|
|
|
mr r5,r3
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
ld r4,_DAR(r1)
|
2014-02-04 13:04:35 +08:00
|
|
|
bl low_hash_fault
|
|
|
|
b ret_from_except
|
2009-06-03 05:17:38 +08:00
|
|
|
|
powerpc: Allow perf_counters to access user memory at interrupt time
This provides a mechanism to allow the perf_counters code to access
user memory in a PMU interrupt routine. Such an access can cause
various kinds of interrupt: SLB miss, MMU hash table miss, segment
table miss, or TLB miss, depending on the processor. This commit
only deals with 64-bit classic/server processors, which use an MMU
hash table. 32-bit processors are already able to access user memory
at interrupt time. Since we don't soft-disable on 32-bit, we avoid
the possibility of reentering hash_page or the TLB miss handlers,
since they run with interrupts disabled.
On 64-bit processors, an SLB miss interrupt on a user address will
update the slb_cache and slb_cache_ptr fields in the paca. This is
OK except in the case where a PMU interrupt occurs in switch_slb,
which also accesses those fields. To prevent this, we hard-disable
interrupts in switch_slb. Interrupts are already soft-disabled at
this point, and will get hard-enabled when they get soft-enabled
later.
This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice,
and to make sure that it clears the slb_cache_ptr when called from
other callers than switch_slb, the existing routine is renamed to
__slb_flush_and_rebolt, which is called by switch_slb and the new
version of slb_flush_and_rebolt.
Similarly, switch_stab (used on POWER3 and RS64 processors) gets a
hard_irq_disable() to protect the per-cpu variables used there and
in ste_allocate.
If a MMU hashtable miss interrupt occurs, normally we would call
hash_page to look up the Linux PTE for the address and create a HPTE.
However, hash_page is fairly complex and takes some locks, so to
avoid the possibility of deadlock, we check the preemption count
to see if we are in a (pseudo-)NMI handler, and if so, we don't call
hash_page but instead treat it like a bad access that will get
reported up through the exception table mechanism. An interrupt
whose handler runs even though the interrupt occurred when
soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI
handler, which should use nmi_enter()/nmi_exit() rather than
irq_enter()/irq_exit().
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-08-17 13:17:54 +08:00
|
|
|
/*
|
|
|
|
* We come here as a result of a DSI at a point where we don't want
|
|
|
|
* to call hash_page, such as when we are accessing memory (possibly
|
|
|
|
* user memory) inside a PMU interrupt that occurred while interrupts
|
|
|
|
* were soft-disabled. We want to invoke the exception handler for
|
|
|
|
* the access, or panic if there isn't a handler.
|
|
|
|
*/
|
2014-02-04 13:04:35 +08:00
|
|
|
77: bl save_nvgprs
|
powerpc: Allow perf_counters to access user memory at interrupt time
This provides a mechanism to allow the perf_counters code to access
user memory in a PMU interrupt routine. Such an access can cause
various kinds of interrupt: SLB miss, MMU hash table miss, segment
table miss, or TLB miss, depending on the processor. This commit
only deals with 64-bit classic/server processors, which use an MMU
hash table. 32-bit processors are already able to access user memory
at interrupt time. Since we don't soft-disable on 32-bit, we avoid
the possibility of reentering hash_page or the TLB miss handlers,
since they run with interrupts disabled.
On 64-bit processors, an SLB miss interrupt on a user address will
update the slb_cache and slb_cache_ptr fields in the paca. This is
OK except in the case where a PMU interrupt occurs in switch_slb,
which also accesses those fields. To prevent this, we hard-disable
interrupts in switch_slb. Interrupts are already soft-disabled at
this point, and will get hard-enabled when they get soft-enabled
later.
This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice,
and to make sure that it clears the slb_cache_ptr when called from
other callers than switch_slb, the existing routine is renamed to
__slb_flush_and_rebolt, which is called by switch_slb and the new
version of slb_flush_and_rebolt.
Similarly, switch_stab (used on POWER3 and RS64 processors) gets a
hard_irq_disable() to protect the per-cpu variables used there and
in ste_allocate.
If a MMU hashtable miss interrupt occurs, normally we would call
hash_page to look up the Linux PTE for the address and create a HPTE.
However, hash_page is fairly complex and takes some locks, so to
avoid the possibility of deadlock, we check the preemption count
to see if we are in a (pseudo-)NMI handler, and if so, we don't call
hash_page but instead treat it like a bad access that will get
reported up through the exception table mechanism. An interrupt
whose handler runs even though the interrupt occurred when
soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI
handler, which should use nmi_enter()/nmi_exit() rather than
irq_enter()/irq_exit().
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-08-17 13:17:54 +08:00
|
|
|
mr r4,r3
|
|
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
li r5,SIGSEGV
|
2014-02-04 13:04:35 +08:00
|
|
|
bl bad_page_fault
|
|
|
|
b ret_from_except
|
2014-07-15 18:25:02 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Here we have detected that the kernel stack pointer is bad.
|
|
|
|
* R9 contains the saved CR, r13 points to the paca,
|
|
|
|
* r10 contains the (bad) kernel stack pointer,
|
|
|
|
* r11 and r12 contain the saved SRR0 and SRR1.
|
|
|
|
* We switch to using an emergency stack, save the registers there,
|
|
|
|
* and call kernel_bad_stack(), which panics.
|
|
|
|
*/
|
|
|
|
bad_stack:
|
|
|
|
ld r1,PACAEMERGSP(r13)
|
|
|
|
subi r1,r1,64+INT_FRAME_SIZE
|
|
|
|
std r9,_CCR(r1)
|
|
|
|
std r10,GPR1(r1)
|
|
|
|
std r11,_NIP(r1)
|
|
|
|
std r12,_MSR(r1)
|
|
|
|
mfspr r11,SPRN_DAR
|
|
|
|
mfspr r12,SPRN_DSISR
|
|
|
|
std r11,_DAR(r1)
|
|
|
|
std r12,_DSISR(r1)
|
|
|
|
mflr r10
|
|
|
|
mfctr r11
|
|
|
|
mfxer r12
|
|
|
|
std r10,_LINK(r1)
|
|
|
|
std r11,_CTR(r1)
|
|
|
|
std r12,_XER(r1)
|
|
|
|
SAVE_GPR(0,r1)
|
|
|
|
SAVE_GPR(2,r1)
|
|
|
|
ld r10,EX_R3(r3)
|
|
|
|
std r10,GPR3(r1)
|
|
|
|
SAVE_GPR(4,r1)
|
|
|
|
SAVE_4GPRS(5,r1)
|
|
|
|
ld r9,EX_R9(r3)
|
|
|
|
ld r10,EX_R10(r3)
|
|
|
|
SAVE_2GPRS(9,r1)
|
|
|
|
ld r9,EX_R11(r3)
|
|
|
|
ld r10,EX_R12(r3)
|
|
|
|
ld r11,EX_R13(r3)
|
|
|
|
std r9,GPR11(r1)
|
|
|
|
std r10,GPR12(r1)
|
|
|
|
std r11,GPR13(r1)
|
|
|
|
BEGIN_FTR_SECTION
|
|
|
|
ld r10,EX_CFAR(r3)
|
|
|
|
std r10,ORIG_GPR3(r1)
|
|
|
|
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
|
|
|
|
SAVE_8GPRS(14,r1)
|
|
|
|
SAVE_10GPRS(22,r1)
|
|
|
|
lhz r12,PACA_TRAP_SAVE(r13)
|
|
|
|
std r12,_TRAP(r1)
|
|
|
|
addi r11,r1,INT_FRAME_SIZE
|
|
|
|
std r11,0(r1)
|
|
|
|
li r12,0
|
|
|
|
std r12,0(r11)
|
|
|
|
ld r2,PACATOC(r13)
|
|
|
|
ld r11,exception_marker@toc(r2)
|
|
|
|
std r12,RESULT(r1)
|
|
|
|
std r11,STACK_FRAME_OVERHEAD-16(r1)
|
|
|
|
1: addi r3,r1,STACK_FRAME_OVERHEAD
|
|
|
|
bl kernel_bad_stack
|
|
|
|
b 1b
|