linux-sg2042/arch/powerpc/kernel/exceptions-64s.S

1205 lines
31 KiB
ArmAsm
Raw Normal View History

/*
* This file contains the 64-bit "server" PowerPC variant
* of the low level exception handling including exception
* vectors, exception return, part of the slb and stab
* handling and other fixed offset specific things.
*
* This file is meant to be #included from head_64.S due to
* position dependent assembly.
*
* Most of this originates from head_64.S and thus has the same
* copyright history.
*
*/
#include <asm/exception-64s.h>
#include <asm/ptrace.h>
/*
* We layout physical memory as follows:
* 0x0000 - 0x00ff : Secondary processor spin code
* 0x0100 - 0x2fff : pSeries Interrupt prologs
* 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs
* 0x6000 - 0x6fff : Initial (CPU0) segment table
* 0x7000 - 0x7fff : FWNMI data area
* 0x8000 - : Early init and support code
*/
/*
* This is the start of the interrupt handlers for pSeries
* This code runs with relocation off.
* Code from here to __end_interrupts gets copied down to real
* address 0x100 when we are running a relocatable kernel.
* Therefore any relative branches in this section must only
* branch to labels in this section.
*/
. = 0x100
.globl __start_interrupts
__start_interrupts:
.globl system_reset_pSeries;
system_reset_pSeries:
HMT_MEDIUM;
SET_SCRATCH0(r13)
#ifdef CONFIG_PPC_P7_NAP
BEGIN_FTR_SECTION
/* Running native on arch 2.06 or later, check if we are
* waking up from nap. We only handle no state loss and
* supervisor state loss. We do -not- handle hypervisor
* state loss at this time.
*/
mfspr r13,SPRN_SRR1
KVM: PPC: Allow book3s_hv guests to use SMT processor modes This lifts the restriction that book3s_hv guests can only run one hardware thread per core, and allows them to use up to 4 threads per core on POWER7. The host still has to run single-threaded. This capability is advertised to qemu through a new KVM_CAP_PPC_SMT capability. The return value of the ioctl querying this capability is the number of vcpus per virtual CPU core (vcore), currently 4. To use this, the host kernel should be booted with all threads active, and then all the secondary threads should be offlined. This will put the secondary threads into nap mode. KVM will then wake them from nap mode and use them for running guest code (while they are still offline). To wake the secondary threads, we send them an IPI using a new xics_wake_cpu() function, implemented in arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage we assume that the platform has a XICS interrupt controller and we are using icp-native.c to drive it. Since the woken thread will need to acknowledge and clear the IPI, we also export the base physical address of the XICS registers using kvmppc_set_xics_phys() for use in the low-level KVM book3s code. When a vcpu is created, it is assigned to a virtual CPU core. The vcore number is obtained by dividing the vcpu number by the number of threads per core in the host. This number is exported to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes to run the guest in single-threaded mode, it should make all vcpu numbers be multiples of the number of threads per core. We distinguish three states of a vcpu: runnable (i.e., ready to execute the guest), blocked (that is, idle), and busy in host. We currently implement a policy that the vcore can run only when all its threads are runnable or blocked. This way, if a vcpu needs to execute elsewhere in the kernel or in qemu, it can do so without being starved of CPU by the other vcpus. When a vcore starts to run, it executes in the context of one of the vcpu threads. The other vcpu threads all go to sleep and stay asleep until something happens requiring the vcpu thread to return to qemu, or to wake up to run the vcore (this can happen when another vcpu thread goes from busy in host state to blocked). It can happen that a vcpu goes from blocked to runnable state (e.g. because of an interrupt), and the vcore it belongs to is already running. In that case it can start to run immediately as long as the none of the vcpus in the vcore have started to exit the guest. We send the next free thread in the vcore an IPI to get it to start to execute the guest. It synchronizes with the other threads via the vcore->entry_exit_count field to make sure that it doesn't go into the guest if the other vcpus are exiting by the time that it is ready to actually enter the guest. Note that there is no fixed relationship between the hardware thread number and the vcpu number. Hardware threads are assigned to vcpus as they become runnable, so we will always use the lower-numbered hardware threads in preference to higher-numbered threads if not all the vcpus in the vcore are runnable, regardless of which vcpus are runnable. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:23:08 +08:00
rlwinm. r13,r13,47-31,30,31
beq 9f
/* waking up from powersave (nap) state */
cmpwi cr1,r13,2
/* Total loss of HV state is fatal, we could try to use the
* PIR to locate a PACA, then use an emergency stack etc...
* but for now, let's just stay stuck here
*/
KVM: PPC: Allow book3s_hv guests to use SMT processor modes This lifts the restriction that book3s_hv guests can only run one hardware thread per core, and allows them to use up to 4 threads per core on POWER7. The host still has to run single-threaded. This capability is advertised to qemu through a new KVM_CAP_PPC_SMT capability. The return value of the ioctl querying this capability is the number of vcpus per virtual CPU core (vcore), currently 4. To use this, the host kernel should be booted with all threads active, and then all the secondary threads should be offlined. This will put the secondary threads into nap mode. KVM will then wake them from nap mode and use them for running guest code (while they are still offline). To wake the secondary threads, we send them an IPI using a new xics_wake_cpu() function, implemented in arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage we assume that the platform has a XICS interrupt controller and we are using icp-native.c to drive it. Since the woken thread will need to acknowledge and clear the IPI, we also export the base physical address of the XICS registers using kvmppc_set_xics_phys() for use in the low-level KVM book3s code. When a vcpu is created, it is assigned to a virtual CPU core. The vcore number is obtained by dividing the vcpu number by the number of threads per core in the host. This number is exported to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes to run the guest in single-threaded mode, it should make all vcpu numbers be multiples of the number of threads per core. We distinguish three states of a vcpu: runnable (i.e., ready to execute the guest), blocked (that is, idle), and busy in host. We currently implement a policy that the vcore can run only when all its threads are runnable or blocked. This way, if a vcpu needs to execute elsewhere in the kernel or in qemu, it can do so without being starved of CPU by the other vcpus. When a vcore starts to run, it executes in the context of one of the vcpu threads. The other vcpu threads all go to sleep and stay asleep until something happens requiring the vcpu thread to return to qemu, or to wake up to run the vcore (this can happen when another vcpu thread goes from busy in host state to blocked). It can happen that a vcpu goes from blocked to runnable state (e.g. because of an interrupt), and the vcore it belongs to is already running. In that case it can start to run immediately as long as the none of the vcpus in the vcore have started to exit the guest. We send the next free thread in the vcore an IPI to get it to start to execute the guest. It synchronizes with the other threads via the vcore->entry_exit_count field to make sure that it doesn't go into the guest if the other vcpus are exiting by the time that it is ready to actually enter the guest. Note that there is no fixed relationship between the hardware thread number and the vcpu number. Hardware threads are assigned to vcpus as they become runnable, so we will always use the lower-numbered hardware threads in preference to higher-numbered threads if not all the vcpus in the vcore are runnable, regardless of which vcpus are runnable. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:23:08 +08:00
bgt cr1,.
GET_PACA(r13)
#ifdef CONFIG_KVM_BOOK3S_64_HV
lbz r0,PACAPROCSTART(r13)
cmpwi r0,0x80
bne 1f
li r0,1
KVM: PPC: Allow book3s_hv guests to use SMT processor modes This lifts the restriction that book3s_hv guests can only run one hardware thread per core, and allows them to use up to 4 threads per core on POWER7. The host still has to run single-threaded. This capability is advertised to qemu through a new KVM_CAP_PPC_SMT capability. The return value of the ioctl querying this capability is the number of vcpus per virtual CPU core (vcore), currently 4. To use this, the host kernel should be booted with all threads active, and then all the secondary threads should be offlined. This will put the secondary threads into nap mode. KVM will then wake them from nap mode and use them for running guest code (while they are still offline). To wake the secondary threads, we send them an IPI using a new xics_wake_cpu() function, implemented in arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage we assume that the platform has a XICS interrupt controller and we are using icp-native.c to drive it. Since the woken thread will need to acknowledge and clear the IPI, we also export the base physical address of the XICS registers using kvmppc_set_xics_phys() for use in the low-level KVM book3s code. When a vcpu is created, it is assigned to a virtual CPU core. The vcore number is obtained by dividing the vcpu number by the number of threads per core in the host. This number is exported to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes to run the guest in single-threaded mode, it should make all vcpu numbers be multiples of the number of threads per core. We distinguish three states of a vcpu: runnable (i.e., ready to execute the guest), blocked (that is, idle), and busy in host. We currently implement a policy that the vcore can run only when all its threads are runnable or blocked. This way, if a vcpu needs to execute elsewhere in the kernel or in qemu, it can do so without being starved of CPU by the other vcpus. When a vcore starts to run, it executes in the context of one of the vcpu threads. The other vcpu threads all go to sleep and stay asleep until something happens requiring the vcpu thread to return to qemu, or to wake up to run the vcore (this can happen when another vcpu thread goes from busy in host state to blocked). It can happen that a vcpu goes from blocked to runnable state (e.g. because of an interrupt), and the vcore it belongs to is already running. In that case it can start to run immediately as long as the none of the vcpus in the vcore have started to exit the guest. We send the next free thread in the vcore an IPI to get it to start to execute the guest. It synchronizes with the other threads via the vcore->entry_exit_count field to make sure that it doesn't go into the guest if the other vcpus are exiting by the time that it is ready to actually enter the guest. Note that there is no fixed relationship between the hardware thread number and the vcpu number. Hardware threads are assigned to vcpus as they become runnable, so we will always use the lower-numbered hardware threads in preference to higher-numbered threads if not all the vcpus in the vcore are runnable, regardless of which vcpus are runnable. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:23:08 +08:00
stb r0,PACAPROCSTART(r13)
b kvm_start_guest
1:
#endif
beq cr1,2f
b .power7_wakeup_noloss
2: b .power7_wakeup_loss
9:
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif /* CONFIG_PPC_P7_NAP */
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
NOTEST, 0x100)
. = 0x200
machine_check_pSeries_1:
/* This is moved out of line as it can be patched by FW, but
* some code path might still want to branch into the original
* vector
*/
b machine_check_pSeries
. = 0x300
.globl data_access_pSeries
data_access_pSeries:
HMT_MEDIUM
SET_SCRATCH0(r13)
#ifndef CONFIG_POWER4_ONLY
BEGIN_FTR_SECTION
b data_access_check_stab
data_access_not_stab:
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
#endif
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD,
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
KVMTEST_PR, 0x300)
. = 0x380
.globl data_access_slb_pSeries
data_access_slb_pSeries:
HMT_MEDIUM
SET_SCRATCH0(r13)
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
std r3,PACA_EXSLB+EX_R3(r13)
mfspr r3,SPRN_DAR
#ifdef __DISABLED__
/* Keep that around for when we re-implement dynamic VSIDs */
cmpdi r3,0
bge slb_miss_user_pseries
#endif /* __DISABLED__ */
mfspr r12,SPRN_SRR1
#ifndef CONFIG_RELOCATABLE
b .slb_miss_realmode
#else
/*
* We can't just use a direct branch to .slb_miss_realmode
* because the distance from here to there depends on where
* the kernel ends up being put.
*/
mfctr r11
ld r10,PACAKBASE(r13)
LOAD_HANDLER(r10, .slb_miss_realmode)
mtctr r10
bctr
#endif
STD_EXCEPTION_PSERIES(0x400, 0x400, instruction_access)
. = 0x480
.globl instruction_access_slb_pSeries
instruction_access_slb_pSeries:
HMT_MEDIUM
SET_SCRATCH0(r13)
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
std r3,PACA_EXSLB+EX_R3(r13)
mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
#ifdef __DISABLED__
/* Keep that around for when we re-implement dynamic VSIDs */
cmpdi r3,0
bge slb_miss_user_pseries
#endif /* __DISABLED__ */
mfspr r12,SPRN_SRR1
#ifndef CONFIG_RELOCATABLE
b .slb_miss_realmode
#else
mfctr r11
ld r10,PACAKBASE(r13)
LOAD_HANDLER(r10, .slb_miss_realmode)
mtctr r10
bctr
#endif
/* We open code these as we can't have a ". = x" (even with
* x = "." within a feature section
*/
. = 0x500;
.globl hardware_interrupt_pSeries;
.globl hardware_interrupt_hv;
hardware_interrupt_pSeries:
hardware_interrupt_hv:
BEGIN_FTR_SECTION
_MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt,
EXC_HV, SOFTEN_TEST_HV)
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502)
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
FTR_SECTION_ELSE
_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt,
KVM: PPC: book3s_hv: Add support for PPC970-family processors This adds support for running KVM guests in supervisor mode on those PPC970 processors that have a usable hypervisor mode. Unfortunately, Apple G5 machines have supervisor mode disabled (MSR[HV] is forced to 1), but the YDL PowerStation does have a usable hypervisor mode. There are several differences between the PPC970 and POWER7 in how guests are managed. These differences are accommodated using the CPU_FTR_ARCH_201 (PPC970) and CPU_FTR_ARCH_206 (POWER7) CPU feature bits. Notably, on PPC970: * The LPCR, LPID or RMOR registers don't exist, and the functions of those registers are provided by bits in HID4 and one bit in HID0. * External interrupts can be directed to the hypervisor, but unlike POWER7 they are masked by MSR[EE] in non-hypervisor modes and use SRR0/1 not HSRR0/1. * There is no virtual RMA (VRMA) mode; the guest must use an RMO (real mode offset) area. * The TLB entries are not tagged with the LPID, so it is necessary to flush the whole TLB on partition switch. Furthermore, when switching partitions we have to ensure that no other CPU is executing the tlbie or tlbsync instructions in either the old or the new partition, otherwise undefined behaviour can occur. * The PMU has 8 counters (PMC registers) rather than 6. * The DSCR, PURR, SPURR, AMR, AMOR, UAMOR registers don't exist. * The SLB has 64 entries rather than 32. * There is no mediated external interrupt facility, so if we switch to a guest that has a virtual external interrupt pending but the guest has MSR[EE] = 0, we have to arrange to have an interrupt pending for it so that we can get control back once it re-enables interrupts. We do that by sending ourselves an IPI with smp_send_reschedule after hard-disabling interrupts. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:40:08 +08:00
EXC_STD, SOFTEN_TEST_HV_201)
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
STD_EXCEPTION_PSERIES(0x600, 0x600, alignment)
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x600)
STD_EXCEPTION_PSERIES(0x700, 0x700, program_check)
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x700)
STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable)
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x800)
MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer)
MASKABLE_EXCEPTION_HV(0x980, 0x982, decrementer)
STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a)
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xa00)
STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b)
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xb00)
. = 0xc00
.globl system_call_pSeries
system_call_pSeries:
HMT_MEDIUM
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
SET_SCRATCH0(r13)
GET_PACA(r13)
std r9,PACA_EXGEN+EX_R9(r13)
std r10,PACA_EXGEN+EX_R10(r13)
mfcr r9
KVMTEST(0xc00)
GET_SCRATCH0(r13)
#endif
BEGIN_FTR_SECTION
cmpdi r0,0x1ebe
beq- 1f
END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
mr r9,r13
GET_PACA(r13)
mfspr r11,SPRN_SRR0
mfspr r12,SPRN_SRR1
ld r10,PACAKBASE(r13)
LOAD_HANDLER(r10, system_call_entry)
mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13)
mtspr SPRN_SRR1,r10
rfid
b . /* prevent speculative execution */
KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
/* Fast LE/BE switch system call */
1: mfspr r12,SPRN_SRR1
xori r12,r12,MSR_LE
mtspr SPRN_SRR1,r12
rfid /* return to userspace */
b .
STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step)
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xd00)
/* At 0xe??? we have a bunch of hypervisor exceptions, we branch
* out of line to handle them
*/
. = 0xe00
b h_data_storage_hv
. = 0xe20
b h_instr_storage_hv
. = 0xe40
b emulation_assist_hv
. = 0xe50
b hmi_exception_hv
. = 0xe60
b hmi_exception_hv
/* We need to deal with the Altivec unavailable exception
* here which is at 0xf20, thus in the middle of the
* prolog code of the PerformanceMonitor one. A little
* trickery is thus necessary
*/
performance_monitor_pSeries_1:
. = 0xf00
b performance_monitor_pSeries
altivec_unavailable_pSeries_1:
. = 0xf20
b altivec_unavailable_pSeries
vsx_unavailable_pSeries_1:
. = 0xf40
b vsx_unavailable_pSeries
#ifdef CONFIG_CBE_RAS
STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error)
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202)
#endif /* CONFIG_CBE_RAS */
STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint)
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x1300)
#ifdef CONFIG_CBE_RAS
STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance)
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602)
#endif /* CONFIG_CBE_RAS */
STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist)
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x1700)
#ifdef CONFIG_CBE_RAS
STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal)
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1802)
#endif /* CONFIG_CBE_RAS */
. = 0x3000
/*** Out of line interrupts support ***/
/* moved from 0x200 */
machine_check_pSeries:
.globl machine_check_fwnmi
machine_check_fwnmi:
HMT_MEDIUM
SET_SCRATCH0(r13) /* save r13 */
EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common,
EXC_STD, KVMTEST, 0x200)
KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200)
#ifndef CONFIG_POWER4_ONLY
/* moved from 0x300 */
data_access_check_stab:
GET_PACA(r13)
std r9,PACA_EXSLB+EX_R9(r13)
std r10,PACA_EXSLB+EX_R10(r13)
mfspr r10,SPRN_DAR
mfspr r9,SPRN_DSISR
srdi r10,r10,60
rlwimi r10,r9,16,0x20
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
#ifdef CONFIG_KVM_BOOK3S_PR
lbz r9,HSTATE_IN_GUEST(r13)
rlwimi r10,r9,8,0x300
#endif
mfcr r9
cmpwi r10,0x2c
beq do_stab_bolted_pSeries
mtcrf 0x80,r9
ld r9,PACA_EXSLB+EX_R9(r13)
ld r10,PACA_EXSLB+EX_R10(r13)
b data_access_not_stab
do_stab_bolted_pSeries:
std r11,PACA_EXSLB+EX_R11(r13)
std r12,PACA_EXSLB+EX_R12(r13)
GET_SCRATCH0(r10)
std r10,PACA_EXSLB+EX_R13(r13)
EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD)
#endif /* CONFIG_POWER4_ONLY */
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x300)
KVM_HANDLER_PR_SKIP(PACA_EXSLB, EXC_STD, 0x380)
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400)
KVM_HANDLER_PR(PACA_EXSLB, EXC_STD, 0x480)
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900)
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982)
.align 7
/* moved from 0xe00 */
STD_EXCEPTION_HV(., 0xe02, h_data_storage)
KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0xe02)
STD_EXCEPTION_HV(., 0xe22, h_instr_storage)
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe22)
STD_EXCEPTION_HV(., 0xe42, emulation_assist)
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe42)
STD_EXCEPTION_HV(., 0xe62, hmi_exception) /* need to flush cache ? */
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62)
/* moved from 0xf00 */
STD_EXCEPTION_PSERIES(., 0xf00, performance_monitor)
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf00)
STD_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable)
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20)
STD_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable)
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 08:21:34 +08:00
KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40)
/*
* An interrupt came in while soft-disabled; clear EE in SRR1,
* clear paca->hard_enabled and return.
*/
masked_interrupt:
stb r10,PACAHARDIRQEN(r13)
mtcrf 0x80,r9
ld r9,PACA_EXGEN+EX_R9(r13)
mfspr r10,SPRN_SRR1
rldicl r10,r10,48,1 /* clear MSR_EE */
rotldi r10,r10,16
mtspr SPRN_SRR1,r10
ld r10,PACA_EXGEN+EX_R10(r13)
GET_SCRATCH0(r13)
rfid
b .
masked_Hinterrupt:
stb r10,PACAHARDIRQEN(r13)
mtcrf 0x80,r9
ld r9,PACA_EXGEN+EX_R9(r13)
mfspr r10,SPRN_HSRR1
rldicl r10,r10,48,1 /* clear MSR_EE */
rotldi r10,r10,16
mtspr SPRN_HSRR1,r10
ld r10,PACA_EXGEN+EX_R10(r13)
GET_SCRATCH0(r13)
hrfid
b .
#ifdef CONFIG_PPC_PSERIES
/*
* Vectors for the FWNMI option. Share common code.
*/
.globl system_reset_fwnmi
.align 7
system_reset_fwnmi:
HMT_MEDIUM
SET_SCRATCH0(r13) /* save r13 */
EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
NOTEST, 0x100)
#endif /* CONFIG_PPC_PSERIES */
#ifdef __DISABLED__
/*
* This is used for when the SLB miss handler has to go virtual,
* which doesn't happen for now anymore but will once we re-implement
* dynamic VSIDs for shared page tables
*/
slb_miss_user_pseries:
std r10,PACA_EXGEN+EX_R10(r13)
std r11,PACA_EXGEN+EX_R11(r13)
std r12,PACA_EXGEN+EX_R12(r13)
GET_SCRATCH0(r10)
ld r11,PACA_EXSLB+EX_R9(r13)
ld r12,PACA_EXSLB+EX_R3(r13)
std r10,PACA_EXGEN+EX_R13(r13)
std r11,PACA_EXGEN+EX_R9(r13)
std r12,PACA_EXGEN+EX_R3(r13)
clrrdi r12,r13,32
mfmsr r10
mfspr r11,SRR0 /* save SRR0 */
ori r12,r12,slb_miss_user_common@l /* virt addr of handler */
ori r10,r10,MSR_IR|MSR_DR|MSR_RI
mtspr SRR0,r12
mfspr r12,SRR1 /* and SRR1 */
mtspr SRR1,r10
rfid
b . /* prevent spec. execution */
#endif /* __DISABLED__ */
.align 7
.globl __end_interrupts
__end_interrupts:
/*
* Code from here down to __end_handlers is invoked from the
* exception prologs above. Because the prologs assemble the
* addresses of these handlers using the LOAD_HANDLER macro,
* which uses an addi instruction, these handlers must be in
* the first 32k of the kernel image.
*/
/*** Common interrupt handlers ***/
STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception)
/*
* Machine check is different because we use a different
* save area: PACA_EXMC instead of PACA_EXGEN.
*/
.align 7
.globl machine_check_common
machine_check_common:
EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
FINISH_NAP
DISABLE_INTS
bl .save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl .machine_check_exception
b .ret_from_except
STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt)
STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception)
STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception)
STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception)
STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception)
STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
#ifdef CONFIG_ALTIVEC
STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception)
#else
STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception)
#endif
#ifdef CONFIG_CBE_RAS
STD_EXCEPTION_COMMON(0x1200, cbe_system_error, .cbe_system_error_exception)
STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, .cbe_maintenance_exception)
STD_EXCEPTION_COMMON(0x1800, cbe_thermal, .cbe_thermal_exception)
#endif /* CONFIG_CBE_RAS */
.align 7
system_call_entry:
b system_call_common
/*
* Here we have detected that the kernel stack pointer is bad.
* R9 contains the saved CR, r13 points to the paca,
* r10 contains the (bad) kernel stack pointer,
* r11 and r12 contain the saved SRR0 and SRR1.
* We switch to using an emergency stack, save the registers there,
* and call kernel_bad_stack(), which panics.
*/
bad_stack:
ld r1,PACAEMERGSP(r13)
subi r1,r1,64+INT_FRAME_SIZE
std r9,_CCR(r1)
std r10,GPR1(r1)
std r11,_NIP(r1)
std r12,_MSR(r1)
mfspr r11,SPRN_DAR
mfspr r12,SPRN_DSISR
std r11,_DAR(r1)
std r12,_DSISR(r1)
mflr r10
mfctr r11
mfxer r12
std r10,_LINK(r1)
std r11,_CTR(r1)
std r12,_XER(r1)
SAVE_GPR(0,r1)
SAVE_GPR(2,r1)
ld r10,EX_R3(r3)
std r10,GPR3(r1)
SAVE_GPR(4,r1)
SAVE_4GPRS(5,r1)
ld r9,EX_R9(r3)
ld r10,EX_R10(r3)
SAVE_2GPRS(9,r1)
ld r9,EX_R11(r3)
ld r10,EX_R12(r3)
ld r11,EX_R13(r3)
std r9,GPR11(r1)
std r10,GPR12(r1)
std r11,GPR13(r1)
BEGIN_FTR_SECTION
ld r10,EX_CFAR(r3)
std r10,ORIG_GPR3(r1)
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
SAVE_8GPRS(14,r1)
SAVE_10GPRS(22,r1)
lhz r12,PACA_TRAP_SAVE(r13)
std r12,_TRAP(r1)
addi r11,r1,INT_FRAME_SIZE
std r11,0(r1)
li r12,0
std r12,0(r11)
ld r2,PACATOC(r13)
ld r11,exception_marker@toc(r2)
std r12,RESULT(r1)
std r11,STACK_FRAME_OVERHEAD-16(r1)
1: addi r3,r1,STACK_FRAME_OVERHEAD
bl .kernel_bad_stack
b 1b
/*
* Here r13 points to the paca, r9 contains the saved CR,
* SRR0 and SRR1 are saved in r11 and r12,
* r9 - r13 are saved in paca->exgen.
*/
.align 7
.globl data_access_common
data_access_common:
mfspr r10,SPRN_DAR
std r10,PACA_EXGEN+EX_DAR(r13)
mfspr r10,SPRN_DSISR
stw r10,PACA_EXGEN+EX_DSISR(r13)
EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
ld r3,PACA_EXGEN+EX_DAR(r13)
lwz r4,PACA_EXGEN+EX_DSISR(r13)
li r5,0x300
b .do_hash_page /* Try to handle as hpte fault */
.align 7
.globl h_data_storage_common
h_data_storage_common:
mfspr r10,SPRN_HDAR
std r10,PACA_EXGEN+EX_DAR(r13)
mfspr r10,SPRN_HDSISR
stw r10,PACA_EXGEN+EX_DSISR(r13)
EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
bl .save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl .unknown_exception
b .ret_from_except
.align 7
.globl instruction_access_common
instruction_access_common:
EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
ld r3,_NIP(r1)
andis. r4,r12,0x5820
li r5,0x400
b .do_hash_page /* Try to handle as hpte fault */
STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception)
/*
* Here is the common SLB miss user that is used when going to virtual
* mode for SLB misses, that is currently not used
*/
#ifdef __DISABLED__
.align 7
.globl slb_miss_user_common
slb_miss_user_common:
mflr r10
std r3,PACA_EXGEN+EX_DAR(r13)
stw r9,PACA_EXGEN+EX_CCR(r13)
std r10,PACA_EXGEN+EX_LR(r13)
std r11,PACA_EXGEN+EX_SRR0(r13)
bl .slb_allocate_user
ld r10,PACA_EXGEN+EX_LR(r13)
ld r3,PACA_EXGEN+EX_R3(r13)
lwz r9,PACA_EXGEN+EX_CCR(r13)
ld r11,PACA_EXGEN+EX_SRR0(r13)
mtlr r10
beq- slb_miss_fault
andi. r10,r12,MSR_RI /* check for unrecoverable exception */
beq- unrecov_user_slb
mfmsr r10
.machine push
.machine "power4"
mtcrf 0x80,r9
.machine pop
clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */
mtmsrd r10,1
mtspr SRR0,r11
mtspr SRR1,r12
ld r9,PACA_EXGEN+EX_R9(r13)
ld r10,PACA_EXGEN+EX_R10(r13)
ld r11,PACA_EXGEN+EX_R11(r13)
ld r12,PACA_EXGEN+EX_R12(r13)
ld r13,PACA_EXGEN+EX_R13(r13)
rfid
b .
slb_miss_fault:
EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
ld r4,PACA_EXGEN+EX_DAR(r13)
li r5,0
std r4,_DAR(r1)
std r5,_DSISR(r1)
b handle_page_fault
unrecov_user_slb:
EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
DISABLE_INTS
bl .save_nvgprs
1: addi r3,r1,STACK_FRAME_OVERHEAD
bl .unrecoverable_exception
b 1b
#endif /* __DISABLED__ */
/*
* r13 points to the PACA, r9 contains the saved CR,
* r12 contain the saved SRR1, SRR0 is still ready for return
* r3 has the faulting address
* r9 - r13 are saved in paca->exslb.
* r3 is saved in paca->slb_r3
* We assume we aren't going to take any exceptions during this procedure.
*/
_GLOBAL(slb_miss_realmode)
mflr r10
#ifdef CONFIG_RELOCATABLE
mtctr r11
#endif
stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
bl .slb_allocate_realmode
/* All done -- return from exception. */
ld r10,PACA_EXSLB+EX_LR(r13)
ld r3,PACA_EXSLB+EX_R3(r13)
lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
#ifdef CONFIG_PPC_ISERIES
BEGIN_FW_FTR_SECTION
ld r11,PACALPPACAPTR(r13)
ld r11,LPPACASRR0(r11) /* get SRR0 value */
END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
#endif /* CONFIG_PPC_ISERIES */
mtlr r10
andi. r10,r12,MSR_RI /* check for unrecoverable exception */
beq- 2f
.machine push
.machine "power4"
mtcrf 0x80,r9
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
.machine pop
#ifdef CONFIG_PPC_ISERIES
BEGIN_FW_FTR_SECTION
mtspr SPRN_SRR0,r11
mtspr SPRN_SRR1,r12
END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
#endif /* CONFIG_PPC_ISERIES */
ld r9,PACA_EXSLB+EX_R9(r13)
ld r10,PACA_EXSLB+EX_R10(r13)
ld r11,PACA_EXSLB+EX_R11(r13)
ld r12,PACA_EXSLB+EX_R12(r13)
ld r13,PACA_EXSLB+EX_R13(r13)
rfid
b . /* prevent speculative execution */
2:
#ifdef CONFIG_PPC_ISERIES
BEGIN_FW_FTR_SECTION
b unrecov_slb
END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
#endif /* CONFIG_PPC_ISERIES */
mfspr r11,SPRN_SRR0
ld r10,PACAKBASE(r13)
LOAD_HANDLER(r10,unrecov_slb)
mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13)
mtspr SPRN_SRR1,r10
rfid
b .
unrecov_slb:
EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
DISABLE_INTS
bl .save_nvgprs
1: addi r3,r1,STACK_FRAME_OVERHEAD
bl .unrecoverable_exception
b 1b
.align 7
.globl hardware_interrupt_common
.globl hardware_interrupt_entry
hardware_interrupt_common:
EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN)
FINISH_NAP
hardware_interrupt_entry:
DISABLE_INTS
BEGIN_FTR_SECTION
bl .ppc64_runlatch_on
END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
addi r3,r1,STACK_FRAME_OVERHEAD
bl .do_IRQ
b .ret_from_except_lite
#ifdef CONFIG_PPC_970_NAP
power4_fixup_nap:
andc r9,r9,r10
std r9,TI_LOCAL_FLAGS(r11)
ld r10,_LINK(r1) /* make idle task do the */
std r10,_NIP(r1) /* equivalent of a blr */
blr
#endif
.align 7
.globl alignment_common
alignment_common:
mfspr r10,SPRN_DAR
std r10,PACA_EXGEN+EX_DAR(r13)
mfspr r10,SPRN_DSISR
stw r10,PACA_EXGEN+EX_DSISR(r13)
EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
ld r3,PACA_EXGEN+EX_DAR(r13)
lwz r4,PACA_EXGEN+EX_DSISR(r13)
std r3,_DAR(r1)
std r4,_DSISR(r1)
bl .save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
ENABLE_INTS
bl .alignment_exception
b .ret_from_except
.align 7
.globl program_check_common
program_check_common:
EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
bl .save_nvgprs
DISABLE_INTS
addi r3,r1,STACK_FRAME_OVERHEAD
bl .program_check_exception
b .ret_from_except
.align 7
.globl fp_unavailable_common
fp_unavailable_common:
EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
bne 1f /* if from user, just load it up */
bl .save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
ENABLE_INTS
bl .kernel_fp_unavailable_exception
BUG_OPCODE
1: bl .load_up_fpu
b fast_exception_return
.align 7
.globl altivec_unavailable_common
altivec_unavailable_common:
EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
beq 1f
bl .load_up_altivec
b fast_exception_return
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
bl .save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
ENABLE_INTS
bl .altivec_unavailable_exception
b .ret_from_except
.align 7
.globl vsx_unavailable_common
vsx_unavailable_common:
EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN)
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
bne .load_up_vsx
1:
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
bl .save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
ENABLE_INTS
bl .vsx_unavailable_exception
b .ret_from_except
.align 7
.globl __end_handlers
__end_handlers:
/*
* Return from an exception with minimal checks.
* The caller is assumed to have done EXCEPTION_PROLOG_COMMON.
* If interrupts have been enabled, or anything has been
* done that might have changed the scheduling status of
* any task or sent any task a signal, you should use
* ret_from_except or ret_from_except_lite instead of this.
*/
fast_exc_return_irq: /* restores irq state too */
ld r3,SOFTE(r1)
TRACE_AND_RESTORE_IRQ(r3);
ld r12,_MSR(r1)
rldicl r4,r12,49,63 /* get MSR_EE to LSB */
stb r4,PACAHARDIRQEN(r13) /* restore paca->hard_enabled */
b 1f
.globl fast_exception_return
fast_exception_return:
ld r12,_MSR(r1)
1: ld r11,_NIP(r1)
andi. r3,r12,MSR_RI /* check if RI is set */
beq- unrecov_fer
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
andi. r3,r12,MSR_PR
beq 2f
ACCOUNT_CPU_USER_EXIT(r3, r4)
2:
#endif
ld r3,_CCR(r1)
ld r4,_LINK(r1)
ld r5,_CTR(r1)
ld r6,_XER(r1)
mtcr r3
mtlr r4
mtctr r5
mtxer r6
REST_GPR(0, r1)
REST_8GPRS(2, r1)
mfmsr r10
rldicl r10,r10,48,1 /* clear EE */
rldicr r10,r10,16,61 /* clear RI (LE is 0 already) */
mtmsrd r10,1
mtspr SPRN_SRR1,r12
mtspr SPRN_SRR0,r11
REST_4GPRS(10, r1)
ld r1,GPR1(r1)
rfid
b . /* prevent speculative execution */
unrecov_fer:
bl .save_nvgprs
1: addi r3,r1,STACK_FRAME_OVERHEAD
bl .unrecoverable_exception
b 1b
/*
* Hash table stuff
*/
.align 7
_STATIC(do_hash_page)
std r3,_DAR(r1)
std r4,_DSISR(r1)
andis. r0,r4,0xa410 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
andis. r0,r4,DSISR_DABRMATCH@h
bne- handle_dabr_fault
BEGIN_FTR_SECTION
andis. r0,r4,0x0020 /* Is it a segment table fault? */
bne- do_ste_alloc /* If so handle it */
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
powerpc: Allow perf_counters to access user memory at interrupt time This provides a mechanism to allow the perf_counters code to access user memory in a PMU interrupt routine. Such an access can cause various kinds of interrupt: SLB miss, MMU hash table miss, segment table miss, or TLB miss, depending on the processor. This commit only deals with 64-bit classic/server processors, which use an MMU hash table. 32-bit processors are already able to access user memory at interrupt time. Since we don't soft-disable on 32-bit, we avoid the possibility of reentering hash_page or the TLB miss handlers, since they run with interrupts disabled. On 64-bit processors, an SLB miss interrupt on a user address will update the slb_cache and slb_cache_ptr fields in the paca. This is OK except in the case where a PMU interrupt occurs in switch_slb, which also accesses those fields. To prevent this, we hard-disable interrupts in switch_slb. Interrupts are already soft-disabled at this point, and will get hard-enabled when they get soft-enabled later. This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice, and to make sure that it clears the slb_cache_ptr when called from other callers than switch_slb, the existing routine is renamed to __slb_flush_and_rebolt, which is called by switch_slb and the new version of slb_flush_and_rebolt. Similarly, switch_stab (used on POWER3 and RS64 processors) gets a hard_irq_disable() to protect the per-cpu variables used there and in ste_allocate. If a MMU hashtable miss interrupt occurs, normally we would call hash_page to look up the Linux PTE for the address and create a HPTE. However, hash_page is fairly complex and takes some locks, so to avoid the possibility of deadlock, we check the preemption count to see if we are in a (pseudo-)NMI handler, and if so, we don't call hash_page but instead treat it like a bad access that will get reported up through the exception table mechanism. An interrupt whose handler runs even though the interrupt occurred when soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI handler, which should use nmi_enter()/nmi_exit() rather than irq_enter()/irq_exit(). Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-08-17 13:17:54 +08:00
clrrdi r11,r1,THREAD_SHIFT
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
bne 77f /* then don't call hash_page now */
/*
* On iSeries, we soft-disable interrupts here, then
* hard-enable interrupts so that the hash_page code can spin on
* the hash_table_lock without problems on a shared processor.
*/
DISABLE_INTS
/*
* Currently, trace_hardirqs_off() will be called by DISABLE_INTS
* and will clobber volatile registers when irq tracing is enabled
* so we need to reload them. It may be possible to be smarter here
* and move the irq tracing elsewhere but let's keep it simple for
* now
*/
#ifdef CONFIG_TRACE_IRQFLAGS
ld r3,_DAR(r1)
ld r4,_DSISR(r1)
ld r5,_TRAP(r1)
ld r12,_MSR(r1)
clrrdi r5,r5,4
#endif /* CONFIG_TRACE_IRQFLAGS */
/*
* We need to set the _PAGE_USER bit if MSR_PR is set or if we are
* accessing a userspace segment (even from the kernel). We assume
* kernel addresses always have the high bit set.
*/
rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */
rotldi r0,r3,15 /* Move high bit into MSR_PR posn */
orc r0,r12,r0 /* MSR_PR | ~high_bit */
rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */
ori r4,r4,1 /* add _PAGE_PRESENT */
rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */
/*
* r3 contains the faulting address
* r4 contains the required access permissions
* r5 contains the trap number
*
* at return r3 = 0 for success
*/
bl .hash_page /* build HPTE if possible */
cmpdi r3,0 /* see if hash_page succeeded */
BEGIN_FW_FTR_SECTION
/*
* If we had interrupts soft-enabled at the point where the
* DSI/ISI occurred, and an interrupt came in during hash_page,
* handle it now.
* We jump to ret_from_except_lite rather than fast_exception_return
* because ret_from_except_lite will check for and handle pending
* interrupts if necessary.
*/
beq 13f
END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
BEGIN_FW_FTR_SECTION
/*
* Here we have interrupts hard-disabled, so it is sufficient
* to restore paca->{soft,hard}_enable and get out.
*/
beq fast_exc_return_irq /* Return from exception on success */
END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)
/* For a hash failure, we don't bother re-enabling interrupts */
ble- 12f
/*
* hash_page couldn't handle it, set soft interrupt enable back
Fix IRQ flag handling naming Fix the IRQ flag handling naming. In linux/irqflags.h under one configuration, it maps: local_irq_enable() -> raw_local_irq_enable() local_irq_disable() -> raw_local_irq_disable() local_irq_save() -> raw_local_irq_save() ... and under the other configuration, it maps: raw_local_irq_enable() -> local_irq_enable() raw_local_irq_disable() -> local_irq_disable() raw_local_irq_save() -> local_irq_save() ... This is quite confusing. There should be one set of names expected of the arch, and this should be wrapped to give another set of names that are expected by users of this facility. Change this to have the arch provide: flags = arch_local_save_flags() flags = arch_local_irq_save() arch_local_irq_restore(flags) arch_local_irq_disable() arch_local_irq_enable() arch_irqs_disabled_flags(flags) arch_irqs_disabled() arch_safe_halt() Then linux/irqflags.h wraps these to provide: raw_local_save_flags(flags) raw_local_irq_save(flags) raw_local_irq_restore(flags) raw_local_irq_disable() raw_local_irq_enable() raw_irqs_disabled_flags(flags) raw_irqs_disabled() raw_safe_halt() with type checking on the flags 'arguments', and then wraps those to provide: local_save_flags(flags) local_irq_save(flags) local_irq_restore(flags) local_irq_disable() local_irq_enable() irqs_disabled_flags(flags) irqs_disabled() safe_halt() with tracing included if enabled. The arch functions can now all be inline functions rather than some of them having to be macros. Signed-off-by: David Howells <dhowells@redhat.com> [X86, FRV, MN10300] Signed-off-by: Chris Metcalf <cmetcalf@tilera.com> [Tile] Signed-off-by: Michal Simek <monstr@monstr.eu> [Microblaze] Tested-by: Catalin Marinas <catalin.marinas@arm.com> [ARM] Acked-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com> [AVR] Acked-by: Tony Luck <tony.luck@intel.com> [IA-64] Acked-by: Hirokazu Takata <takata@linux-m32r.org> [M32R] Acked-by: Greg Ungerer <gerg@uclinux.org> [M68K/M68KNOMMU] Acked-by: Ralf Baechle <ralf@linux-mips.org> [MIPS] Acked-by: Kyle McMartin <kyle@mcmartin.ca> [PA-RISC] Acked-by: Paul Mackerras <paulus@samba.org> [PowerPC] Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> [S390] Acked-by: Chen Liqin <liqin.chen@sunplusct.com> [Score] Acked-by: Matt Fleming <matt@console-pimps.org> [SH] Acked-by: David S. Miller <davem@davemloft.net> [Sparc] Acked-by: Chris Zankel <chris@zankel.net> [Xtensa] Reviewed-by: Richard Henderson <rth@twiddle.net> [Alpha] Reviewed-by: Yoshinori Sato <ysato@users.sourceforge.jp> [H8300] Cc: starvik@axis.com [CRIS] Cc: jesper.nilsson@axis.com [CRIS] Cc: linux-cris-kernel@axis.com
2010-10-07 21:08:55 +08:00
* to what it was before the trap. Note that .arch_local_irq_restore
* handles any interrupts pending at this point.
*/
ld r3,SOFTE(r1)
TRACE_AND_RESTORE_IRQ_PARTIAL(r3, 11f)
Fix IRQ flag handling naming Fix the IRQ flag handling naming. In linux/irqflags.h under one configuration, it maps: local_irq_enable() -> raw_local_irq_enable() local_irq_disable() -> raw_local_irq_disable() local_irq_save() -> raw_local_irq_save() ... and under the other configuration, it maps: raw_local_irq_enable() -> local_irq_enable() raw_local_irq_disable() -> local_irq_disable() raw_local_irq_save() -> local_irq_save() ... This is quite confusing. There should be one set of names expected of the arch, and this should be wrapped to give another set of names that are expected by users of this facility. Change this to have the arch provide: flags = arch_local_save_flags() flags = arch_local_irq_save() arch_local_irq_restore(flags) arch_local_irq_disable() arch_local_irq_enable() arch_irqs_disabled_flags(flags) arch_irqs_disabled() arch_safe_halt() Then linux/irqflags.h wraps these to provide: raw_local_save_flags(flags) raw_local_irq_save(flags) raw_local_irq_restore(flags) raw_local_irq_disable() raw_local_irq_enable() raw_irqs_disabled_flags(flags) raw_irqs_disabled() raw_safe_halt() with type checking on the flags 'arguments', and then wraps those to provide: local_save_flags(flags) local_irq_save(flags) local_irq_restore(flags) local_irq_disable() local_irq_enable() irqs_disabled_flags(flags) irqs_disabled() safe_halt() with tracing included if enabled. The arch functions can now all be inline functions rather than some of them having to be macros. Signed-off-by: David Howells <dhowells@redhat.com> [X86, FRV, MN10300] Signed-off-by: Chris Metcalf <cmetcalf@tilera.com> [Tile] Signed-off-by: Michal Simek <monstr@monstr.eu> [Microblaze] Tested-by: Catalin Marinas <catalin.marinas@arm.com> [ARM] Acked-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com> [AVR] Acked-by: Tony Luck <tony.luck@intel.com> [IA-64] Acked-by: Hirokazu Takata <takata@linux-m32r.org> [M32R] Acked-by: Greg Ungerer <gerg@uclinux.org> [M68K/M68KNOMMU] Acked-by: Ralf Baechle <ralf@linux-mips.org> [MIPS] Acked-by: Kyle McMartin <kyle@mcmartin.ca> [PA-RISC] Acked-by: Paul Mackerras <paulus@samba.org> [PowerPC] Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> [S390] Acked-by: Chen Liqin <liqin.chen@sunplusct.com> [Score] Acked-by: Matt Fleming <matt@console-pimps.org> [SH] Acked-by: David S. Miller <davem@davemloft.net> [Sparc] Acked-by: Chris Zankel <chris@zankel.net> [Xtensa] Reviewed-by: Richard Henderson <rth@twiddle.net> [Alpha] Reviewed-by: Yoshinori Sato <ysato@users.sourceforge.jp> [H8300] Cc: starvik@axis.com [CRIS] Cc: jesper.nilsson@axis.com [CRIS] Cc: linux-cris-kernel@axis.com
2010-10-07 21:08:55 +08:00
bl .arch_local_irq_restore
b 11f
/* We have a data breakpoint exception - handle it */
handle_dabr_fault:
bl .save_nvgprs
ld r4,_DAR(r1)
ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl .do_dabr
b .ret_from_except_lite
/* Here we have a page fault that hash_page can't handle. */
handle_page_fault:
ENABLE_INTS
11: ld r4,_DAR(r1)
ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl .do_page_fault
cmpdi r3,0
beq+ 13f
bl .save_nvgprs
mr r5,r3
addi r3,r1,STACK_FRAME_OVERHEAD
lwz r4,_DAR(r1)
bl .bad_page_fault
b .ret_from_except
13: b .ret_from_except_lite
/* We have a page fault that hash_page could handle but HV refused
* the PTE insertion
*/
12: bl .save_nvgprs
mr r5,r3
addi r3,r1,STACK_FRAME_OVERHEAD
ld r4,_DAR(r1)
bl .low_hash_fault
b .ret_from_except
powerpc: Allow perf_counters to access user memory at interrupt time This provides a mechanism to allow the perf_counters code to access user memory in a PMU interrupt routine. Such an access can cause various kinds of interrupt: SLB miss, MMU hash table miss, segment table miss, or TLB miss, depending on the processor. This commit only deals with 64-bit classic/server processors, which use an MMU hash table. 32-bit processors are already able to access user memory at interrupt time. Since we don't soft-disable on 32-bit, we avoid the possibility of reentering hash_page or the TLB miss handlers, since they run with interrupts disabled. On 64-bit processors, an SLB miss interrupt on a user address will update the slb_cache and slb_cache_ptr fields in the paca. This is OK except in the case where a PMU interrupt occurs in switch_slb, which also accesses those fields. To prevent this, we hard-disable interrupts in switch_slb. Interrupts are already soft-disabled at this point, and will get hard-enabled when they get soft-enabled later. This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice, and to make sure that it clears the slb_cache_ptr when called from other callers than switch_slb, the existing routine is renamed to __slb_flush_and_rebolt, which is called by switch_slb and the new version of slb_flush_and_rebolt. Similarly, switch_stab (used on POWER3 and RS64 processors) gets a hard_irq_disable() to protect the per-cpu variables used there and in ste_allocate. If a MMU hashtable miss interrupt occurs, normally we would call hash_page to look up the Linux PTE for the address and create a HPTE. However, hash_page is fairly complex and takes some locks, so to avoid the possibility of deadlock, we check the preemption count to see if we are in a (pseudo-)NMI handler, and if so, we don't call hash_page but instead treat it like a bad access that will get reported up through the exception table mechanism. An interrupt whose handler runs even though the interrupt occurred when soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI handler, which should use nmi_enter()/nmi_exit() rather than irq_enter()/irq_exit(). Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-08-17 13:17:54 +08:00
/*
* We come here as a result of a DSI at a point where we don't want
* to call hash_page, such as when we are accessing memory (possibly
* user memory) inside a PMU interrupt that occurred while interrupts
* were soft-disabled. We want to invoke the exception handler for
* the access, or panic if there isn't a handler.
*/
77: bl .save_nvgprs
mr r4,r3
addi r3,r1,STACK_FRAME_OVERHEAD
li r5,SIGSEGV
bl .bad_page_fault
b .ret_from_except
/* here we have a segment miss */
do_ste_alloc:
bl .ste_allocate /* try to insert stab entry */
cmpdi r3,0
bne- handle_page_fault
b fast_exception_return
/*
* r13 points to the PACA, r9 contains the saved CR,
* r11 and r12 contain the saved SRR0 and SRR1.
* r9 - r13 are saved in paca->exslb.
* We assume we aren't going to take any exceptions during this procedure.
* We assume (DAR >> 60) == 0xc.
*/
.align 7
_GLOBAL(do_stab_bolted)
stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */
/* Hash to the primary group */
ld r10,PACASTABVIRT(r13)
mfspr r11,SPRN_DAR
srdi r11,r11,28
rldimi r10,r11,7,52 /* r10 = first ste of the group */
/* Calculate VSID */
/* This is a kernel address, so protovsid = ESID */
ASM_VSID_SCRAMBLE(r11, r9, 256M)
rldic r9,r11,12,16 /* r9 = vsid << 12 */
/* Search the primary group for a free entry */
1: ld r11,0(r10) /* Test valid bit of the current ste */
andi. r11,r11,0x80
beq 2f
addi r10,r10,16
andi. r11,r10,0x70
bne 1b
/* Stick for only searching the primary group for now. */
/* At least for now, we use a very simple random castout scheme */
/* Use the TB as a random number ; OR in 1 to avoid entry 0 */
mftb r11
rldic r11,r11,4,57 /* r11 = (r11 << 4) & 0x70 */
ori r11,r11,0x10
/* r10 currently points to an ste one past the group of interest */
/* make it point to the randomly selected entry */
subi r10,r10,128
or r10,r10,r11 /* r10 is the entry to invalidate */
isync /* mark the entry invalid */
ld r11,0(r10)
rldicl r11,r11,56,1 /* clear the valid bit */
rotldi r11,r11,8
std r11,0(r10)
sync
clrrdi r11,r11,28 /* Get the esid part of the ste */
slbie r11
2: std r9,8(r10) /* Store the vsid part of the ste */
eieio
mfspr r11,SPRN_DAR /* Get the new esid */
clrrdi r11,r11,28 /* Permits a full 32b of ESID */
ori r11,r11,0x90 /* Turn on valid and kp */
std r11,0(r10) /* Put new entry back into the stab */
sync
/* All done -- return from exception. */
lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
ld r11,PACA_EXSLB+EX_SRR0(r13) /* get saved SRR0 */
andi. r10,r12,MSR_RI
beq- unrecov_slb
mtcrf 0x80,r9 /* restore CR */
mfmsr r10
clrrdi r10,r10,2
mtmsrd r10,1
mtspr SPRN_SRR0,r11
mtspr SPRN_SRR1,r12
ld r9,PACA_EXSLB+EX_R9(r13)
ld r10,PACA_EXSLB+EX_R10(r13)
ld r11,PACA_EXSLB+EX_R11(r13)
ld r12,PACA_EXSLB+EX_R12(r13)
ld r13,PACA_EXSLB+EX_R13(r13)
rfid
b . /* prevent speculative execution */
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
/*
* Data area reserved for FWNMI option.
* This address (0x7000) is fixed by the RPA.
*/
.= 0x7000
.globl fwnmi_data_area
fwnmi_data_area:
#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
/* iSeries does not use the FWNMI stuff, so it is safe to put
* this here, even if we later allow kernels that will boot on
* both pSeries and iSeries */
#ifdef CONFIG_PPC_ISERIES
. = LPARMAP_PHYS
.globl xLparMap
xLparMap:
.quad HvEsidsToMap /* xNumberEsids */
.quad HvRangesToMap /* xNumberRanges */
.quad STAB0_PAGE /* xSegmentTableOffs */
.zero 40 /* xRsvd */
/* xEsids (HvEsidsToMap entries of 2 quads) */
.quad PAGE_OFFSET_ESID /* xKernelEsid */
.quad PAGE_OFFSET_VSID /* xKernelVsid */
.quad VMALLOC_START_ESID /* xKernelEsid */
.quad VMALLOC_START_VSID /* xKernelVsid */
/* xRanges (HvRangesToMap entries of 3 quads) */
.quad HvPagesToMap /* xPages */
.quad 0 /* xOffset */
.quad PAGE_OFFSET_VSID << (SID_SHIFT - HW_PAGE_SHIFT) /* xVPN */
#endif /* CONFIG_PPC_ISERIES */
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
/* pseries and powernv need to keep the whole page from
* 0x7000 to 0x8000 free for use by the firmware
*/
. = 0x8000
#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
/*
* Space for CPU0's segment table.
*
* On iSeries, the hypervisor must fill in at least one entry before
* we get control (with relocate on). The address is given to the hv
* as a page number (see xLparMap above), so this must be at a
* fixed address (the linker can't compute (u64)&initial_stab >>
* PAGE_SHIFT).
*/
. = STAB0_OFFSET /* 0x8000 */
.globl initial_stab
initial_stab:
.space 4096
#ifdef CONFIG_PPC_POWERNV
_GLOBAL(opal_mc_secondary_handler)
HMT_MEDIUM
SET_SCRATCH0(r13)
GET_PACA(r13)
clrldi r3,r3,2
tovirt(r3,r3)
std r3,PACA_OPAL_MC_EVT(r13)
ld r13,OPAL_MC_SRR0(r3)
mtspr SPRN_SRR0,r13
ld r13,OPAL_MC_SRR1(r3)
mtspr SPRN_SRR1,r13
ld r3,OPAL_MC_GPR3(r3)
GET_SCRATCH0(r13)
b machine_check_pSeries
#endif /* CONFIG_PPC_POWERNV */