OpenCloudOS-Kernel/arch/powerpc/kernel/interrupt_64.S

753 lines
18 KiB
ArmAsm
Raw Normal View History

#include <asm/asm-offsets.h>
#include <asm/bug.h>
#ifdef CONFIG_PPC_BOOK3S
#include <asm/exception-64s.h>
#else
#include <asm/exception-64e.h>
#endif
#include <asm/feature-fixups.h>
#include <asm/head-64.h>
#include <asm/hw_irq.h>
#include <asm/kup.h>
#include <asm/mmu.h>
#include <asm/ppc_asm.h>
#include <asm/ptrace.h>
.align 7
.macro DEBUG_SRR_VALID srr
#ifdef CONFIG_PPC_RFI_SRR_DEBUG
.ifc \srr,srr
mfspr r11,SPRN_SRR0
ld r12,_NIP(r1)
clrrdi r11,r11,2
clrrdi r12,r12,2
100: tdne r11,r12
powerpc/64s: Use EMIT_WARN_ENTRY for SRR debug warnings When CONFIG_PPC_RFI_SRR_DEBUG=y we check the SRR values before returning from interrupts. This is done in asm using EMIT_BUG_ENTRY, and passing BUGFLAG_WARNING. However that fails to create an exception table entry for the warning, and so do_program_check() fails the exception table search and proceeds to call _exception(), resulting in an oops like: Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 2 PID: 1204 Comm: sigreturn_unali Tainted: P 5.16.0-rc2-00194-g91ca3d4f77c5 #12 NIP: c00000000000c5b0 LR: 0000000000000000 CTR: 0000000000000000 ... NIP [c00000000000c5b0] system_call_common+0x150/0x268 LR [0000000000000000] 0x0 Call Trace: [c00000000db73e10] [c00000000000c558] system_call_common+0xf8/0x268 (unreliable) ... Instruction dump: 7cc803a6 888d0931 2c240000 4082001c 38800000 988d0931 e8810170 e8a10178 7c9a03a6 7cbb03a6 7d7a02a6 e9810170 <7f0b6088> 7d7b02a6 e9810178 7f0b6088 We should instead use EMIT_WARN_ENTRY, which creates an exception table entry for the warning, allowing the warning to be correctly recognised, and the code to resume after printing the warning. Note however that because this warning is buried deep in the interrupt return path, we are not able to recover from it (due to MSR_RI being clear), so we still end up in die() with an unrecoverable exception. Fixes: 59dc5bfca0cb ("powerpc/64s: avoid reloading (H)SRR registers if they are still valid") Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20211221135101.2085547-2-mpe@ellerman.id.au
2021-12-21 21:51:00 +08:00
EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
mfspr r11,SPRN_SRR1
ld r12,_MSR(r1)
100: tdne r11,r12
powerpc/64s: Use EMIT_WARN_ENTRY for SRR debug warnings When CONFIG_PPC_RFI_SRR_DEBUG=y we check the SRR values before returning from interrupts. This is done in asm using EMIT_BUG_ENTRY, and passing BUGFLAG_WARNING. However that fails to create an exception table entry for the warning, and so do_program_check() fails the exception table search and proceeds to call _exception(), resulting in an oops like: Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 2 PID: 1204 Comm: sigreturn_unali Tainted: P 5.16.0-rc2-00194-g91ca3d4f77c5 #12 NIP: c00000000000c5b0 LR: 0000000000000000 CTR: 0000000000000000 ... NIP [c00000000000c5b0] system_call_common+0x150/0x268 LR [0000000000000000] 0x0 Call Trace: [c00000000db73e10] [c00000000000c558] system_call_common+0xf8/0x268 (unreliable) ... Instruction dump: 7cc803a6 888d0931 2c240000 4082001c 38800000 988d0931 e8810170 e8a10178 7c9a03a6 7cbb03a6 7d7a02a6 e9810170 <7f0b6088> 7d7b02a6 e9810178 7f0b6088 We should instead use EMIT_WARN_ENTRY, which creates an exception table entry for the warning, allowing the warning to be correctly recognised, and the code to resume after printing the warning. Note however that because this warning is buried deep in the interrupt return path, we are not able to recover from it (due to MSR_RI being clear), so we still end up in die() with an unrecoverable exception. Fixes: 59dc5bfca0cb ("powerpc/64s: avoid reloading (H)SRR registers if they are still valid") Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20211221135101.2085547-2-mpe@ellerman.id.au
2021-12-21 21:51:00 +08:00
EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
.else
mfspr r11,SPRN_HSRR0
ld r12,_NIP(r1)
clrrdi r11,r11,2
clrrdi r12,r12,2
100: tdne r11,r12
powerpc/64s: Use EMIT_WARN_ENTRY for SRR debug warnings When CONFIG_PPC_RFI_SRR_DEBUG=y we check the SRR values before returning from interrupts. This is done in asm using EMIT_BUG_ENTRY, and passing BUGFLAG_WARNING. However that fails to create an exception table entry for the warning, and so do_program_check() fails the exception table search and proceeds to call _exception(), resulting in an oops like: Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 2 PID: 1204 Comm: sigreturn_unali Tainted: P 5.16.0-rc2-00194-g91ca3d4f77c5 #12 NIP: c00000000000c5b0 LR: 0000000000000000 CTR: 0000000000000000 ... NIP [c00000000000c5b0] system_call_common+0x150/0x268 LR [0000000000000000] 0x0 Call Trace: [c00000000db73e10] [c00000000000c558] system_call_common+0xf8/0x268 (unreliable) ... Instruction dump: 7cc803a6 888d0931 2c240000 4082001c 38800000 988d0931 e8810170 e8a10178 7c9a03a6 7cbb03a6 7d7a02a6 e9810170 <7f0b6088> 7d7b02a6 e9810178 7f0b6088 We should instead use EMIT_WARN_ENTRY, which creates an exception table entry for the warning, allowing the warning to be correctly recognised, and the code to resume after printing the warning. Note however that because this warning is buried deep in the interrupt return path, we are not able to recover from it (due to MSR_RI being clear), so we still end up in die() with an unrecoverable exception. Fixes: 59dc5bfca0cb ("powerpc/64s: avoid reloading (H)SRR registers if they are still valid") Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20211221135101.2085547-2-mpe@ellerman.id.au
2021-12-21 21:51:00 +08:00
EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
mfspr r11,SPRN_HSRR1
ld r12,_MSR(r1)
100: tdne r11,r12
powerpc/64s: Use EMIT_WARN_ENTRY for SRR debug warnings When CONFIG_PPC_RFI_SRR_DEBUG=y we check the SRR values before returning from interrupts. This is done in asm using EMIT_BUG_ENTRY, and passing BUGFLAG_WARNING. However that fails to create an exception table entry for the warning, and so do_program_check() fails the exception table search and proceeds to call _exception(), resulting in an oops like: Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 2 PID: 1204 Comm: sigreturn_unali Tainted: P 5.16.0-rc2-00194-g91ca3d4f77c5 #12 NIP: c00000000000c5b0 LR: 0000000000000000 CTR: 0000000000000000 ... NIP [c00000000000c5b0] system_call_common+0x150/0x268 LR [0000000000000000] 0x0 Call Trace: [c00000000db73e10] [c00000000000c558] system_call_common+0xf8/0x268 (unreliable) ... Instruction dump: 7cc803a6 888d0931 2c240000 4082001c 38800000 988d0931 e8810170 e8a10178 7c9a03a6 7cbb03a6 7d7a02a6 e9810170 <7f0b6088> 7d7b02a6 e9810178 7f0b6088 We should instead use EMIT_WARN_ENTRY, which creates an exception table entry for the warning, allowing the warning to be correctly recognised, and the code to resume after printing the warning. Note however that because this warning is buried deep in the interrupt return path, we are not able to recover from it (due to MSR_RI being clear), so we still end up in die() with an unrecoverable exception. Fixes: 59dc5bfca0cb ("powerpc/64s: avoid reloading (H)SRR registers if they are still valid") Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20211221135101.2085547-2-mpe@ellerman.id.au
2021-12-21 21:51:00 +08:00
EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
.endif
#endif
.endm
#ifdef CONFIG_PPC_BOOK3S
.macro system_call_vectored name trapnr
.globl system_call_vectored_\name
system_call_vectored_\name:
_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
SCV_INTERRUPT_TO_KERNEL
mr r10,r1
ld r1,PACAKSAVE(r13)
std r10,0(r1)
std r11,_NIP(r1)
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
std r2,GPR2(r1)
LOAD_PACA_TOC()
mfcr r12
li r11,0
/* Save syscall parameters in r3-r8 */
SAVE_GPRS(3, 8, r1)
/* Zero r9-r12, this should only be required when restoring all GPRs */
std r11,GPR9(r1)
std r11,GPR10(r1)
std r11,GPR11(r1)
std r11,GPR12(r1)
std r9,GPR13(r1)
SAVE_NVGPRS(r1)
std r11,_XER(r1)
std r11,_LINK(r1)
std r11,_CTR(r1)
li r11,\trapnr
std r11,_TRAP(r1)
std r12,_CCR(r1)
std r3,ORIG_GPR3(r1)
LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
std r11,STACK_INT_FRAME_MARKER(r1) /* "regs" marker */
/* Calling convention has r3 = regs, r4 = orig r0 */
addi r3,r1,STACK_INT_FRAME_REGS
mr r4,r0
BEGIN_FTR_SECTION
HMT_MEDIUM
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/*
* scv enters with MSR[EE]=1 and is immediately considered soft-masked.
* The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED,
* and interrupts may be masked and pending already.
* system_call_exception() will call trace_hardirqs_off() which means
* interrupts could already have been blocked before trace_hardirqs_off,
* but this is the best we can do.
*/
powerpc/64s: Zeroise gprs on interrupt routine entry on Book3S Zeroise user state in gprs (assign to zero) to reduce the influence of user registers on speculation within kernel syscall handlers. Clears occur at the very beginning of the sc and scv 0 interrupt handlers, with restores occurring following the execution of the syscall handler. Zeroise GPRS r0, r2-r11, r14-r31, on entry into the kernel for all other interrupt sources. The remaining gprs are overwritten by entry macros to interrupt handlers, irrespective of whether or not a given handler consumes these register values. If an interrupt does not select the IMSR_R12 IOption, zeroise r12. Prior to this commit, r14-r31 are restored on a per-interrupt basis at exit, but now they are always restored on 64bit Book3S. Remove explicit REST_NVGPRS invocations on 64-bit Book3S. 32-bit systems do not clear user registers on interrupt, and continue to depend on the return value of interrupt_exit_user_prepare to determine whether or not to restore non-volatiles. The mmap_bench benchmark in selftests should rapidly invoke pagefaults. See ~0.8% performance regression with this mitigation, but this indicates the worst-case performance due to heavier-weight interrupt handlers. This mitigation is able to be enabled/disabled through CONFIG_INTERRUPT_SANITIZE_REGISTERS. Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Rohan McLure <rmclure@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20221201071019.1953023-5-rmclure@linux.ibm.com
2022-12-01 15:10:17 +08:00
/*
* Zero user registers to prevent influencing speculative execution
* state of kernel code.
*/
SANITIZE_SYSCALL_GPRS()
bl system_call_exception
.Lsyscall_vectored_\name\()_exit:
addi r4,r1,STACK_INT_FRAME_REGS
li r5,1 /* scv */
bl syscall_exit_prepare
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
.Lsyscall_vectored_\name\()_rst_start:
lbz r11,PACAIRQHAPPENED(r13)
andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
bne- syscall_vectored_\name\()_restart
li r11,IRQS_ENABLED
stb r11,PACAIRQSOFTMASK(r13)
li r11,0
stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
ld r2,_CCR(r1)
ld r4,_NIP(r1)
ld r5,_MSR(r1)
BEGIN_FTR_SECTION
stdcx. r0,0,r1 /* to clear the reservation */
END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
BEGIN_FTR_SECTION
HMT_MEDIUM_LOW
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
powerpc/64s: Zeroise gprs on interrupt routine entry on Book3S Zeroise user state in gprs (assign to zero) to reduce the influence of user registers on speculation within kernel syscall handlers. Clears occur at the very beginning of the sc and scv 0 interrupt handlers, with restores occurring following the execution of the syscall handler. Zeroise GPRS r0, r2-r11, r14-r31, on entry into the kernel for all other interrupt sources. The remaining gprs are overwritten by entry macros to interrupt handlers, irrespective of whether or not a given handler consumes these register values. If an interrupt does not select the IMSR_R12 IOption, zeroise r12. Prior to this commit, r14-r31 are restored on a per-interrupt basis at exit, but now they are always restored on 64bit Book3S. Remove explicit REST_NVGPRS invocations on 64-bit Book3S. 32-bit systems do not clear user registers on interrupt, and continue to depend on the return value of interrupt_exit_user_prepare to determine whether or not to restore non-volatiles. The mmap_bench benchmark in selftests should rapidly invoke pagefaults. See ~0.8% performance regression with this mitigation, but this indicates the worst-case performance due to heavier-weight interrupt handlers. This mitigation is able to be enabled/disabled through CONFIG_INTERRUPT_SANITIZE_REGISTERS. Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Rohan McLure <rmclure@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20221201071019.1953023-5-rmclure@linux.ibm.com
2022-12-01 15:10:17 +08:00
SANITIZE_RESTORE_NVGPRS()
cmpdi r3,0
bne .Lsyscall_vectored_\name\()_restore_regs
/* rfscv returns with LR->NIA and CTR->MSR */
mtlr r4
mtctr r5
/* Could zero these as per ABI, but we may consider a stricter ABI
* which preserves these if libc implementations can benefit, so
* restore them for now until further measurement is done. */
REST_GPR(0, r1)
REST_GPRS(4, 8, r1)
/* Zero volatile regs that may contain sensitive kernel data */
ZEROIZE_GPRS(9, 12)
mtspr SPRN_XER,r0
/*
* We don't need to restore AMR on the way back to userspace for KUAP.
* The value of AMR only matters while we're in the kernel.
*/
mtcr r2
REST_GPRS(2, 3, r1)
REST_GPR(13, r1)
REST_GPR(1, r1)
RFSCV_TO_USER
b . /* prevent speculative execution */
.Lsyscall_vectored_\name\()_restore_regs:
mtspr SPRN_SRR0,r4
mtspr SPRN_SRR1,r5
ld r3,_CTR(r1)
ld r4,_LINK(r1)
ld r5,_XER(r1)
powerpc/64s: Zeroise gprs on interrupt routine entry on Book3S Zeroise user state in gprs (assign to zero) to reduce the influence of user registers on speculation within kernel syscall handlers. Clears occur at the very beginning of the sc and scv 0 interrupt handlers, with restores occurring following the execution of the syscall handler. Zeroise GPRS r0, r2-r11, r14-r31, on entry into the kernel for all other interrupt sources. The remaining gprs are overwritten by entry macros to interrupt handlers, irrespective of whether or not a given handler consumes these register values. If an interrupt does not select the IMSR_R12 IOption, zeroise r12. Prior to this commit, r14-r31 are restored on a per-interrupt basis at exit, but now they are always restored on 64bit Book3S. Remove explicit REST_NVGPRS invocations on 64-bit Book3S. 32-bit systems do not clear user registers on interrupt, and continue to depend on the return value of interrupt_exit_user_prepare to determine whether or not to restore non-volatiles. The mmap_bench benchmark in selftests should rapidly invoke pagefaults. See ~0.8% performance regression with this mitigation, but this indicates the worst-case performance due to heavier-weight interrupt handlers. This mitigation is able to be enabled/disabled through CONFIG_INTERRUPT_SANITIZE_REGISTERS. Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Rohan McLure <rmclure@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20221201071019.1953023-5-rmclure@linux.ibm.com
2022-12-01 15:10:17 +08:00
HANDLER_RESTORE_NVGPRS()
REST_GPR(0, r1)
mtcr r2
mtctr r3
mtlr r4
mtspr SPRN_XER,r5
REST_GPRS(2, 13, r1)
REST_GPR(1, r1)
RFI_TO_USER
.Lsyscall_vectored_\name\()_rst_end:
syscall_vectored_\name\()_restart:
_ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart)
GET_PACA(r13)
ld r1,PACA_EXIT_SAVE_R1(r13)
LOAD_PACA_TOC()
ld r3,RESULT(r1)
addi r4,r1,STACK_INT_FRAME_REGS
li r11,IRQS_ALL_DISABLED
stb r11,PACAIRQSOFTMASK(r13)
bl syscall_exit_restart
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
b .Lsyscall_vectored_\name\()_rst_start
1:
SOFT_MASK_TABLE(.Lsyscall_vectored_\name\()_rst_start, 1b)
RESTART_TABLE(.Lsyscall_vectored_\name\()_rst_start, .Lsyscall_vectored_\name\()_rst_end, syscall_vectored_\name\()_restart)
.endm
system_call_vectored common 0x3000
/*
* We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0
* which is tested by system_call_exception when r0 is -1 (as set by vector
* entry code).
*/
system_call_vectored sigill 0x7ff0
#endif /* CONFIG_PPC_BOOK3S */
.balign IFETCH_ALIGN_BYTES
.globl system_call_common_real
system_call_common_real:
_ASM_NOKPROBE_SYMBOL(system_call_common_real)
ld r10,PACAKMSR(r13) /* get MSR value for kernel */
mtmsrd r10
.balign IFETCH_ALIGN_BYTES
.globl system_call_common
system_call_common:
_ASM_NOKPROBE_SYMBOL(system_call_common)
mr r10,r1
ld r1,PACAKSAVE(r13)
std r10,0(r1)
std r11,_NIP(r1)
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
std r2,GPR2(r1)
#ifdef CONFIG_PPC_E500
START_BTB_FLUSH_SECTION
BTB_FLUSH(r10)
END_BTB_FLUSH_SECTION
#endif
LOAD_PACA_TOC()
mfcr r12
li r11,0
/* Save syscall parameters in r3-r8 */
SAVE_GPRS(3, 8, r1)
/* Zero r9-r12, this should only be required when restoring all GPRs */
std r11,GPR9(r1)
std r11,GPR10(r1)
std r11,GPR11(r1)
std r11,GPR12(r1)
std r9,GPR13(r1)
SAVE_NVGPRS(r1)
std r11,_XER(r1)
std r11,_CTR(r1)
mflr r10
/*
* This clears CR0.SO (bit 28), which is the error indication on
* return from this system call.
*/
rldimi r12,r11,28,(63-28)
li r11,0xc00
std r10,_LINK(r1)
std r11,_TRAP(r1)
std r12,_CCR(r1)
std r3,ORIG_GPR3(r1)
LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
std r11,STACK_INT_FRAME_MARKER(r1) /* "regs" marker */
/* Calling convention has r3 = regs, r4 = orig r0 */
addi r3,r1,STACK_INT_FRAME_REGS
mr r4,r0
#ifdef CONFIG_PPC_BOOK3S
li r11,1
stb r11,PACASRR_VALID(r13)
#endif
/*
* We always enter kernel from userspace with irq soft-mask enabled and
* nothing pending. system_call_exception() will call
* trace_hardirqs_off().
*/
li r11,IRQS_ALL_DISABLED
stb r11,PACAIRQSOFTMASK(r13)
#ifdef CONFIG_PPC_BOOK3S
li r12,-1 /* Set MSR_EE and MSR_RI */
mtmsrd r12,1
#else
wrteei 1
#endif
powerpc/64s: Zeroise gprs on interrupt routine entry on Book3S Zeroise user state in gprs (assign to zero) to reduce the influence of user registers on speculation within kernel syscall handlers. Clears occur at the very beginning of the sc and scv 0 interrupt handlers, with restores occurring following the execution of the syscall handler. Zeroise GPRS r0, r2-r11, r14-r31, on entry into the kernel for all other interrupt sources. The remaining gprs are overwritten by entry macros to interrupt handlers, irrespective of whether or not a given handler consumes these register values. If an interrupt does not select the IMSR_R12 IOption, zeroise r12. Prior to this commit, r14-r31 are restored on a per-interrupt basis at exit, but now they are always restored on 64bit Book3S. Remove explicit REST_NVGPRS invocations on 64-bit Book3S. 32-bit systems do not clear user registers on interrupt, and continue to depend on the return value of interrupt_exit_user_prepare to determine whether or not to restore non-volatiles. The mmap_bench benchmark in selftests should rapidly invoke pagefaults. See ~0.8% performance regression with this mitigation, but this indicates the worst-case performance due to heavier-weight interrupt handlers. This mitigation is able to be enabled/disabled through CONFIG_INTERRUPT_SANITIZE_REGISTERS. Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Rohan McLure <rmclure@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20221201071019.1953023-5-rmclure@linux.ibm.com
2022-12-01 15:10:17 +08:00
/*
* Zero user registers to prevent influencing speculative execution
* state of kernel code.
*/
SANITIZE_SYSCALL_GPRS()
bl system_call_exception
.Lsyscall_exit:
addi r4,r1,STACK_INT_FRAME_REGS
li r5,0 /* !scv */
bl syscall_exit_prepare
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
#ifdef CONFIG_PPC_BOOK3S
.Lsyscall_rst_start:
lbz r11,PACAIRQHAPPENED(r13)
andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
bne- syscall_restart
#endif
li r11,IRQS_ENABLED
stb r11,PACAIRQSOFTMASK(r13)
li r11,0
stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
ld r2,_CCR(r1)
ld r6,_LINK(r1)
mtlr r6
#ifdef CONFIG_PPC_BOOK3S
lbz r4,PACASRR_VALID(r13)
cmpdi r4,0
bne 1f
li r4,0
stb r4,PACASRR_VALID(r13)
#endif
ld r4,_NIP(r1)
ld r5,_MSR(r1)
mtspr SPRN_SRR0,r4
mtspr SPRN_SRR1,r5
1:
DEBUG_SRR_VALID srr
BEGIN_FTR_SECTION
stdcx. r0,0,r1 /* to clear the reservation */
END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
powerpc/64s: Zeroise gprs on interrupt routine entry on Book3S Zeroise user state in gprs (assign to zero) to reduce the influence of user registers on speculation within kernel syscall handlers. Clears occur at the very beginning of the sc and scv 0 interrupt handlers, with restores occurring following the execution of the syscall handler. Zeroise GPRS r0, r2-r11, r14-r31, on entry into the kernel for all other interrupt sources. The remaining gprs are overwritten by entry macros to interrupt handlers, irrespective of whether or not a given handler consumes these register values. If an interrupt does not select the IMSR_R12 IOption, zeroise r12. Prior to this commit, r14-r31 are restored on a per-interrupt basis at exit, but now they are always restored on 64bit Book3S. Remove explicit REST_NVGPRS invocations on 64-bit Book3S. 32-bit systems do not clear user registers on interrupt, and continue to depend on the return value of interrupt_exit_user_prepare to determine whether or not to restore non-volatiles. The mmap_bench benchmark in selftests should rapidly invoke pagefaults. See ~0.8% performance regression with this mitigation, but this indicates the worst-case performance due to heavier-weight interrupt handlers. This mitigation is able to be enabled/disabled through CONFIG_INTERRUPT_SANITIZE_REGISTERS. Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Rohan McLure <rmclure@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20221201071019.1953023-5-rmclure@linux.ibm.com
2022-12-01 15:10:17 +08:00
SANITIZE_RESTORE_NVGPRS()
cmpdi r3,0
bne .Lsyscall_restore_regs
/* Zero volatile regs that may contain sensitive kernel data */
ZEROIZE_GPR(0)
ZEROIZE_GPRS(4, 12)
mtctr r0
mtspr SPRN_XER,r0
.Lsyscall_restore_regs_cont:
BEGIN_FTR_SECTION
HMT_MEDIUM_LOW
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/*
* We don't need to restore AMR on the way back to userspace for KUAP.
* The value of AMR only matters while we're in the kernel.
*/
mtcr r2
REST_GPRS(2, 3, r1)
REST_GPR(13, r1)
REST_GPR(1, r1)
RFI_TO_USER
b . /* prevent speculative execution */
.Lsyscall_restore_regs:
ld r3,_CTR(r1)
ld r4,_XER(r1)
powerpc/64s: Zeroise gprs on interrupt routine entry on Book3S Zeroise user state in gprs (assign to zero) to reduce the influence of user registers on speculation within kernel syscall handlers. Clears occur at the very beginning of the sc and scv 0 interrupt handlers, with restores occurring following the execution of the syscall handler. Zeroise GPRS r0, r2-r11, r14-r31, on entry into the kernel for all other interrupt sources. The remaining gprs are overwritten by entry macros to interrupt handlers, irrespective of whether or not a given handler consumes these register values. If an interrupt does not select the IMSR_R12 IOption, zeroise r12. Prior to this commit, r14-r31 are restored on a per-interrupt basis at exit, but now they are always restored on 64bit Book3S. Remove explicit REST_NVGPRS invocations on 64-bit Book3S. 32-bit systems do not clear user registers on interrupt, and continue to depend on the return value of interrupt_exit_user_prepare to determine whether or not to restore non-volatiles. The mmap_bench benchmark in selftests should rapidly invoke pagefaults. See ~0.8% performance regression with this mitigation, but this indicates the worst-case performance due to heavier-weight interrupt handlers. This mitigation is able to be enabled/disabled through CONFIG_INTERRUPT_SANITIZE_REGISTERS. Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Rohan McLure <rmclure@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20221201071019.1953023-5-rmclure@linux.ibm.com
2022-12-01 15:10:17 +08:00
HANDLER_RESTORE_NVGPRS()
mtctr r3
mtspr SPRN_XER,r4
REST_GPR(0, r1)
REST_GPRS(4, 12, r1)
b .Lsyscall_restore_regs_cont
.Lsyscall_rst_end:
#ifdef CONFIG_PPC_BOOK3S
syscall_restart:
_ASM_NOKPROBE_SYMBOL(syscall_restart)
GET_PACA(r13)
ld r1,PACA_EXIT_SAVE_R1(r13)
LOAD_PACA_TOC()
ld r3,RESULT(r1)
addi r4,r1,STACK_INT_FRAME_REGS
li r11,IRQS_ALL_DISABLED
stb r11,PACAIRQSOFTMASK(r13)
bl syscall_exit_restart
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
b .Lsyscall_rst_start
1:
SOFT_MASK_TABLE(.Lsyscall_rst_start, 1b)
RESTART_TABLE(.Lsyscall_rst_start, .Lsyscall_rst_end, syscall_restart)
#endif
/*
* If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
* touched, no exit work created, then this can be used.
*/
.balign IFETCH_ALIGN_BYTES
.globl fast_interrupt_return_srr
fast_interrupt_return_srr:
_ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr)
kuap_check_amr r3, r4
ld r5,_MSR(r1)
andi. r0,r5,MSR_PR
#ifdef CONFIG_PPC_BOOK3S
beq 1f
kuap_user_restore r3, r4
b .Lfast_user_interrupt_return_srr
1: kuap_kernel_restore r3, r4
andi. r0,r5,MSR_RI
li r3,0 /* 0 return value, no EMULATE_STACK_STORE */
bne+ .Lfast_kernel_interrupt_return_srr
addi r3,r1,STACK_INT_FRAME_REGS
bl unrecoverable_exception
b . /* should not get here */
#else
bne .Lfast_user_interrupt_return_srr
b .Lfast_kernel_interrupt_return_srr
#endif
.macro interrupt_return_macro srr
.balign IFETCH_ALIGN_BYTES
.globl interrupt_return_\srr
interrupt_return_\srr\():
_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\())
ld r4,_MSR(r1)
andi. r0,r4,MSR_PR
beq interrupt_return_\srr\()_kernel
interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */
_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
addi r3,r1,STACK_INT_FRAME_REGS
bl interrupt_exit_user_prepare
#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS
cmpdi r3,0
bne- .Lrestore_nvgprs_\srr
.Lrestore_nvgprs_\srr\()_cont:
#endif
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
#ifdef CONFIG_PPC_BOOK3S
.Linterrupt_return_\srr\()_user_rst_start:
lbz r11,PACAIRQHAPPENED(r13)
andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
bne- interrupt_return_\srr\()_user_restart
#endif
li r11,IRQS_ENABLED
stb r11,PACAIRQSOFTMASK(r13)
li r11,0
stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
.Lfast_user_interrupt_return_\srr\():
SANITIZE_RESTORE_NVGPRS()
#ifdef CONFIG_PPC_BOOK3S
.ifc \srr,srr
lbz r4,PACASRR_VALID(r13)
.else
lbz r4,PACAHSRR_VALID(r13)
.endif
cmpdi r4,0
li r4,0
bne 1f
#endif
ld r11,_NIP(r1)
ld r12,_MSR(r1)
.ifc \srr,srr
mtspr SPRN_SRR0,r11
mtspr SPRN_SRR1,r12
1:
#ifdef CONFIG_PPC_BOOK3S
stb r4,PACASRR_VALID(r13)
#endif
.else
mtspr SPRN_HSRR0,r11
mtspr SPRN_HSRR1,r12
1:
#ifdef CONFIG_PPC_BOOK3S
stb r4,PACAHSRR_VALID(r13)
#endif
.endif
DEBUG_SRR_VALID \srr
#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
lbz r4,PACAIRQSOFTMASK(r13)
tdnei r4,IRQS_ENABLED
#endif
BEGIN_FTR_SECTION
ld r10,_PPR(r1)
mtspr SPRN_PPR,r10
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
BEGIN_FTR_SECTION
stdcx. r0,0,r1 /* to clear the reservation */
FTR_SECTION_ELSE
ldarx r0,0,r1
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
ld r3,_CCR(r1)
ld r4,_LINK(r1)
ld r5,_CTR(r1)
ld r6,_XER(r1)
li r0,0
REST_GPRS(7, 13, r1)
mtcr r3
mtlr r4
mtctr r5
mtspr SPRN_XER,r6
REST_GPRS(2, 6, r1)
REST_GPR(0, r1)
REST_GPR(1, r1)
.ifc \srr,srr
RFI_TO_USER
.else
HRFI_TO_USER
.endif
b . /* prevent speculative execution */
.Linterrupt_return_\srr\()_user_rst_end:
#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS
.Lrestore_nvgprs_\srr\():
REST_NVGPRS(r1)
b .Lrestore_nvgprs_\srr\()_cont
#endif
#ifdef CONFIG_PPC_BOOK3S
interrupt_return_\srr\()_user_restart:
_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart)
GET_PACA(r13)
ld r1,PACA_EXIT_SAVE_R1(r13)
LOAD_PACA_TOC()
addi r3,r1,STACK_INT_FRAME_REGS
li r11,IRQS_ALL_DISABLED
stb r11,PACAIRQSOFTMASK(r13)
bl interrupt_exit_user_restart
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
b .Linterrupt_return_\srr\()_user_rst_start
1:
SOFT_MASK_TABLE(.Linterrupt_return_\srr\()_user_rst_start, 1b)
RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr\()_user_rst_end, interrupt_return_\srr\()_user_restart)
#endif
.balign IFETCH_ALIGN_BYTES
interrupt_return_\srr\()_kernel:
_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
addi r3,r1,STACK_INT_FRAME_REGS
bl interrupt_exit_kernel_prepare
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
.Linterrupt_return_\srr\()_kernel_rst_start:
ld r11,SOFTE(r1)
cmpwi r11,IRQS_ENABLED
stb r11,PACAIRQSOFTMASK(r13)
powerpc/64/interrupt: Fix return to masked context after hard-mask irq becomes pending If a synchronous interrupt (e.g., hash fault) is taken inside an irqs-disabled region which has MSR[EE]=1, then an asynchronous interrupt that is PACA_IRQ_MUST_HARD_MASK (e.g., PMI) is taken inside the synchronous interrupt handler, then the synchronous interrupt will return with MSR[EE]=1 and the asynchronous interrupt fires again. If the asynchronous interrupt is a PMI and the original context does not have PMIs disabled (only Linux IRQs), the asynchronous interrupt will fire despite having the PMI marked soft pending. This can confuse the perf code and cause warnings. This patch changes the interrupt return so that irqs-disabled MSR[EE]=1 contexts will be returned to with MSR[EE]=0 if a PACA_IRQ_MUST_HARD_MASK interrupt has become pending in the meantime. The longer explanation for what happens: 1. local_irq_disable() 2. Hash fault interrupt fires, do_hash_fault handler runs 3. interrupt_enter_prepare() sets IRQS_ALL_DISABLED 4. interrupt_enter_prepare() sets MSR[EE]=1 5. PMU interrupt fires, masked handler runs 6. Masked handler marks PMI pending 7. Masked handler returns with PACA_IRQ_HARD_DIS set, MSR[EE]=0 8. do_hash_fault interrupt return handler runs 9. interrupt_exit_kernel_prepare() clears PACA_IRQ_HARD_DIS 10. interrupt returns with MSR[EE]=1 11. PMU interrupt fires, perf handler runs Fixes: 4423eb5ae32e ("powerpc/64/interrupt: make normal synchronous interrupts enable MSR[EE] if possible") Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220926054305.2671436-4-npiggin@gmail.com
2022-09-26 13:43:01 +08:00
beq .Linterrupt_return_\srr\()_soft_enabled
/*
* Returning to soft-disabled context.
* Check if a MUST_HARD_MASK interrupt has become pending, in which
* case we need to disable MSR[EE] in the return context.
powerpc/64s/interrupt: Fix clear of PACA_IRQS_HARD_DIS when returning to soft-masked context Commit a4cb3651a1743 ("powerpc/64s/interrupt: Fix lost interrupts when returning to soft-masked context") fixed the problem of pending irqs being cleared when clearing the HARD_DIS bit, but then it didn't clear the bit at all. This change clears HARD_DIS without affecting other bits in the mask. When an interrupt hits in a soft-masked section that has MSR[EE]=1, it can hard disable and set PACA_IRQS_HARD_DIS, which must be cleared when returning to the EE=1 caller (unless it was set due to a MUST_HARD_MASK interrupt becoming pending). Failure to clear this leaves the returned-to context running with MSR[EE]=1 and PACA_IRQS_HARD_DIS, which confuses irq assertions and could be dangerous for code that might test the flag. This was observed in a hash MMU kernel where a kernel hash fault hits in a local_irqs_disabled region that has EE=1. The hash fault also runs with EE=1, then as it returns, a decrementer hits in the restart section and the irq restart code hard-masks which sets the PACA_IRQ_HARD_DIS flag, which is not clear when the original context is returned to. Reported-by: Sachin Sant <sachinp@linux.ibm.com> Fixes: a4cb3651a1743 ("powerpc/64s/interrupt: Fix lost interrupts when returning to soft-masked context") Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Tested-by: Sachin Sant <sachinp@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20221022052207.471328-1-npiggin@gmail.com
2022-10-22 13:22:07 +08:00
*
* The MSR[EE] check catches among other things the short incoherency
* in hard_irq_disable() between clearing MSR[EE] and setting
* PACA_IRQ_HARD_DIS.
powerpc/64/interrupt: Fix return to masked context after hard-mask irq becomes pending If a synchronous interrupt (e.g., hash fault) is taken inside an irqs-disabled region which has MSR[EE]=1, then an asynchronous interrupt that is PACA_IRQ_MUST_HARD_MASK (e.g., PMI) is taken inside the synchronous interrupt handler, then the synchronous interrupt will return with MSR[EE]=1 and the asynchronous interrupt fires again. If the asynchronous interrupt is a PMI and the original context does not have PMIs disabled (only Linux IRQs), the asynchronous interrupt will fire despite having the PMI marked soft pending. This can confuse the perf code and cause warnings. This patch changes the interrupt return so that irqs-disabled MSR[EE]=1 contexts will be returned to with MSR[EE]=0 if a PACA_IRQ_MUST_HARD_MASK interrupt has become pending in the meantime. The longer explanation for what happens: 1. local_irq_disable() 2. Hash fault interrupt fires, do_hash_fault handler runs 3. interrupt_enter_prepare() sets IRQS_ALL_DISABLED 4. interrupt_enter_prepare() sets MSR[EE]=1 5. PMU interrupt fires, masked handler runs 6. Masked handler marks PMI pending 7. Masked handler returns with PACA_IRQ_HARD_DIS set, MSR[EE]=0 8. do_hash_fault interrupt return handler runs 9. interrupt_exit_kernel_prepare() clears PACA_IRQ_HARD_DIS 10. interrupt returns with MSR[EE]=1 11. PMU interrupt fires, perf handler runs Fixes: 4423eb5ae32e ("powerpc/64/interrupt: make normal synchronous interrupts enable MSR[EE] if possible") Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220926054305.2671436-4-npiggin@gmail.com
2022-09-26 13:43:01 +08:00
*/
ld r12,_MSR(r1)
andi. r10,r12,MSR_EE
beq .Lfast_kernel_interrupt_return_\srr\() // EE already disabled
lbz r11,PACAIRQHAPPENED(r13)
andi. r10,r11,PACA_IRQ_MUST_HARD_MASK
powerpc/64s/interrupt: Fix clear of PACA_IRQS_HARD_DIS when returning to soft-masked context Commit a4cb3651a1743 ("powerpc/64s/interrupt: Fix lost interrupts when returning to soft-masked context") fixed the problem of pending irqs being cleared when clearing the HARD_DIS bit, but then it didn't clear the bit at all. This change clears HARD_DIS without affecting other bits in the mask. When an interrupt hits in a soft-masked section that has MSR[EE]=1, it can hard disable and set PACA_IRQS_HARD_DIS, which must be cleared when returning to the EE=1 caller (unless it was set due to a MUST_HARD_MASK interrupt becoming pending). Failure to clear this leaves the returned-to context running with MSR[EE]=1 and PACA_IRQS_HARD_DIS, which confuses irq assertions and could be dangerous for code that might test the flag. This was observed in a hash MMU kernel where a kernel hash fault hits in a local_irqs_disabled region that has EE=1. The hash fault also runs with EE=1, then as it returns, a decrementer hits in the restart section and the irq restart code hard-masks which sets the PACA_IRQ_HARD_DIS flag, which is not clear when the original context is returned to. Reported-by: Sachin Sant <sachinp@linux.ibm.com> Fixes: a4cb3651a1743 ("powerpc/64s/interrupt: Fix lost interrupts when returning to soft-masked context") Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Tested-by: Sachin Sant <sachinp@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20221022052207.471328-1-npiggin@gmail.com
2022-10-22 13:22:07 +08:00
bne 1f // HARD_MASK is pending
// No HARD_MASK pending, clear possible HARD_DIS set by interrupt
andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
stb r11,PACAIRQHAPPENED(r13)
b .Lfast_kernel_interrupt_return_\srr\()
powerpc/64/interrupt: Fix return to masked context after hard-mask irq becomes pending If a synchronous interrupt (e.g., hash fault) is taken inside an irqs-disabled region which has MSR[EE]=1, then an asynchronous interrupt that is PACA_IRQ_MUST_HARD_MASK (e.g., PMI) is taken inside the synchronous interrupt handler, then the synchronous interrupt will return with MSR[EE]=1 and the asynchronous interrupt fires again. If the asynchronous interrupt is a PMI and the original context does not have PMIs disabled (only Linux IRQs), the asynchronous interrupt will fire despite having the PMI marked soft pending. This can confuse the perf code and cause warnings. This patch changes the interrupt return so that irqs-disabled MSR[EE]=1 contexts will be returned to with MSR[EE]=0 if a PACA_IRQ_MUST_HARD_MASK interrupt has become pending in the meantime. The longer explanation for what happens: 1. local_irq_disable() 2. Hash fault interrupt fires, do_hash_fault handler runs 3. interrupt_enter_prepare() sets IRQS_ALL_DISABLED 4. interrupt_enter_prepare() sets MSR[EE]=1 5. PMU interrupt fires, masked handler runs 6. Masked handler marks PMI pending 7. Masked handler returns with PACA_IRQ_HARD_DIS set, MSR[EE]=0 8. do_hash_fault interrupt return handler runs 9. interrupt_exit_kernel_prepare() clears PACA_IRQ_HARD_DIS 10. interrupt returns with MSR[EE]=1 11. PMU interrupt fires, perf handler runs Fixes: 4423eb5ae32e ("powerpc/64/interrupt: make normal synchronous interrupts enable MSR[EE] if possible") Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220926054305.2671436-4-npiggin@gmail.com
2022-09-26 13:43:01 +08:00
powerpc/64s/interrupt: Fix clear of PACA_IRQS_HARD_DIS when returning to soft-masked context Commit a4cb3651a1743 ("powerpc/64s/interrupt: Fix lost interrupts when returning to soft-masked context") fixed the problem of pending irqs being cleared when clearing the HARD_DIS bit, but then it didn't clear the bit at all. This change clears HARD_DIS without affecting other bits in the mask. When an interrupt hits in a soft-masked section that has MSR[EE]=1, it can hard disable and set PACA_IRQS_HARD_DIS, which must be cleared when returning to the EE=1 caller (unless it was set due to a MUST_HARD_MASK interrupt becoming pending). Failure to clear this leaves the returned-to context running with MSR[EE]=1 and PACA_IRQS_HARD_DIS, which confuses irq assertions and could be dangerous for code that might test the flag. This was observed in a hash MMU kernel where a kernel hash fault hits in a local_irqs_disabled region that has EE=1. The hash fault also runs with EE=1, then as it returns, a decrementer hits in the restart section and the irq restart code hard-masks which sets the PACA_IRQ_HARD_DIS flag, which is not clear when the original context is returned to. Reported-by: Sachin Sant <sachinp@linux.ibm.com> Fixes: a4cb3651a1743 ("powerpc/64s/interrupt: Fix lost interrupts when returning to soft-masked context") Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Tested-by: Sachin Sant <sachinp@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20221022052207.471328-1-npiggin@gmail.com
2022-10-22 13:22:07 +08:00
1: /* Must clear MSR_EE from _MSR */
powerpc/64/interrupt: Fix return to masked context after hard-mask irq becomes pending If a synchronous interrupt (e.g., hash fault) is taken inside an irqs-disabled region which has MSR[EE]=1, then an asynchronous interrupt that is PACA_IRQ_MUST_HARD_MASK (e.g., PMI) is taken inside the synchronous interrupt handler, then the synchronous interrupt will return with MSR[EE]=1 and the asynchronous interrupt fires again. If the asynchronous interrupt is a PMI and the original context does not have PMIs disabled (only Linux IRQs), the asynchronous interrupt will fire despite having the PMI marked soft pending. This can confuse the perf code and cause warnings. This patch changes the interrupt return so that irqs-disabled MSR[EE]=1 contexts will be returned to with MSR[EE]=0 if a PACA_IRQ_MUST_HARD_MASK interrupt has become pending in the meantime. The longer explanation for what happens: 1. local_irq_disable() 2. Hash fault interrupt fires, do_hash_fault handler runs 3. interrupt_enter_prepare() sets IRQS_ALL_DISABLED 4. interrupt_enter_prepare() sets MSR[EE]=1 5. PMU interrupt fires, masked handler runs 6. Masked handler marks PMI pending 7. Masked handler returns with PACA_IRQ_HARD_DIS set, MSR[EE]=0 8. do_hash_fault interrupt return handler runs 9. interrupt_exit_kernel_prepare() clears PACA_IRQ_HARD_DIS 10. interrupt returns with MSR[EE]=1 11. PMU interrupt fires, perf handler runs Fixes: 4423eb5ae32e ("powerpc/64/interrupt: make normal synchronous interrupts enable MSR[EE] if possible") Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220926054305.2671436-4-npiggin@gmail.com
2022-09-26 13:43:01 +08:00
#ifdef CONFIG_PPC_BOOK3S
li r10,0
/* Clear valid before changing _MSR */
.ifc \srr,srr
stb r10,PACASRR_VALID(r13)
.else
stb r10,PACAHSRR_VALID(r13)
.endif
#endif
xori r12,r12,MSR_EE
std r12,_MSR(r1)
b .Lfast_kernel_interrupt_return_\srr\()
.Linterrupt_return_\srr\()_soft_enabled:
/*
* In the soft-enabled case, need to double-check that we have no
* pending interrupts that might have come in before we reached the
* restart section of code, and restart the exit so those can be
* handled.
*
* If there are none, it is be possible that the interrupt still
* has PACA_IRQ_HARD_DIS set, which needs to be cleared for the
* interrupted context. This clear will not clobber a new pending
* interrupt coming in, because we're in the restart section, so
* such would return to the restart location.
*/
#ifdef CONFIG_PPC_BOOK3S
lbz r11,PACAIRQHAPPENED(r13)
andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
bne- interrupt_return_\srr\()_kernel_restart
#endif
powerpc/64/interrupt: Fix return to masked context after hard-mask irq becomes pending If a synchronous interrupt (e.g., hash fault) is taken inside an irqs-disabled region which has MSR[EE]=1, then an asynchronous interrupt that is PACA_IRQ_MUST_HARD_MASK (e.g., PMI) is taken inside the synchronous interrupt handler, then the synchronous interrupt will return with MSR[EE]=1 and the asynchronous interrupt fires again. If the asynchronous interrupt is a PMI and the original context does not have PMIs disabled (only Linux IRQs), the asynchronous interrupt will fire despite having the PMI marked soft pending. This can confuse the perf code and cause warnings. This patch changes the interrupt return so that irqs-disabled MSR[EE]=1 contexts will be returned to with MSR[EE]=0 if a PACA_IRQ_MUST_HARD_MASK interrupt has become pending in the meantime. The longer explanation for what happens: 1. local_irq_disable() 2. Hash fault interrupt fires, do_hash_fault handler runs 3. interrupt_enter_prepare() sets IRQS_ALL_DISABLED 4. interrupt_enter_prepare() sets MSR[EE]=1 5. PMU interrupt fires, masked handler runs 6. Masked handler marks PMI pending 7. Masked handler returns with PACA_IRQ_HARD_DIS set, MSR[EE]=0 8. do_hash_fault interrupt return handler runs 9. interrupt_exit_kernel_prepare() clears PACA_IRQ_HARD_DIS 10. interrupt returns with MSR[EE]=1 11. PMU interrupt fires, perf handler runs Fixes: 4423eb5ae32e ("powerpc/64/interrupt: make normal synchronous interrupts enable MSR[EE] if possible") Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220926054305.2671436-4-npiggin@gmail.com
2022-09-26 13:43:01 +08:00
li r11,0
stb r11,PACAIRQHAPPENED(r13) // clear the possible HARD_DIS
.Lfast_kernel_interrupt_return_\srr\():
SANITIZE_RESTORE_NVGPRS()
cmpdi cr1,r3,0
#ifdef CONFIG_PPC_BOOK3S
.ifc \srr,srr
lbz r4,PACASRR_VALID(r13)
.else
lbz r4,PACAHSRR_VALID(r13)
.endif
cmpdi r4,0
li r4,0
bne 1f
#endif
ld r11,_NIP(r1)
ld r12,_MSR(r1)
.ifc \srr,srr
mtspr SPRN_SRR0,r11
mtspr SPRN_SRR1,r12
1:
#ifdef CONFIG_PPC_BOOK3S
stb r4,PACASRR_VALID(r13)
#endif
.else
mtspr SPRN_HSRR0,r11
mtspr SPRN_HSRR1,r12
1:
#ifdef CONFIG_PPC_BOOK3S
stb r4,PACAHSRR_VALID(r13)
#endif
.endif
DEBUG_SRR_VALID \srr
BEGIN_FTR_SECTION
stdcx. r0,0,r1 /* to clear the reservation */
FTR_SECTION_ELSE
ldarx r0,0,r1
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
ld r3,_LINK(r1)
ld r4,_CTR(r1)
ld r5,_XER(r1)
ld r6,_CCR(r1)
li r0,0
REST_GPRS(7, 12, r1)
mtlr r3
mtctr r4
mtspr SPRN_XER,r5
/*
* Leaving a stale STACK_FRAME_REGS_MARKER on the stack can confuse
* the reliable stack unwinder later on. Clear it.
*/
std r0,STACK_INT_FRAME_MARKER(r1)
REST_GPRS(2, 5, r1)
bne- cr1,1f /* emulate stack store */
mtcr r6
REST_GPR(6, r1)
REST_GPR(0, r1)
REST_GPR(1, r1)
.ifc \srr,srr
RFI_TO_KERNEL
.else
HRFI_TO_KERNEL
.endif
b . /* prevent speculative execution */
1: /*
* Emulate stack store with update. New r1 value was already calculated
* and updated in our interrupt regs by emulate_loadstore, but we can't
* store the previous value of r1 to the stack before re-loading our
* registers from it, otherwise they could be clobbered. Use
* PACA_EXGEN as temporary storage to hold the store data, as
* interrupts are disabled here so it won't be clobbered.
*/
mtcr r6
std r9,PACA_EXGEN+0(r13)
addi r9,r1,INT_FRAME_SIZE /* get original r1 */
REST_GPR(6, r1)
REST_GPR(0, r1)
REST_GPR(1, r1)
std r9,0(r1) /* perform store component of stdu */
ld r9,PACA_EXGEN+0(r13)
.ifc \srr,srr
RFI_TO_KERNEL
.else
HRFI_TO_KERNEL
.endif
b . /* prevent speculative execution */
.Linterrupt_return_\srr\()_kernel_rst_end:
#ifdef CONFIG_PPC_BOOK3S
interrupt_return_\srr\()_kernel_restart:
_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart)
GET_PACA(r13)
ld r1,PACA_EXIT_SAVE_R1(r13)
LOAD_PACA_TOC()
addi r3,r1,STACK_INT_FRAME_REGS
li r11,IRQS_ALL_DISABLED
stb r11,PACAIRQSOFTMASK(r13)
bl interrupt_exit_kernel_restart
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
b .Linterrupt_return_\srr\()_kernel_rst_start
1:
SOFT_MASK_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, 1b)
RESTART_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, .Linterrupt_return_\srr\()_kernel_rst_end, interrupt_return_\srr\()_kernel_restart)
#endif
.endm
interrupt_return_macro srr
#ifdef CONFIG_PPC_BOOK3S
interrupt_return_macro hsrr
.globl __end_soft_masked
__end_soft_masked:
powerpc/64/asm: Do not reassign labels The LLVM integrated assembler really does not like us reassigning things to the same label: <instantiation>:7:9: error: invalid reassignment of non-absolute variable 'fs_label' This happens across a bunch of platforms: https://github.com/ClangBuiltLinux/linux/issues/1043 https://github.com/ClangBuiltLinux/linux/issues/1008 https://github.com/ClangBuiltLinux/linux/issues/920 https://github.com/ClangBuiltLinux/linux/issues/1050 There is no hope of getting this fixed in LLVM (see https://github.com/ClangBuiltLinux/linux/issues/1043#issuecomment-641571200 and https://bugs.llvm.org/show_bug.cgi?id=47798#c1 ) so if we want to build with LLVM_IAS, we need to hack around it ourselves. For us the big problem comes from this: \#define USE_FIXED_SECTION(sname) \ fs_label = start_##sname; \ fs_start = sname##_start; \ use_ftsec sname; \#define USE_TEXT_SECTION() fs_label = start_text; \ fs_start = text_start; \ .text and in particular fs_label. This works around it by not setting those 'variables' and requiring that users of the variables instead track for themselves what section they are in. This isn't amazing, by any stretch, but it gets us further in the compilation. Note that even though users have to keep track of the section, using a wrong one produces an error with both binutils and llvm which prevents from using wrong section at the compile time: llvm error example: AS arch/powerpc/kernel/head_64.o <unknown>:0: error: Cannot represent a difference across sections make[3]: *** [/home/aik/p/kernels-llvm/llvm/scripts/Makefile.build:388: arch/powerpc/kernel/head_64.o] Error 1 binutils error example: /home/aik/p/kernels-llvm/llvm/arch/powerpc/kernel/exceptions-64s.S: Assembler messages: /home/aik/p/kernels-llvm/llvm/arch/powerpc/kernel/exceptions-64s.S:1974: Error: can't resolve `system_call_common' {.text section} - `start_r eal_vectors' {.head.text.real_vectors section} make[3]: *** [/home/aik/p/kernels-llvm/llvm/scripts/Makefile.build:388: arch/powerpc/kernel/head_64.o] Error 1 Signed-off-by: Daniel Axtens <dja@axtens.net> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20211221055904.555763-5-aik@ozlabs.ru
2021-12-21 13:59:02 +08:00
DEFINE_FIXED_SYMBOL(__end_soft_masked, text)
#endif /* CONFIG_PPC_BOOK3S */
#ifdef CONFIG_PPC_BOOK3S
_GLOBAL(ret_from_fork_scv)
bl schedule_tail
REST_NVGPRS(r1)
li r3,0 /* fork() return value */
b .Lsyscall_vectored_common_exit
#endif
_GLOBAL(ret_from_fork)
bl schedule_tail
REST_NVGPRS(r1)
li r3,0 /* fork() return value */
b .Lsyscall_exit
_GLOBAL(ret_from_kernel_thread)
bl schedule_tail
REST_NVGPRS(r1)
mtctr r14
mr r3,r15
#ifdef CONFIG_PPC64_ELF_ABI_V2
mr r12,r14
#endif
bctrl
li r3,0
b .Lsyscall_exit