[Spectre] Fix MIR verifier errors in retpoline thunks

Summary:
The main challenge here is that X86InstrInfo::AnalyzeBranch doesn't
understand the way we're using a CALL instruction as a branch, so we
can't list the CallTarget MBB as a successor of the entry block. If we
don't list it as a successor, then the AsmPrinter doesn't print a label
for the MBB.

Fix the issue by inserting our own label at the beginning of the call
target block. We can rely on the AsmPrinter to always emit it, even
though the block appears to be unreachable, but address-taken.

Fixes PR38391.

Reviewers: thegameg, chandlerc, echristo

Subscribers: hiraditya, llvm-commits

Differential Revision: https://reviews.llvm.org/D53653

llvm-svn: 345426
This commit is contained in:
Reid Kleckner 2018-10-26 20:26:36 +00:00
parent e2ec68e177
commit 98d880fbd7
4 changed files with 36 additions and 20 deletions

View File

@ -74,7 +74,7 @@ private:
void createThunkFunction(Module &M, StringRef Name);
void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
void populateThunk(MachineFunction &MF, Optional<unsigned> Reg = None);
void populateThunk(MachineFunction &MF, unsigned Reg);
};
} // end anonymous namespace
@ -236,25 +236,33 @@ void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
}
void X86RetpolineThunks::populateThunk(MachineFunction &MF,
Optional<unsigned> Reg) {
unsigned Reg) {
// Set MF properties. We never use vregs...
MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
// Grab the entry MBB and erase any other blocks. O0 codegen appears to
// generate two bbs for the entry block.
MachineBasicBlock *Entry = &MF.front();
Entry->clear();
while (MF.size() > 1)
MF.erase(std::next(MF.begin()));
MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
MCSymbol *TargetSym = MF.getContext().createTempSymbol();
MF.push_back(CaptureSpec);
MF.push_back(CallTarget);
const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addMBB(CallTarget);
Entry->addSuccessor(CallTarget);
Entry->addLiveIn(Reg);
BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
// The MIR verifier thinks that the CALL in the entry block will fall through
// to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
// the successor, but the MIR verifier doesn't know how to cope with that.
Entry->addSuccessor(CaptureSpec);
CallTarget->setHasAddressTaken();
// In the capture loop for speculation, we want to stop the processor from
// speculating as fast as possible. On Intel processors, the PAUSE instruction
@ -270,7 +278,10 @@ void X86RetpolineThunks::populateThunk(MachineFunction &MF,
CaptureSpec->setHasAddressTaken();
CaptureSpec->addSuccessor(CaptureSpec);
CallTarget->addLiveIn(Reg);
CallTarget->setHasAddressTaken();
CallTarget->setAlignment(4);
insertRegReturnAddrClobber(*CallTarget, *Reg);
insertRegReturnAddrClobber(*CallTarget, Reg);
CallTarget->back().setPreInstrSymbol(MF, TargetSym);
BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
}

View File

@ -1,8 +1,8 @@
; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
; RUN: llc -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
; RUN: llc -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
; RUN: llc -verify-machineinstrs -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
; RUN: llc -verify-machineinstrs -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
declare void @bar(i32)

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=i686-linux < %s | FileCheck --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" %s
; RUN: llc -verify-machineinstrs -mtriple=i686-linux < %s | FileCheck --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" %s
; Test 32-bit retpoline when -mregparm=3 is used. This case is interesting
; because there are no available scratch registers. The Linux kernel builds

View File

@ -1,8 +1,8 @@
; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
; RUN: llc -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
; RUN: llc -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
; RUN: llc -verify-machineinstrs -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
; RUN: llc -verify-machineinstrs -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
declare void @bar(i32)
@ -428,8 +428,9 @@ latch:
; X64-NEXT: lfence
; X64-NEXT: jmp [[CAPTURE_SPEC]]
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: [[CALL_TARGET]]: # Block address taken
; X64-NEXT: {{.*}} # Block address taken
; X64-NEXT: # %entry
; X64-NEXT: [[CALL_TARGET]]:
; X64-NEXT: movq %r11, (%rsp)
; X64-NEXT: retq
;
@ -446,8 +447,9 @@ latch:
; X86-NEXT: lfence
; X86-NEXT: jmp [[CAPTURE_SPEC]]
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: [[CALL_TARGET]]: # Block address taken
; X86-NEXT: {{.*}} # Block address taken
; X86-NEXT: # %entry
; X86-NEXT: [[CALL_TARGET]]:
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: retl
;
@ -464,8 +466,9 @@ latch:
; X86-NEXT: lfence
; X86-NEXT: jmp [[CAPTURE_SPEC]]
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: [[CALL_TARGET]]: # Block address taken
; X86-NEXT: {{.*}} # Block address taken
; X86-NEXT: # %entry
; X86-NEXT: [[CALL_TARGET]]:
; X86-NEXT: movl %ecx, (%esp)
; X86-NEXT: retl
;
@ -482,8 +485,9 @@ latch:
; X86-NEXT: lfence
; X86-NEXT: jmp [[CAPTURE_SPEC]]
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: [[CALL_TARGET]]: # Block address taken
; X86-NEXT: {{.*}} # Block address taken
; X86-NEXT: # %entry
; X86-NEXT: [[CALL_TARGET]]:
; X86-NEXT: movl %edx, (%esp)
; X86-NEXT: retl
;
@ -500,8 +504,9 @@ latch:
; X86-NEXT: lfence
; X86-NEXT: jmp [[CAPTURE_SPEC]]
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: [[CALL_TARGET]]: # Block address taken
; X86-NEXT: {{.*}} # Block address taken
; X86-NEXT: # %entry
; X86-NEXT: [[CALL_TARGET]]:
; X86-NEXT: movl %edi, (%esp)
; X86-NEXT: retl