[ARM] implement LOAD_STACK_GUARD for remaining targets

Currently, LOAD_STACK_GUARD on ARM is only implemented for Mach-O targets, and
other targets rely on the generic support which may result in spilling of the
stack canary value or address, or may cause it to be kept in a callee save
register across function calls, which means they essentially get spilled as
well, only by the callee when it wants to free up this register.

So let's implement LOAD_STACK GUARD for other targets as well. This ensures
that the load of the stack canary is rematerialized fully in the epilogue.

This code was split off from

  D112768: [ARM] implement support for TLS register based stack protector

for which it is a prerequisite.

Reviewed By: nickdesaulniers

Differential Revision: https://reviews.llvm.org/D112811
This commit is contained in:
Ard Biesheuvel 2021-11-08 22:22:04 +01:00 committed by Ard Biesheuvel
parent 9a3cb73460
commit 2caf85ad7a
9 changed files with 51 additions and 38 deletions

View File

@ -1682,8 +1682,6 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
"LOAD_STACK_GUARD currently supported only for MachO.");
expandLoadStackGuard(MI);
MI.getParent()->erase(MI);
return true;
@ -4883,8 +4881,6 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
return true;
}
// LoadStackGuard has so far only been implemented for MachO. Different code
// sequence is needed for other targets.
void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
unsigned LoadImmOpc,
unsigned LoadOpc) const {
@ -4896,12 +4892,25 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
Register Reg = MI->getOperand(0).getReg();
const GlobalValue *GV =
cast<GlobalValue>((*MI->memoperands_begin())->getValue());
bool IsIndirect = Subtarget.isGVIndirectSymbol(GV);
MachineInstrBuilder MIB;
BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
.addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
unsigned TargetFlags = ARMII::MO_NO_FLAG;
if (Subtarget.isTargetMachO()) {
TargetFlags |= ARMII::MO_NONLAZY;
} else if (Subtarget.isTargetCOFF()) {
if (GV->hasDLLImportStorageClass())
TargetFlags |= ARMII::MO_DLLIMPORT;
else if (IsIndirect)
TargetFlags |= ARMII::MO_COFFSTUB;
} else if (Subtarget.isGVInGOT(GV)) {
TargetFlags |= ARMII::MO_GOT;
}
if (Subtarget.isGVIndirectSymbol(GV)) {
BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
.addGlobalAddress(GV, 0, TargetFlags);
if (IsIndirect) {
MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
MIB.addReg(Reg, RegState::Kill).addImm(0);
auto Flags = MachineMemOperand::MOLoad |

View File

@ -20682,10 +20682,7 @@ bool ARMTargetLowering::shouldInsertFencesForAtomic(
return InsertFencesForAtomic;
}
// This has so far only been implemented for MachO.
bool ARMTargetLowering::useLoadStackGuardNode() const {
return Subtarget->isTargetMachO();
}
bool ARMTargetLowering::useLoadStackGuardNode() const { return true; }
void ARMTargetLowering::insertSSPDeclarations(Module &M) const {
if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())

View File

@ -96,7 +96,10 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
const TargetMachine &TM = MF.getTarget();
if (!Subtarget.useMovt()) {
const GlobalValue *GV =
cast<GlobalValue>((*MI->memoperands_begin())->getValue());
if (!Subtarget.useMovt() || Subtarget.isGVInGOT(GV)) {
if (TM.isPositionIndependent())
expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12);
else
@ -109,9 +112,6 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
return;
}
const GlobalValue *GV =
cast<GlobalValue>((*MI->memoperands_begin())->getValue());
if (!Subtarget.isGVIndirectSymbol(GV)) {
expandLoadStackGuardBase(MI, ARM::MOV_ga_pcrel, ARM::LDRi12);
return;

View File

@ -250,7 +250,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void Thumb2InstrInfo::expandLoadStackGuard(
MachineBasicBlock::iterator MI) const {
MachineFunction &MF = *MI->getParent()->getParent();
if (MF.getTarget().isPositionIndependent())
const GlobalValue *GV =
cast<GlobalValue>((*MI->memoperands_begin())->getValue());
if (MF.getSubtarget<ARMSubtarget>().isGVInGOT(GV))
expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::t2LDRi12);
else if (MF.getTarget().isPositionIndependent())
expandLoadStackGuardBase(MI, ARM::t2MOV_ga_pcrel, ARM::t2LDRi12);
else
expandLoadStackGuardBase(MI, ARM::t2MOVi32imm, ARM::t2LDRi12);

View File

@ -21,13 +21,13 @@
define void @layout_ssp() ssp {
entry:
; Expected stack layout for ssp is
; 176 large_char . Group 1, nested arrays, arrays >= ssp-buffer-size
; 168 struct_large_char .
; 164 scalar1 | Everything else
; 160 scalar2
; 156 scalar3
; 152 addr-of
; 148 small_nonchar
; 180 large_char . Group 1, nested arrays, arrays >= ssp-buffer-size
; 172 struct_large_char .
; 168 scalar1 | Everything else
; 164 scalar2
; 160 scalar3
; 156 addr-of
; 152 small_nonchar
; 112 large_nonchar
; 110 small_char
; 108 struct_small_char
@ -37,23 +37,23 @@ entry:
; CHECK: layout_ssp:
; CHECK: bl get_scalar1
; CHECK: str r0, [sp, #164]
; CHECK: str r0, [sp, #168]
; CHECK: bl end_scalar1
; CHECK: bl get_scalar2
; CHECK: str r0, [sp, #160]
; CHECK: bl end_scalar2
; CHECK: str r0, [sp, #164]
; CHECK: bl end_scalar
; CHECK: bl get_scalar3
; CHECK: str r0, [sp, #156]
; CHECK: str r0, [sp, #160]
; CHECK: bl end_scalar3
; CHECK: bl get_addrof
; CHECK: str r0, [sp, #152]
; CHECK: str r0, [sp, #156]
; CHECK: bl end_addrof
; CHECK: get_small_nonchar
; CHECK: strh r0, [sp, #148]
; CHECK: strh r0, [sp, #152]
; CHECK: bl end_small_nonchar
; CHECK: bl get_large_nonchar
@ -65,11 +65,11 @@ entry:
; CHECK: bl end_small_char
; CHECK: bl get_large_char
; CHECK: strb r0, [sp, #176]
; CHECK: strb r0, [sp, #180]
; CHECK: bl end_large_char
; CHECK: bl get_struct_large_char
; CHECK: strb r0, [sp, #168]
; CHECK: strb r0, [sp, #172]
; CHECK: bl end_struct_large_char
; CHECK: bl get_struct_small_char

View File

@ -3,12 +3,11 @@
; Verify that the offset assigned to the stack protector is at the top of the
; frame, covering the locals.
; CHECK-LABEL: fn:
; CHECK: sub sp, sp, #24
; CHECK: sub sp, sp, #16
; CHECK-NEXT: sub sp, sp, #65536
; CHECK-NEXT: ldr r1, .LCPI0_0
; CHECK-NEXT: str r1, [sp, #8]
; CHECK-NEXT: ldr r1, [r1]
; CHECK-NEXT: add lr, sp, #65536
; CHECK-NEXT: str r1, [lr, #20]
; CHECK-NEXT: str r1, [lr, #12]
; CHECK: .LCPI0_0:
; CHECK-NEXT: .long __stack_chk_guard

View File

@ -31,7 +31,7 @@ entry:
; NACL-LABEL: g:
; Ensure that use movw instead of constpool for the loop trip count. But don't
; match the __stack_chk_guard movw
; NACL: movw r{{[1-9]}}, #
; NACL: movw {{r[0-9]+|lr}}, #
; NACL: ldr
; NACL: sub
; NACL: str
@ -49,7 +49,7 @@ entry:
; CHECK: sub
; CHECK: vst1
; CHECK: bne
; NACL: movw r{{[1-9]}}, #
; NACL: movw {{r[0-9]+|lr}}, #
; NACL: vld1
; NACL: sub
; NACL: vst1

View File

@ -5,7 +5,7 @@ target triple = "armv6kz-unknown-unknown-gnueabihf"
; Unfortunately, this test is sort of fragile... the original issue only
; shows up if scheduling happens in a very specific order. But including
; it anyway just to demonstrate the issue.
; CHECK: pop {r4, lr}
; CHECK: pop {r{{[0-9]+}}, lr}
@e = external local_unnamed_addr constant [0 x i32 (i32, i32)*], align 4

View File

@ -12,7 +12,10 @@ entry:
; MINGW: ldr [[REG2:r[0-9]+]], {{\[}}[[REG]]]
; MINGW: ldr {{r[0-9]+}}, {{\[}}[[REG2]]]
; MINGW: bl other
; MINGW: ldr {{r[0-9]+}}, {{\[}}[[REG2]]]
; MINGW: movw [[REG3:r[0-9]+]], :lower16:.refptr.__stack_chk_guard
; MINGW: movt [[REG3]], :upper16:.refptr.__stack_chk_guard
; MINGW: ldr [[REG4:r[0-9]+]], {{\[}}[[REG3]]]
; MINGW: ldr {{r[0-9]+}}, {{\[}}[[REG4]]]
; MINGW: bl __stack_chk_fail
%c = alloca i8, align 1