[ARM] unwinding .pad instructions missing in execute-only prologue

If the stack pointer is altered for local variables and we are generating
Thumb2 execute-only code the .pad directive is missing.

Usually the size of the adjustment is stored in a PC-relative location
and loaded into a register which is then added to the stack pointer.
However when we are generating execute-only code code the size of the
adjustment is instead generated using the MOVW/MOVT instruction pair.

As a by product of handling the execute-only case this also fixes an
existing issue that in the none execute-only case the .pad directive was
generated against the load of the constant to a register instruction,
instead of the instruction which adds the register to the stack pointer.

Differential Revision: https://reviews.llvm.org/D76849
This commit is contained in:
Keith Walker 2020-03-26 09:40:28 +00:00
parent 08fab9ebec
commit 01dc10774e
6 changed files with 109 additions and 34 deletions

View File

@ -1084,16 +1084,26 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
unsigned Opc = MI->getOpcode();
unsigned SrcReg, DstReg;
if (Opc == ARM::tPUSH || Opc == ARM::tLDRpci) {
// Two special cases:
// 1) tPUSH does not have src/dst regs.
// 2) for Thumb1 code we sometimes materialize the constant via constpool
// load. Yes, this is pretty fragile, but for now I don't see better
// way... :(
switch (Opc) {
case ARM::tPUSH:
// special case: tPUSH does not have src/dst regs.
SrcReg = DstReg = ARM::SP;
} else {
break;
case ARM::tLDRpci:
case ARM::t2MOVi16:
case ARM::t2MOVTi16:
// special cases:
// 1) for Thumb1 code we sometimes materialize the constant via constpool
// load.
// 2) for Thumb2 execute only code we materialize the constant via
// immediate constants in 2 seperate instructions (MOVW/MOVT).
SrcReg = ~0U;
DstReg = MI->getOperand(0).getReg();
break;
default:
SrcReg = MI->getOperand(1).getReg();
DstReg = MI->getOperand(0).getReg();
break;
}
// Try to figure out the unwinding opcode out of src / dst regs.
@ -1197,23 +1207,11 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
case ARM::tADDrSPi:
Offset = -MI->getOperand(2).getImm()*4;
break;
case ARM::tLDRpci: {
// Grab the constpool index and check, whether it corresponds to
// original or cloned constpool entry.
unsigned CPI = MI->getOperand(1).getIndex();
const MachineConstantPool *MCP = MF.getConstantPool();
if (CPI >= MCP->getConstants().size())
CPI = AFI->getOriginalCPIdx(CPI);
assert(CPI != -1U && "Invalid constpool index");
// Derive the actual offset.
const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry");
// FIXME: Check for user, it should be "add" instruction!
Offset = -cast<ConstantInt>(CPE.Val.ConstVal)->getSExtValue();
case ARM::tADDhirr:
Offset =
-AFI->EHPrologueOffsetInRegs.lookup(MI->getOperand(2).getReg());
break;
}
}
if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM) {
if (DstReg == FramePtr && FramePtr != ARM::SP)
@ -1233,17 +1231,46 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
} else if (DstReg == ARM::SP) {
MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
} else if (Opc == ARM::tMOVr) {
} else {
int64_t Offset = 0;
switch (Opc) {
case ARM::tMOVr:
// If a Thumb1 function spills r8-r11, we copy the values to low
// registers before pushing them. Record the copy so we can emit the
// correct ".save" later.
AFI->EHPrologueRemappedRegs[DstReg] = SrcReg;
} else {
break;
case ARM::tLDRpci: {
// Grab the constpool index and check, whether it corresponds to
// original or cloned constpool entry.
unsigned CPI = MI->getOperand(1).getIndex();
const MachineConstantPool *MCP = MF.getConstantPool();
if (CPI >= MCP->getConstants().size())
CPI = AFI->getOriginalCPIdx(CPI);
assert(CPI != -1U && "Invalid constpool index");
// Derive the actual offset.
const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry");
Offset = cast<ConstantInt>(CPE.Val.ConstVal)->getSExtValue();
AFI->EHPrologueOffsetInRegs[DstReg] = Offset;
break;
}
case ARM::t2MOVi16:
Offset = MI->getOperand(1).getImm();
AFI->EHPrologueOffsetInRegs[DstReg] = Offset;
break;
case ARM::t2MOVTi16:
Offset = MI->getOperand(2).getImm();
AFI->EHPrologueOffsetInRegs[DstReg] |= (Offset << 16);
break;
default:
MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
}
}
}
}
// Simple pseudo-instructions have their lowering (with expansion to real
// instructions) auto-generated.

View File

@ -852,10 +852,13 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
unsigned ImmVal = (unsigned)MO.getImm();
unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
unsigned MIFlags = MI.getFlags();
LO16 = LO16.addImm(SOImmValV1);
HI16 = HI16.addImm(SOImmValV2);
LO16.cloneMemRefs(MI);
HI16.cloneMemRefs(MI);
LO16.setMIFlags(MIFlags);
HI16.setMIFlags(MIFlags);
LO16.addImm(Pred).addReg(PredReg).add(condCodeOp());
HI16.addImm(Pred).addReg(PredReg).add(condCodeOp());
if (isCC)
@ -867,6 +870,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
unsigned LO16Opc = 0;
unsigned HI16Opc = 0;
unsigned MIFlags = MI.getFlags();
if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
LO16Opc = ARM::t2MOVi16;
HI16Opc = ARM::t2MOVTi16;
@ -880,6 +884,9 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg);
LO16.setMIFlags(MIFlags);
HI16.setMIFlags(MIFlags);
switch (MO.getType()) {
case MachineOperand::MO_Immediate: {
unsigned Imm = MO.getImm();

View File

@ -245,6 +245,7 @@ public:
}
DenseMap<unsigned, unsigned> EHPrologueRemappedRegs;
DenseMap<unsigned, unsigned> EHPrologueOffsetInRegs;
void setPreservesR0() { PreservesR0 = true; }
bool getPreservesR0() const { return PreservesR0; }

View File

@ -88,8 +88,10 @@ emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB,
0, MIFlags);
}
BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDhirr), ARM::SP)
.addReg(ARM::SP).addReg(ScratchReg, RegState::Kill)
.add(predOps(ARMCC::AL));
.addReg(ARM::SP)
.addReg(ScratchReg, RegState::Kill)
.add(predOps(ARMCC::AL))
.setMIFlags(MIFlags);
return;
}
// FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate

View File

@ -9,8 +9,8 @@ define void @vla_emergency_spill(i32 %n) {
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: .setfp r7, sp, #12
; CHECK-NEXT: add r7, sp, #12
; CHECK-NEXT: .pad #4100
; CHECK-NEXT: ldr r6, .LCPI0_0
; CHECK-NEXT: .pad #4100
; CHECK-NEXT: add sp, r6
; CHECK-NEXT: mov r6, sp
; CHECK-NEXT: adds r0, r0, #7
@ -59,8 +59,8 @@ define void @simple_emergency_spill(i32 %n) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: .pad #8196
; CHECK-NEXT: ldr r7, .LCPI1_0
; CHECK-NEXT: .pad #8196
; CHECK-NEXT: add sp, r7
; CHECK-NEXT: add r0, sp, #4
; CHECK-NEXT: ldr r1, .LCPI1_2
@ -119,8 +119,8 @@ define void @simple_emergency_spill_nor7(i32 %n) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: .pad #8196
; CHECK-NEXT: ldr r6, .LCPI2_0
; CHECK-NEXT: .pad #8196
; CHECK-NEXT: add sp, r6
; CHECK-NEXT: add r0, sp, #4
; CHECK-NEXT: ldr r1, .LCPI2_2

View File

@ -0,0 +1,38 @@
; RUN: llc < %s -mtriple=thumbv8m.base-arm-none-eabi | FileCheck %s
define void @fn() {
entry:
; CHECK-LABEL: fn:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: ldr r6, .LCPI0_0
; CHECK-NEXT: .pad #1600
; CHECK-NEXT: add sp, r6
; CHECK: .LCPI0_0:
; CHECK_NEXT: long 4294963196
%a = alloca [400 x i32], align 4
%arraydecay = getelementptr inbounds [400 x i32], [400 x i32]* %a, i32 0, i32 0
call void @bar(i32* %arraydecay)
ret void
}
define void @execute_only_fn() #0 {
entry:
; CHECK-LABEL: execute_only_fn:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: movw r6, #63936
; CHECK-NEXT: movt r6, #65535
; CHECK-NEXT: .pad #1600
; CHECK-NEXT: add sp, r6
%a = alloca [400 x i32], align 4
%arraydecay = getelementptr inbounds [400 x i32], [400 x i32]* %a, i32 0, i32 0
call void @bar(i32* %arraydecay)
ret void
}
declare dso_local void @bar(i32*)
attributes #0 = { noinline optnone "target-features"="+armv8-m.base,+execute-only,+thumb-mode" }