forked from OSchip/llvm-project
[ARM] Describe defs/uses of VLLDM and VLSTM
The VLLDM and VLSTM instructions are incompletely specified. They (potentially) write (or read, respectively) registers Q0-Q7, VPR, and FPSCR, but the compiler is unaware of it. In the new test case `cmse-vlldm-no-reorder.ll` case the compiler missed an anti-dependency and reordered a `VLLDM` ahead of the instruction, which stashed the return value from the non-secure call, effectively clobbering said value. This test case does not fail with upstream LLVM, because of scheduling differences and I couldn't find a test case for the VLSTM either. Differential Revision: https://reviews.llvm.org/D81586
This commit is contained in:
parent
510796d1a7
commit
adf7973fd3
|
@ -85,12 +85,15 @@ namespace {
|
|||
const BitVector &ClearRegs);
|
||||
void CMSESaveClearFPRegs(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
|
||||
const LivePhysRegs &LiveRegs,
|
||||
SmallVectorImpl<unsigned> &AvailableRegs);
|
||||
void CMSESaveClearFPRegsV8(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
|
||||
const LivePhysRegs &LiveRegs,
|
||||
SmallVectorImpl<unsigned> &ScratchRegs);
|
||||
void CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, DebugLoc &DL);
|
||||
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
|
||||
const LivePhysRegs &LiveRegs);
|
||||
void CMSERestoreFPRegs(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
|
||||
SmallVectorImpl<unsigned> &AvailableRegs);
|
||||
|
@ -1202,17 +1205,17 @@ ARMExpandPseudo::CMSEClearFPRegsV81(MachineBasicBlock &MBB,
|
|||
|
||||
void ARMExpandPseudo::CMSESaveClearFPRegs(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
|
||||
SmallVectorImpl<unsigned> &ScratchRegs) {
|
||||
const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
|
||||
if (STI->hasV8_1MMainlineOps())
|
||||
CMSESaveClearFPRegsV81(MBB, MBBI, DL);
|
||||
CMSESaveClearFPRegsV81(MBB, MBBI, DL, LiveRegs);
|
||||
else
|
||||
CMSESaveClearFPRegsV8(MBB, MBBI, DL, ScratchRegs);
|
||||
CMSESaveClearFPRegsV8(MBB, MBBI, DL, LiveRegs, ScratchRegs);
|
||||
}
|
||||
|
||||
// Save and clear FP registers if present
|
||||
void ARMExpandPseudo::CMSESaveClearFPRegsV8(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
|
||||
SmallVectorImpl<unsigned> &ScratchRegs) {
|
||||
const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
|
||||
if (!STI->hasFPRegs())
|
||||
return;
|
||||
|
||||
|
@ -1271,7 +1274,11 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV8(
|
|||
// Lazy store all fp registers to the stack
|
||||
MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
|
||||
.addReg(ARM::SP)
|
||||
.add(predOps(ARMCC::AL));
|
||||
.add(predOps(ARMCC::AL));
|
||||
for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
|
||||
ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
|
||||
VLSTM.addReg(R, RegState::Implicit |
|
||||
(LiveRegs.contains(R) ? 0 : RegState::Undef));
|
||||
|
||||
// Restore all arguments
|
||||
for (const auto &Regs : ClearedFPRegs) {
|
||||
|
@ -1343,7 +1350,8 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV8(
|
|||
|
||||
void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
DebugLoc &DL) {
|
||||
DebugLoc &DL,
|
||||
const LivePhysRegs &LiveRegs) {
|
||||
BitVector ClearRegs(32, true);
|
||||
bool DefFP = determineFPRegsToClear(*MBBI, ClearRegs);
|
||||
|
||||
|
@ -1358,9 +1366,13 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
|
|||
.add(predOps(ARMCC::AL));
|
||||
|
||||
// Lazy store all FP registers to the stack
|
||||
BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
|
||||
.addReg(ARM::SP)
|
||||
.add(predOps(ARMCC::AL));
|
||||
MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
|
||||
.addReg(ARM::SP)
|
||||
.add(predOps(ARMCC::AL));
|
||||
for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
|
||||
ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
|
||||
VLSTM.addReg(R, RegState::Implicit |
|
||||
(LiveRegs.contains(R) ? 0 : RegState::Undef));
|
||||
} else {
|
||||
// Push all the callee-saved registers (s16-s31).
|
||||
MachineInstrBuilder VPUSH =
|
||||
|
@ -1754,13 +1766,15 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
|
|||
static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, int JumpReg,
|
||||
bool Thumb1Only) {
|
||||
const LivePhysRegs &LiveRegs, bool Thumb1Only) {
|
||||
const DebugLoc &DL = MBBI->getDebugLoc();
|
||||
if (Thumb1Only) { // push Lo and Hi regs separately
|
||||
MachineInstrBuilder PushMIB =
|
||||
BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
|
||||
for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg)
|
||||
PushMIB.addReg(Reg, Reg != JumpReg ? RegState::Undef : 0);
|
||||
for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
|
||||
PushMIB.addReg(
|
||||
Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
|
||||
}
|
||||
|
||||
// Thumb1 can only tPUSH low regs, so we copy the high regs to the low
|
||||
// regs that we just saved and push the low regs again, taking care to
|
||||
|
@ -1773,7 +1787,7 @@ static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
|
|||
if (JumpReg == LoReg)
|
||||
continue;
|
||||
BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
|
||||
.addReg(HiReg, RegState::Undef)
|
||||
.addReg(HiReg, LiveRegs.contains(HiReg) ? 0 : RegState::Undef)
|
||||
.add(predOps(ARMCC::AL));
|
||||
--HiReg;
|
||||
}
|
||||
|
@ -1791,19 +1805,21 @@ static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
|
|||
if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) {
|
||||
int LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4;
|
||||
BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
|
||||
.addReg(ARM::R8)
|
||||
.addReg(ARM::R8, LiveRegs.contains(ARM::R8) ? 0 : RegState::Undef)
|
||||
.add(predOps(ARMCC::AL));
|
||||
BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH))
|
||||
.add(predOps(ARMCC::AL))
|
||||
.addReg(LoReg);
|
||||
.addReg(LoReg, RegState::Kill);
|
||||
}
|
||||
} else { // push Lo and Hi registers with a single instruction
|
||||
MachineInstrBuilder PushMIB =
|
||||
BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP)
|
||||
.addReg(ARM::SP)
|
||||
.add(predOps(ARMCC::AL));
|
||||
for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg)
|
||||
PushMIB.addReg(Reg, Reg != JumpReg ? RegState::Undef : 0);
|
||||
for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) {
|
||||
PushMIB.addReg(
|
||||
Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1934,7 +1950,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
|||
case ARM::tBLXNS_CALL: {
|
||||
DebugLoc DL = MBBI->getDebugLoc();
|
||||
unsigned JumpReg = MBBI->getOperand(0).getReg();
|
||||
CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg,
|
||||
|
||||
// Figure out which registers are live at the point immediately before the
|
||||
// call. When we indiscriminately push a set of registers, the live
|
||||
// registers are added as ordinary use operands, whereas dead registers
|
||||
// are "undef".
|
||||
LivePhysRegs LiveRegs(*TRI);
|
||||
LiveRegs.addLiveOuts(MBB);
|
||||
for (const MachineInstr &MI : make_range(MBB.rbegin(), MBBI.getReverse()))
|
||||
LiveRegs.stepBackward(MI);
|
||||
LiveRegs.stepBackward(*MBBI);
|
||||
|
||||
CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg, LiveRegs,
|
||||
AFI->isThumb1OnlyFunction());
|
||||
|
||||
SmallVector<unsigned, 16> ClearRegs;
|
||||
|
@ -1971,7 +1998,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
|||
.add(predOps(ARMCC::AL));
|
||||
}
|
||||
|
||||
CMSESaveClearFPRegs(MBB, MBBI, DL,
|
||||
CMSESaveClearFPRegs(MBB, MBBI, DL, LiveRegs,
|
||||
ClearRegs); // save+clear FP regs with ClearRegs
|
||||
CMSEClearGPRegs(MBB, MBBI, DL, ClearRegs, JumpReg);
|
||||
|
||||
|
|
|
@ -287,7 +287,6 @@ def : MnemonicAlias<"vstm", "vstmia">;
|
|||
//===----------------------------------------------------------------------===//
|
||||
// Lazy load / store multiple Instructions
|
||||
//
|
||||
let mayLoad = 1 in
|
||||
def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
|
||||
NoItinerary, "vlldm${p}\t$Rn", "", []>,
|
||||
Requires<[HasV8MMainline, Has8MSecExt]> {
|
||||
|
@ -298,9 +297,9 @@ def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
|
|||
let Inst{15-12} = 0;
|
||||
let Inst{7-0} = 0;
|
||||
let mayLoad = 1;
|
||||
let Defs = [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, VPR, FPSCR, FPSCR_NZCV];
|
||||
}
|
||||
|
||||
let mayStore = 1 in
|
||||
def VLSTM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
|
||||
NoItinerary, "vlstm${p}\t$Rn", "", []>,
|
||||
Requires<[HasV8MMainline, Has8MSecExt]> {
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 --float-abi=hard %s -o - | \
|
||||
; RUN: FileCheck %s
|
||||
|
||||
@g = hidden local_unnamed_addr global float (...)* null, align 4
|
||||
@a = hidden local_unnamed_addr global float 0.000000e+00, align 4
|
||||
|
||||
define hidden void @f() local_unnamed_addr #0 {
|
||||
entry:
|
||||
%0 = load float ()*, float ()** bitcast (float (...)** @g to float ()**), align 4
|
||||
%call = tail call nnan ninf nsz float %0() #1
|
||||
store float %call, float* @a, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: blxns r{{[0-9]+}}
|
||||
; CHECK: vmov r[[T:[0-9]+]], s0
|
||||
; CHECK: vlldm sp
|
||||
; CHECK: vmov s0, r[[T]]
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind "cmse_nonsecure_call" }
|
|
@ -0,0 +1,112 @@
|
|||
# RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 --float-abi=hard --run-pass=arm-pseudo %s -o - | \
|
||||
# RUN: FileCheck %s
|
||||
--- |
|
||||
; ModuleID = 'cmse-vlldm-no-reorder.ll'
|
||||
source_filename = "cmse-vlldm-no-reorder.ll"
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv8m.main"
|
||||
|
||||
@g = hidden local_unnamed_addr global float (...)* null, align 4
|
||||
@a = hidden local_unnamed_addr global float 0.000000e+00, align 4
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define hidden void @f() local_unnamed_addr #0 {
|
||||
entry:
|
||||
%0 = load float ()*, float ()** bitcast (float (...)** @g to float ()**), align 4
|
||||
%call = tail call nnan ninf nsz float %0() #2
|
||||
store float %call, float* @a, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare void @llvm.stackprotector(i8*, i8**) #1
|
||||
|
||||
attributes #0 = { nounwind "target-cpu"="cortex-m33" }
|
||||
attributes #1 = { nounwind }
|
||||
attributes #2 = { nounwind "cmse_nonsecure_call" }
|
||||
|
||||
...
|
||||
---
|
||||
name: f
|
||||
alignment: 2
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers: []
|
||||
liveins: []
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 8
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 4
|
||||
adjustsStack: true
|
||||
hasCalls: true
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 0
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack: []
|
||||
stack:
|
||||
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
callSites: []
|
||||
constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $r7, $lr
|
||||
|
||||
$sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 8
|
||||
frame-setup CFI_INSTRUCTION offset $lr, -4
|
||||
frame-setup CFI_INSTRUCTION offset $r7, -8
|
||||
renamable $r0 = t2MOVi32imm @g
|
||||
renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from `float ()** bitcast (float (...)** @g to float ()**)`)
|
||||
tBLXNS_CALL killed renamable $r0, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $s0
|
||||
renamable $r0 = t2MOVi32imm @a
|
||||
VSTRS killed renamable $s0, killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4 into @a)
|
||||
$sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r7, def $pc
|
||||
|
||||
...
|
||||
|
||||
# CHECK-LABEL: bb.0.entry:
|
||||
# CHECK: $sp = t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, $r4, $r5, $r6, undef $r7, $r8, $r9, $r10, $r11
|
||||
# CHECK-NEXT: $r0 = t2BICri $r0, 1, 14 /* CC::al */, $noreg, $noreg
|
||||
# CHECK-NEXT: $sp = tSUBspi $sp, 34, 14 /* CC::al */, $noreg
|
||||
# CHECK-NEXT: VLSTM $sp, 14 /* CC::al */, $noreg, implicit undef $vpr, implicit undef $fpscr, implicit undef $fpscr_nzcv, implicit undef $q0, implicit undef $q1, implicit undef $q2, implicit undef $q3, implicit undef $q4, implicit undef $q5, implicit undef $q6, implicit undef $q7
|
||||
# CHECK-NEXT: $r1 = tMOVr $r0, 14 /* CC::al */, $noreg
|
||||
# CHECK-NEXT: $r2 = tMOVr $r0, 14 /* CC::al */, $noreg
|
||||
# CHECK-NEXT: $r3 = tMOVr $r0, 14 /* CC::al */, $noreg
|
||||
# CHECK-NEXT: $r4 = tMOVr $r0, 14 /* CC::al */, $noreg
|
||||
# CHECK-NEXT: $r5 = tMOVr $r0, 14 /* CC::al */, $noreg
|
||||
# CHECK-NEXT: $r6 = tMOVr $r0, 14 /* CC::al */, $noreg
|
||||
# CHECK-NEXT: $r7 = tMOVr $r0, 14 /* CC::al */, $noreg
|
||||
# CHECK-NEXT: $r8 = tMOVr $r0, 14 /* CC::al */, $noreg
|
||||
# CHECK-NEXT: $r9 = tMOVr $r0, 14 /* CC::al */, $noreg
|
||||
# CHECK-NEXT: $r10 = tMOVr $r0, 14 /* CC::al */, $noreg
|
||||
# CHECK-NEXT: $r11 = tMOVr $r0, 14 /* CC::al */, $noreg
|
||||
# CHECK-NEXT: $r12 = tMOVr $r0, 14 /* CC::al */, $noreg
|
||||
# CHECK-NEXT: t2MSR_M 3072, $r0, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
||||
# CHECK-NEXT: tBLXNSr 14 /* CC::al */, $noreg, killed $r0, csr_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $s0
|
||||
# CHECK-NEXT: $r12 = VMOVRS $s0, 14 /* CC::al */, $noreg
|
||||
# CHECK-NEXT: VLLDM $sp, 14 /* CC::al */, $noreg, implicit-def $q0, implicit-def $q1, implicit-def $q2, implicit-def $q3, implicit-def $q4, implicit-def $q5, implicit-def $q6, implicit-def $q7, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv
|
||||
# CHECK-NEXT: $s0 = VMOVSR $r12, 14 /* CC::al */, $noreg
|
||||
# CHECK-NEXT: $sp = tADDspi $sp, 34, 14 /* CC::al */, $noreg
|
||||
# CHECK-NEXT: $sp = t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11
|
||||
|
|
@ -62,7 +62,7 @@ body: |
|
|||
$sp = tSUBspi $sp, 34, 14, $noreg
|
||||
VLSTM $sp, 14, $noreg
|
||||
tBLXNSr 14, $noreg, killed $r4, csr_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp
|
||||
VLLDM $sp, 14, $noreg
|
||||
VLLDM $sp, 14, $noreg, implicit-def $q0, implicit-def $q1, implicit-def $q2, implicit-def $q3, implicit-def $q4, implicit-def $q5, implicit-def $q6, implicit-def $q7, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv
|
||||
$sp = tADDspi $sp, 34, 14, $noreg
|
||||
$sp = t2LDMIA_UPD $sp, 14, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11
|
||||
$sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $pc
|
||||
|
|
Loading…
Reference in New Issue