[ARM] Describe defs/uses of VLLDM and VLSTM

The VLLDM and VLSTM instructions are incompletely specified.  They
(potentially) write (or read, respectively) registers Q0-Q7, VPR, and
FPSCR, but the compiler is unaware of it.

In the new test case `cmse-vlldm-no-reorder.ll` case the compiler
missed an anti-dependency and reordered a `VLLDM` ahead of the
instruction, which stashed the return value from the non-secure call,
effectively clobbering said value.

This test case does not fail with upstream LLVM, because of scheduling
differences and I couldn't find a test case for the VLSTM either.

Differential Revision: https://reviews.llvm.org/D81586
This commit is contained in:
Momchil Velikov 2020-06-23 15:24:50 +01:00
parent 510796d1a7
commit adf7973fd3
5 changed files with 182 additions and 23 deletions

View File

@ -85,12 +85,15 @@ namespace {
const BitVector &ClearRegs);
void CMSESaveClearFPRegs(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
const LivePhysRegs &LiveRegs,
SmallVectorImpl<unsigned> &AvailableRegs);
void CMSESaveClearFPRegsV8(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
const LivePhysRegs &LiveRegs,
SmallVectorImpl<unsigned> &ScratchRegs);
void CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL);
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
const LivePhysRegs &LiveRegs);
void CMSERestoreFPRegs(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
SmallVectorImpl<unsigned> &AvailableRegs);
@ -1202,17 +1205,17 @@ ARMExpandPseudo::CMSEClearFPRegsV81(MachineBasicBlock &MBB,
void ARMExpandPseudo::CMSESaveClearFPRegs(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
SmallVectorImpl<unsigned> &ScratchRegs) {
const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
if (STI->hasV8_1MMainlineOps())
CMSESaveClearFPRegsV81(MBB, MBBI, DL);
CMSESaveClearFPRegsV81(MBB, MBBI, DL, LiveRegs);
else
CMSESaveClearFPRegsV8(MBB, MBBI, DL, ScratchRegs);
CMSESaveClearFPRegsV8(MBB, MBBI, DL, LiveRegs, ScratchRegs);
}
// Save and clear FP registers if present
void ARMExpandPseudo::CMSESaveClearFPRegsV8(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
SmallVectorImpl<unsigned> &ScratchRegs) {
const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
if (!STI->hasFPRegs())
return;
@ -1271,7 +1274,11 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV8(
// Lazy store all fp registers to the stack
MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
.add(predOps(ARMCC::AL));
for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
VLSTM.addReg(R, RegState::Implicit |
(LiveRegs.contains(R) ? 0 : RegState::Undef));
// Restore all arguments
for (const auto &Regs : ClearedFPRegs) {
@ -1343,7 +1350,8 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV8(
void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
DebugLoc &DL) {
DebugLoc &DL,
const LivePhysRegs &LiveRegs) {
BitVector ClearRegs(32, true);
bool DefFP = determineFPRegsToClear(*MBBI, ClearRegs);
@ -1358,9 +1366,13 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
.add(predOps(ARMCC::AL));
// Lazy store all FP registers to the stack
BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
VLSTM.addReg(R, RegState::Implicit |
(LiveRegs.contains(R) ? 0 : RegState::Undef));
} else {
// Push all the callee-saved registers (s16-s31).
MachineInstrBuilder VPUSH =
@ -1754,13 +1766,15 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, int JumpReg,
bool Thumb1Only) {
const LivePhysRegs &LiveRegs, bool Thumb1Only) {
const DebugLoc &DL = MBBI->getDebugLoc();
if (Thumb1Only) { // push Lo and Hi regs separately
MachineInstrBuilder PushMIB =
BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg)
PushMIB.addReg(Reg, Reg != JumpReg ? RegState::Undef : 0);
for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
PushMIB.addReg(
Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
}
// Thumb1 can only tPUSH low regs, so we copy the high regs to the low
// regs that we just saved and push the low regs again, taking care to
@ -1773,7 +1787,7 @@ static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
if (JumpReg == LoReg)
continue;
BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
.addReg(HiReg, RegState::Undef)
.addReg(HiReg, LiveRegs.contains(HiReg) ? 0 : RegState::Undef)
.add(predOps(ARMCC::AL));
--HiReg;
}
@ -1791,19 +1805,21 @@ static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) {
int LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4;
BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
.addReg(ARM::R8)
.addReg(ARM::R8, LiveRegs.contains(ARM::R8) ? 0 : RegState::Undef)
.add(predOps(ARMCC::AL));
BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH))
.add(predOps(ARMCC::AL))
.addReg(LoReg);
.addReg(LoReg, RegState::Kill);
}
} else { // push Lo and Hi registers with a single instruction
MachineInstrBuilder PushMIB =
BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP)
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg)
PushMIB.addReg(Reg, Reg != JumpReg ? RegState::Undef : 0);
for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) {
PushMIB.addReg(
Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
}
}
}
@ -1934,7 +1950,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::tBLXNS_CALL: {
DebugLoc DL = MBBI->getDebugLoc();
unsigned JumpReg = MBBI->getOperand(0).getReg();
CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg,
// Figure out which registers are live at the point immediately before the
// call. When we indiscriminately push a set of registers, the live
// registers are added as ordinary use operands, whereas dead registers
// are "undef".
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(MBB);
for (const MachineInstr &MI : make_range(MBB.rbegin(), MBBI.getReverse()))
LiveRegs.stepBackward(MI);
LiveRegs.stepBackward(*MBBI);
CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg, LiveRegs,
AFI->isThumb1OnlyFunction());
SmallVector<unsigned, 16> ClearRegs;
@ -1971,7 +1998,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
.add(predOps(ARMCC::AL));
}
CMSESaveClearFPRegs(MBB, MBBI, DL,
CMSESaveClearFPRegs(MBB, MBBI, DL, LiveRegs,
ClearRegs); // save+clear FP regs with ClearRegs
CMSEClearGPRegs(MBB, MBBI, DL, ClearRegs, JumpReg);

View File

@ -287,7 +287,6 @@ def : MnemonicAlias<"vstm", "vstmia">;
//===----------------------------------------------------------------------===//
// Lazy load / store multiple Instructions
//
let mayLoad = 1 in
def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
NoItinerary, "vlldm${p}\t$Rn", "", []>,
Requires<[HasV8MMainline, Has8MSecExt]> {
@ -298,9 +297,9 @@ def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
let Inst{15-12} = 0;
let Inst{7-0} = 0;
let mayLoad = 1;
let Defs = [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, VPR, FPSCR, FPSCR_NZCV];
}
let mayStore = 1 in
def VLSTM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
NoItinerary, "vlstm${p}\t$Rn", "", []>,
Requires<[HasV8MMainline, Has8MSecExt]> {

View File

@ -0,0 +1,21 @@
; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 --float-abi=hard %s -o - | \
; RUN: FileCheck %s
@g = hidden local_unnamed_addr global float (...)* null, align 4
@a = hidden local_unnamed_addr global float 0.000000e+00, align 4
define hidden void @f() local_unnamed_addr #0 {
entry:
%0 = load float ()*, float ()** bitcast (float (...)** @g to float ()**), align 4
%call = tail call nnan ninf nsz float %0() #1
store float %call, float* @a, align 4
ret void
}
; CHECK: blxns r{{[0-9]+}}
; CHECK: vmov r[[T:[0-9]+]], s0
; CHECK: vlldm sp
; CHECK: vmov s0, r[[T]]
attributes #0 = { nounwind }
attributes #1 = { nounwind "cmse_nonsecure_call" }

View File

@ -0,0 +1,112 @@
# RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 --float-abi=hard --run-pass=arm-pseudo %s -o - | \
# RUN: FileCheck %s
--- |
; ModuleID = 'cmse-vlldm-no-reorder.ll'
source_filename = "cmse-vlldm-no-reorder.ll"
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8m.main"
@g = hidden local_unnamed_addr global float (...)* null, align 4
@a = hidden local_unnamed_addr global float 0.000000e+00, align 4
; Function Attrs: nounwind
define hidden void @f() local_unnamed_addr #0 {
entry:
%0 = load float ()*, float ()** bitcast (float (...)** @g to float ()**), align 4
%call = tail call nnan ninf nsz float %0() #2
store float %call, float* @a, align 4
ret void
}
; Function Attrs: nounwind
declare void @llvm.stackprotector(i8*, i8**) #1
attributes #0 = { nounwind "target-cpu"="cortex-m33" }
attributes #1 = { nounwind }
attributes #2 = { nounwind "cmse_nonsecure_call" }
...
---
name: f
alignment: 2
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins: []
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 8
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: true
hasCalls: true
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
liveins: $r7, $lr
$sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
renamable $r0 = t2MOVi32imm @g
renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from `float ()** bitcast (float (...)** @g to float ()**)`)
tBLXNS_CALL killed renamable $r0, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $s0
renamable $r0 = t2MOVi32imm @a
VSTRS killed renamable $s0, killed renamable $r0, 0, 14 /* CC::al */, $noreg :: (store 4 into @a)
$sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r7, def $pc
...
# CHECK-LABEL: bb.0.entry:
# CHECK: $sp = t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, $r4, $r5, $r6, undef $r7, $r8, $r9, $r10, $r11
# CHECK-NEXT: $r0 = t2BICri $r0, 1, 14 /* CC::al */, $noreg, $noreg
# CHECK-NEXT: $sp = tSUBspi $sp, 34, 14 /* CC::al */, $noreg
# CHECK-NEXT: VLSTM $sp, 14 /* CC::al */, $noreg, implicit undef $vpr, implicit undef $fpscr, implicit undef $fpscr_nzcv, implicit undef $q0, implicit undef $q1, implicit undef $q2, implicit undef $q3, implicit undef $q4, implicit undef $q5, implicit undef $q6, implicit undef $q7
# CHECK-NEXT: $r1 = tMOVr $r0, 14 /* CC::al */, $noreg
# CHECK-NEXT: $r2 = tMOVr $r0, 14 /* CC::al */, $noreg
# CHECK-NEXT: $r3 = tMOVr $r0, 14 /* CC::al */, $noreg
# CHECK-NEXT: $r4 = tMOVr $r0, 14 /* CC::al */, $noreg
# CHECK-NEXT: $r5 = tMOVr $r0, 14 /* CC::al */, $noreg
# CHECK-NEXT: $r6 = tMOVr $r0, 14 /* CC::al */, $noreg
# CHECK-NEXT: $r7 = tMOVr $r0, 14 /* CC::al */, $noreg
# CHECK-NEXT: $r8 = tMOVr $r0, 14 /* CC::al */, $noreg
# CHECK-NEXT: $r9 = tMOVr $r0, 14 /* CC::al */, $noreg
# CHECK-NEXT: $r10 = tMOVr $r0, 14 /* CC::al */, $noreg
# CHECK-NEXT: $r11 = tMOVr $r0, 14 /* CC::al */, $noreg
# CHECK-NEXT: $r12 = tMOVr $r0, 14 /* CC::al */, $noreg
# CHECK-NEXT: t2MSR_M 3072, $r0, 14 /* CC::al */, $noreg, implicit-def $cpsr
# CHECK-NEXT: tBLXNSr 14 /* CC::al */, $noreg, killed $r0, csr_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $s0
# CHECK-NEXT: $r12 = VMOVRS $s0, 14 /* CC::al */, $noreg
# CHECK-NEXT: VLLDM $sp, 14 /* CC::al */, $noreg, implicit-def $q0, implicit-def $q1, implicit-def $q2, implicit-def $q3, implicit-def $q4, implicit-def $q5, implicit-def $q6, implicit-def $q7, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv
# CHECK-NEXT: $s0 = VMOVSR $r12, 14 /* CC::al */, $noreg
# CHECK-NEXT: $sp = tADDspi $sp, 34, 14 /* CC::al */, $noreg
# CHECK-NEXT: $sp = t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11

View File

@ -62,7 +62,7 @@ body: |
$sp = tSUBspi $sp, 34, 14, $noreg
VLSTM $sp, 14, $noreg
tBLXNSr 14, $noreg, killed $r4, csr_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp
VLLDM $sp, 14, $noreg
VLLDM $sp, 14, $noreg, implicit-def $q0, implicit-def $q1, implicit-def $q2, implicit-def $q3, implicit-def $q4, implicit-def $q5, implicit-def $q6, implicit-def $q7, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv
$sp = tADDspi $sp, 34, 14, $noreg
$sp = t2LDMIA_UPD $sp, 14, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11
$sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $pc