forked from OSchip/llvm-project
ARM: allow copying of CPSR when all else fails.
As with x86 and AArch64, certain situations can arise where we need to spill CPSR in the middle of a calculation. These should be avoided where possible (MRS/MSR is rather expensive), which ARM is actually better at than the other two since it tries to Glue defs to uses, but as a last ditch effort, copying is better than crashing. rdar://problem/18011155 llvm-svn: 218789
This commit is contained in:
parent
2706eb031d
commit
5d72c5de02
|
@ -698,6 +698,49 @@ unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
|
|||
return Size;
|
||||
}
|
||||
|
||||
void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned DestReg, bool KillSrc,
|
||||
const ARMSubtarget &Subtarget) const {
|
||||
unsigned Opc = Subtarget.isThumb()
|
||||
? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)
|
||||
: ARM::MRS;
|
||||
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);
|
||||
|
||||
// There is only 1 A/R class MRS instruction, and it always refers to
|
||||
// APSR. However, there are lots of other possibilities on M-class cores.
|
||||
if (Subtarget.isMClass())
|
||||
MIB.addImm(0x800);
|
||||
|
||||
AddDefaultPred(MIB);
|
||||
|
||||
MIB.addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
|
||||
}
|
||||
|
||||
void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned SrcReg, bool KillSrc,
|
||||
const ARMSubtarget &Subtarget) const {
|
||||
unsigned Opc = Subtarget.isThumb()
|
||||
? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)
|
||||
: ARM::MSR;
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
|
||||
|
||||
if (Subtarget.isMClass())
|
||||
MIB.addImm(0x800);
|
||||
else
|
||||
MIB.addImm(8);
|
||||
|
||||
MIB.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
|
||||
AddDefaultPred(MIB);
|
||||
|
||||
MIB.addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
|
||||
}
|
||||
|
||||
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
|
@ -785,6 +828,12 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
Opc = ARM::VMOVS;
|
||||
BeginIdx = ARM::ssub_0;
|
||||
SubRegs = 2;
|
||||
} else if (SrcReg == ARM::CPSR) {
|
||||
copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);
|
||||
return;
|
||||
} else if (DestReg == ARM::CPSR) {
|
||||
copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(Opc && "Impossible reg-to-reg copy");
|
||||
|
|
|
@ -156,6 +156,13 @@ public:
|
|||
unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
|
||||
int &FrameIndex) const override;
|
||||
|
||||
void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
||||
unsigned SrcReg, bool KillSrc,
|
||||
const ARMSubtarget &Subtarget) const;
|
||||
void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
||||
unsigned DestReg, bool KillSrc,
|
||||
const ARMSubtarget &Subtarget) const;
|
||||
|
||||
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
||||
DebugLoc DL, unsigned DestReg, unsigned SrcReg,
|
||||
bool KillSrc) const override;
|
||||
|
|
|
@ -182,7 +182,7 @@ ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind
|
|||
const TargetRegisterClass *
|
||||
ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
|
||||
if (RC == &ARM::CCRRegClass)
|
||||
return nullptr; // Can't copy CCR registers.
|
||||
return &ARM::rGPRRegClass; // Can't copy CCR registers.
|
||||
return RC;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
; RUN: llc -mtriple=armv7s-apple-ios7.0 -show-mc-encoding %s -o - | FileCheck %s --check-prefix=CHECK-ARM
|
||||
; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -show-mc-encoding %s -o - | FileCheck %s --check-prefix=CHECK-THUMB
|
||||
; RUN: llc -mtriple=thumbv7m-none-eabi -show-mc-encoding %s -o - | FileCheck %s --check-prefix=CHECK-THUMB
|
||||
|
||||
; In the ARM backend, most compares are glued to their uses so CPSR can't
|
||||
; escape. However, for long ADCS chains (and last ditch fallback) the dependency
|
||||
; is carried in the DAG because duplicating them can be more expensive than
|
||||
; copying CPSR.
|
||||
|
||||
; Crafting a test for this was a little tricky, in case it breaks here are some
|
||||
; notes on what I was tring to achieve:
|
||||
; + We want 2 long ADCS chains
|
||||
; + We want them to split after an initial common prefix (so that a single
|
||||
; CPSR is used twice).
|
||||
; + We want both chains to write CPSR post-split (so that the copy can't be
|
||||
; elided).
|
||||
; + We want the chains to be long enough that duplicating them is expensive.
|
||||
|
||||
define void @test_copy_cpsr(i128 %lhs, i128 %rhs, i128* %addr) {
|
||||
; CHECK-ARM: test_copy_cpsr:
|
||||
; CHECK-THUMB: test_copy_cpsr:
|
||||
|
||||
; CHECK-ARM: mrs [[TMP:r[0-9]+]], apsr @ encoding: [0x00,0x{{[0-9a-f]}}0,0x0f,0xe1]
|
||||
; CHECK-ARM: msr APSR_nzcvq, [[TMP]] @ encoding: [0x0{{[0-9a-f]}},0xf0,0x28,0xe1]
|
||||
|
||||
; In Thumb mode v7M and v7AR have different MRS/MSR instructions that happen
|
||||
; to overlap for the apsr case, so it's definitely worth checking both.
|
||||
; CHECK-THUMB: mrs [[TMP:r[0-9]+]], apsr @ encoding: [0xef,0xf3,0x00,0x8{{[0-9a-f]}}]
|
||||
; CHECK-THUMB: msr {{APSR|apsr}}_nzcvq, [[TMP]] @ encoding: [0x8{{[0-9a-f]}},0xf3,0x00,0x88]
|
||||
|
||||
%sum = add i128 %lhs, %rhs
|
||||
store volatile i128 %sum, i128* %addr
|
||||
|
||||
%rhs2.tmp1 = trunc i128 %rhs to i64
|
||||
%rhs2 = zext i64 %rhs2.tmp1 to i128
|
||||
|
||||
%sum2 = add i128 %lhs, %rhs2
|
||||
store volatile i128 %sum2, i128* %addr
|
||||
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue