From 5d72c5de02f7bf18aa4d62db1075ab4c0a8b40fc Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 1 Oct 2014 19:21:03 +0000 Subject: [PATCH] ARM: allow copying of CPSR when all else fails. As with x86 and AArch64, certain situations can arise where we need to spill CPSR in the middle of a calculation. These should be avoided where possible (MRS/MSR is rather expensive), which ARM is actually better at than the other two since it tries to Glue defs to uses, but as a last ditch effort, copying is better than crashing. rdar://problem/18011155 llvm-svn: 218789 --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 49 +++++++++++++++++++++ llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 7 +++ llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp | 2 +- llvm/test/CodeGen/ARM/copy-cpsr.ll | 41 +++++++++++++++++ 4 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/ARM/copy-cpsr.ll diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 8ea87f9930c0..ee0143e2e5d3 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -698,6 +698,49 @@ unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { return Size; } +void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, bool KillSrc, + const ARMSubtarget &Subtarget) const { + unsigned Opc = Subtarget.isThumb() + ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR) + : ARM::MRS; + + MachineInstrBuilder MIB = + BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg); + + // There is only 1 A/R class MRS instruction, and it always refers to + // APSR. However, there are lots of other possibilities on M-class cores. + if (Subtarget.isMClass()) + MIB.addImm(0x800); + + AddDefaultPred(MIB); + + MIB.addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc)); +} + +void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned SrcReg, bool KillSrc, + const ARMSubtarget &Subtarget) const { + unsigned Opc = Subtarget.isThumb() + ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR) + : ARM::MSR; + + MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); + + if (Subtarget.isMClass()) + MIB.addImm(0x800); + else + MIB.addImm(8); + + MIB.addReg(SrcReg, getKillRegState(KillSrc)); + + AddDefaultPred(MIB); + + MIB.addReg(ARM::CPSR, RegState::Implicit | RegState::Define); +} + void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -785,6 +828,12 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = ARM::VMOVS; BeginIdx = ARM::ssub_0; SubRegs = 2; + } else if (SrcReg == ARM::CPSR) { + copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget); + return; + } else if (DestReg == ARM::CPSR) { + copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget); + return; } assert(Opc && "Impossible reg-to-reg copy"); diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 613c7afcf5a5..ab5dc661faf3 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -156,6 +156,13 @@ public: unsigned isStoreToStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const override; + void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool KillSrc, + const ARMSubtarget &Subtarget) const; + void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, bool KillSrc, + const ARMSubtarget &Subtarget) const; + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 6207fadcccb1..a7bf4f5ead30 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -182,7 +182,7 @@ ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind const TargetRegisterClass * ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { if (RC == &ARM::CCRRegClass) - return nullptr; // Can't copy CCR registers. + return &ARM::rGPRRegClass; // Can't copy CCR registers. return RC; } diff --git a/llvm/test/CodeGen/ARM/copy-cpsr.ll b/llvm/test/CodeGen/ARM/copy-cpsr.ll new file mode 100644 index 000000000000..8b7dc038fc92 --- /dev/null +++ b/llvm/test/CodeGen/ARM/copy-cpsr.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=armv7s-apple-ios7.0 -show-mc-encoding %s -o - | FileCheck %s --check-prefix=CHECK-ARM +; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -show-mc-encoding %s -o - | FileCheck %s --check-prefix=CHECK-THUMB +; RUN: llc -mtriple=thumbv7m-none-eabi -show-mc-encoding %s -o - | FileCheck %s --check-prefix=CHECK-THUMB + +; In the ARM backend, most compares are glued to their uses so CPSR can't +; escape. However, for long ADCS chains (and last ditch fallback) the dependency +; is carried in the DAG because duplicating them can be more expensive than +; copying CPSR. + +; Crafting a test for this was a little tricky, in case it breaks here are some +; notes on what I was tring to achieve: +; + We want 2 long ADCS chains +; + We want them to split after an initial common prefix (so that a single +; CPSR is used twice). +; + We want both chains to write CPSR post-split (so that the copy can't be +; elided). +; + We want the chains to be long enough that duplicating them is expensive. + +define void @test_copy_cpsr(i128 %lhs, i128 %rhs, i128* %addr) { +; CHECK-ARM: test_copy_cpsr: +; CHECK-THUMB: test_copy_cpsr: + +; CHECK-ARM: mrs [[TMP:r[0-9]+]], apsr @ encoding: [0x00,0x{{[0-9a-f]}}0,0x0f,0xe1] +; CHECK-ARM: msr APSR_nzcvq, [[TMP]] @ encoding: [0x0{{[0-9a-f]}},0xf0,0x28,0xe1] + + ; In Thumb mode v7M and v7AR have different MRS/MSR instructions that happen + ; to overlap for the apsr case, so it's definitely worth checking both. +; CHECK-THUMB: mrs [[TMP:r[0-9]+]], apsr @ encoding: [0xef,0xf3,0x00,0x8{{[0-9a-f]}}] +; CHECK-THUMB: msr {{APSR|apsr}}_nzcvq, [[TMP]] @ encoding: [0x8{{[0-9a-f]}},0xf3,0x00,0x88] + + %sum = add i128 %lhs, %rhs + store volatile i128 %sum, i128* %addr + + %rhs2.tmp1 = trunc i128 %rhs to i64 + %rhs2 = zext i64 %rhs2.tmp1 to i128 + + %sum2 = add i128 %lhs, %rhs2 + store volatile i128 %sum2, i128* %addr + + ret void +}