From 2aca0c622a22366d7e26e6002b910558fe641805 Mon Sep 17 00:00:00 2001 From: Artyom Skrobov Date: Mon, 28 Dec 2015 21:40:45 +0000 Subject: [PATCH] [Thumb] Fix assembler error 'cannot honor width suffix pop {lr}' Summary: * avoid generating POP {LR} in Thumb1 epilogues * combine MOV LR, Rx + BX LR -> BX Rx in a peephole optimization pass * combine POP {LR} + B + BX LR -> POP {PC} on v5T+ Test cases by Ana Pazos Differential Revision: http://reviews.llvm.org/D15707 llvm-svn: 256523 --- llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 27 +++++ llvm/lib/Target/ARM/Thumb1FrameLowering.cpp | 84 ++++++------- .../CodeGen/Thumb/thumb-shrink-wrapping.ll | 110 +++++++++++++++++- 3 files changed, 167 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index f16ce690859a..725b8383c961 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -151,6 +151,7 @@ namespace { bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); bool MergeReturnIntoLDM(MachineBasicBlock &MBB); + bool CombineMovBx(MachineBasicBlock &MBB); }; char ARMLoadStoreOpt::ID = 0; } @@ -1825,6 +1826,30 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { return false; } +bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) { + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + if (MBBI == MBB.begin() || MBBI == MBB.end() || + MBBI->getOpcode() != ARM::tBX_RET) + return false; + + MachineBasicBlock::iterator Prev = MBBI; + --Prev; + if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR)) + return false; + + for (auto Use : Prev->uses()) + if (Use.isKill()) { + AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) + .addReg(Use.getReg(), RegState::Kill)) + .copyImplicitOps(&*MBBI); + MBB.erase(MBBI); + MBB.erase(Prev); + return true; + } + + llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?"); +} + bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { MF = &Fn; STI = &static_cast(Fn.getSubtarget()); @@ -1844,6 +1869,8 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { Modified |= LoadStoreMultipleOpti(MBB); if (STI->hasV5TOps()) Modified |= MergeReturnIntoLDM(MBB); + if (isThumb1) + Modified |= CombineMovBx(MBB); } Allocator.DestroyAll(); diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index 8771c68e5931..93e0ac4aa320 100644 --- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -433,14 +433,16 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, auto MBBI = MBB.getFirstTerminator(); bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize; if (CanRestoreDirectly) { - if (MBBI != MBB.end()) + if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB) CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::tPOP_RET); else { - assert(MBB.back().getOpcode() == ARM::tPOP); + auto MBBI_prev = MBBI; + MBBI_prev--; + assert(MBBI_prev->getOpcode() == ARM::tPOP); assert(MBB.succ_size() == 1); if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET) - MBBI--; // Replace the final tPOP with a tPOP_RET. + MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET. else CanRestoreDirectly = false; } @@ -454,8 +456,7 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))); // Copy implicit ops and popped registers, if any. for (auto MO: MBBI->operands()) - if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && - MO.getReg() != ARM::LR) + if (MO.isReg() && (MO.isImplicit() || MO.isDef())) MIB.addOperand(MO); MIB.addReg(ARM::PC, RegState::Define); // Erase the old instruction (tBX_RET or tPOP). @@ -529,32 +530,26 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, .addReg(PopReg, RegState::Kill)); } - bool AddBx = false; - if (MBBI == MBB.end()) { - MachineInstr& Pop = MBB.back(); - assert(Pop.getOpcode() == ARM::tPOP); - Pop.RemoveOperand(Pop.findRegisterDefOperandIdx(ARM::LR)); - } else if (MBBI->getOpcode() == ARM::tPOP_RET) { + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) { // We couldn't use the direct restoration above, so // perform the opposite conversion: tPOP_RET to tPOP. MachineInstrBuilder MIB = AddDefaultPred( BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))); - unsigned Popped = 0; + bool Popped = false; for (auto MO: MBBI->operands()) if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && MO.getReg() != ARM::PC) { MIB.addOperand(MO); if (!MO.isImplicit()) - Popped++; + Popped = true; } // Is there anything left to pop? if (!Popped) MBB.erase(MIB.getInstr()); // Erase the old instruction. MBB.erase(MBBI); - MBBI = MBB.end(); - AddBx = true; + MBBI = AddDefaultPred(BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET))); } assert(PopReg && "Do not know how to get LR"); @@ -563,31 +558,14 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); - if (!TemporaryReg && MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET) { - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX)) - .addReg(PopReg, RegState::Kill); - AddDefaultPred(MIB); - MIB.copyImplicitOps(&*MBBI); - // erase the old tBX_RET instruction - MBB.erase(MBBI); - return true; - } + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::LR, RegState::Define) + .addReg(PopReg, RegState::Kill)); - if (AddBx && !TemporaryReg) { - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX)) - .addReg(PopReg, RegState::Kill)); - } else { - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(ARM::LR, RegState::Define) - .addReg(PopReg, RegState::Kill)); - } - if (TemporaryReg) { + if (TemporaryReg) AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(PopReg, RegState::Define) .addReg(TemporaryReg, RegState::Kill)); - if (AddBx) - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET))); - } return true; } @@ -645,28 +623,34 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)); AddDefaultPred(MIB); - bool NumRegs = false; + bool NeedsPop = false; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (Reg == ARM::LR && MBB.succ_empty()) { - // Special epilogue for vararg functions. See emitEpilogue - if (isVarArg) + if (Reg == ARM::LR) { + if (MBB.succ_empty()) { + // Special epilogue for vararg functions. See emitEpilogue + if (isVarArg) + continue; + // ARMv4T requires BX, see emitEpilogue + if (!STI.hasV5TOps()) + continue; + Reg = ARM::PC; + (*MIB).setDesc(TII.get(ARM::tPOP_RET)); + if (MI != MBB.end()) + MIB.copyImplicitOps(&*MI); + MI = MBB.erase(MI); + } else + // LR may only be popped into PC, as part of return sequence. + // If this isn't the return sequence, we'll need emitPopSpecialFixUp + // to restore LR the hard way. continue; - // ARMv4T requires BX, see emitEpilogue - if (!STI.hasV5TOps()) - continue; - Reg = ARM::PC; - (*MIB).setDesc(TII.get(ARM::tPOP_RET)); - if (MI != MBB.end()) - MIB.copyImplicitOps(&*MI); - MI = MBB.erase(MI); } MIB.addReg(Reg, getDefRegState(true)); - NumRegs = true; + NeedsPop = true; } // It's illegal to emit pop instruction without operands. - if (NumRegs) + if (NeedsPop) MBB.insert(MI, &*MIB); else MF.DeleteMachineInstr(MIB); diff --git a/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll b/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll index e68ca0bd78c9..fb4ee8dba7a9 100644 --- a/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll +++ b/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll @@ -159,6 +159,7 @@ declare i32 @doSomething(i32, i32*) ; DISABLE-V4T-NEXT: pop {r1} ; DISABLE-V4T-NEXT: bx r1 ; +; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end ; ENABLE-NEXT: bx lr define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { entry: @@ -270,7 +271,10 @@ for.end: ; preds = %for.body ; Next BB. ; SUM << 3. ; CHECK: lsls [[SUM]], [[SUM]], #3 -; ENABLE-NEXT: pop {r4, lr} +; ENABLE-V5T-NEXT: pop {r4, pc} +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 ; ; Duplicated epilogue. ; DISABLE-V5T: pop {r4, pc} @@ -285,6 +289,7 @@ for.end: ; preds = %for.body ; DISABLE-V4T-NEXT: pop {r1} ; DISABLE-V4T-NEXT: bx r1 ; +; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end ; ENABLE-NEXT: bx lr define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { entry: @@ -350,7 +355,10 @@ declare void @somethingElse(...) ; Next BB. ; SUM << 3. ; CHECK: lsls [[SUM]], [[SUM]], #3 -; ENABLE: pop {r4, lr} +; ENABLE-V5T-NEXT: pop {r4, pc} +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 ; ; Duplicated epilogue. ; DISABLE-V5T: pop {r4, pc} @@ -365,6 +373,7 @@ declare void @somethingElse(...) ; DISABLE-V4T-NEXT: pop {r1} ; DISABLE-V4T-NEXT: bx r1 ; +; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end ; ENABLE-NEXT: bx lr define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 { entry: @@ -431,7 +440,10 @@ entry: ; ; Next BB. ; CHECK: movs r0, #0 -; ENABLE-NEXT: pop {r4, lr} +; ENABLE-V5T-NEXT: pop {r4, pc} +; ENABLE-V4T-NEXT: pop {r4} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 ; ; Duplicated epilogue. ; DISABLE-V5T-NEXT: pop {r4, pc} @@ -446,6 +458,7 @@ entry: ; DISABLE-V4T-NEXT: pop {r1} ; DISABLE-V4T-NEXT: bx r1 ; +; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end ; ENABLE-NEXT: bx lr define i32 @inlineAsm(i32 %cond, i32 %N) { entry: @@ -506,7 +519,10 @@ if.end: ; preds = %for.body, %if.else ; CHECK-NEXT: lsls r0, r0, #3 ; ; ENABLE-NEXT: add sp, #16 -; ENABLE-NEXT: pop {[[TMP]], lr} +; ENABLE-V5T-NEXT: pop {[[TMP]], pc} +; ENABLE-V4T-NEXT: pop {[[TMP]]} +; ENABLE-V4T-NEXT: pop {r1} +; ENABLE-V4T-NEXT: bx r1 ; ; Duplicated epilogue. ; DISABLE-V5T-NEXT: add sp, #16 @@ -518,6 +534,7 @@ if.end: ; preds = %for.body, %if.else ; CHECK: lsls r0, r1, #1 ; ; Epilogue code. +; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end ; ENABLE-NEXT: bx lr ; ; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end @@ -586,4 +603,89 @@ if.end: declare void @abort() #0 +define i32 @b_to_bx(i32 %value) { +; CHECK-LABEL: b_to_bx: +; DISABLE: push {r7, lr} +; CHECK: cmp r1, #49 +; CHECK-NEXT: bgt [[ELSE_LABEL:LBB[0-9_]+]] +; ENABLE: push {r7, lr} + +; CHECK: bl +; DISABLE-V5-NEXT: pop {r7, pc} +; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]] + +; ENABLE-V5-NEXT: pop {r7, pc} +; ENABLE-V4-NEXT: pop {r7} +; ENABLE-V4-NEXT: pop {r1} +; ENABLE-V4-NEXT: bx r1 + +; CHECK: [[ELSE_LABEL]]: @ %if.else +; CHECK-NEXT: lsls r0, r1, #1 +; DISABLE-V5-NEXT: pop {r7, pc} +; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end +; DISABLE-V4T-NEXT: pop {r7} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 + +; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end +; ENABLE-NEXT: bx lr + +entry: + %cmp = icmp slt i32 %value, 50 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %div = sdiv i32 5000, %value + br label %if.end + +if.else: + %mul = shl nsw i32 %value, 1 + br label %if.end + +if.end: + %value.addr.0 = phi i32 [ %div, %if.then ], [ %mul, %if.else ] + ret i32 %value.addr.0 +} + +define i1 @beq_to_bx(i32* %y, i32 %head) { +; CHECK-LABEL: beq_to_bx: +; DISABLE: push {r4, lr} +; CHECK: cmp r2, #0 +; CHECK-NEXT: beq [[EXIT_LABEL:LBB[0-9_]+]] +; ENABLE: push {r4, lr} + +; CHECK: tst r3, r4 +; ENABLE-NEXT: pop {r4} +; ENABLE-NEXT: pop {r3} +; ENABLE-NEXT: mov lr, r3 +; CHECK-NEXT: beq [[EXIT_LABEL]] + +; CHECK: str r1, [r2] +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: [[EXIT_LABEL]]: @ %cleanup +; ENABLE-NEXT: bx lr +; DISABLE-V5-NEXT: pop {r4, pc} +; DISABLE-V4T-NEXT: pop {r4} +; DISABLE-V4T-NEXT: pop {r1} +; DISABLE-V4T-NEXT: bx r1 + +entry: + %cmp = icmp eq i32* %y, null + br i1 %cmp, label %cleanup, label %if.end + +if.end: + %z = load i32, i32* %y, align 4 + %and = and i32 %z, 2 + %cmp2 = icmp eq i32 %and, 0 + br i1 %cmp2, label %cleanup, label %if.end4 + +if.end4: + store i32 %head, i32* %y, align 4 + br label %cleanup + +cleanup: + %retval.0 = phi i1 [ 0, %if.end4 ], [ 1, %entry ], [ 1, %if.end ] + ret i1 %retval.0 +} + attributes #0 = { noreturn nounwind }