diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 16a1c3c1bb55..84ae063941ca 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1229,10 +1229,30 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { } else { MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && - ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) - return false; + ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) { + + // We couldn't find an inc/dec to merge. But if the base is dead, we + // can still change to a writeback form as that will save us 2 bytes + // of code size. It can create WAW hazards though, so only do it if + // we're minimizing code size. + if (!MBB.getParent()->getFunction()->optForMinSize() || !BaseKill) + return false; + + bool HighRegsUsed = false; + for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) + if (MI->getOperand(i).getReg() >= ARM::R8) { + HighRegsUsed = true; + break; + } + + if (!HighRegsUsed) + MergeInstr = MBB.end(); + else + return false; + } } - MBB.erase(MergeInstr); + if (MergeInstr != MBB.end()) + MBB.erase(MergeInstr); unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) diff --git a/llvm/test/CodeGen/ARM/ldm-base-writeback.ll b/llvm/test/CodeGen/ARM/ldm-base-writeback.ll new file mode 100644 index 000000000000..375f58a24a19 --- /dev/null +++ b/llvm/test/CodeGen/ARM/ldm-base-writeback.ll @@ -0,0 +1,21 @@ +; RUN: llc -O3 < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" +target triple = "armv7--linux-gnu" + +@a = global i32 0, align 4 +@b = global i32 0, align 4 +@c = global i32 0, align 4 + +; CHECK-LABEL: bar: +; CHECK: ldm r{{[0-9]}}!, {r0, r{{[0-9]}}, r{{[0-9]}}} +define void @bar(i32 %a1, i32 %b1, i32 %c1) minsize optsize { + %1 = load i32, i32* @a, align 4 + %2 = load i32, i32* @b, align 4 + %3 = load i32, i32* @c, align 4 + %4 = tail call i32 @baz(i32 %1, i32 %3) minsize optsize + %5 = tail call i32 @baz(i32 %2, i32 %3) minsize optsize + ret void +} + +declare i32 @baz(i32,i32) minsize optsize