forked from OSchip/llvm-project
[ARM] Shrink post-indexed LDR and STR to LDM/STM
A Thumb-2 post-indexed LDR instruction such as: ldr.w r0, [r1], #4 Can be rewritten as: ldm.n r1!, {r0} LDMs can be more expensive than LDRs on some cores, so this has been enabled only in minsize mode. llvm-svn: 272002
This commit is contained in:
parent
695c6b476a
commit
53298a1808
|
@ -116,12 +116,14 @@ namespace {
|
|||
{ ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
|
||||
{ ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
|
||||
{ ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
|
||||
{ ARM::t2LDR_POST,ARM::tLDMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
|
||||
{ ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
|
||||
{ ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
|
||||
{ ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
|
||||
{ ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
|
||||
{ ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
|
||||
{ ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
|
||||
{ ARM::t2STR_POST,ARM::tSTMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
|
||||
|
||||
{ ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
|
||||
{ ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
|
||||
|
@ -423,6 +425,46 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
|
|||
HasShift = true;
|
||||
OpNum = 4;
|
||||
break;
|
||||
case ARM::t2LDR_POST:
|
||||
case ARM::t2STR_POST: {
|
||||
if (!MBB.getParent()->getFunction()->optForMinSize())
|
||||
return false;
|
||||
|
||||
// We're creating a completely different type of load/store - LDM from LDR.
|
||||
// For this reason we can't reuse the logic at the end of this function; we
|
||||
// have to implement the MI building here.
|
||||
bool IsStore = Entry.WideOpc == ARM::t2STR_POST;
|
||||
unsigned Rt = MI->getOperand(IsStore ? 1 : 0).getReg();
|
||||
unsigned Rn = MI->getOperand(IsStore ? 0 : 1).getReg();
|
||||
unsigned Offset = MI->getOperand(3).getImm();
|
||||
unsigned PredImm = MI->getOperand(4).getImm();
|
||||
unsigned PredReg = MI->getOperand(5).getReg();
|
||||
assert(isARMLowRegister(Rt));
|
||||
assert(isARMLowRegister(Rn));
|
||||
|
||||
if (Offset != 4)
|
||||
return false;
|
||||
|
||||
// Add the 16-bit load / store instruction.
|
||||
DebugLoc dl = MI->getDebugLoc();
|
||||
auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1))
|
||||
.addReg(Rn, RegState::Define)
|
||||
.addReg(Rn)
|
||||
.addImm(PredImm)
|
||||
.addReg(PredReg)
|
||||
.addReg(Rt, IsStore ? 0 : RegState::Define);
|
||||
|
||||
// Transfer memoperands.
|
||||
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
|
||||
|
||||
// Transfer MI flags.
|
||||
MIB.setMIFlags(MI->getFlags());
|
||||
|
||||
// Kill the old instruction.
|
||||
MI->eraseFromParent();
|
||||
++NumLdSts;
|
||||
return true;
|
||||
}
|
||||
case ARM::t2LDMIA: {
|
||||
unsigned BaseReg = MI->getOperand(0).getReg();
|
||||
assert(isARMLowRegister(BaseReg));
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
|
||||
target triple = "thumbv7m--linux-gnu"
|
||||
|
||||
; CHECK-LABEL: f:
|
||||
; CHECK: ldm r{{[0-9]}}!, {r[[x:[0-9]]]}
|
||||
; CHECK: add.w r[[x]], r[[x]], #3
|
||||
; CHECK: stm r{{[0-9]}}!, {r[[x]]}
|
||||
define void @f(i32 %n, i32* nocapture %a, i32* nocapture readonly %b) optsize minsize {
|
||||
%1 = icmp sgt i32 %n, 0
|
||||
br i1 %1, label %.lr.ph, label %._crit_edge
|
||||
|
||||
.lr.ph: ; preds = %.lr.ph, %0
|
||||
%i.04 = phi i32 [ %6, %.lr.ph ], [ 0, %0 ]
|
||||
%.03 = phi i32* [ %2, %.lr.ph ], [ %b, %0 ]
|
||||
%.012 = phi i32* [ %5, %.lr.ph ], [ %a, %0 ]
|
||||
%2 = getelementptr inbounds i32, i32* %.03, i32 1
|
||||
%3 = load i32, i32* %.03, align 4
|
||||
%4 = add nsw i32 %3, 3
|
||||
%5 = getelementptr inbounds i32, i32* %.012, i32 1
|
||||
store i32 %4, i32* %.012, align 4
|
||||
%6 = add nsw i32 %i.04, 1
|
||||
%exitcond = icmp eq i32 %6, %n
|
||||
br i1 %exitcond, label %._crit_edge, label %.lr.ph
|
||||
|
||||
._crit_edge: ; preds = %.lr.ph, %0
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: f_nominsize:
|
||||
; CHECK-NOT: ldm
|
||||
define void @f_nominsize(i32 %n, i32* nocapture %a, i32* nocapture readonly %b) optsize {
|
||||
%1 = icmp sgt i32 %n, 0
|
||||
br i1 %1, label %.lr.ph, label %._crit_edge
|
||||
|
||||
.lr.ph: ; preds = %.lr.ph, %0
|
||||
%i.04 = phi i32 [ %6, %.lr.ph ], [ 0, %0 ]
|
||||
%.03 = phi i32* [ %2, %.lr.ph ], [ %b, %0 ]
|
||||
%.012 = phi i32* [ %5, %.lr.ph ], [ %a, %0 ]
|
||||
%2 = getelementptr inbounds i32, i32* %.03, i32 1
|
||||
%3 = load i32, i32* %.03, align 4
|
||||
%4 = add nsw i32 %3, 3
|
||||
%5 = getelementptr inbounds i32, i32* %.012, i32 1
|
||||
store i32 %4, i32* %.012, align 4
|
||||
%6 = add nsw i32 %i.04, 1
|
||||
%exitcond = icmp eq i32 %6, %n
|
||||
br i1 %exitcond, label %._crit_edge, label %.lr.ph
|
||||
|
||||
._crit_edge: ; preds = %.lr.ph, %0
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue