forked from OSchip/llvm-project
[AArch64] Merge two adjacent str WZR into str XZR
Summary: This change merges adjacent 32 bit zero stores into a 64 bit zero store. e.g., str wzr, [x0] str wzr, [x0, #4] becomes str xzr, [x0] Therefore, four adjacent 32 bit zero stores will be a single stp. e.g., str wzr, [x0] str wzr, [x0, #4] str wzr, [x0, #8] str wzr, [x0, #12] becomes stp xzr, xzr, [x0] Reviewers: mcrosier, jmolloy, gberry, t.p.northover Subscribers: aemerson, rengolin, mcrosier, llvm-commits Differential Revision: http://reviews.llvm.org/D16933 llvm-svn: 260682
This commit is contained in:
parent
f034a8c7d7
commit
397eb7b0b3
|
@ -235,10 +235,6 @@ static bool isNarrowStore(unsigned Opc) {
|
|||
}
|
||||
}
|
||||
|
||||
static bool isNarrowStore(MachineInstr *MI) {
|
||||
return isNarrowStore(MI->getOpcode());
|
||||
}
|
||||
|
||||
static bool isNarrowLoad(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
default:
|
||||
|
@ -386,6 +382,10 @@ static unsigned getMatchingWideOpcode(unsigned Opc) {
|
|||
return AArch64::STURHHi;
|
||||
case AArch64::STURHHi:
|
||||
return AArch64::STURWi;
|
||||
case AArch64::STURWi:
|
||||
return AArch64::STURXi;
|
||||
case AArch64::STRWui:
|
||||
return AArch64::STRXui;
|
||||
case AArch64::LDRHHui:
|
||||
case AArch64::LDRSHWui:
|
||||
return AArch64::LDRWui;
|
||||
|
@ -640,6 +640,16 @@ static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst,
|
|||
(UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
|
||||
}
|
||||
|
||||
static bool isPromotableZeroStoreOpcode(MachineInstr *MI) {
|
||||
unsigned Opc = MI->getOpcode();
|
||||
return isNarrowStore(Opc) || Opc == AArch64::STRWui || Opc == AArch64::STURWi;
|
||||
}
|
||||
|
||||
static bool isPromotableZeroStoreInst(MachineInstr *MI) {
|
||||
return (isPromotableZeroStoreOpcode(MI)) &&
|
||||
getLdStRegOp(MI).getReg() == AArch64::WZR;
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator
|
||||
AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,
|
||||
MachineBasicBlock::iterator MergeMI,
|
||||
|
@ -775,12 +785,12 @@ AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,
|
|||
MergeMI->eraseFromParent();
|
||||
return NextI;
|
||||
}
|
||||
assert(isNarrowStore(Opc) && "Expected narrow store");
|
||||
assert(isPromotableZeroStoreInst(I) && "Expected promotable zero store");
|
||||
|
||||
// Construct the new instruction.
|
||||
MachineInstrBuilder MIB;
|
||||
MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
|
||||
.addOperand(getLdStRegOp(I))
|
||||
.addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
|
||||
.addOperand(BaseRegOp)
|
||||
.addImm(OffsetImm)
|
||||
.setMemRefs(I->mergeMemRefsWith(*MergeMI));
|
||||
|
@ -1211,7 +1221,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
|
|||
unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
|
||||
int Offset = getLdStOffsetOp(FirstMI).getImm();
|
||||
int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1;
|
||||
bool IsNarrowStore = isNarrowStore(Opc);
|
||||
bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
|
||||
|
||||
// Track which registers have been modified and used between the first insn
|
||||
// (inclusive) and the second insn.
|
||||
|
@ -1282,7 +1292,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
|
|||
continue;
|
||||
}
|
||||
|
||||
if (IsNarrowLoad || IsNarrowStore) {
|
||||
if (IsNarrowLoad || IsPromotableZeroStore) {
|
||||
// If the alignment requirements of the scaled wide load/store
|
||||
// instruction can't express the offset of the scaled narrow
|
||||
// input, bail and keep looking.
|
||||
|
@ -1307,7 +1317,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
|
|||
// For narrow stores, allow only when the stored value is the same
|
||||
// (i.e., WZR).
|
||||
if ((MayLoad && Reg == getLdStRegOp(MI).getReg()) ||
|
||||
(IsNarrowStore && Reg != getLdStRegOp(MI).getReg())) {
|
||||
(IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
|
||||
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
|
||||
MemInsns.push_back(MI);
|
||||
continue;
|
||||
|
@ -1633,24 +1643,27 @@ bool AArch64LoadStoreOpt::isCandidateToMergeOrPair(MachineInstr *MI) {
|
|||
// store.
|
||||
bool AArch64LoadStoreOpt::tryToMergeLdStInst(
|
||||
MachineBasicBlock::iterator &MBBI) {
|
||||
assert((isNarrowLoad(MBBI) || isNarrowStore(MBBI)) && "Expected narrow op.");
|
||||
assert((isNarrowLoad(MBBI) || isPromotableZeroStoreOpcode(MBBI)) &&
|
||||
"Expected narrow op.");
|
||||
MachineInstr *MI = MBBI;
|
||||
MachineBasicBlock::iterator E = MI->getParent()->end();
|
||||
|
||||
if (!isCandidateToMergeOrPair(MI))
|
||||
return false;
|
||||
|
||||
// For narrow stores, find only the case where the stored value is WZR.
|
||||
if (isNarrowStore(MI) && getLdStRegOp(MI).getReg() != AArch64::WZR)
|
||||
// For promotable zero stores, the stored value should be WZR.
|
||||
if (isPromotableZeroStoreOpcode(MI) &&
|
||||
getLdStRegOp(MI).getReg() != AArch64::WZR)
|
||||
return false;
|
||||
|
||||
// Look ahead up to LdStLimit instructions for a mergable instruction.
|
||||
LdStPairFlags Flags;
|
||||
MachineBasicBlock::iterator MergeMI = findMatchingInsn(MBBI, Flags, LdStLimit);
|
||||
MachineBasicBlock::iterator MergeMI =
|
||||
findMatchingInsn(MBBI, Flags, LdStLimit);
|
||||
if (MergeMI != E) {
|
||||
if (isNarrowLoad(MI)) {
|
||||
++NumNarrowLoadsPromoted;
|
||||
} else if (isNarrowStore(MI)) {
|
||||
} else if (isPromotableZeroStoreInst(MI)) {
|
||||
++NumZeroStoresPromoted;
|
||||
}
|
||||
// Keeping the iterator straight is a pain, so we let the merge routine tell
|
||||
|
@ -1765,13 +1778,15 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
|
|||
case AArch64::LDRSHWui:
|
||||
case AArch64::STRBBui:
|
||||
case AArch64::STRHHui:
|
||||
case AArch64::STRWui:
|
||||
// Unscaled instructions.
|
||||
case AArch64::LDURBBi:
|
||||
case AArch64::LDURHHi:
|
||||
case AArch64::LDURSBWi:
|
||||
case AArch64::LDURSHWi:
|
||||
case AArch64::STURBBi:
|
||||
case AArch64::STURHHi: {
|
||||
case AArch64::STURHHi:
|
||||
case AArch64::STURWi: {
|
||||
if (tryToMergeLdStInst(MBBI)) {
|
||||
Modified = true;
|
||||
break;
|
||||
|
|
|
@ -352,6 +352,42 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: Strw_zero
|
||||
;CHECK : str xzr
|
||||
define void @Strw_zero(i32* nocapture %P, i32 %n) {
|
||||
entry:
|
||||
%idxprom = sext i32 %n to i64
|
||||
%arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
|
||||
store i32 0, i32* %arrayidx
|
||||
%add = add nsw i32 %n, 1
|
||||
%idxprom1 = sext i32 %add to i64
|
||||
%arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1
|
||||
store i32 0, i32* %arrayidx2
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: Strw_zero_4
|
||||
;CHECK : stp xzr
|
||||
define void @Strw_zero_4(i32* nocapture %P, i32 %n) {
|
||||
entry:
|
||||
%idxprom = sext i32 %n to i64
|
||||
%arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
|
||||
store i32 0, i32* %arrayidx
|
||||
%add = add nsw i32 %n, 1
|
||||
%idxprom1 = sext i32 %add to i64
|
||||
%arrayidx2 = getelementptr inbounds i32, i32* %P, i64 %idxprom1
|
||||
store i32 0, i32* %arrayidx2
|
||||
%add3 = add nsw i32 %n, 2
|
||||
%idxprom4 = sext i32 %add3 to i64
|
||||
%arrayidx5 = getelementptr inbounds i32, i32* %P, i64 %idxprom4
|
||||
store i32 0, i32* %arrayidx5
|
||||
%add6 = add nsw i32 %n, 3
|
||||
%idxprom7 = sext i32 %add6 to i64
|
||||
%arrayidx8 = getelementptr inbounds i32, i32* %P, i64 %idxprom7
|
||||
store i32 0, i32* %arrayidx8
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Sturb_zero
|
||||
; CHECK: sturh wzr
|
||||
define void @Sturb_zero(i8* nocapture %P, i32 %n) #0 {
|
||||
|
@ -404,3 +440,42 @@ entry:
|
|||
store i16 0, i16* %arrayidx9
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: Sturw_zero
|
||||
;CHECK : stur xzr
|
||||
define void @Sturw_zero(i32* nocapture %P, i32 %n) {
|
||||
entry:
|
||||
%sub = add nsw i32 %n, -3
|
||||
%idxprom = sext i32 %sub to i64
|
||||
%arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
|
||||
store i32 0, i32* %arrayidx
|
||||
%sub1 = add nsw i32 %n, -4
|
||||
%idxprom2 = sext i32 %sub1 to i64
|
||||
%arrayidx3 = getelementptr inbounds i32, i32* %P, i64 %idxprom2
|
||||
store i32 0, i32* %arrayidx3
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: Sturw_zero_4
|
||||
;CHECK : str xzr
|
||||
define void @Sturw_zero_4(i32* nocapture %P, i32 %n) {
|
||||
entry:
|
||||
%sub = add nsw i32 %n, -3
|
||||
%idxprom = sext i32 %sub to i64
|
||||
%arrayidx = getelementptr inbounds i32, i32* %P, i64 %idxprom
|
||||
store i32 0, i32* %arrayidx
|
||||
%sub1 = add nsw i32 %n, -4
|
||||
%idxprom2 = sext i32 %sub1 to i64
|
||||
%arrayidx3 = getelementptr inbounds i32, i32* %P, i64 %idxprom2
|
||||
store i32 0, i32* %arrayidx3
|
||||
%sub4 = add nsw i32 %n, -2
|
||||
%idxprom5 = sext i32 %sub4 to i64
|
||||
%arrayidx6 = getelementptr inbounds i32, i32* %P, i64 %idxprom5
|
||||
store i32 0, i32* %arrayidx6
|
||||
%sub7 = add nsw i32 %n, -1
|
||||
%idxprom8 = sext i32 %sub7 to i64
|
||||
%arrayidx9 = getelementptr inbounds i32, i32* %P, i64 %idxprom8
|
||||
store i32 0, i32* %arrayidx9
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue