[AArch64] Keep track of MIFlags in the LoadStoreOptimizer

Merging:

* $x26, $x25 = frame-setup LDPXi $sp, 0
* $sp = frame-destroy ADDXri $sp, 64, 0

into an LDPXpost should preserve the flags from both instructions as
following:

* frame-setup frame-destroy LDPXpost

Differential Revision: https://reviews.llvm.org/D44446

llvm-svn: 327533
This commit is contained in:
Francis Visoiu Mistrih 2018-03-14 17:10:58 +00:00
parent bec5df2d05
commit 084e7d8770
4 changed files with 124 additions and 7 deletions

View File

@ -1317,6 +1317,11 @@ public:
/// modify the memrefs of the this MachineInstr.
std::pair<mmo_iterator, unsigned> mergeMemRefsWith(const MachineInstr& Other);
/// Return the MIFlags which represent both MachineInstrs. This
/// should be used when merging two MachineInstrs into one. This routine does
/// not modify the MIFlags of this MachineInstr.
uint8_t mergeFlagsWith(const MachineInstr& Other) const;
/// Clear this MachineInstr's memory reference descriptor list. This resets
/// the memrefs to their most conservative state. This should be used only
/// as a last resort since it greatly pessimizes our knowledge of the memory

View File

@ -381,6 +381,12 @@ MachineInstr::mergeMemRefsWith(const MachineInstr& Other) {
return std::make_pair(MemBegin, CombinedNumMemRefs);
}
uint8_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
// For now, the just return the union of the flags. If the flags get more
// complicated over time, we might need more logic here.
return getFlags() | Other.getFlags();
}
bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
assert(!isBundledWithPred() && "Must be called on bundle header");
for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) {

View File

@ -702,7 +702,8 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
.addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
.add(BaseRegOp)
.addImm(OffsetImm)
.setMemRefs(I->mergeMemRefsWith(*MergeMI));
.setMemRefs(I->mergeMemRefsWith(*MergeMI))
.setMIFlags(I->mergeFlagsWith(*MergeMI));
(void)MIB;
DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
@ -818,7 +819,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
.add(RegOp1)
.add(BaseRegOp)
.addImm(OffsetImm)
.setMemRefs(I->mergeMemRefsWith(*Paired));
.setMemRefs(I->mergeMemRefsWith(*Paired))
.setMIFlags(I->mergeFlagsWith(*Paired));
(void)MIB;
@ -913,7 +915,8 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
.addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
.add(StMO)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
.setMIFlags(LoadI->getFlags());
} else {
// FIXME: Currently we disable this transformation in big-endian targets as
// performance and correctness are verified only in little-endian.
@ -954,7 +957,8 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
DestReg)
.add(StMO)
.addImm(AndMaskEncoded);
.addImm(AndMaskEncoded)
.setMIFlags(LoadI->getFlags());
} else {
BitExtMI =
BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
@ -962,7 +966,8 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
DestReg)
.add(StMO)
.addImm(Immr)
.addImm(Imms);
.addImm(Imms)
.setMIFlags(LoadI->getFlags());
}
}
@ -1352,7 +1357,8 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
.add(getLdStRegOp(*I))
.add(getLdStBaseOp(*I))
.addImm(Value)
.setMemRefs(I->memoperands_begin(), I->memoperands_end());
.setMemRefs(I->memoperands_begin(), I->memoperands_end())
.setMIFlags(I->mergeFlagsWith(*Update));
} else {
// Paired instruction.
int Scale = getMemScale(*I);
@ -1362,7 +1368,8 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
.add(getLdStRegOp(*I, 1))
.add(getLdStBaseOp(*I))
.addImm(Value / Scale)
.setMemRefs(I->memoperands_begin(), I->memoperands_end());
.setMemRefs(I->memoperands_begin(), I->memoperands_end())
.setMIFlags(I->mergeFlagsWith(*Update));
}
(void)MIB;

View File

@ -0,0 +1,99 @@
# RUN: llc -run-pass=aarch64-ldst-opt -o - -mtriple=aarch64-- %s | FileCheck %s
# Check that we merge the MIFlags from both the instructions in the final
# instruction.
---
name: case11
# CHECK-LABEL: name: case11
body: |
bb.0:
frame-setup STRWui $w1, $x0, 1 :: (store 4)
$w1 = frame-destroy LDRWui $x0, 1 :: (load 4)
; CHECK: frame-setup STRWui
; CHECK-NOT: frame-setup
; CHECK-NEXT: frame-destroy ORRWrs
; No merging happening here, make sure we keep the flags of the previous
; instruction.
RET_ReallyLR
...
---
name: case12
# CHECK-LABEL: name: case12
body: |
bb.0:
frame-setup STRWui $w1, $x0, 1 :: (store 4)
$w2 = frame-destroy LDRHHui $x0, 2 :: (load 2)
; CHECK: frame-setup STRWui
; CHECK-NOT: frame-setup
; CHECK-NEXT: frame-destroy ANDWri
; No merging happening here, make sure we keep the flags of the previous
; instruction.
RET_ReallyLR
...
---
name: case13
# CHECK-LABEL: name: case13
body: |
bb.0:
frame-setup STRWui $w1, $x0, 1 :: (store 4)
$w2 = frame-destroy LDRHHui $x0, 3 :: (load 2)
; CHECK: frame-setup STRWui
; CHECK-NOT: frame-setup
; CHECK-NEXT: frame-destroy UBFMWri
; No merging happening here, make sure we keep the flags of the previous
; instruction.
RET_ReallyLR
...
---
name: case2
# CHECK-LABEL: name: case2
body: |
bb.0:
frame-setup STRHHui $wzr, $x0, 0 :: (store 4)
frame-destroy STRHHui $wzr, $x0, 1 :: (store 4)
; CHECK: frame-setup frame-destroy STRWui
RET_ReallyLR
...
---
name: case3
# CHECK-LABEL: name: case3
body: |
bb.0:
$x0 = frame-setup LDRXui $x2, 0 :: (load 8)
$x1 = frame-destroy LDRXui $x2, 1 :: (load 8)
; CHECK: frame-setup frame-destroy LDPXi
RET_ReallyLR
...
---
name: case4
# CHECK-LABEL: name: case4
body: |
bb.0:
$x26, $x25 = frame-setup LDPXi $sp, 0
$sp = frame-destroy ADDXri $sp, 64, 0
; CHECK: = frame-setup frame-destroy LDPXpost
RET_ReallyLR
...
---
name: case41
# CHECK-LABEL: name: case41
body: |
bb.0:
$x26 = frame-setup LDRXui $sp, 0
$sp = frame-destroy ADDXri $sp, 64, 0
; CHECK: = frame-setup frame-destroy LDRXpost
RET_ReallyLR
...