forked from OSchip/llvm-project
[X86] Memory folding for commutative instructions.
This patch improves support for commutative instructions in the x86 memory folding implementation by attempting to fold a commuted version of the instruction if the original folding fails - if that folding fails as well the instruction is 're-commuted' back to its original order before returning. This mainly helps the stack inliner better fold reloads of 3 (or more) operand instructions (VEX encoded SSE etc.) but by performing this in the lowest foldMemoryOperandImpl implementation it also replaces the X86InstrInfo::optimizeLoadInstr version and is now used by FastISel too. Differential Revision: http://reviews.llvm.org/D5701 llvm-svn: 219584
This commit is contained in:
parent
27adb1240f
commit
77ac26d279
|
@ -3337,7 +3337,7 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
|
|||
AM.getFullAddress(AddrOps);
|
||||
|
||||
MachineInstr *Result =
|
||||
XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
|
||||
XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment, /*AllowCommute=*/ true);
|
||||
if (!Result)
|
||||
return false;
|
||||
|
||||
|
|
|
@ -3926,9 +3926,6 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
|
|||
if (!DefMI->isSafeToMove(this, nullptr, SawStore))
|
||||
return nullptr;
|
||||
|
||||
// We try to commute MI if possible.
|
||||
unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1;
|
||||
for (unsigned Idx = 0; Idx < IdxEnd; Idx++) {
|
||||
// Collect information about virtual register operands of MI.
|
||||
unsigned SrcOperandId = 0;
|
||||
bool FoundSrcOperand = false;
|
||||
|
@ -3957,24 +3954,6 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
|
|||
return FoldMI;
|
||||
}
|
||||
|
||||
if (Idx == 1) {
|
||||
// MI was changed but it didn't help, commute it back!
|
||||
commuteInstruction(MI, false);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Check whether we can commute MI and enable folding.
|
||||
if (MI->isCommutable()) {
|
||||
MachineInstr *NewMI = commuteInstruction(MI, false);
|
||||
// Unable to commute.
|
||||
if (!NewMI) return nullptr;
|
||||
if (NewMI != MI) {
|
||||
// New instruction. It doesn't need to be kept.
|
||||
NewMI->eraseFromParent();
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -4134,7 +4113,7 @@ MachineInstr*
|
|||
X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
||||
MachineInstr *MI, unsigned i,
|
||||
const SmallVectorImpl<MachineOperand> &MOs,
|
||||
unsigned Size, unsigned Align) const {
|
||||
unsigned Size, unsigned Align, bool AllowCommute) const {
|
||||
const DenseMap<unsigned,
|
||||
std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
|
||||
bool isCallRegIndirect = Subtarget.callRegIndirect();
|
||||
|
@ -4231,6 +4210,46 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
|||
}
|
||||
}
|
||||
|
||||
// If the instruction and target operand are commutable, commute the instruction and try again.
|
||||
if (AllowCommute) {
|
||||
unsigned OriginalOpIdx = i, CommuteOpIdx1, CommuteOpIdx2;
|
||||
if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) {
|
||||
if ((CommuteOpIdx1 == OriginalOpIdx) || (CommuteOpIdx2 == OriginalOpIdx)) {
|
||||
MachineInstr* CommutedMI = commuteInstruction(MI, false);
|
||||
if (!CommutedMI) {
|
||||
// Unable to commute.
|
||||
return nullptr;
|
||||
}
|
||||
if (CommutedMI != MI) {
|
||||
// New instruction. We can't fold from this.
|
||||
CommutedMI->eraseFromParent();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Attempt to fold with the commuted version of the instruction.
|
||||
unsigned CommuteOpIdx = (CommuteOpIdx1 == OriginalOpIdx ? CommuteOpIdx2 : CommuteOpIdx1);
|
||||
NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx, MOs, Size, Align, /*AllowCommute=*/ false);
|
||||
if (NewMI)
|
||||
return NewMI;
|
||||
|
||||
// Folding failed again - undo the commute before returning.
|
||||
MachineInstr* UncommutedMI = commuteInstruction(MI, false);
|
||||
if (!UncommutedMI) {
|
||||
// Unable to commute.
|
||||
return nullptr;
|
||||
}
|
||||
if (UncommutedMI != MI) {
|
||||
// New instruction. It doesn't need to be kept.
|
||||
UncommutedMI->eraseFromParent();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Return here to prevent duplicate fuse failure report.
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// No fusion
|
||||
if (PrintFailedFusing && !MI->isCopy())
|
||||
dbgs() << "We failed to fuse operand " << i << " in " << *MI;
|
||||
|
@ -4440,7 +4459,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
|
|||
|
||||
SmallVector<MachineOperand,4> MOs;
|
||||
MOs.push_back(MachineOperand::CreateFI(FrameIndex));
|
||||
return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment);
|
||||
return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment, /*AllowCommute=*/ true);
|
||||
}
|
||||
|
||||
static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
|
||||
|
@ -4593,7 +4612,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
|||
break;
|
||||
}
|
||||
}
|
||||
return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment);
|
||||
return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment, /*AllowCommute=*/ true);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -404,7 +404,7 @@ public:
|
|||
MachineInstr* MI,
|
||||
unsigned OpNum,
|
||||
const SmallVectorImpl<MachineOperand> &MOs,
|
||||
unsigned Size, unsigned Alignment) const;
|
||||
unsigned Size, unsigned Alignment, bool AllowCommute) const;
|
||||
|
||||
void
|
||||
getUnconditionalBranch(MCInst &Branch,
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
; RUN: llc -O3 -disable-peephole -mcpu=corei7-avx -mattr=+avx < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
|
||||
; Function Attrs: nounwind readonly uwtable
|
||||
define <32 x double> @_Z14vstack_foldDv32_dS_(<32 x double> %a, <32 x double> %b) #0 {
|
||||
%1 = fadd <32 x double> %a, %b
|
||||
%2 = fsub <32 x double> %a, %b
|
||||
%3 = fmul <32 x double> %1, %2
|
||||
ret <32 x double> %3
|
||||
|
||||
;CHECK-NOT: vmovapd {{.*#+}} 32-byte Reload
|
||||
;CHECK: vmulpd {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
|
||||
;CHECK-NOT: vmovapd {{.*#+}} 32-byte Reload
|
||||
}
|
Loading…
Reference in New Issue