forked from OSchip/llvm-project
[PowerPC] Improvements for BUILD_VECTOR Vol. 4
This is the final patch in the series of patches that improves BUILD_VECTOR handling on PowerPC. This adds a few peephole optimizations to remove redundant instructions. It also adds a large test case which encompasses a large set of code patterns that build vectors - this test case was the motivator for this series of patches. Differential Revision: https://reviews.llvm.org/D26066 llvm-svn: 288800
This commit is contained in:
parent
bfd5ff155a
commit
15748f4921
|
@ -844,19 +844,9 @@ let Uses = [RM] in {
|
|||
def XXPERMDI : XX3Form_2<60, 10,
|
||||
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM),
|
||||
"xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>;
|
||||
let isCodeGenOnly = 1 in {
|
||||
def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vfrc:$XA, u2imm:$DM),
|
||||
let isCodeGenOnly = 1 in
|
||||
def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM),
|
||||
"xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>;
|
||||
let D = 0 in
|
||||
def XXSPLTD0s : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vfrc:$XA),
|
||||
"xxspltd $XT, $XA, 0", IIC_VecPerm, []>;
|
||||
let D = 1 in
|
||||
def XXSPLTD1s : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vfrc:$XA),
|
||||
"xxspltd $XT, $XA, 1", IIC_VecPerm, []>;
|
||||
let D = 2 in
|
||||
def XXSWAPDs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vfrc:$XA),
|
||||
"xxswapd $XT, $XA", IIC_VecPerm, []>;
|
||||
}
|
||||
def XXSEL : XX4Form<60, 3,
|
||||
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC),
|
||||
"xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>;
|
||||
|
@ -928,6 +918,12 @@ def : InstAlias<"xxmrgld $XT, $XA, $XB",
|
|||
(XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
|
||||
def : InstAlias<"xxswapd $XT, $XB",
|
||||
(XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
|
||||
def : InstAlias<"xxspltd $XT, $XB, 0",
|
||||
(XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>;
|
||||
def : InstAlias<"xxspltd $XT, $XB, 1",
|
||||
(XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>;
|
||||
def : InstAlias<"xxswapd $XT, $XB",
|
||||
(XXPERMDIs vsrc:$XT, vsfrc:$XB, 2)>;
|
||||
|
||||
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
|
||||
|
||||
|
@ -2510,11 +2506,11 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)),
|
||||
(v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>;
|
||||
def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi8i64)),
|
||||
(v2i64 (XXSPLTD0s (LXSIBZX xoaddr:$src)))>;
|
||||
(v2i64 (XXPERMDIs (LXSIBZX xoaddr:$src), 0))>;
|
||||
def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi8)),
|
||||
(v4i32 (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1))>;
|
||||
def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi8i64)),
|
||||
(v2i64 (XXSPLTD0s (VEXTSB2Ds (LXSIBZX xoaddr:$src))))>;
|
||||
(v2i64 (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0))>;
|
||||
|
||||
// Build vectors from i16 loads
|
||||
def : Pat<(v8i16 (scalar_to_vector ScalarLoads.Li16)),
|
||||
|
@ -2522,11 +2518,11 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
|||
def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi16)),
|
||||
(v4i32 (XXSPLTWs (LXSIHZX xoaddr:$src), 1))>;
|
||||
def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi16i64)),
|
||||
(v2i64 (XXSPLTD0s (LXSIHZX xoaddr:$src)))>;
|
||||
(v2i64 (XXPERMDIs (LXSIHZX xoaddr:$src), 0))>;
|
||||
def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi16)),
|
||||
(v4i32 (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1))>;
|
||||
def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)),
|
||||
(v2i64 (XXSPLTD0s (VEXTSH2Ds (LXSIHZX xoaddr:$src))))>;
|
||||
(v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>;
|
||||
|
||||
let Predicates = [IsBigEndian, HasP9Vector] in {
|
||||
// Scalar stores of i8
|
||||
|
@ -2760,9 +2756,11 @@ let AddedComplexity = 400 in {
|
|||
|
||||
// Build vectors of floating point converted to i64.
|
||||
def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
|
||||
(v2i64 (XXPERMDIs (COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>;
|
||||
(v2i64 (XXPERMDIs
|
||||
(COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>;
|
||||
def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)),
|
||||
(v2i64 (XXPERMDIs (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;
|
||||
(v2i64 (XXPERMDIs
|
||||
(COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;
|
||||
def : Pat<(v2i64 (scalar_to_vector DblToLongLoad.A)),
|
||||
(v2i64 (XVCVDPSXDS (LXVDSX xoaddr:$A)))>;
|
||||
def : Pat<(v2i64 (scalar_to_vector DblToULongLoad.A)),
|
||||
|
|
|
@ -124,10 +124,40 @@ bool PPCMIPeephole::simplifyCode(void) {
|
|||
if (TrueReg1 == TrueReg2
|
||||
&& TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
|
||||
MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
|
||||
unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
|
||||
|
||||
// If this is a splat fed by a splatting load, the splat is
|
||||
// redundant. Replace with a copy. This doesn't happen directly due
|
||||
// to code in PPCDAGToDAGISel.cpp, but it can happen when converting
|
||||
// a load of a double to a vector of 64-bit integers.
|
||||
auto isConversionOfLoadAndSplat = [=]() -> bool {
|
||||
if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
|
||||
return false;
|
||||
unsigned DefReg = lookThruCopyLike(DefMI->getOperand(1).getReg());
|
||||
if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
|
||||
MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
|
||||
if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
if (DefMI && (Immed == 0 || Immed == 3)) {
|
||||
if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
|
||||
DEBUG(dbgs()
|
||||
<< "Optimizing load-and-splat/splat "
|
||||
"to load-and-splat/copy: ");
|
||||
DEBUG(MI.dump());
|
||||
BuildMI(MBB, &MI, MI.getDebugLoc(),
|
||||
TII->get(PPC::COPY), MI.getOperand(0).getReg())
|
||||
.addOperand(MI.getOperand(1));
|
||||
ToErase = &MI;
|
||||
Simplified = true;
|
||||
}
|
||||
}
|
||||
|
||||
// If this is a splat or a swap fed by another splat, we
|
||||
// can replace it with a copy.
|
||||
if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
|
||||
if (DefOpc == PPC::XXPERMDI) {
|
||||
unsigned FeedImmed = DefMI->getOperand(3).getImm();
|
||||
unsigned FeedReg1
|
||||
= lookThruCopyLike(DefMI->getOperand(1).getReg());
|
||||
|
@ -170,8 +200,9 @@ bool PPCMIPeephole::simplifyCode(void) {
|
|||
ToErase = &MI;
|
||||
Simplified = true;
|
||||
}
|
||||
} else if ((Immed == 0 || Immed == 3) &&
|
||||
DefMI && DefMI->getOpcode() == PPC::XXPERMDIs) {
|
||||
} else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
|
||||
(DefMI->getOperand(2).getImm() == 0 ||
|
||||
DefMI->getOperand(2).getImm() == 3)) {
|
||||
// Splat fed by another splat - switch the output of the first
|
||||
// and remove the second.
|
||||
DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
|
||||
|
@ -190,17 +221,32 @@ bool PPCMIPeephole::simplifyCode(void) {
|
|||
unsigned MyOpcode = MI.getOpcode();
|
||||
unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
|
||||
unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg());
|
||||
if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
|
||||
break;
|
||||
MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
|
||||
if (!DefMI)
|
||||
break;
|
||||
unsigned DefOpcode = DefMI->getOpcode();
|
||||
bool SameOpcode = (MyOpcode == DefOpcode) ||
|
||||
auto isConvertOfSplat = [=]() -> bool {
|
||||
if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
|
||||
return false;
|
||||
unsigned ConvReg = DefMI->getOperand(1).getReg();
|
||||
if (!TargetRegisterInfo::isVirtualRegister(ConvReg))
|
||||
return false;
|
||||
MachineInstr *Splt = MRI->getVRegDef(ConvReg);
|
||||
return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
|
||||
Splt->getOpcode() == PPC::XXSPLTW);
|
||||
};
|
||||
bool AlreadySplat = (MyOpcode == DefOpcode) ||
|
||||
(MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
|
||||
(MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
|
||||
(MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs);
|
||||
// Splat fed by another splat - switch the output of the first
|
||||
// and remove the second.
|
||||
if (SameOpcode) {
|
||||
(MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
|
||||
(MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
|
||||
(MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
|
||||
(MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
|
||||
// If the instruction[s] that feed this splat have already splat
|
||||
// the value, this splat is redundant.
|
||||
if (AlreadySplat) {
|
||||
DEBUG(dbgs() << "Changing redundant splat to a copy: ");
|
||||
DEBUG(MI.dump());
|
||||
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
|
||||
|
@ -234,9 +280,64 @@ bool PPCMIPeephole::simplifyCode(void) {
|
|||
}
|
||||
break;
|
||||
}
|
||||
case PPC::XVCVDPSP: {
|
||||
// If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
|
||||
unsigned TrueReg = lookThruCopyLike(MI.getOperand(1).getReg());
|
||||
if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
|
||||
break;
|
||||
MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
|
||||
|
||||
// This can occur when building a vector of single precision or integer
|
||||
// values.
|
||||
if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
|
||||
unsigned DefsReg1 = lookThruCopyLike(DefMI->getOperand(1).getReg());
|
||||
unsigned DefsReg2 = lookThruCopyLike(DefMI->getOperand(2).getReg());
|
||||
if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
|
||||
!TargetRegisterInfo::isVirtualRegister(DefsReg2))
|
||||
break;
|
||||
MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
|
||||
MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
|
||||
|
||||
if (!P1 || !P2)
|
||||
break;
|
||||
|
||||
// Remove the passed FRSP instruction if it only feeds this MI and
|
||||
// set any uses of that FRSP (in this MI) to the source of the FRSP.
|
||||
auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
|
||||
if (RoundInstr->getOpcode() == PPC::FRSP &&
|
||||
MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
|
||||
Simplified = true;
|
||||
unsigned ConvReg1 = RoundInstr->getOperand(1).getReg();
|
||||
unsigned FRSPDefines = RoundInstr->getOperand(0).getReg();
|
||||
MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
|
||||
for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
|
||||
if (Use.getOperand(i).isReg() &&
|
||||
Use.getOperand(i).getReg() == FRSPDefines)
|
||||
Use.getOperand(i).setReg(ConvReg1);
|
||||
DEBUG(dbgs() << "Removing redundant FRSP:\n");
|
||||
DEBUG(RoundInstr->dump());
|
||||
DEBUG(dbgs() << "As it feeds instruction:\n");
|
||||
DEBUG(MI.dump());
|
||||
DEBUG(dbgs() << "Through instruction:\n");
|
||||
DEBUG(DefMI->dump());
|
||||
RoundInstr->eraseFromParent();
|
||||
}
|
||||
};
|
||||
|
||||
// If the input to XVCVDPSP is a vector that was built (even
|
||||
// partially) out of FRSP's, the FRSP(s) can safely be removed
|
||||
// since this instruction performs the same operation.
|
||||
if (P1 != P2) {
|
||||
removeFRSPIfPossible(P1);
|
||||
removeFRSPIfPossible(P2);
|
||||
break;
|
||||
}
|
||||
removeFRSPIfPossible(P1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the last instruction was marked for elimination,
|
||||
// remove it now.
|
||||
if (ToErase) {
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -156,14 +156,10 @@ define <4 x i32> @test14(<4 x i32> %a, i32* nocapture readonly %b) {
|
|||
entry:
|
||||
; CHECK-LABEL: test14
|
||||
; CHECK: lwz [[LD:[0-9]+]],
|
||||
; FIXME: mtvsrws 34, [[LD]]
|
||||
; CHECK: mtvsrws [[SPLT:[0-9]+]], [[LD]]
|
||||
; CHECK: xxspltw 34, [[SPLT]], 3
|
||||
; CHECK: mtvsrws 34, [[LD]]
|
||||
; CHECK-BE-LABEL: test14
|
||||
; CHECK-BE: lwz [[LD:[0-9]+]],
|
||||
; FIXME: mtvsrws 34, [[LD]]
|
||||
; CHECK-BE: mtvsrws [[SPLT:[0-9]+]], [[LD]]
|
||||
; CHECK-BE: xxspltw 34, [[SPLT]], 0
|
||||
; CHECK-BE: mtvsrws 34, [[LD]]
|
||||
%0 = load i32, i32* %b, align 4
|
||||
%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
|
||||
%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
|
|
|
@ -57,11 +57,11 @@ entry:
|
|||
%splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
ret <2 x i64> %splat.splat
|
||||
; CHECK-LABEL: veculuc
|
||||
; CHECK: lxsibzx 34, 0, 3
|
||||
; CHECK-NEXT: xxspltd 34, 34, 0
|
||||
; CHECK: lxsibzx 0, 0, 3
|
||||
; CHECK-NEXT: xxspltd 34, 0, 0
|
||||
; CHECK-BE-LABEL: veculuc
|
||||
; CHECK-BE: lxsibzx 34, 0, 3
|
||||
; CHECK-BE-NEXT: xxspltd 34, 34, 0
|
||||
; CHECK-BE: lxsibzx 0, 0, 3
|
||||
; CHECK-BE-NEXT: xxspltd 34, 0, 0
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind readonly
|
||||
|
@ -120,11 +120,11 @@ entry:
|
|||
%splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
ret <2 x i64> %splat.splat
|
||||
; CHECK-LABEL: vecsluc
|
||||
; CHECK: lxsibzx 34, 0, 3
|
||||
; CHECK-NEXT: xxspltd 34, 34, 0
|
||||
; CHECK: lxsibzx 0, 0, 3
|
||||
; CHECK-NEXT: xxspltd 34, 0, 0
|
||||
; CHECK-BE-LABEL: vecsluc
|
||||
; CHECK-BE: lxsibzx 34, 0, 3
|
||||
; CHECK-BE-NEXT: xxspltd 34, 34, 0
|
||||
; CHECK-BE: lxsibzx 0, 0, 3
|
||||
; CHECK-BE-NEXT: xxspltd 34, 0, 0
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind readonly
|
||||
|
@ -366,11 +366,11 @@ entry:
|
|||
%splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
ret <2 x i64> %splat.splat
|
||||
; CHECK-LABEL: veculus
|
||||
; CHECK: lxsihzx 34, 0, 3
|
||||
; CHECK-NEXT: xxspltd 34, 34, 0
|
||||
; CHECK: lxsihzx 0, 0, 3
|
||||
; CHECK-NEXT: xxspltd 34, 0, 0
|
||||
; CHECK-BE-LABEL: veculus
|
||||
; CHECK-BE: lxsihzx 34, 0, 3
|
||||
; CHECK-BE-NEXT: xxspltd 34, 34, 0
|
||||
; CHECK-BE: lxsihzx 0, 0, 3
|
||||
; CHECK-BE-NEXT: xxspltd 34, 0, 0
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind readonly
|
||||
|
@ -430,11 +430,11 @@ entry:
|
|||
%splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
ret <2 x i64> %splat.splat
|
||||
; CHECK-LABEL: vecslus
|
||||
; CHECK: lxsihzx 34, 0, 3
|
||||
; CHECK-NEXT: xxspltd 34, 34, 0
|
||||
; CHECK: lxsihzx 0, 0, 3
|
||||
; CHECK-NEXT: xxspltd 34, 0, 0
|
||||
; CHECK-BE-LABEL: vecslus
|
||||
; CHECK-BE: lxsihzx 34, 0, 3
|
||||
; CHECK-BE-NEXT: xxspltd 34, 34, 0
|
||||
; CHECK-BE: lxsihzx 0, 0, 3
|
||||
; CHECK-BE-NEXT: xxspltd 34, 0, 0
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind readonly
|
||||
|
|
Loading…
Reference in New Issue