forked from OSchip/llvm-project
[X86] Add a DAG combine to turn vbroadcast(vzload X) -> vbroadcast_load
Remove now unneeded isel patterns.
This commit is contained in:
parent
2b17438a92
commit
bc65b68661
|
@ -35146,6 +35146,22 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
|
|||
return N; // Return N so it doesn't get rechecked!
|
||||
}
|
||||
|
||||
// vbroadcast(vzload X) -> vbroadcast_load X
|
||||
if (Src.getOpcode() == X86ISD::VZEXT_LOAD && Src.hasOneUse()) {
|
||||
MemSDNode *LN = cast<MemIntrinsicSDNode>(Src);
|
||||
if (LN->getMemoryVT().getSizeInBits() == VT.getScalarSizeInBits()) {
|
||||
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
|
||||
SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
|
||||
SDValue BcastLd =
|
||||
DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
|
||||
LN->getMemoryVT(), LN->getMemOperand());
|
||||
DCI.CombineTo(N.getNode(), BcastLd);
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
|
||||
DCI.recursivelyDeleteUnusedNodes(LN);
|
||||
return N; // Return N so it doesn't get rechecked!
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
case X86ISD::BLENDI: {
|
||||
|
|
|
@ -1423,19 +1423,6 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
|
|||
AVX5128IBase, EVEX;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
// 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
|
||||
def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
|
||||
(VPBROADCASTQZrm addr:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasVLX] in {
|
||||
// 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
|
||||
def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
|
||||
(VPBROADCASTQZ128rm addr:$src)>;
|
||||
def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
|
||||
(VPBROADCASTQZ256rm addr:$src)>;
|
||||
}
|
||||
let Predicates = [HasVLX, HasBWI] in {
|
||||
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
|
||||
// This means we'll encounter truncated i32 loads; match that here.
|
||||
|
@ -10873,8 +10860,6 @@ def : Pat<(v2f64 (X86VBroadcast f64:$src)),
|
|||
(VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
|
||||
def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
|
||||
(VMOVDDUPZ128rm addr:$src)>;
|
||||
def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
|
||||
(VMOVDDUPZ128rm addr:$src)>;
|
||||
|
||||
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
|
||||
(v2f64 VR128X:$src0)),
|
||||
|
|
|
@ -7523,13 +7523,6 @@ defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, X86VBroadcastl
|
|||
defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, X86VBroadcastld64,
|
||||
v2i64, v4i64, NoVLX>;
|
||||
|
||||
let Predicates = [HasAVX2, NoVLX] in {
|
||||
// 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
|
||||
def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
|
||||
(VPBROADCASTQrm addr:$src)>;
|
||||
def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
|
||||
(VPBROADCASTQYrm addr:$src)>;
|
||||
}
|
||||
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
|
||||
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
|
||||
// This means we'll encounter truncated i32 loads; match that here.
|
||||
|
@ -7621,8 +7614,6 @@ let Predicates = [HasAVX, NoVLX] in {
|
|||
(VMOVDDUPrr VR128:$src)>;
|
||||
def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
|
||||
(VMOVDDUPrm addr:$src)>;
|
||||
def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
|
||||
(VMOVDDUPrm addr:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX1Only] in {
|
||||
|
|
Loading…
Reference in New Issue