[X86][AVX] Provide SubVectorBroadcast fallback if load fold fails (PR29133)

Fix for PR29133, matching the approach that was taken for AVX1 scalar broadcasts.

llvm-svn: 279735
This commit is contained in:
Simon Pilgrim 2016-08-25 12:45:16 +00:00
parent 05cf9c22f1
commit 0ad9f3e93b
4 changed files with 1302 additions and 2 deletions

View File

@ -12987,8 +12987,7 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
// lower to a VBROADCASTF128/VBROADCASTI128/etc.
if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2))) {
if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) &&
areOnlyUsersOf(SubVec2.getNode(), {Op, Vec}) &&
!Ld->hasAnyUseOfValue(1)) {
areOnlyUsersOf(SubVec2.getNode(), {Op, Vec})) {
return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
}
}

View File

@ -1026,6 +1026,21 @@ def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
(VBROADCASTI32X4Z256rm addr:$src)>;
def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
(VBROADCASTI32X4Z256rm addr:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
(VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v4f32 VR128X:$src), 1)>;
def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
(VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v4i32 VR128X:$src), 1)>;
def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
(VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v8i16 VR128X:$src), 1)>;
def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
(VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v16i8 VR128X:$src), 1)>;
}
let Predicates = [HasVLX, HasDQI] in {
@ -1042,6 +1057,15 @@ def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
(VBROADCASTF32X4Z256rm addr:$src)>;
def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
(VBROADCASTI32X4Z256rm addr:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
(VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v2f64 VR128X:$src), 1)>;
def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
(VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v2i64 VR128X:$src), 1)>;
}
let Predicates = [HasDQI] in {
@ -1057,6 +1081,15 @@ defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
v16f32_info, v8f32x_info>,
EVEX_V512, EVEX_CD8<32, CD8VT8>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
(VINSERTI64x2Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v2f64 VR128X:$src), 1)>;
def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
(VINSERTI64x2Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v2i64 VR128X:$src), 1)>;
}
multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,

View File

@ -8662,6 +8662,51 @@ let Predicates = [HasAVX2] in {
defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>;
defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>;
}
//===----------------------------------------------------------------------===//
// SubVector Broadcasts
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128:$src))),
(VINSERTI128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
(v2i64 VR128:$src), 1)>;
def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128:$src))),
(VINSERTI128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
(v4i32 VR128:$src), 1)>;
def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128:$src))),
(VINSERTI128rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
(v8i16 VR128:$src), 1)>;
def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))),
(VINSERTI128rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
(v16i8 VR128:$src), 1)>;
}
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128:$src))),
(VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
(v2f64 VR128:$src), 1)>;
def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128:$src))),
(VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
(v4f32 VR128:$src), 1)>;
}
let Predicates = [HasAVX1Only] in {
def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128:$src))),
(VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
(v2i64 VR128:$src), 1)>;
def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128:$src))),
(VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
(v4i32 VR128:$src), 1)>;
def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128:$src))),
(VINSERTF128rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
(v8i16 VR128:$src), 1)>;
def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))),
(VINSERTF128rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
(v16i8 VR128:$src), 1)>;
}
//===----------------------------------------------------------------------===//
// Variable Bit Shifts
//

File diff suppressed because it is too large Load Diff