forked from OSchip/llvm-project
[X86][AVX] Provide SubVectorBroadcast fallback if load fold fails (PR29133)
Fix for PR29133, matching the approach that was taken for AVX1 scalar broadcasts. llvm-svn: 279735
This commit is contained in:
parent
05cf9c22f1
commit
0ad9f3e93b
|
@ -12987,8 +12987,7 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
|
|||
// lower to a VBROADCASTF128/VBROADCASTI128/etc.
|
||||
if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2))) {
|
||||
if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) &&
|
||||
areOnlyUsersOf(SubVec2.getNode(), {Op, Vec}) &&
|
||||
!Ld->hasAnyUseOfValue(1)) {
|
||||
areOnlyUsersOf(SubVec2.getNode(), {Op, Vec})) {
|
||||
return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1026,6 +1026,21 @@ def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
|
|||
(VBROADCASTI32X4Z256rm addr:$src)>;
|
||||
def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
|
||||
(VBROADCASTI32X4Z256rm addr:$src)>;
|
||||
|
||||
// Provide fallback in case the load node that is used in the patterns above
|
||||
// is used by additional users, which prevents the pattern selection.
|
||||
def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
|
||||
(VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
|
||||
(v4f32 VR128X:$src), 1)>;
|
||||
def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
|
||||
(VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
|
||||
(v4i32 VR128X:$src), 1)>;
|
||||
def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
|
||||
(VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
|
||||
(v8i16 VR128X:$src), 1)>;
|
||||
def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
|
||||
(VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
|
||||
(v16i8 VR128X:$src), 1)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasVLX, HasDQI] in {
|
||||
|
@ -1042,6 +1057,15 @@ def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
|
|||
(VBROADCASTF32X4Z256rm addr:$src)>;
|
||||
def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
|
||||
(VBROADCASTI32X4Z256rm addr:$src)>;
|
||||
|
||||
// Provide fallback in case the load node that is used in the patterns above
|
||||
// is used by additional users, which prevents the pattern selection.
|
||||
def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
|
||||
(VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
|
||||
(v2f64 VR128X:$src), 1)>;
|
||||
def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
|
||||
(VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
|
||||
(v2i64 VR128X:$src), 1)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasDQI] in {
|
||||
|
@ -1057,6 +1081,15 @@ defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
|
|||
defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
|
||||
v16f32_info, v8f32x_info>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VT8>;
|
||||
|
||||
// Provide fallback in case the load node that is used in the patterns above
|
||||
// is used by additional users, which prevents the pattern selection.
|
||||
def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
|
||||
(VINSERTI64x2Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
|
||||
(v2f64 VR128X:$src), 1)>;
|
||||
def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
|
||||
(VINSERTI64x2Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
|
||||
(v2i64 VR128X:$src), 1)>;
|
||||
}
|
||||
|
||||
multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
|
||||
|
|
|
@ -8662,6 +8662,51 @@ let Predicates = [HasAVX2] in {
|
|||
defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>;
|
||||
defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SubVector Broadcasts
|
||||
// Provide fallback in case the load node that is used in the patterns above
|
||||
// is used by additional users, which prevents the pattern selection.
|
||||
|
||||
let Predicates = [HasAVX2, NoVLX] in {
|
||||
def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128:$src))),
|
||||
(VINSERTI128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
|
||||
(v2i64 VR128:$src), 1)>;
|
||||
def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128:$src))),
|
||||
(VINSERTI128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
|
||||
(v4i32 VR128:$src), 1)>;
|
||||
def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128:$src))),
|
||||
(VINSERTI128rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
|
||||
(v8i16 VR128:$src), 1)>;
|
||||
def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))),
|
||||
(VINSERTI128rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
|
||||
(v16i8 VR128:$src), 1)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128:$src))),
|
||||
(VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
|
||||
(v2f64 VR128:$src), 1)>;
|
||||
def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128:$src))),
|
||||
(VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
|
||||
(v4f32 VR128:$src), 1)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX1Only] in {
|
||||
def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128:$src))),
|
||||
(VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
|
||||
(v2i64 VR128:$src), 1)>;
|
||||
def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128:$src))),
|
||||
(VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
|
||||
(v4i32 VR128:$src), 1)>;
|
||||
def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128:$src))),
|
||||
(VINSERTF128rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
|
||||
(v8i16 VR128:$src), 1)>;
|
||||
def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))),
|
||||
(VINSERTF128rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
|
||||
(v16i8 VR128:$src), 1)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Variable Bit Shifts
|
||||
//
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue