forked from OSchip/llvm-project
Optimization of shuffle node that can fit to the register form of VBROADCAST instruction on AVX2.
llvm-svn: 159504
This commit is contained in:
parent
3af251dbf1
commit
9af899fa88
|
@ -5047,8 +5047,16 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
|
|||
|
||||
SDValue Sc = Op.getOperand(0);
|
||||
if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
|
||||
Sc.getOpcode() != ISD::BUILD_VECTOR)
|
||||
return SDValue();
|
||||
Sc.getOpcode() != ISD::BUILD_VECTOR) {
|
||||
|
||||
if (!Subtarget->hasAVX2())
|
||||
return SDValue();
|
||||
|
||||
// Use the register form of the broadcast instruction available on AVX2.
|
||||
if (VT.is256BitVector())
|
||||
Sc = Extract128BitVector(Sc, 0, DAG, dl);
|
||||
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc);
|
||||
}
|
||||
|
||||
Ld = Sc.getOperand(0);
|
||||
ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
|
||||
|
|
|
@ -7272,8 +7272,8 @@ let ExeDomain = SSEPackedSingle in {
|
|||
int_x86_avx2_vbroadcast_ss_ps_256>;
|
||||
}
|
||||
let ExeDomain = SSEPackedDouble in
|
||||
def VBROADCASTSDrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
|
||||
int_x86_avx2_vbroadcast_sd_pd_256>;
|
||||
def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
|
||||
int_x86_avx2_vbroadcast_sd_pd_256>;
|
||||
|
||||
let Predicates = [HasAVX2] in
|
||||
def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
|
||||
|
@ -7684,6 +7684,31 @@ let Predicates = [HasAVX2] in {
|
|||
def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
|
||||
(VPBROADCASTQYrm addr:$src)>;
|
||||
|
||||
def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))),
|
||||
(VPBROADCASTBrr VR128:$src)>;
|
||||
def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))),
|
||||
(VPBROADCASTBYrr VR128:$src)>;
|
||||
def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))),
|
||||
(VPBROADCASTWrr VR128:$src)>;
|
||||
def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))),
|
||||
(VPBROADCASTWYrr VR128:$src)>;
|
||||
def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))),
|
||||
(VPBROADCASTDrr VR128:$src)>;
|
||||
def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))),
|
||||
(VPBROADCASTDYrr VR128:$src)>;
|
||||
def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))),
|
||||
(VPBROADCASTQrr VR128:$src)>;
|
||||
def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))),
|
||||
(VPBROADCASTQYrr VR128:$src)>;
|
||||
def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))),
|
||||
(VBROADCASTSSrr VR128:$src)>;
|
||||
def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))),
|
||||
(VBROADCASTSSYrr VR128:$src)>;
|
||||
def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))),
|
||||
(VPBROADCASTQrr VR128:$src)>;
|
||||
def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))),
|
||||
(VBROADCASTSDYrr VR128:$src)>;
|
||||
|
||||
// Provide fallback in case the load node that is used in the patterns above
|
||||
// is used by additional users, which prevents the pattern selection.
|
||||
let AddedComplexity = 20 in {
|
||||
|
@ -7694,7 +7719,7 @@ let Predicates = [HasAVX2] in {
|
|||
(VBROADCASTSSYrr
|
||||
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
|
||||
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
|
||||
(VBROADCASTSDrr
|
||||
(VBROADCASTSDYrr
|
||||
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>;
|
||||
|
||||
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
|
||||
|
@ -7704,7 +7729,7 @@ let Predicates = [HasAVX2] in {
|
|||
(VBROADCASTSSYrr
|
||||
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>;
|
||||
def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
|
||||
(VBROADCASTSDrr
|
||||
(VBROADCASTSDYrr
|
||||
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd))>;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -259,3 +259,99 @@ define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp {
|
|||
ret <4 x double> %wide
|
||||
}
|
||||
|
||||
;CHECK: _inreg8xfloat
|
||||
;CHECK: vbroadcastss
|
||||
;CHECK: ret
|
||||
define <8 x float> @_inreg8xfloat(<8 x float> %a) {
|
||||
%b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
|
||||
ret <8 x float> %b
|
||||
}
|
||||
|
||||
;CHECK: _inreg4xfloat
|
||||
;CHECK: vbroadcastss
|
||||
;CHECK: ret
|
||||
define <4 x float> @_inreg4xfloat(<4 x float> %a) {
|
||||
%b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x float> %b
|
||||
}
|
||||
|
||||
;CHECK: _inreg16xi16
|
||||
;CHECK: vpbroadcastw
|
||||
;CHECK: ret
|
||||
define <16 x i16> @_inreg16xi16(<16 x i16> %a) {
|
||||
%b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
|
||||
ret <16 x i16> %b
|
||||
}
|
||||
|
||||
;CHECK: _inreg8xi16
|
||||
;CHECK: vpbroadcastw
|
||||
;CHECK: ret
|
||||
define <8 x i16> @_inreg8xi16(<8 x i16> %a) {
|
||||
%b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
ret <8 x i16> %b
|
||||
}
|
||||
|
||||
|
||||
;CHECK: _inreg4xi64
|
||||
;CHECK: vpbroadcastq
|
||||
;CHECK: ret
|
||||
define <4 x i64> @_inreg4xi64(<4 x i64> %a) {
|
||||
%b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x i64> %b
|
||||
}
|
||||
|
||||
;CHECK: _inreg2xi64
|
||||
;CHECK: vpbroadcastq
|
||||
;CHECK: ret
|
||||
define <2 x i64> @_inreg2xi64(<2 x i64> %a) {
|
||||
%b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
ret <2 x i64> %b
|
||||
}
|
||||
|
||||
;CHECK: _inreg4xdouble
|
||||
;CHECK: vbroadcastsd
|
||||
;CHECK: ret
|
||||
define <4 x double> @_inreg4xdouble(<4 x double> %a) {
|
||||
%b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x double> %b
|
||||
}
|
||||
|
||||
;CHECK: _inreg2xdouble
|
||||
;CHECK: vpbroadcastq
|
||||
;CHECK: ret
|
||||
define <2 x double> @_inreg2xdouble(<2 x double> %a) {
|
||||
%b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
|
||||
ret <2 x double> %b
|
||||
}
|
||||
|
||||
;CHECK: _inreg8xi32
|
||||
;CHECK: vpbroadcastd
|
||||
;CHECK: ret
|
||||
define <8 x i32> @_inreg8xi32(<8 x i32> %a) {
|
||||
%b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
|
||||
ret <8 x i32> %b
|
||||
}
|
||||
|
||||
;CHECK: _inreg4xi32
|
||||
;CHECK: vpbroadcastd
|
||||
;CHECK: ret
|
||||
define <4 x i32> @_inreg4xi32(<4 x i32> %a) {
|
||||
%b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
ret <4 x i32> %b
|
||||
}
|
||||
|
||||
;CHECK: _inreg32xi8
|
||||
;CHECK: vpbroadcastb
|
||||
;CHECK: ret
|
||||
define <32 x i8> @_inreg32xi8(<32 x i8> %a) {
|
||||
%b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
|
||||
ret <32 x i8> %b
|
||||
}
|
||||
|
||||
;CHECK: _inreg16xi8
|
||||
;CHECK: vpbroadcastb
|
||||
;CHECK: ret
|
||||
define <16 x i8> @_inreg16xi8(<16 x i8> %a) {
|
||||
%b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
|
||||
ret <16 x i8> %b
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue