forked from OSchip/llvm-project
[X86] Allow masked VBROADCAST instructions to be turned into BLENDM with a broadcast load to avoid a copy.
The BLENDM instructions allow an 2 sources and an independent destination while masked VBROADCAST has the destination tied to the source. llvm-svn: 372068
This commit is contained in:
parent
2cc57bedd5
commit
769dd59a27
|
@ -1123,50 +1123,103 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
|
|||
X86VectorVTInfo MaskInfo,
|
||||
X86VectorVTInfo DestInfo,
|
||||
X86VectorVTInfo SrcInfo,
|
||||
bit IsConvertibleToThreeAddress,
|
||||
SDPatternOperator UnmaskedOp = X86VBroadcast> {
|
||||
let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
|
||||
defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
|
||||
(outs MaskInfo.RC:$dst),
|
||||
(ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
|
||||
(MaskInfo.VT
|
||||
(bitconvert
|
||||
(DestInfo.VT
|
||||
(UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
|
||||
(MaskInfo.VT
|
||||
(bitconvert
|
||||
(DestInfo.VT
|
||||
(X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>,
|
||||
T8PD, EVEX, Sched<[SchedRR]>;
|
||||
let mayLoad = 1 in
|
||||
defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
|
||||
(outs MaskInfo.RC:$dst),
|
||||
(ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
|
||||
(MaskInfo.VT
|
||||
(bitconvert
|
||||
(DestInfo.VT (UnmaskedOp
|
||||
(SrcInfo.ScalarLdFrag addr:$src))))),
|
||||
(MaskInfo.VT
|
||||
(bitconvert
|
||||
(DestInfo.VT (X86VBroadcast
|
||||
(SrcInfo.ScalarLdFrag addr:$src)))))>,
|
||||
T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
|
||||
Sched<[SchedRM]>;
|
||||
}
|
||||
let hasSideEffects = 0 in
|
||||
def r : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set MaskInfo.RC:$dst,
|
||||
(MaskInfo.VT
|
||||
(bitconvert
|
||||
(DestInfo.VT
|
||||
(UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
|
||||
DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
|
||||
def rkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
|
||||
(ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
|
||||
"${dst} {${mask}} {z}, $src}"),
|
||||
[(set MaskInfo.RC:$dst,
|
||||
(vselect MaskInfo.KRCWM:$mask,
|
||||
(MaskInfo.VT
|
||||
(bitconvert
|
||||
(DestInfo.VT
|
||||
(X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
|
||||
MaskInfo.ImmAllZerosV))],
|
||||
DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
|
||||
let Constraints = "$src0 = $dst" in
|
||||
def rk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
|
||||
(ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
|
||||
SrcInfo.RC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
|
||||
"${dst} {${mask}}, $src}"),
|
||||
[(set MaskInfo.RC:$dst,
|
||||
(vselect MaskInfo.KRCWM:$mask,
|
||||
(MaskInfo.VT
|
||||
(bitconvert
|
||||
(DestInfo.VT
|
||||
(X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
|
||||
MaskInfo.RC:$src0))],
|
||||
DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
|
||||
|
||||
let hasSideEffects = 0, mayLoad = 1 in
|
||||
def m : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
|
||||
(ins SrcInfo.ScalarMemOp:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set MaskInfo.RC:$dst,
|
||||
(MaskInfo.VT
|
||||
(bitconvert
|
||||
(DestInfo.VT
|
||||
(UnmaskedOp (SrcInfo.ScalarLdFrag addr:$src))))))],
|
||||
DestInfo.ExeDomain>, T8PD, EVEX,
|
||||
EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
|
||||
|
||||
def mkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
|
||||
(ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
|
||||
"${dst} {${mask}} {z}, $src}"),
|
||||
[(set MaskInfo.RC:$dst,
|
||||
(vselect MaskInfo.KRCWM:$mask,
|
||||
(MaskInfo.VT
|
||||
(bitconvert
|
||||
(DestInfo.VT
|
||||
(X86VBroadcast (SrcInfo.ScalarLdFrag addr:$src))))),
|
||||
MaskInfo.ImmAllZerosV))],
|
||||
DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
|
||||
EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
|
||||
|
||||
let Constraints = "$src0 = $dst",
|
||||
isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
|
||||
def mk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
|
||||
(ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
|
||||
SrcInfo.ScalarMemOp:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
|
||||
"${dst} {${mask}}, $src}"),
|
||||
[(set MaskInfo.RC:$dst,
|
||||
(vselect MaskInfo.KRCWM:$mask,
|
||||
(MaskInfo.VT
|
||||
(bitconvert
|
||||
(DestInfo.VT
|
||||
(X86VBroadcast (SrcInfo.ScalarLdFrag addr:$src))))),
|
||||
MaskInfo.RC:$src0))],
|
||||
DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
|
||||
EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
|
||||
}
|
||||
|
||||
// Helper class to force mask and broadcast result to same type.
|
||||
multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
|
||||
SchedWrite SchedRR, SchedWrite SchedRM,
|
||||
X86VectorVTInfo DestInfo,
|
||||
X86VectorVTInfo SrcInfo> :
|
||||
X86VectorVTInfo SrcInfo,
|
||||
bit IsConvertibleToThreeAddress> :
|
||||
avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
|
||||
DestInfo, DestInfo, SrcInfo>;
|
||||
DestInfo, DestInfo, SrcInfo,
|
||||
IsConvertibleToThreeAddress>;
|
||||
|
||||
multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
|
||||
AVX512VLVectorVTInfo _> {
|
||||
let Predicates = [HasAVX512] in {
|
||||
defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
|
||||
WriteFShuffle256Ld, _.info512, _.info128>,
|
||||
WriteFShuffle256Ld, _.info512, _.info128, 1>,
|
||||
avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
|
||||
_.info128>,
|
||||
EVEX_V512;
|
||||
|
@ -1174,7 +1227,7 @@ multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
|
|||
|
||||
let Predicates = [HasVLX] in {
|
||||
defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
|
||||
WriteFShuffle256Ld, _.info256, _.info128>,
|
||||
WriteFShuffle256Ld, _.info256, _.info128, 1>,
|
||||
avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
|
||||
_.info128>,
|
||||
EVEX_V256;
|
||||
|
@ -1185,7 +1238,7 @@ multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
|
|||
AVX512VLVectorVTInfo _> {
|
||||
let Predicates = [HasAVX512] in {
|
||||
defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
|
||||
WriteFShuffle256Ld, _.info512, _.info128>,
|
||||
WriteFShuffle256Ld, _.info512, _.info128, 1>,
|
||||
avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
|
||||
_.info128>,
|
||||
EVEX_V512;
|
||||
|
@ -1193,12 +1246,12 @@ multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
|
|||
|
||||
let Predicates = [HasVLX] in {
|
||||
defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
|
||||
WriteFShuffle256Ld, _.info256, _.info128>,
|
||||
WriteFShuffle256Ld, _.info256, _.info128, 1>,
|
||||
avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
|
||||
_.info128>,
|
||||
EVEX_V256;
|
||||
defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
|
||||
WriteFShuffle256Ld, _.info128, _.info128>,
|
||||
WriteFShuffle256Ld, _.info128, _.info128, 1>,
|
||||
avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
|
||||
_.info128>,
|
||||
EVEX_V128;
|
||||
|
@ -1283,30 +1336,34 @@ defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
|
|||
X86VBroadcast, GR64, HasAVX512>, VEX_W;
|
||||
|
||||
multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
|
||||
AVX512VLVectorVTInfo _, Predicate prd> {
|
||||
AVX512VLVectorVTInfo _, Predicate prd,
|
||||
bit IsConvertibleToThreeAddress> {
|
||||
let Predicates = [prd] in {
|
||||
defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
|
||||
WriteShuffle256Ld, _.info512, _.info128>,
|
||||
WriteShuffle256Ld, _.info512, _.info128,
|
||||
IsConvertibleToThreeAddress>,
|
||||
EVEX_V512;
|
||||
}
|
||||
let Predicates = [prd, HasVLX] in {
|
||||
defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
|
||||
WriteShuffle256Ld, _.info256, _.info128>,
|
||||
WriteShuffle256Ld, _.info256, _.info128,
|
||||
IsConvertibleToThreeAddress>,
|
||||
EVEX_V256;
|
||||
defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
|
||||
WriteShuffleXLd, _.info128, _.info128>,
|
||||
WriteShuffleXLd, _.info128, _.info128,
|
||||
IsConvertibleToThreeAddress>,
|
||||
EVEX_V128;
|
||||
}
|
||||
}
|
||||
|
||||
defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
|
||||
avx512vl_i8_info, HasBWI>;
|
||||
avx512vl_i8_info, HasBWI, 0>;
|
||||
defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
|
||||
avx512vl_i16_info, HasBWI>;
|
||||
avx512vl_i16_info, HasBWI, 0>;
|
||||
defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
|
||||
avx512vl_i32_info, HasAVX512>;
|
||||
avx512vl_i32_info, HasAVX512, 1>;
|
||||
defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
|
||||
avx512vl_i64_info, HasAVX512>, VEX_W1X;
|
||||
avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
|
||||
|
||||
multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
|
||||
X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
|
||||
|
@ -1612,12 +1669,12 @@ multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
|
|||
let Predicates = [HasDQI] in
|
||||
defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
|
||||
WriteShuffle256Ld, _Dst.info512,
|
||||
_Src.info512, _Src.info128, null_frag>,
|
||||
_Src.info512, _Src.info128, 0, null_frag>,
|
||||
EVEX_V512;
|
||||
let Predicates = [HasDQI, HasVLX] in
|
||||
defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
|
||||
WriteShuffle256Ld, _Dst.info256,
|
||||
_Src.info256, _Src.info128, null_frag>,
|
||||
_Src.info256, _Src.info128, 0, null_frag>,
|
||||
EVEX_V256;
|
||||
}
|
||||
|
||||
|
@ -1628,7 +1685,7 @@ multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
|
|||
let Predicates = [HasDQI, HasVLX] in
|
||||
defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
|
||||
WriteShuffleXLd, _Dst.info128,
|
||||
_Src.info128, _Src.info128, null_frag>,
|
||||
_Src.info128, _Src.info128, 0, null_frag>,
|
||||
EVEX_V128;
|
||||
}
|
||||
|
||||
|
@ -1913,7 +1970,7 @@ multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
|
|||
}
|
||||
multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
|
||||
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
|
||||
let mayLoad = 1, hasSideEffects = 0 in {
|
||||
let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
|
||||
def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
|
||||
(ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
|
|
|
@ -1177,40 +1177,62 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
|||
case X86::VMOVUPDZrmk: case X86::VMOVAPDZrmk:
|
||||
case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk:
|
||||
case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk:
|
||||
case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk: {
|
||||
case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk:
|
||||
case X86::VBROADCASTSDZ256mk:
|
||||
case X86::VBROADCASTSDZmk:
|
||||
case X86::VBROADCASTSSZ128mk:
|
||||
case X86::VBROADCASTSSZ256mk:
|
||||
case X86::VBROADCASTSSZmk:
|
||||
case X86::VPBROADCASTDZ128mk:
|
||||
case X86::VPBROADCASTDZ256mk:
|
||||
case X86::VPBROADCASTDZmk:
|
||||
case X86::VPBROADCASTQZ128mk:
|
||||
case X86::VPBROADCASTQZ256mk:
|
||||
case X86::VPBROADCASTQZmk: {
|
||||
unsigned Opc;
|
||||
switch (MIOpc) {
|
||||
default: llvm_unreachable("Unreachable!");
|
||||
case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break;
|
||||
case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break;
|
||||
case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break;
|
||||
case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break;
|
||||
case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break;
|
||||
case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break;
|
||||
case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
|
||||
case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
|
||||
case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
|
||||
case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
|
||||
case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
|
||||
case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
|
||||
case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
|
||||
case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
|
||||
case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
|
||||
case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
|
||||
case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
|
||||
case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
|
||||
case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
|
||||
case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
|
||||
case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
|
||||
case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
|
||||
case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
|
||||
case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
|
||||
case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
|
||||
case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
|
||||
case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
|
||||
case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
|
||||
case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
|
||||
case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
|
||||
case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break;
|
||||
case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break;
|
||||
case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break;
|
||||
case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break;
|
||||
case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break;
|
||||
case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break;
|
||||
case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
|
||||
case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
|
||||
case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
|
||||
case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
|
||||
case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
|
||||
case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
|
||||
case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
|
||||
case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
|
||||
case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
|
||||
case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
|
||||
case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
|
||||
case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
|
||||
case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
|
||||
case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
|
||||
case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
|
||||
case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
|
||||
case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
|
||||
case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
|
||||
case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
|
||||
case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
|
||||
case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
|
||||
case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
|
||||
case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
|
||||
case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
|
||||
case X86::VBROADCASTSDZ256mk: Opc = X86::VBLENDMPDZ256rmbk; break;
|
||||
case X86::VBROADCASTSDZmk: Opc = X86::VBLENDMPDZrmbk; break;
|
||||
case X86::VBROADCASTSSZ128mk: Opc = X86::VBLENDMPSZ128rmbk; break;
|
||||
case X86::VBROADCASTSSZ256mk: Opc = X86::VBLENDMPSZ256rmbk; break;
|
||||
case X86::VBROADCASTSSZmk: Opc = X86::VBLENDMPSZrmbk; break;
|
||||
case X86::VPBROADCASTDZ128mk: Opc = X86::VPBLENDMDZ128rmbk; break;
|
||||
case X86::VPBROADCASTDZ256mk: Opc = X86::VPBLENDMDZ256rmbk; break;
|
||||
case X86::VPBROADCASTDZmk: Opc = X86::VPBLENDMDZrmbk; break;
|
||||
case X86::VPBROADCASTQZ128mk: Opc = X86::VPBLENDMQZ128rmbk; break;
|
||||
case X86::VPBROADCASTQZ256mk: Opc = X86::VPBLENDMQZ256rmbk; break;
|
||||
case X86::VPBROADCASTQZmk: Opc = X86::VPBLENDMQZrmbk; break;
|
||||
}
|
||||
|
||||
NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
|
||||
|
@ -1224,6 +1246,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
|||
.add(MI.getOperand(7));
|
||||
break;
|
||||
}
|
||||
|
||||
case X86::VMOVDQU8Z128rrk:
|
||||
case X86::VMOVDQU8Z256rrk:
|
||||
case X86::VMOVDQU8Zrrk:
|
||||
|
|
|
@ -4458,8 +4458,7 @@ define void @bcast_unfold_cmp_v8f32_refold(float* nocapture %0) {
|
|||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vcmpgtps 4096(%rdi,%rax), %ymm0, %k1
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm2
|
||||
; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 {%k1}
|
||||
; CHECK-NEXT: vblendmps {{.*}}(%rip){1to8}, %ymm1, %ymm2 {%k1}
|
||||
; CHECK-NEXT: vmovups %ymm2, 4096(%rdi,%rax)
|
||||
; CHECK-NEXT: addq $32, %rax
|
||||
; CHECK-NEXT: jne .LBB126_1
|
||||
|
|
Loading…
Reference in New Issue