[X86] Split WriteVecALU/WritePHAdd into XMM and YMM/ZMM scheduler classes

llvm-svn: 331453
This commit is contained in:
Simon Pilgrim 2018-05-03 13:27:10 +00:00
parent 2e54353b86
commit f7dd6069a5
16 changed files with 314 additions and 978 deletions

View File

@ -4684,16 +4684,16 @@ let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128,
loadv2i64, i128mem,
WritePHAdd, 0>, VEX_4V, VEX_WIG;
SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128,
loadv2i64, i128mem,
WritePHAdd, 0>, VEX_4V, VEX_WIG;
SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128,
loadv2i64, i128mem,
WritePHAdd, 0>, VEX_4V, VEX_WIG;
SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
loadv2i64, i128mem,
WritePHAdd, 0>, VEX_4V;
SchedWritePHAdd.XMM, 0>, VEX_4V;
defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
int_x86_ssse3_psign_b_128,
SchedWriteVecALU.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
@ -4705,10 +4705,10 @@ let isCommutable = 0 in {
SchedWriteVecALU.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
int_x86_ssse3_phadd_sw_128,
WritePHAdd, loadv2i64, 0>, VEX_4V, VEX_WIG;
SchedWritePHAdd.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
int_x86_ssse3_phsub_sw_128,
WritePHAdd, loadv2i64, 0>, VEX_4V, VEX_WIG;
SchedWritePHAdd.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
}
}
@ -4730,16 +4730,16 @@ let ImmT = NoImm, Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16,
VR256, loadv4i64, i256mem,
WritePHAdd, 0>, VEX_4V, VEX_L, VEX_WIG;
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256,
loadv4i64, i256mem,
WritePHAdd, 0>, VEX_4V, VEX_L, VEX_WIG;
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16,
VR256, loadv4i64, i256mem,
WritePHAdd, 0>, VEX_4V, VEX_L, VEX_WIG;
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
loadv4i64, i256mem,
WritePHAdd, 0>, VEX_4V, VEX_L;
SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L;
defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w,
@ -4748,10 +4748,10 @@ let isCommutable = 0 in {
SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
int_x86_avx2_phadd_sw,
WritePHAdd>, VEX_4V, VEX_L, VEX_WIG;
SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
int_x86_avx2_phsub_sw,
WritePHAdd>, VEX_4V, VEX_L, VEX_WIG;
SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG;
}
}
@ -4759,13 +4759,13 @@ let isCommutable = 0 in {
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128,
memopv2i64, i128mem, WritePHAdd>;
memopv2i64, i128mem, SchedWritePHAdd.XMM>;
defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128,
memopv2i64, i128mem, WritePHAdd>;
memopv2i64, i128mem, SchedWritePHAdd.XMM>;
defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128,
memopv2i64, i128mem, WritePHAdd>;
memopv2i64, i128mem, SchedWritePHAdd.XMM>;
defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128,
memopv2i64, i128mem, WritePHAdd>;
memopv2i64, i128mem, SchedWritePHAdd.XMM>;
defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128,
SchedWriteVecALU.XMM, memopv2i64>;
defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128,
@ -4776,10 +4776,10 @@ let isCommutable = 0 in {
memopv2i64, i128mem, SchedWriteVarShuffle.XMM>;
defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw",
int_x86_ssse3_phadd_sw_128,
WritePHAdd, memopv2i64>;
SchedWritePHAdd.XMM, memopv2i64>;
defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw",
int_x86_ssse3_phsub_sw_128,
WritePHAdd, memopv2i64>;
SchedWritePHAdd.XMM, memopv2i64>;
defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16,
v16i8, VR128, memopv2i64, i128mem,
SchedWriteVecIMul.XMM>;

View File

@ -199,7 +199,8 @@ def : WriteRes<WriteVecLoad, [BWPort23]> { let Latency = 5; }
def : WriteRes<WriteVecStore, [BWPort237, BWPort4]>;
def : WriteRes<WriteVecMove, [BWPort015]>;
defm : BWWriteResPair<WriteVecALU, [BWPort15], 1>; // Vector integer ALU op, no logicals.
defm : BWWriteResPair<WriteVecALU, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
defm : BWWriteResPair<WriteVecALUY, [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM).
defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (YMM/ZMM).
defm : BWWriteResPair<WriteVecShift, [BWPort0], 1>; // Vector integer shifts.
@ -365,9 +366,10 @@ def : WriteRes<WriteNop, []>;
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
defm : BWWriteResPair<WriteFHAdd, [BWPort1,BWPort5], 5, [1,2], 3>;
defm : BWWriteResPair<WriteFHAdd, [BWPort1,BWPort5], 5, [1,2], 3, 5>;
defm : BWWriteResPair<WriteFHAddY, [BWPort1,BWPort5], 5, [1,2], 3, 6>;
defm : BWWriteResPair<WritePHAdd, [BWPort5,BWPort15], 3, [2,1], 3>;
defm : BWWriteResPair<WritePHAdd, [BWPort5,BWPort15], 3, [2,1], 3, 5>;
defm : BWWriteResPair<WritePHAddY, [BWPort5,BWPort15], 3, [2,1], 3, 6>;
// Remaining instrs.
@ -1087,55 +1089,6 @@ def: InstRW<[BWWriteResGroup74], (instregex "FCOM32m",
"FCOMP32m",
"FCOMP64m")>;
def BWWriteResGroup76 : SchedWriteRes<[BWPort23,BWPort15]> {
let Latency = 7;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[BWWriteResGroup76], (instregex "VPABSBYrm",
"VPABSDYrm",
"VPABSWYrm",
"VPADDBYrm",
"VPADDDYrm",
"VPADDQYrm",
"VPADDSBYrm",
"VPADDSWYrm",
"VPADDUSBYrm",
"VPADDUSWYrm",
"VPADDWYrm",
"VPAVGBYrm",
"VPAVGWYrm",
"VPCMPEQBYrm",
"VPCMPEQDYrm",
"VPCMPEQQYrm",
"VPCMPEQWYrm",
"VPCMPGTBYrm",
"VPCMPGTDYrm",
"VPCMPGTWYrm",
"VPMAXSBYrm",
"VPMAXSDYrm",
"VPMAXSWYrm",
"VPMAXUBYrm",
"VPMAXUDYrm",
"VPMAXUWYrm",
"VPMINSBYrm",
"VPMINSDYrm",
"VPMINSWYrm",
"VPMINUBYrm",
"VPMINUDYrm",
"VPMINUWYrm",
"VPSIGNBYrm",
"VPSIGNDYrm",
"VPSIGNWYrm",
"VPSUBBYrm",
"VPSUBDYrm",
"VPSUBQYrm",
"VPSUBSBYrm",
"VPSUBSWYrm",
"VPSUBUSBYrm",
"VPSUBUSWYrm",
"VPSUBWYrm")>;
def BWWriteResGroup77 : SchedWriteRes<[BWPort23,BWPort015]> {
let Latency = 7;
let NumMicroOps = 2;
@ -1415,18 +1368,6 @@ def: InstRW<[BWWriteResGroup109], (instregex "VPSLLVDYrm",
"VPSRAVDYrm",
"VPSRLVDYrm")>;
def BWWriteResGroup110 : SchedWriteRes<[BWPort5,BWPort23,BWPort15]> {
let Latency = 9;
let NumMicroOps = 4;
let ResourceCycles = [2,1,1];
}
def: InstRW<[BWWriteResGroup110], (instregex "VPHADDDYrm",
"VPHADDSWYrm",
"VPHADDWYrm",
"VPHSUBDYrm",
"VPHSUBSWYrm",
"VPHSUBWYrm")>;
def BWWriteResGroup111 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort0156]> {
let Latency = 9;
let NumMicroOps = 4;

View File

@ -197,7 +197,8 @@ def : WriteRes<WriteVecMove, [HWPort015]>;
defm : HWWriteResPair<WriteVecShift, [HWPort0], 1>;
defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteVecALU, [HWPort15], 1>;
defm : HWWriteResPair<WriteVecALU, [HWPort15], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVecALUY, [HWPort15], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteVecIMulY, [HWPort0], 5, [1], 1, 7>;
defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>;
@ -609,6 +610,7 @@ def : InstRW<[HWWriteFXTRACT], (instrs FXTRACT)>;
defm : HWWriteResPair<WriteFHAdd, [HWPort1, HWPort5], 5, [1,2], 3, 6>;
defm : HWWriteResPair<WriteFHAddY, [HWPort1, HWPort5], 5, [1,2], 3, 7>;
defm : HWWriteResPair<WritePHAdd, [HWPort5, HWPort15], 3, [2,1], 3, 6>;
defm : HWWriteResPair<WritePHAddY, [HWPort5, HWPort15], 3, [2,1], 3, 7>;
//=== Floating Point XMM and YMM Instructions ===//
@ -940,105 +942,14 @@ def: InstRW<[HWWriteResGroup16], (instregex "ANDN(32|64)rm",
"BLSI(32|64)rm",
"BLSMSK(32|64)rm",
"BLSR(32|64)rm",
"MOVBE(16|32|64)rm")>;
def HWWriteResGroup16_1 : SchedWriteRes<[HWPort23,HWPort15]> {
let Latency = 7;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[HWWriteResGroup16_1], (instregex "(V?)PABSBrm",
"(V?)PABSDrm",
"(V?)PABSWrm",
"(V?)PADDBrm",
"(V?)PADDDrm",
"(V?)PADDQrm",
"(V?)PADDSBrm",
"(V?)PADDSWrm",
"(V?)PADDUSBrm",
"(V?)PADDUSWrm",
"(V?)PADDWrm",
"(V?)PAVGBrm",
"(V?)PAVGWrm",
"(V?)PCMPEQBrm",
"(V?)PCMPEQDrm",
"(V?)PCMPEQQrm",
"(V?)PCMPEQWrm",
"(V?)PCMPGTBrm",
"(V?)PCMPGTDrm",
"(V?)PCMPGTWrm",
"(V?)PMAXSBrm",
"(V?)PMAXSDrm",
"(V?)PMAXSWrm",
"(V?)PMAXUBrm",
"(V?)PMAXUDrm",
"(V?)PMAXUWrm",
"(V?)PMINSBrm",
"(V?)PMINSDrm",
"(V?)PMINSWrm",
"(V?)PMINUBrm",
"(V?)PMINUDrm",
"(V?)PMINUWrm",
"(V?)PSIGNBrm",
"(V?)PSIGNDrm",
"(V?)PSIGNWrm",
"(V?)PSUBBrm",
"(V?)PSUBDrm",
"(V?)PSUBQrm",
"(V?)PSUBSBrm",
"(V?)PSUBSWrm",
"(V?)PSUBUSBrm",
"(V?)PSUBUSWrm",
"(V?)PSUBWrm")>;
def HWWriteResGroup16_2 : SchedWriteRes<[HWPort23,HWPort15]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[HWWriteResGroup16_2], (instregex "VPABSBYrm",
"VPABSDYrm",
"VPABSWYrm",
"VPADDBYrm",
"VPADDDYrm",
"VPADDQYrm",
"VPADDSBYrm",
"VPADDSWYrm",
"VPADDUSBYrm",
"VPADDUSWYrm",
"VPADDWYrm",
"VPAVGBYrm",
"VPAVGWYrm",
"VPCMPEQBYrm",
"VPCMPEQDYrm",
"VPCMPEQQYrm",
"VPCMPEQWYrm",
"VPCMPGTBYrm",
"VPCMPGTDYrm",
"VPCMPGTWYrm",
"VPMAXSBYrm",
"VPMAXSDYrm",
"VPMAXSWYrm",
"VPMAXUBYrm",
"VPMAXUDYrm",
"VPMAXUWYrm",
"VPMINSBYrm",
"VPMINSDYrm",
"VPMINSWYrm",
"VPMINUBYrm",
"VPMINUDYrm",
"VPMINUWYrm",
"VPSIGNBYrm",
"VPSIGNDYrm",
"VPSIGNWYrm",
"VPSUBBYrm",
"VPSUBDYrm",
"VPSUBQYrm",
"VPSUBSBYrm",
"VPSUBSWYrm",
"VPSUBUSBYrm",
"VPSUBUSWYrm",
"VPSUBWYrm")>;
"MOVBE(16|32|64)rm",
"MMX_PABS(B|D|W)rm",
"MMX_P(ADD|SUB)(B|D|W|Q)irm",
"MMX_P(ADD|SUB)(U?)S(B|W)irm",
"MMX_PAVG(B|W)irm",
"MMX_PCMP(EQ|GT)(B|D|W)irm",
"MMX_P(MAX|MIN)(SW|UB)irm",
"MMX_PSIGN(B|D|W)rm")>;
def HWWriteResGroup17 : SchedWriteRes<[HWPort23,HWPort015]> {
let Latency = 7;
@ -1514,18 +1425,6 @@ def HWWriteResGroup64 : SchedWriteRes<[HWPort5,HWPort23,HWPort15]> {
}
def: InstRW<[HWWriteResGroup64], (instregex "MMX_PH(ADD|SUB)(D|SW|W)rm")>;
def HWWriteResGroup64_1 : SchedWriteRes<[HWPort5,HWPort23,HWPort15]> {
let Latency = 10;
let NumMicroOps = 4;
let ResourceCycles = [2,1,1];
}
def: InstRW<[HWWriteResGroup64_1], (instregex "VPHADDDYrm",
"VPHADDSWYrm",
"VPHADDWYrm",
"VPHSUBDYrm",
"VPHSUBSWYrm",
"VPHSUBWYrm")>;
def HWWriteResGroup65 : SchedWriteRes<[HWPort23,HWPort06,HWPort0156]> {
let Latency = 8;
let NumMicroOps = 4;

View File

@ -177,7 +177,8 @@ def : WriteRes<WriteVecMove, [SBPort05]>;
defm : SBWriteResPair<WriteVecShift, [SBPort5], 1>;
defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVecALU, [SBPort1], 3>;
defm : SBWriteResPair<WriteVecALU, [SBPort15], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVecALUY, [SBPort15], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteVecIMulY, [SBPort0], 5, [1], 1, 7>;
defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>;
@ -222,6 +223,7 @@ def : WriteRes<WriteVecExtractSt, [SBPort4,SBPort23,SBPort15]> {
defm : SBWriteResPair<WriteFHAdd, [SBPort1,SBPort5], 5, [1,2], 3, 6>;
defm : SBWriteResPair<WriteFHAddY, [SBPort1,SBPort5], 5, [1,2], 3, 7>;
defm : SBWriteResPair<WritePHAdd, [SBPort15], 3, [3], 3, 6>;
defm : SBWriteResPair<WritePHAddY, [SBPort15], 3, [3], 3, 7>;
////////////////////////////////////////////////////////////////////////////////
// String instructions.
@ -406,47 +408,12 @@ def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABS(B|D|W)rr",
"MMX_PADDQirr",
"MMX_PALIGNRrri",
"MMX_PSIGN(B|D|W)rr",
"(V?)PABSBrr",
"(V?)PABSDrr",
"(V?)PABSWrr",
def: InstRW<[SBWriteResGroup5], (instregex "MMX_PALIGNRrri",
"(V?)PACKSSDWrr",
"(V?)PACKSSWBrr",
"(V?)PACKUSDWrr",
"(V?)PACKUSWBrr",
"(V?)PADDBrr",
"(V?)PADDDrr",
"(V?)PADDQrr",
"(V?)PADDSBrr",
"(V?)PADDSWrr",
"(V?)PADDUSBrr",
"(V?)PADDUSWrr",
"(V?)PADDWrr",
"(V?)PALIGNRrri",
"(V?)PAVGBrr",
"(V?)PAVGWrr",
"(V?)PCMPEQBrr",
"(V?)PCMPEQDrr",
"(V?)PCMPEQQrr",
"(V?)PCMPEQWrr",
"(V?)PCMPGTBrr",
"(V?)PCMPGTDrr",
"(V?)PCMPGTWrr",
"(V?)PMAXSBrr",
"(V?)PMAXSDrr",
"(V?)PMAXSWrr",
"(V?)PMAXUBrr",
"(V?)PMAXUDrr",
"(V?)PMAXUWrr",
"(V?)PMINSBrr",
"(V?)PMINSDrr",
"(V?)PMINSWrr",
"(V?)PMINUBrr",
"(V?)PMINUDrr",
"(V?)PMINUWrr",
"(V?)PMOVSXBDrr",
"(V?)PMOVSXBQrr",
"(V?)PMOVSXBWrr",
@ -462,19 +429,8 @@ def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABS(B|D|W)rr",
"(V?)PSHUFDri",
"(V?)PSHUFHWri",
"(V?)PSHUFLWri",
"(V?)PSIGNBrr",
"(V?)PSIGNDrr",
"(V?)PSIGNWrr",
"(V?)PSLLDQri",
"(V?)PSRLDQri",
"(V?)PSUBBrr",
"(V?)PSUBDrr",
"(V?)PSUBQrr",
"(V?)PSUBSBrr",
"(V?)PSUBSWrr",
"(V?)PSUBUSBrr",
"(V?)PSUBUSWrr",
"(V?)PSUBWrr",
"(V?)PUNPCKHBWrr",
"(V?)PUNPCKHDQrr",
"(V?)PUNPCKHQDQrr",
@ -604,6 +560,12 @@ def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> {
let ResourceCycles = [1];
}
def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPI2PSirr",
"MMX_PADD(B|D|W)irr",
"MMX_P(ADD|SUB)(U?)S(B|W)irr",
"MMX_PAVG(B|W)irr",
"MMX_PCMP(EQ|GT)(B|D|W)irr",
"MMX_P(MAX|MIN)(SW|UB)irr",
"MMX_PSUB(B|D|Q|W)irr",
"PUSHFS64",
"(V?)CVTDQ2PS(Y?)rr")>;
@ -954,44 +916,11 @@ def SBWriteResGroup59 : SchedWriteRes<[SBPort23,SBPort15]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm",
"(V?)PABSBrm",
"(V?)PABSDrm",
"(V?)PABSWrm",
"(V?)PACKSSDWrm",
def: InstRW<[SBWriteResGroup59], (instregex "(V?)PACKSSDWrm",
"(V?)PACKSSWBrm",
"(V?)PACKUSDWrm",
"(V?)PACKUSWBrm",
"(V?)PADDBrm",
"(V?)PADDDrm",
"(V?)PADDQrm",
"(V?)PADDSBrm",
"(V?)PADDSWrm",
"(V?)PADDUSBrm",
"(V?)PADDUSWrm",
"(V?)PADDWrm",
"(V?)PALIGNRrmi",
"(V?)PAVGBrm",
"(V?)PAVGWrm",
"(V?)PCMPEQBrm",
"(V?)PCMPEQDrm",
"(V?)PCMPEQQrm",
"(V?)PCMPEQWrm",
"(V?)PCMPGTBrm",
"(V?)PCMPGTDrm",
"(V?)PCMPGTWrm",
"(V?)PMAXSBrm",
"(V?)PMAXSDrm",
"(V?)PMAXSWrm",
"(V?)PMAXUBrm",
"(V?)PMAXUDrm",
"(V?)PMAXUWrm",
"(V?)PMINSBrm",
"(V?)PMINSDrm",
"(V?)PMINSWrm",
"(V?)PMINUBrm",
"(V?)PMINUDrm",
"(V?)PMINUWrm",
"(V?)PMOVSXBDrm",
"(V?)PMOVSXBQrm",
"(V?)PMOVSXBWrm",
@ -1007,17 +936,6 @@ def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm",
"(V?)PSHUFDmi",
"(V?)PSHUFHWmi",
"(V?)PSHUFLWmi",
"(V?)PSIGNBrm",
"(V?)PSIGNDrm",
"(V?)PSIGNWrm",
"(V?)PSUBBrm",
"(V?)PSUBDrm",
"(V?)PSUBQrm",
"(V?)PSUBSBrm",
"(V?)PSUBSWrm",
"(V?)PSUBUSBrm",
"(V?)PSUBUSWrm",
"(V?)PSUBWrm",
"(V?)PUNPCKHBWrm",
"(V?)PUNPCKHDQrm",
"(V?)PUNPCKHQDQrm",
@ -1027,6 +945,18 @@ def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm",
"(V?)PUNPCKLQDQrm",
"(V?)PUNPCKLWDrm")>;
def SBWriteResGroup59a : SchedWriteRes<[SBPort23,SBPort1]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SBWriteResGroup59a], (instregex "MMX_PADD(B|D|W)irm",
"MMX_P(ADD|SUB)(U?)S(B|W)irm",
"MMX_PAVG(B|W)irm",
"MMX_PCMP(EQ|GT)(B|D|W)irm",
"MMX_P(MAX|MIN)(SW|UB)irm",
"MMX_PSUB(B|D|Q|W)irm")>;
def SBWriteResGroup61 : SchedWriteRes<[SBPort0,SBPort05]> {
let Latency = 7;
let NumMicroOps = 3;

View File

@ -195,7 +195,8 @@ def : WriteRes<WriteVecLoad, [SKLPort23]> { let Latency = 6; }
def : WriteRes<WriteVecStore, [SKLPort237, SKLPort4]>;
def : WriteRes<WriteVecMove, [SKLPort015]>;
defm : SKLWriteResPair<WriteVecALU, [SKLPort15], 1>; // Vector integer ALU op, no logicals.
defm : SKLWriteResPair<WriteVecALU, [SKLPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals.
defm : SKLWriteResPair<WriteVecALUY, [SKLPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
defm : SKLWriteResPair<WriteVecLogic, [SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
defm : SKLWriteResPair<WriteVecShift, [SKLPort0], 1>; // Vector integer shifts.
@ -373,7 +374,8 @@ def : WriteRes<WriteNop, []>;
defm : SKLWriteResPair<WriteFHAdd, [SKLPort5,SKLPort01], 6, [2,1], 3, 6>;
defm : SKLWriteResPair<WriteFHAddY, [SKLPort5,SKLPort01], 6, [2,1], 3, 7>;
defm : SKLWriteResPair<WritePHAdd, [SKLPort15], 1>;
defm : SKLWriteResPair<WritePHAdd, [SKLPort5,SKLPort015], 3, [2,1], 3, 6>;
defm : SKLWriteResPair<WritePHAddY, [SKLPort5,SKLPort015], 3, [2,1], 3, 7>;
// Remaining instrs.
@ -429,38 +431,7 @@ def SKLWriteResGroup5 : SchedWriteRes<[SKLPort01]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
def: InstRW<[SKLWriteResGroup5], (instregex "(V?)PABSB(Y?)rr",
"(V?)PABSD(Y?)rr",
"(V?)PABSW(Y?)rr",
"(V?)PADDSB(Y?)rr",
"(V?)PADDSW(Y?)rr",
"(V?)PADDUSB(Y?)rr",
"(V?)PADDUSW(Y?)rr",
"(V?)PAVGB(Y?)rr",
"(V?)PAVGW(Y?)rr",
"(V?)PCMPEQB(Y?)rr",
"(V?)PCMPEQD(Y?)rr",
"(V?)PCMPEQQ(Y?)rr",
"(V?)PCMPEQW(Y?)rr",
"(V?)PCMPGTB(Y?)rr",
"(V?)PCMPGTD(Y?)rr",
"(V?)PCMPGTW(Y?)rr",
"(V?)PMAXSB(Y?)rr",
"(V?)PMAXSD(Y?)rr",
"(V?)PMAXSW(Y?)rr",
"(V?)PMAXUB(Y?)rr",
"(V?)PMAXUD(Y?)rr",
"(V?)PMAXUW(Y?)rr",
"(V?)PMINSB(Y?)rr",
"(V?)PMINSD(Y?)rr",
"(V?)PMINSW(Y?)rr",
"(V?)PMINUB(Y?)rr",
"(V?)PMINUD(Y?)rr",
"(V?)PMINUW(Y?)rr",
"(V?)PSIGNB(Y?)rr",
"(V?)PSIGND(Y?)rr",
"(V?)PSIGNW(Y?)rr",
"(V?)PSLLD(Y?)ri",
def: InstRW<[SKLWriteResGroup5], (instregex "(V?)PSLLD(Y?)ri",
"(V?)PSLLQ(Y?)ri",
"VPSLLVD(Y?)rr",
"VPSLLVQ(Y?)rr",
@ -472,11 +443,7 @@ def: InstRW<[SKLWriteResGroup5], (instregex "(V?)PABSB(Y?)rr",
"(V?)PSRLQ(Y?)ri",
"VPSRLVD(Y?)rr",
"VPSRLVQ(Y?)rr",
"(V?)PSRLW(Y?)ri",
"(V?)PSUBSB(Y?)rr",
"(V?)PSUBSW(Y?)rr",
"(V?)PSUBUSB(Y?)rr",
"(V?)PSUBUSW(Y?)rr")>;
"(V?)PSRLW(Y?)ri")>;
def SKLWriteResGroup6 : SchedWriteRes<[SKLPort05]> {
let Latency = 1;
@ -828,16 +795,6 @@ def SKLWriteResGroup37 : SchedWriteRes<[SKLPort5,SKLPort05]> {
}
def: InstRW<[SKLWriteResGroup37], (instregex "MMX_PH(ADD|SUB)(D|W)rr")>;
def SKLWriteResGroup38 : SchedWriteRes<[SKLPort5,SKLPort015]> {
let Latency = 3;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
def: InstRW<[SKLWriteResGroup38], (instregex "(V?)PHADDD(Y?)rr",
"(V?)PHADDW(Y?)rr",
"(V?)PHSUBD(Y?)rr",
"(V?)PHSUBW(Y?)rr")>;
def SKLWriteResGroup39 : SchedWriteRes<[SKLPort5,SKLPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
@ -1304,38 +1261,7 @@ def SKLWriteResGroup90 : SchedWriteRes<[SKLPort01,SKLPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup90], (instregex "(V?)PABSBrm",
"(V?)PABSDrm",
"(V?)PABSWrm",
"(V?)PADDSBrm",
"(V?)PADDSWrm",
"(V?)PADDUSBrm",
"(V?)PADDUSWrm",
"(V?)PAVGBrm",
"(V?)PAVGWrm",
"(V?)PCMPEQBrm",
"(V?)PCMPEQDrm",
"(V?)PCMPEQQrm",
"(V?)PCMPEQWrm",
"(V?)PCMPGTBrm",
"(V?)PCMPGTDrm",
"(V?)PCMPGTWrm",
"(V?)PMAXSBrm",
"(V?)PMAXSDrm",
"(V?)PMAXSWrm",
"(V?)PMAXUBrm",
"(V?)PMAXUDrm",
"(V?)PMAXUWrm",
"(V?)PMINSBrm",
"(V?)PMINSDrm",
"(V?)PMINSWrm",
"(V?)PMINUBrm",
"(V?)PMINUDrm",
"(V?)PMINUWrm",
"(V?)PSIGNBrm",
"(V?)PSIGNDrm",
"(V?)PSIGNWrm",
"(V?)PSLLDrm",
def: InstRW<[SKLWriteResGroup90], (instregex "(V?)PSLLDrm",
"(V?)PSLLQrm",
"VPSLLVDrm",
"VPSLLVQrm",
@ -1347,11 +1273,7 @@ def: InstRW<[SKLWriteResGroup90], (instregex "(V?)PABSBrm",
"(V?)PSRLQrm",
"(V?)PSRLVDrm",
"VPSRLVQrm",
"(V?)PSRLWrm",
"(V?)PSUBSBrm",
"(V?)PSUBSWrm",
"(V?)PSUBUSBrm",
"(V?)PSUBUSWrm")>;
"(V?)PSRLWrm")>;
def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> {
let Latency = 7;
@ -1514,38 +1436,7 @@ def SKLWriteResGroup109 : SchedWriteRes<[SKLPort01,SKLPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup109], (instregex "VPABSBYrm",
"VPABSDYrm",
"VPABSWYrm",
"VPADDSBYrm",
"VPADDSWYrm",
"VPADDUSBYrm",
"VPADDUSWYrm",
"VPAVGBYrm",
"VPAVGWYrm",
"VPCMPEQBYrm",
"VPCMPEQDYrm",
"VPCMPEQQYrm",
"VPCMPEQWYrm",
"VPCMPGTBYrm",
"VPCMPGTDYrm",
"VPCMPGTWYrm",
"VPMAXSBYrm",
"VPMAXSDYrm",
"VPMAXSWYrm",
"VPMAXUBYrm",
"VPMAXUDYrm",
"VPMAXUWYrm",
"VPMINSBYrm",
"VPMINSDYrm",
"VPMINSWYrm",
"VPMINUBYrm",
"VPMINUDYrm",
"VPMINUWYrm",
"VPSIGNBYrm",
"VPSIGNDYrm",
"VPSIGNWYrm",
"VPSLLDYrm",
def: InstRW<[SKLWriteResGroup109], (instregex "VPSLLDYrm",
"VPSLLQYrm",
"VPSLLVDYrm",
"VPSLLVQYrm",
@ -1557,11 +1448,7 @@ def: InstRW<[SKLWriteResGroup109], (instregex "VPABSBYrm",
"VPSRLQYrm",
"VPSRLVDYrm",
"VPSRLVQYrm",
"VPSRLWYrm",
"VPSUBSBYrm",
"VPSUBSWYrm",
"VPSUBUSBYrm",
"VPSUBUSWYrm")>;
"VPSRLWYrm")>;
def SKLWriteResGroup110 : SchedWriteRes<[SKLPort23,SKLPort015]> {
let Latency = 8;
@ -1725,16 +1612,6 @@ def SKLWriteResGroup128 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
def: InstRW<[SKLWriteResGroup128], (instregex "(V?)PHADDSWrm",
"(V?)PHSUBSWrm")>;
def SKLWriteResGroup129 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> {
let Latency = 9;
let NumMicroOps = 4;
let ResourceCycles = [2,1,1];
}
def: InstRW<[SKLWriteResGroup129], (instregex "(V?)PHADDDrm",
"(V?)PHADDWrm",
"(V?)PHSUBDrm",
"(V?)PHSUBWrm")>;
def SKLWriteResGroup130 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort0156]> {
let Latency = 9;
let NumMicroOps = 4;
@ -1807,16 +1684,6 @@ def SKLWriteResGroup140 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
def: InstRW<[SKLWriteResGroup140], (instregex "VPHADDSWYrm",
"VPHSUBSWYrm")>;
def SKLWriteResGroup141 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> {
let Latency = 10;
let NumMicroOps = 4;
let ResourceCycles = [2,1,1];
}
def: InstRW<[SKLWriteResGroup141], (instregex "VPHADDDYrm",
"VPHADDWYrm",
"VPHSUBDYrm",
"VPHSUBWYrm")>;
def SKLWriteResGroup142 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort06,SKLPort0156]> {
let Latency = 9;
let NumMicroOps = 4;

View File

@ -195,7 +195,8 @@ def : WriteRes<WriteVecLoad, [SKXPort23]> { let Latency = 5; }
def : WriteRes<WriteVecStore, [SKXPort237, SKXPort4]>;
def : WriteRes<WriteVecMove, [SKXPort015]>;
defm : SKXWriteResPair<WriteVecALU, [SKXPort15], 1>; // Vector integer ALU op, no logicals.
defm : SKXWriteResPair<WriteVecALU, [SKXPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals.
defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
defm : SKXWriteResPair<WriteVecLogic, [SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1>; // Vector integer shifts.
@ -373,7 +374,8 @@ def : WriteRes<WriteNop, []>;
defm : SKXWriteResPair<WriteFHAdd, [SKXPort5,SKXPort015], 6, [2,1], 3, 6>;
defm : SKXWriteResPair<WriteFHAddY, [SKXPort5,SKXPort015], 6, [2,1], 3, 7>;
defm : SKXWriteResPair<WritePHAdd, [SKXPort15], 1>;
defm : SKXWriteResPair<WritePHAdd, [SKXPort5,SKXPort015], 3, [2,1], 3, 6>;
defm : SKXWriteResPair<WritePHAddY, [SKXPort5,SKXPort015], 3, [2,1], 3, 7>;
// Remaining instrs.
@ -477,122 +479,7 @@ def SKXWriteResGroup5 : SchedWriteRes<[SKXPort01]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup5], (instregex "VPABSBYrr",
"VPABSBZ128rr",
"VPABSBZ256rr",
"VPABSBZrr",
"(V?)PABSBrr",
"VPABSDYrr",
"VPABSDZ128rr",
"VPABSDZ256rr",
"VPABSDZrr",
"(V?)PABSDrr",
"VPABSQZ128rr",
"VPABSQZ256rr",
"VPABSQZrr",
"VPABSWYrr",
"VPABSWZ128rr",
"VPABSWZ256rr",
"VPABSWZrr",
"(V?)PABSWrr",
"VPADDSBYrr",
"VPADDSBZ128rr",
"VPADDSBZ256rr",
"VPADDSBZrr",
"(V?)PADDSBrr",
"VPADDSWYrr",
"VPADDSWZ128rr",
"VPADDSWZ256rr",
"VPADDSWZrr",
"(V?)PADDSWrr",
"VPADDUSBYrr",
"VPADDUSBZ128rr",
"VPADDUSBZ256rr",
"VPADDUSBZrr",
"(V?)PADDUSBrr",
"VPADDUSWYrr",
"VPADDUSWZ128rr",
"VPADDUSWZ256rr",
"VPADDUSWZrr",
"(V?)PADDUSWrr",
"VPAVGBYrr",
"VPAVGBZ128rr",
"VPAVGBZ256rr",
"VPAVGBZrr",
"(V?)PAVGBrr",
"VPAVGWYrr",
"VPAVGWZ128rr",
"VPAVGWZ256rr",
"VPAVGWZrr",
"(V?)PAVGWrr",
"(V?)PCMPEQB(Y?)rr",
"(V?)PCMPEQD(Y?)rr",
"(V?)PCMPEQQ(Y?)rr",
"(V?)PCMPEQW(Y?)rr",
"(V?)PCMPGTB(Y?)rr",
"(V?)PCMPGTD(Y?)rr",
"(V?)PCMPGTW(Y?)rr",
"VPMAXSBYrr",
"VPMAXSBZ128rr",
"VPMAXSBZ256rr",
"VPMAXSBZrr",
"(V?)PMAXSBrr",
"VPMAXSDYrr",
"VPMAXSDZ128rr",
"VPMAXSDZ256rr",
"VPMAXSDZrr",
"(V?)PMAXSDrr",
"VPMAXSWYrr",
"VPMAXSWZ128rr",
"VPMAXSWZ256rr",
"VPMAXSWZrr",
"(V?)PMAXSWrr",
"VPMAXUBYrr",
"VPMAXUBZ128rr",
"VPMAXUBZ256rr",
"VPMAXUBZrr",
"(V?)PMAXUBrr",
"VPMAXUDYrr",
"VPMAXUDZ128rr",
"VPMAXUDZ256rr",
"VPMAXUDZrr",
"(V?)PMAXUDrr",
"VPMAXUWYrr",
"VPMAXUWZ128rr",
"VPMAXUWZ256rr",
"VPMAXUWZrr",
"(V?)PMAXUWrr",
"VPMINSBYrr",
"VPMINSBZ128rr",
"VPMINSBZ256rr",
"VPMINSBZrr",
"(V?)PMINSBrr",
"VPMINSDYrr",
"VPMINSDZ128rr",
"VPMINSDZ256rr",
"VPMINSDZrr",
"(V?)PMINSDrr",
"VPMINSWYrr",
"VPMINSWZ128rr",
"VPMINSWZ256rr",
"VPMINSWZrr",
"(V?)PMINSWrr",
"VPMINUBYrr",
"VPMINUBZ128rr",
"VPMINUBZ256rr",
"VPMINUBZrr",
"(V?)PMINUBrr",
"VPMINUDYrr",
"VPMINUDZ128rr",
"VPMINUDZ256rr",
"VPMINUDZrr",
"(V?)PMINUDrr",
"VPMINUWYrr",
"VPMINUWZ128rr",
"VPMINUWZ256rr",
"VPMINUWZrr",
"(V?)PMINUWrr",
"VPROLDZ128ri",
def: InstRW<[SKXWriteResGroup5], (instregex "VPROLDZ128ri",
"VPROLDZ256ri",
"VPROLDZri",
"VPROLQZ128ri",
@ -616,9 +503,6 @@ def: InstRW<[SKXWriteResGroup5], (instregex "VPABSBYrr",
"VPRORVQZ128rr",
"VPRORVQZ256rr",
"VPRORVQZrr",
"(V?)PSIGNB(Y?)rr",
"(V?)PSIGND(Y?)rr",
"(V?)PSIGNW(Y?)rr",
"(V?)PSLLDYri",
"VPSLLDZ128ri",
"VPSLLDZ256ri",
@ -698,27 +582,7 @@ def: InstRW<[SKXWriteResGroup5], (instregex "VPABSBYrr",
"VPSRLWZ128ri",
"VPSRLWZ256ri",
"VPSRLWZri",
"(V?)PSRLWri",
"VPSUBSBYrr",
"VPSUBSBZ128rr",
"VPSUBSBZ256rr",
"VPSUBSBZrr",
"(V?)PSUBSBrr",
"VPSUBSWYrr",
"VPSUBSWZ128rr",
"VPSUBSWZ256rr",
"VPSUBSWZrr",
"(V?)PSUBSWrr",
"VPSUBUSBYrr",
"VPSUBUSBZ128rr",
"VPSUBUSBZ256rr",
"VPSUBUSBZrr",
"(V?)PSUBUSBrr",
"VPSUBUSWYrr",
"VPSUBUSWZ128rr",
"VPSUBUSWZ256rr",
"VPSUBUSWZrr",
"(V?)PSUBUSWrr")>;
"(V?)PSRLWri")>;
def SKXWriteResGroup6 : SchedWriteRes<[SKXPort05]> {
let Latency = 1;
@ -1384,13 +1248,6 @@ def SKXWriteResGroup39 : SchedWriteRes<[SKXPort5,SKXPort05]> {
}
def: InstRW<[SKXWriteResGroup39], (instregex "MMX_PH(ADD|SUB)(D|W)rr")>;
def SKXWriteResGroup40 : SchedWriteRes<[SKXPort5,SKXPort015]> {
let Latency = 3;
let NumMicroOps = 3;
let ResourceCycles = [2,1];
}
def: InstRW<[SKXWriteResGroup40], (instregex "(V?)PH(ADD|SUB)(D|W)(Y?)rr")>;
def SKXWriteResGroup41 : SchedWriteRes<[SKXPort5,SKXPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
@ -2214,57 +2071,7 @@ def SKXWriteResGroup94 : SchedWriteRes<[SKXPort01,SKXPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup94], (instregex "VPABSBZ128rm(b?)",
"(V?)PABSBrm",
"VPABSDZ128rm(b?)",
"(V?)PABSDrm",
"VPABSQZ128rm(b?)",
"VPABSWZ128rm(b?)",
"(V?)PABSWrm",
"VPADDSBZ128rm(b?)",
"(V?)PADDSBrm",
"VPADDSWZ128rm(b?)",
"(V?)PADDSWrm",
"VPADDUSBZ128rm(b?)",
"(V?)PADDUSBrm",
"VPADDUSWZ128rm(b?)",
"(V?)PADDUSWrm",
"VPAVGBZ128rm(b?)",
"(V?)PAVGBrm",
"VPAVGWZ128rm(b?)",
"(V?)PAVGWrm",
"(V?)PCMPEQBrm",
"(V?)PCMPEQDrm",
"(V?)PCMPEQQrm",
"(V?)PCMPEQWrm",
"(V?)PCMPGTBrm",
"(V?)PCMPGTDrm",
"(V?)PCMPGTWrm",
"VPMAXSBZ128rm(b?)",
"(V?)PMAXSBrm",
"VPMAXSDZ128rm(b?)",
"(V?)PMAXSDrm",
"VPMAXSWZ128rm(b?)",
"(V?)PMAXSWrm",
"VPMAXUBZ128rm(b?)",
"(V?)PMAXUBrm",
"VPMAXUDZ128rm(b?)",
"(V?)PMAXUDrm",
"VPMAXUWZ128rm(b?)",
"(V?)PMAXUWrm",
"VPMINSBZ128rm(b?)",
"(V?)PMINSBrm",
"VPMINSDZ128rm(b?)",
"(V?)PMINSDrm",
"VPMINSWZ128rm(b?)",
"(V?)PMINSWrm",
"VPMINUBZ128rm(b?)",
"(V?)PMINUBrm",
"VPMINUDZ128rm(b?)",
"(V?)PMINUDrm",
"VPMINUWZ128rm(b?)",
"(V?)PMINUWrm",
"VPROLDZ128m(b?)i",
def: InstRW<[SKXWriteResGroup94], (instregex "VPROLDZ128m(b?)i",
"VPROLQZ128m(b?)i",
"VPROLVDZ128rm(b?)",
"VPROLVQZ128rm(b?)",
@ -2272,9 +2079,6 @@ def: InstRW<[SKXWriteResGroup94], (instregex "VPABSBZ128rm(b?)",
"VPRORQZ128m(b?)i",
"VPRORVDZ128rm(b?)",
"VPRORVQZ128rm(b?)",
"(V?)PSIGNBrm",
"(V?)PSIGNDrm",
"(V?)PSIGNWrm",
"VPSLLDZ128m(b?)i",
"VPSLLDZ128rm(b?)",
"(V?)PSLLDrm",
@ -2314,15 +2118,7 @@ def: InstRW<[SKXWriteResGroup94], (instregex "VPABSBZ128rm(b?)",
"VPSRLVWZ128rm(b?)",
"VPSRLWZ128mi(b?)",
"VPSRLWZ128rm(b?)",
"(V?)PSRLWrm",
"VPSUBSBZ128rm(b?)",
"(V?)PSUBSBrm",
"VPSUBSWZ128rm(b?)",
"(V?)PSUBSWrm",
"VPSUBUSBZ128rm(b?)",
"(V?)PSUBUSBrm",
"VPSUBUSWZ128rm(b?)",
"(V?)PSUBUSWrm")>;
"(V?)PSRLWrm")>;
def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 7;
@ -2615,79 +2411,7 @@ def SKXWriteResGroup120 : SchedWriteRes<[SKXPort01,SKXPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup120], (instregex "VPABSBYrm",
"VPABSBZ256rm(b?)",
"VPABSBZrm(b?)",
"VPABSDYrm",
"VPABSDZ256rm(b?)",
"VPABSDZrm(b?)",
"VPABSQZ256rm(b?)",
"VPABSQZrm(b?)",
"VPABSWYrm",
"VPABSWZ256rm(b?)",
"VPABSWZrm(b?)",
"VPADDSBYrm",
"VPADDSBZ256rm(b?)",
"VPADDSBZrm(b?)",
"VPADDSWYrm",
"VPADDSWZ256rm(b?)",
"VPADDSWZrm(b?)",
"VPADDUSBYrm",
"VPADDUSBZ256rm(b?)",
"VPADDUSBZrm(b?)",
"VPADDUSWYrm",
"VPADDUSWZ256rm(b?)",
"VPADDUSWZrm(b?)",
"VPAVGBYrm",
"VPAVGBZ256rm(b?)",
"VPAVGBZrm(b?)",
"VPAVGWYrm",
"VPAVGWZ256rm(b?)",
"VPAVGWZrm(b?)",
"VPCMPEQBYrm",
"VPCMPEQDYrm",
"VPCMPEQQYrm",
"VPCMPEQWYrm",
"VPCMPGTBYrm",
"VPCMPGTDYrm",
"VPCMPGTWYrm",
"VPMAXSBYrm",
"VPMAXSBZ256rm(b?)",
"VPMAXSBZrm(b?)",
"VPMAXSDYrm",
"VPMAXSDZ256rm(b?)",
"VPMAXSDZrm(b?)",
"VPMAXSWYrm",
"VPMAXSWZ256rm(b?)",
"VPMAXSWZrm(b?)",
"VPMAXUBYrm",
"VPMAXUBZ256rm(b?)",
"VPMAXUBZrm(b?)",
"VPMAXUDYrm",
"VPMAXUDZ256rm(b?)",
"VPMAXUDZrm(b?)",
"VPMAXUWYrm",
"VPMAXUWZ256rm(b?)",
"VPMAXUWZrm(b?)",
"VPMINSBYrm",
"VPMINSBZ256rm(b?)",
"VPMINSBZrm(b?)",
"VPMINSDYrm",
"VPMINSDZ256rm(b?)",
"VPMINSDZrm(b?)",
"VPMINSWYrm",
"VPMINSWZ256rm(b?)",
"VPMINSWZrm(b?)",
"VPMINUBYrm",
"VPMINUBZ256rm(b?)",
"VPMINUBZrm(b?)",
"VPMINUDYrm",
"VPMINUDZ256rm(b?)",
"VPMINUDZrm(b?)",
"VPMINUWYrm",
"VPMINUWZ256rm(b?)",
"VPMINUWZrm(b?)",
"VPROLDZ256m(b?)i",
def: InstRW<[SKXWriteResGroup120], (instregex "VPROLDZ256m(b?)i",
"VPROLDZm(b?)i",
"VPROLQZ256m(b?)i",
"VPROLQZm(b?)i",
@ -2703,9 +2427,6 @@ def: InstRW<[SKXWriteResGroup120], (instregex "VPABSBYrm",
"VPRORVDZrm(b?)",
"VPRORVQZ256rm(b?)",
"VPRORVQZrm(b?)",
"VPSIGNBYrm",
"VPSIGNDYrm",
"VPSIGNWYrm",
"VPSLLDYrm",
"VPSLLDZ256m(b?)i",
"VPSLLDZ256rm(b?)",
@ -2772,19 +2493,7 @@ def: InstRW<[SKXWriteResGroup120], (instregex "VPABSBYrm",
"VPSRLWZ256mi(b?)",
"VPSRLWZ256rm(b?)",
"VPSRLWZmi(b?)",
"VPSRLWZrm(b?)",
"VPSUBSBYrm",
"VPSUBSBZ256rm(b?)",
"VPSUBSBZrm(b?)",
"VPSUBSWYrm",
"VPSUBSWZ256rm(b?)",
"VPSUBSWZrm(b?)",
"VPSUBUSBYrm",
"VPSUBUSBZ256rm(b?)",
"VPSUBUSBZrm(b?)",
"VPSUBUSWYrm",
"VPSUBUSWZ256rm(b?)",
"VPSUBUSWZrm(b?)")>;
"VPSRLWZrm(b?)")>;
def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 8;
@ -3138,16 +2847,6 @@ def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> {
def: InstRW<[SKXWriteResGroup143], (instregex "(V?)PHADDSWrm",
"(V?)PHSUBSWrm")>;
def SKXWriteResGroup144 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 9;
let NumMicroOps = 4;
let ResourceCycles = [2,1,1];
}
def: InstRW<[SKXWriteResGroup144], (instregex "(V?)PHADDDrm",
"(V?)PHADDWrm",
"(V?)PHSUBDrm",
"(V?)PHSUBWrm")>;
def SKXWriteResGroup145 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort0156]> {
let Latency = 9;
let NumMicroOps = 4;
@ -3315,16 +3014,6 @@ def SKXWriteResGroup154 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> {
def: InstRW<[SKXWriteResGroup154], (instregex "VPHADDSWYrm",
"VPHSUBSWYrm")>;
def SKXWriteResGroup155 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
let Latency = 10;
let NumMicroOps = 4;
let ResourceCycles = [2,1,1];
}
def: InstRW<[SKXWriteResGroup155], (instregex "VPHADDDYrm",
"VPHADDWYrm",
"VPHSUBDYrm",
"VPHSUBWYrm")>;
def SKXWriteResGroup156 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort06,SKXPort0156]> {
let Latency = 9;
let NumMicroOps = 4;

View File

@ -127,12 +127,14 @@ class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
defm WriteFHAdd : X86SchedWritePair;
defm WriteFHAddY : X86SchedWritePair; // YMM/ZMM.
defm WritePHAdd : X86SchedWritePair;
defm WritePHAddY : X86SchedWritePair; // YMM/ZMM.
// Vector integer operations.
def WriteVecLoad : SchedWrite;
def WriteVecStore : SchedWrite;
def WriteVecMove : SchedWrite;
defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM).
defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
defm WriteVecLogicY: X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM).
defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
@ -249,9 +251,9 @@ def SchedWriteFVarBlend
WriteFVarBlendY, WriteFVarBlendY>;
def SchedWriteVecALU
: X86SchedWriteWidths<WriteVecALU, WriteVecALU, WriteVecALU, WriteVecALU>;
: X86SchedWriteWidths<WriteVecALU, WriteVecALU, WriteVecALUY, WriteVecALUY>;
def SchedWritePHAdd
: X86SchedWriteWidths<WritePHAdd, WritePHAdd, WritePHAdd, WritePHAdd>;
: X86SchedWriteWidths<WritePHAdd, WritePHAdd, WritePHAddY, WritePHAddY>;
def SchedWriteVecLogic
: X86SchedWriteWidths<WriteVecLogic, WriteVecLogic,
WriteVecLogicY, WriteVecLogicY>;

View File

@ -252,6 +252,7 @@ def : WriteRes<WriteVecStore, [AtomPort0]>;
def : WriteRes<WriteVecMove, [AtomPort01]>;
defm : AtomWriteResPair<WriteVecALU, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecALUY, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecLogic, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecLogicY, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecShift, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
@ -316,6 +317,7 @@ defm : AtomWriteResPair<WriteAESDecEnc, [AtomPort01], [AtomPort01]>; // NOTE: Do
defm : AtomWriteResPair<WriteFHAdd, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WriteFHAddY, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WritePHAdd, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : AtomWriteResPair<WritePHAddY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
////////////////////////////////////////////////////////////////////////////////
// Carry-less multiplication instructions.

View File

@ -404,6 +404,7 @@ def : WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC]>;
def : WriteRes<WriteVecMove, [JFPU01, JVALU]>;
defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecALUY, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShift, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecIMul, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteVecIMulY, [JFPU0, JVIMUL], 2>;
@ -468,6 +469,7 @@ defm : JWriteResFpuPair<WriteAESDecEnc, [JFPU0, JVIMUL], 3, [1, 1], 2>;
defm : JWriteResFpuPair<WriteFHAdd, [JFPU0, JFPA], 3>;
defm : JWriteResYMMPair<WriteFHAddY, [JFPU0, JFPA], 3, [2,2], 2>;
defm : JWriteResFpuPair<WritePHAdd, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WritePHAddY, [JFPU01, JVALU], 1>;
////////////////////////////////////////////////////////////////////////////////
// Carry-less multiplication instructions.

View File

@ -166,6 +166,7 @@ defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALU, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALUY, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 4>;
// FIXME: The below is closer to correct, but caused some perf regressions.
@ -201,6 +202,7 @@ def : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 3, [2]>;
defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 3, [2]>;
defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WritePHAddY, [SLM_FPC_RSV01], 1>;
// String instructions.
// Packed Compare Implicit Length Strings, Return Mask

View File

@ -238,7 +238,9 @@ defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WritePHAdd, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WritePHAddY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecALUY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>;
defm : ZnWriteResFpuPair<WriteVecIMulY, [ZnFPU0], 4>;
defm : ZnWriteResFpuPair<WritePMULLD, [ZnFPU0], 4>; // FIXME

View File

@ -10,7 +10,7 @@ define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) {
; GENERIC-LABEL: test_broadcasti128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:1.00]
; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_broadcasti128:
@ -174,8 +174,8 @@ define <8 x float> @test_broadcastss_ymm(<4 x float> %a0) {
define <4 x i32> @test_extracti128(<8 x i32> %a0, <8 x i32> %a1, <4 x i32> *%a2) {
; GENERIC-LABEL: test_extracti128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [3:1.00]
; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50]
; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
@ -524,7 +524,7 @@ define <8 x i32> @test_inserti128(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2)
; GENERIC: # %bb.0:
; GENERIC-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_inserti128:
@ -652,8 +652,8 @@ declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind rea
define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) {
; GENERIC-LABEL: test_pabsb:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpabsb %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:1.00]
; GENERIC-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -702,8 +702,8 @@ declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) {
; GENERIC-LABEL: test_pabsd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpabsd %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:1.00]
; GENERIC-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -752,8 +752,8 @@ declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) {
; GENERIC-LABEL: test_pabsw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpabsw %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:1.00]
; GENERIC-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -978,8 +978,8 @@ declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readn
define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_paddb:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_paddb:
@ -1020,8 +1020,8 @@ define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_paddd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_paddd:
@ -1062,8 +1062,8 @@ define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; GENERIC-LABEL: test_paddq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_paddq:
@ -1104,8 +1104,8 @@ define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
define <32 x i8> @test_paddsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_paddsb:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_paddsb:
@ -1147,8 +1147,8 @@ declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone
define <16 x i16> @test_paddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_paddsw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_paddsw:
@ -1190,8 +1190,8 @@ declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readn
define <32 x i8> @test_paddusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_paddusb:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_paddusb:
@ -1233,8 +1233,8 @@ declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnon
define <16 x i16> @test_paddusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_paddusw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_paddusw:
@ -1276,8 +1276,8 @@ declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind read
define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_paddw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_paddw:
@ -1320,7 +1320,7 @@ define <32 x i8> @test_palignr(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
; GENERIC-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00]
; GENERIC-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_palignr:
@ -1369,7 +1369,7 @@ define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pand:
@ -1418,7 +1418,7 @@ define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pandn:
@ -1467,8 +1467,8 @@ define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
define <32 x i8> @test_pavgb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_pavgb:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pavgb:
@ -1519,8 +1519,8 @@ define <32 x i8> @test_pavgb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
define <16 x i16> @test_pavgw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_pavgw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pavgw:
@ -1622,7 +1622,7 @@ define <8 x i32> @test_pblendd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2)
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
; GENERIC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pblendd_ymm:
@ -1714,7 +1714,7 @@ define <16 x i16> @test_pblendw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:0.50]
; GENERIC-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:0.50]
; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pblendw:
@ -1812,7 +1812,7 @@ define <32 x i8> @test_pbroadcastb_ymm(<32 x i8> %a0, <32 x i8> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [6:1.00]
; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pbroadcastb_ymm:
@ -1910,7 +1910,7 @@ define <8 x i32> @test_pbroadcastd_ymm(<8 x i32> %a0, <8 x i32> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [6:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pbroadcastd_ymm:
@ -2008,7 +2008,7 @@ define <4 x i64> @test_pbroadcastq_ymm(<4 x i64> %a0, <4 x i64> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [6:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pbroadcastq_ymm:
@ -2106,7 +2106,7 @@ define <16 x i16> @test_pbroadcastw_ymm(<16 x i16> %a0, <16 x i16> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [6:1.00]
; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pbroadcastw_ymm:
@ -2153,8 +2153,8 @@ define <16 x i16> @test_pbroadcastw_ymm(<16 x i16> %a0, <16 x i16> *%a1) {
define <32 x i8> @test_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_pcmpeqb:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpeqb:
@ -2197,8 +2197,8 @@ define <32 x i8> @test_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
define <8 x i32> @test_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_pcmpeqd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpeqd:
@ -2241,8 +2241,8 @@ define <8 x i32> @test_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
define <4 x i64> @test_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; GENERIC-LABEL: test_pcmpeqq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpeqq:
@ -2285,8 +2285,8 @@ define <4 x i64> @test_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
define <16 x i16> @test_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_pcmpeqw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpeqw:
@ -2329,8 +2329,8 @@ define <16 x i16> @test_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
define <32 x i8> @test_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_pcmpgtb:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpgtb:
@ -2373,8 +2373,8 @@ define <32 x i8> @test_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
define <8 x i32> @test_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_pcmpgtd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpgtd:
@ -2417,8 +2417,8 @@ define <8 x i32> @test_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
define <4 x i64> @test_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; GENERIC-LABEL: test_pcmpgtq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpgtq:
@ -2461,8 +2461,8 @@ define <4 x i64> @test_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
define <16 x i16> @test_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_pcmpgtw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pcmpgtw:
@ -2507,7 +2507,7 @@ define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_perm2i128:
@ -2556,7 +2556,7 @@ define <8 x i32> @test_permd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_permd:
@ -2705,7 +2705,7 @@ define <4 x i64> @test_permq(<4 x i64> %a0, <4 x i64> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_permq:
@ -3039,7 +3039,7 @@ define <8 x i32> @test_phaddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_phaddd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
; GENERIC-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [9:1.50]
; GENERIC-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_phaddd:
@ -3082,7 +3082,7 @@ define <16 x i16> @test_phaddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
; GENERIC-LABEL: test_phaddsw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
; GENERIC-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [9:1.50]
; GENERIC-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_phaddsw:
@ -3125,7 +3125,7 @@ define <16 x i16> @test_phaddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
; GENERIC-LABEL: test_phaddw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
; GENERIC-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [9:1.50]
; GENERIC-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_phaddw:
@ -3168,7 +3168,7 @@ define <8 x i32> @test_phsubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_phsubd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
; GENERIC-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [9:1.50]
; GENERIC-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_phsubd:
@ -3211,7 +3211,7 @@ define <16 x i16> @test_phsubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
; GENERIC-LABEL: test_phsubsw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
; GENERIC-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [9:1.50]
; GENERIC-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_phsubsw:
@ -3254,7 +3254,7 @@ define <16 x i16> @test_phsubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
; GENERIC-LABEL: test_phsubw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
; GENERIC-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [9:1.50]
; GENERIC-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_phsubw:
@ -3580,8 +3580,8 @@ declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind
define <32 x i8> @test_pmaxsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_pmaxsb:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmaxsb:
@ -3623,8 +3623,8 @@ declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
define <8 x i32> @test_pmaxsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_pmaxsd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmaxsd:
@ -3666,8 +3666,8 @@ declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
define <16 x i16> @test_pmaxsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_pmaxsw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmaxsw:
@ -3709,8 +3709,8 @@ declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readn
define <32 x i8> @test_pmaxub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_pmaxub:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmaxub:
@ -3752,8 +3752,8 @@ declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
define <8 x i32> @test_pmaxud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_pmaxud:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmaxud:
@ -3795,8 +3795,8 @@ declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
define <16 x i16> @test_pmaxuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_pmaxuw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmaxuw:
@ -3838,8 +3838,8 @@ declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readn
define <32 x i8> @test_pminsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_pminsb:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pminsb:
@ -3881,8 +3881,8 @@ declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
define <8 x i32> @test_pminsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_pminsd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pminsd:
@ -3924,8 +3924,8 @@ declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
define <16 x i16> @test_pminsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_pminsw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pminsw:
@ -3967,8 +3967,8 @@ declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readn
define <32 x i8> @test_pminub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_pminub:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pminub:
@ -4010,8 +4010,8 @@ declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
define <8 x i32> @test_pminud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_pminud:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pminud:
@ -4053,8 +4053,8 @@ declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
define <16 x i16> @test_pminuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_pminuw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pminuw:
@ -4139,7 +4139,7 @@ define <8 x i32> @test_pmovsxbd(<16 x i8> %a0, <16 x i8> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [6:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovsxbd:
@ -4190,7 +4190,7 @@ define <4 x i64> @test_pmovsxbq(<16 x i8> %a0, <16 x i8> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [6:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovsxbq:
@ -4241,7 +4241,7 @@ define <16 x i16> @test_pmovsxbw(<16 x i8> %a0, <16 x i8> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [6:1.00]
; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovsxbw:
@ -4290,7 +4290,7 @@ define <4 x i64> @test_pmovsxdq(<4 x i32> %a0, <4 x i32> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [6:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovsxdq:
@ -4339,7 +4339,7 @@ define <8 x i32> @test_pmovsxwd(<8 x i16> %a0, <8 x i16> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [6:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovsxwd:
@ -4388,7 +4388,7 @@ define <4 x i64> @test_pmovsxwq(<8 x i16> %a0, <8 x i16> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [6:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovsxwq:
@ -4439,7 +4439,7 @@ define <8 x i32> @test_pmovzxbd(<16 x i8> %a0, <16 x i8> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [6:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovzxbd:
@ -4490,7 +4490,7 @@ define <4 x i64> @test_pmovzxbq(<16 x i8> %a0, <16 x i8> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovzxbq:
@ -4541,7 +4541,7 @@ define <16 x i16> @test_pmovzxbw(<16 x i8> %a0, <16 x i8> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [6:1.00]
; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovzxbw:
@ -4590,7 +4590,7 @@ define <4 x i64> @test_pmovzxdq(<4 x i32> %a0, <4 x i32> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovzxdq:
@ -4639,7 +4639,7 @@ define <8 x i32> @test_pmovzxwd(<8 x i16> %a0, <8 x i16> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovzxwd:
@ -4688,7 +4688,7 @@ define <4 x i64> @test_pmovzxwq(<8 x i16> %a0, <8 x i16> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pmovzxwq:
@ -5040,7 +5040,7 @@ define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_por:
@ -5176,7 +5176,7 @@ define <8 x i32> @test_pshufd(<8 x i32> %a0, <8 x i32> *%a1) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pshufd:
@ -5321,8 +5321,8 @@ define <16 x i16> @test_pshuflw(<16 x i16> %a0, <16 x i16> *%a1) {
define <32 x i8> @test_psignb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_psignb:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psignb:
@ -5364,8 +5364,8 @@ declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
define <8 x i32> @test_psignd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_psignd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psignd:
@ -5407,8 +5407,8 @@ declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
define <16 x i16> @test_psignw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_psignw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psignw:
@ -6348,8 +6348,8 @@ declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnon
define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_psubb:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psubb:
@ -6390,8 +6390,8 @@ define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_psubd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psubd:
@ -6432,8 +6432,8 @@ define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; GENERIC-LABEL: test_psubq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psubq:
@ -6474,8 +6474,8 @@ define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
define <32 x i8> @test_psubsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_psubsb:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psubsb:
@ -6517,8 +6517,8 @@ declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone
define <16 x i16> @test_psubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_psubsw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psubsw:
@ -6560,8 +6560,8 @@ declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readn
define <32 x i8> @test_psubusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; GENERIC-LABEL: test_psubusb:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psubusb:
@ -6603,8 +6603,8 @@ declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnon
define <16 x i16> @test_psubusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_psubusw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psubusw:
@ -6646,8 +6646,8 @@ declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind read
define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; GENERIC-LABEL: test_psubw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_psubw:
@ -6732,8 +6732,8 @@ define <8 x i32> @test_punpckhdq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [3:1.00]
; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_punpckhdq:
@ -6787,7 +6787,7 @@ define <4 x i64> @test_punpckhqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2)
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_punpckhqdq:
@ -6920,8 +6920,8 @@ define <8 x i32> @test_punpckldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [3:1.00]
; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_punpckldq:
@ -6975,7 +6975,7 @@ define <4 x i64> @test_punpcklqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2)
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_punpcklqdq:
@ -7066,7 +7066,7 @@ define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pxor:

View File

@ -291,7 +291,7 @@ entry:
define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
; GENERIC-LABEL: vpaddq_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddq_test:
@ -305,7 +305,7 @@ define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
; GENERIC-LABEL: vpaddq_fold_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddq_fold_test:
@ -320,7 +320,7 @@ define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
; GENERIC-LABEL: vpaddq_broadcast_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddq_broadcast_test:
@ -334,7 +334,7 @@ define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
; GENERIC-LABEL: vpaddq_broadcast2_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddq_broadcast2_test:
@ -357,7 +357,7 @@ define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
; GENERIC-LABEL: vpaddd_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_test:
@ -371,7 +371,7 @@ define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
; GENERIC-LABEL: vpaddd_fold_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_fold_test:
@ -386,7 +386,7 @@ define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
; GENERIC-LABEL: vpaddd_broadcast_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_broadcast_test:
@ -401,7 +401,7 @@ define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %ma
; GENERIC-LABEL: vpaddd_mask_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_mask_test:
@ -419,7 +419,7 @@ define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %m
; GENERIC-LABEL: vpaddd_maskz_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_maskz_test:
@ -437,7 +437,7 @@ define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16
; GENERIC-LABEL: vpaddd_mask_fold_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_mask_fold_test:
@ -456,7 +456,7 @@ define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1)
; GENERIC-LABEL: vpaddd_mask_broadcast_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_mask_broadcast_test:
@ -474,7 +474,7 @@ define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16
; GENERIC-LABEL: vpaddd_maskz_fold_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_maskz_fold_test:
@ -493,7 +493,7 @@ define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1)
; GENERIC-LABEL: vpaddd_maskz_broadcast_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpaddd_maskz_broadcast_test:
@ -510,7 +510,7 @@ define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1)
define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
; GENERIC-LABEL: vpsubq_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpsubq_test:
@ -524,7 +524,7 @@ define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
; GENERIC-LABEL: vpsubd_test:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpsubd_test:
@ -643,7 +643,7 @@ define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
; GENERIC-LABEL: addq_broadcast:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: addq_broadcast:
@ -2828,7 +2828,7 @@ define <8 x float> @ubto8f32(<8 x i32> %a) {
; GENERIC-LABEL: ubto8f32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -2847,7 +2847,7 @@ define <8 x double> @ubto8f64(<8 x i32> %a) {
; GENERIC-LABEL: ubto8f64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
@ -4369,8 +4369,8 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
; GENERIC-LABEL: sext_8i1_8i32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8i1_8i32:
@ -4420,7 +4420,7 @@ define i16 @trunc_i32_to_i1(i32 %a) {
define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
; GENERIC-LABEL: sext_8i1_8i16:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50]
; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
@ -4439,7 +4439,7 @@ define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
; GENERIC-LABEL: sext_16i1_16i32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [1:0.50]
; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -4456,7 +4456,7 @@ define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
; GENERIC-LABEL: sext_8i1_8i64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50]
; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -4640,7 +4640,7 @@ define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) {
define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
; GENERIC-LABEL: zext_64xi1_to_64xi8:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [1:0.50]
; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -4657,7 +4657,7 @@ define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
; GENERIC-LABEL: zext_32xi1_to_32xi16:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [1:0.50]
; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
@ -4676,7 +4676,7 @@ define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
; GENERIC-LABEL: zext_16xi1_to_16xi16:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -4694,7 +4694,7 @@ define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 {
; GENERIC-LABEL: zext_32xi1_to_32xi8:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [1:0.50]
; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [6:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -5028,7 +5028,7 @@ define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <1
define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpandd:
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -5048,7 +5048,7 @@ entry:
define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpandnd:
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -5070,7 +5070,7 @@ entry:
define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpord:
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -5090,7 +5090,7 @@ entry:
define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpxord:
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -5110,7 +5110,7 @@ entry:
define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpandq:
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -5129,7 +5129,7 @@ entry:
define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpandnq:
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -5149,7 +5149,7 @@ entry:
define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vporq:
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -5168,7 +5168,7 @@ entry:
define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
; GENERIC-LABEL: vpxorq:
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -6867,7 +6867,7 @@ define i8 @shuf_test1(i16 %v) nounwind {
define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
; GENERIC-LABEL: zext_test1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50]
; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33]
@ -6891,7 +6891,7 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
; GENERIC-LABEL: zext_test2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50]
; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33]
@ -6917,7 +6917,7 @@ define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
; GENERIC-LABEL: zext_test3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50]
; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: andb $1, %al # sched: [1:0.33]
@ -6967,8 +6967,8 @@ entry:
define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
; GENERIC-LABEL: test4:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [1:0.50]
; GENERIC-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [1:0.50]
; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
@ -6990,8 +6990,8 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1
define <2 x i64> @vcmp_test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
; GENERIC-LABEL: vcmp_test5:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpleq %xmm3, %xmm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vpcmpleq %xmm3, %xmm2, %k1 # sched: [1:0.50]
; GENERIC-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [1:0.50]
; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -7059,7 +7059,7 @@ define <16 x i8> @vcmp_test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
; GENERIC-NEXT: retq # sched: [1:1.00]
; GENERIC-NEXT: .LBB386_1:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [1:0.50]
; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]

View File

@ -8,15 +8,15 @@ define void @test_vpopcntd(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> *%a2, i16
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovw %esi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [1:0.50]
; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:0.50]
; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
@ -28,12 +28,12 @@ define void @test_vpopcntd(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> *%a2, i16
; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 # sched: [1:0.50]
; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [1:0.50]
; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 # sched: [6:0.50]
; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [6:0.50]
; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [6:0.50]
; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [6:0.50]
; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [6:0.50]
; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [6:0.50]
; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:0.50]
; ICELAKE-NEXT: #NO_APP
; ICELAKE-NEXT: vzeroupper # sched: [4:1.00]
; ICELAKE-NEXT: retq # sched: [7:1.00]
@ -46,15 +46,15 @@ define void @test_vpopcntq(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> *%a2, i8 %a3)
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovw %esi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 # sched: [1:0.50]
; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [1:0.50]
; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:0.50]
; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:0.50]
; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:0.50]
; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
@ -66,12 +66,12 @@ define void @test_vpopcntq(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> *%a2, i8 %a3)
; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 # sched: [1:0.50]
; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [1:0.50]
; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 # sched: [6:0.50]
; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [6:0.50]
; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [6:0.50]
; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [6:0.50]
; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [6:0.50]
; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [6:0.50]
; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:0.50]
; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:0.50]
; ICELAKE-NEXT: #NO_APP
; ICELAKE-NEXT: vzeroupper # sched: [4:1.00]
; ICELAKE-NEXT: retq # sched: [7:1.00]

View File

@ -6,7 +6,7 @@
define <2 x i64> @test_extrq(<2 x i64> %a0, <16 x i8> %a1) {
; GENERIC-LABEL: test_extrq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: extrq %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_extrq:
@ -26,7 +26,7 @@ declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>)
define <2 x i64> @test_extrqi(<2 x i64> %a0) {
; GENERIC-LABEL: test_extrqi:
; GENERIC: # %bb.0:
; GENERIC-NEXT: extrq $2, $3, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_extrqi:
@ -46,7 +46,7 @@ declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8)
define <2 x i64> @test_insertq(<2 x i64> %a0, <2 x i64> %a1) {
; GENERIC-LABEL: test_insertq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: insertq %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: insertq %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_insertq:
@ -66,7 +66,7 @@ declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>)
define <2 x i64> @test_insertqi(<2 x i64> %a0, <2 x i64> %a1) {
; GENERIC-LABEL: test_insertqi:
; GENERIC: # %bb.0:
; GENERIC-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_insertqi:

View File

@ -147,14 +147,14 @@ define void @test_vpcom(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; GENERIC-LABEL: test_vpcom:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; GENERIC-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; GENERIC-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; GENERIC-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -179,14 +179,14 @@ define void @test_vpcomu(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; GENERIC-LABEL: test_vpcomu:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; GENERIC-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; GENERIC-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; GENERIC-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; GENERIC-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;