[X86] Cleanup WriteFShuffle/WriteFVarShuffle (+256 variants) scheduler classes with more common default values

llvm-svn: 331380
This commit is contained in:
Simon Pilgrim 2018-05-02 17:58:50 +00:00
parent dea80d5174
commit 819f218f07
9 changed files with 335 additions and 533 deletions

View File

@ -205,8 +205,8 @@ defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integ
defm : BWWriteResPair<WriteVecShift, [BWPort0], 1>; // Vector integer shifts.
defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5>; // Vector integer multiply.
defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // PMULLD
defm : BWWriteResPair<WriteShuffle, [BWPort5], 1>; // Vector shuffles.
defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1>; // Vector variable shuffles.
defm : BWWriteResPair<WriteShuffle, [BWPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1, [1], 1, 5>; // Vector variable shuffles.
defm : BWWriteResPair<WriteBlend, [BWPort5], 1>; // Vector blends.
defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2], 2, 5>; // Vector variable blends.
defm : BWWriteResPair<WriteMPSAD, [BWPort0, BWPort5], 7, [1, 2], 3, 5>; // Vector MPSAD.
@ -334,11 +334,11 @@ defm : BWWriteResPair<WriteCLMul, [BWPort0], 5>;
def : WriteRes<WriteSystem, [BWPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite;
// AVX2.
defm : BWWriteResPair<WriteFShuffle256, [BWPort5], 3>; // Fp 256-bit width vector shuffles.
defm : BWWriteResPair<WriteFVarShuffle256, [BWPort5], 3>; // Fp 256-bit width vector variable shuffles.
defm : BWWriteResPair<WriteShuffle256, [BWPort5], 3>; // 256-bit width vector shuffles.
defm : BWWriteResPair<WriteVarShuffle256, [BWPort5], 3>; // 256-bit width vector variable shuffles.
defm : BWWriteResPair<WriteVarVecShift, [BWPort0, BWPort5], 2, [2, 1]>; // Variable vector shifts.
defm : BWWriteResPair<WriteFShuffle256, [BWPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles.
defm : BWWriteResPair<WriteFVarShuffle256, [BWPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles.
defm : BWWriteResPair<WriteShuffle256, [BWPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles.
defm : BWWriteResPair<WriteVarShuffle256, [BWPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles.
defm : BWWriteResPair<WriteVarVecShift, [BWPort0, BWPort5], 2, [2, 1]>; // Variable vector shifts.
// Old microcoded instructions that nobody use.
def : WriteRes<WriteMicrocoded, [BWPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite;

View File

@ -172,14 +172,14 @@ defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>;
defm : HWWriteResPair<WriteFSign, [HWPort0], 1>;
defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFLogicY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFShuffleY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFVarShuffleY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFBlend, [HWPort015], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteFBlendY, [HWPort015], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFShuffle256, [HWPort5], 3>;
defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3>;
defm : HWWriteResPair<WriteFShuffle256, [HWPort5], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2], 2, 6>;
defm : HWWriteResPair<WriteFVarBlendY, [HWPort5], 2, [2], 2, 7>;
@ -200,11 +200,11 @@ defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteVecALU, [HWPort15], 1>;
defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5>;
defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>;
defm : HWWriteResPair<WriteShuffle, [HWPort5], 1>;
defm : HWWriteResPair<WriteVarShuffle, [HWPort5], 1>;
defm : HWWriteResPair<WriteShuffle, [HWPort5], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteVarShuffle,[HWPort5], 1, [1], 1, 5>;
defm : HWWriteResPair<WriteBlend, [HWPort5], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteShuffle256, [HWPort5], 3>;
defm : HWWriteResPair<WriteVarShuffle256, [HWPort5], 3>;
defm : HWWriteResPair<WriteShuffle256, [HWPort5], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteVarShuffle256, [HWPort5], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteVarBlend, [HWPort5], 2, [2], 2, 6>;
defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 2, [2, 1]>;
defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 7, [1, 2], 3, 6>;
@ -221,6 +221,7 @@ def : WriteRes<WriteVecInsertLd, [HWPort5,HWPort23]> {
let Latency = 6;
let NumMicroOps = 2;
}
def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>;
def : WriteRes<WriteVecExtract, [HWPort0,HWPort5]> {
let Latency = 2;
@ -874,14 +875,11 @@ def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[HWWriteResGroup13], (instregex "(V?)INSERTPSrm",
"(V?)PACKSSDWrm",
def: InstRW<[HWWriteResGroup13], (instregex "(V?)PACKSSDWrm",
"(V?)PACKSSWBrm",
"(V?)PACKUSDWrm",
"(V?)PACKUSWBrm",
"(V?)PALIGNRrmi",
"VPERMILPDmi",
"VPERMILPSmi",
"(V?)PSHUFBrm",
"(V?)PSHUFDmi",
"(V?)PSHUFHWmi",
@ -893,13 +891,7 @@ def: InstRW<[HWWriteResGroup13], (instregex "(V?)INSERTPSrm",
"(V?)PUNPCKLBWrm",
"(V?)PUNPCKLDQrm",
"(V?)PUNPCKLQDQrm",
"(V?)PUNPCKLWDrm",
"(V?)SHUFPDrmi",
"(V?)SHUFPSrmi",
"(V?)UNPCKHPDrm",
"(V?)UNPCKHPSrm",
"(V?)UNPCKLPDrm",
"(V?)UNPCKLPSrm")>;
"(V?)PUNPCKLWDrm")>;
def HWWriteResGroup13_1 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 8;
@ -1415,13 +1407,7 @@ def HWWriteResGroup53 : SchedWriteRes<[HWPort5,HWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[HWWriteResGroup53], (instregex "VPERM2F128rm",
"VPERM2I128rm",
"VPERMDYrm",
"VPERMPDYmi",
"VPERMPSYrm",
"VPERMQYmi",
"VPMOVZXBDYrm",
def: InstRW<[HWWriteResGroup53], (instregex "VPMOVZXBDYrm",
"VPMOVZXBQYrm",
"VPMOVZXBWYrm",
"VPMOVZXDQYrm",
@ -1798,8 +1784,8 @@ def HWWriteResGroup89 : SchedWriteRes<[HWPort0]> {
let ResourceCycles = [1];
}
def: InstRW<[HWWriteResGroup89], (instregex "(V?)PCMPGTQ(Y?)rr",
"MUL_FPrST0",
"MUL_FST0r",
"MUL_FPrST0",
"MUL_FST0r",
"MUL_FrST0")>;
def HWWriteResGroup91_1 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {

View File

@ -159,10 +159,10 @@ defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>;
defm : SBWriteResPair<WriteFSign, [SBPort5], 1>;
defm : SBWriteResPair<WriteFLogic, [SBPort5], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1, [1], 1, 5>;
defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFShuffleY,[SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1>;
defm : SBWriteResPair<WriteFVarShuffleY,[SBPort5], 1>;
defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFVarShuffleY,[SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFBlend, [SBPort05], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteFBlendY, [SBPort05], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
@ -180,8 +180,8 @@ defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVecALU, [SBPort1], 3>;
defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5>;
defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>; // TODO this is probably wrong for 256/512-bit for the "generic" model
defm : SBWriteResPair<WriteShuffle, [SBPort5], 1>;
defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1>;
defm : SBWriteResPair<WriteShuffle, [SBPort5], 1, [1], 1, 5>;
defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1, [1], 1, 5>;
defm : SBWriteResPair<WriteBlend, [SBPort15], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVarBlend, [SBPort15], 2, [2], 2, 6>;
defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>;
@ -322,10 +322,10 @@ def : WriteRes<WriteNop, []>;
// AVX2/FMA is not supported on that architecture, but we should define the basic
// scheduling resources anyway.
defm : SBWriteResPair<WriteFShuffle256, [SBPort0], 1>;
defm : SBWriteResPair<WriteFVarShuffle256, [SBPort0], 1>;
defm : SBWriteResPair<WriteShuffle256, [SBPort0], 1>;
defm : SBWriteResPair<WriteVarShuffle256, [SBPort0], 1>;
defm : SBWriteResPair<WriteFShuffle256, [SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFVarShuffle256, [SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteShuffle256, [SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVarShuffle256, [SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVarVecShift, [SBPort0], 1>;
defm : SBWriteResPair<WriteFMA, [SBPort01], 5>;
defm : SBWriteResPair<WriteFMAS, [SBPort01], 5>;
@ -372,9 +372,6 @@ def: InstRW<[SBWriteResGroup2], (instregex "FFREE",
"RETQ",
"ST_FPrr",
"ST_Frr",
"VEXTRACTF128rr",
"VINSERTF128rr",
"VPERM2F128rr",
"(V?)MOV64toPQIrr",
"(V?)MOVDI2PDIrr")>;
@ -936,28 +933,6 @@ def: InstRW<[SBWriteResGroup55], (instregex "(V?)CVTPS2PD(Y?)rm",
"VTESTPDrm",
"VTESTPSrm")>;
def SBWriteResGroup56 : SchedWriteRes<[SBPort5,SBPort23]> {
let Latency = 7;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SBWriteResGroup56], (instregex "VBROADCASTF128",
"(V?)INSERTPSrm",
"(V?)MOVHPDrm",
"(V?)MOVHPSrm",
"(V?)MOVLPDrm",
"(V?)MOVLPSrm",
"VPERMILPDmi",
"VPERMILPDrm",
"VPERMILPSmi",
"VPERMILPSrm",
"(V?)SHUFPDrmi",
"(V?)SHUFPSrmi",
"(V?)UNPCKHPDrm",
"(V?)UNPCKHPSrm",
"(V?)UNPCKLPDrm",
"(V?)UNPCKLPSrm")>;
def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 7;
let NumMicroOps = 2;
@ -1135,15 +1110,6 @@ def SBWriteResGroup72 : SchedWriteRes<[SBPort1,SBPort23]> {
}
def: InstRW<[SBWriteResGroup72], (instrs MUL8m)>;
def SBWriteResGroup73 : SchedWriteRes<[SBPort5,SBPort23]> {
let Latency = 8;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm",
"VPERMILPDYrm",
"VPERMILPSYrm")>;
def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 8;
let NumMicroOps = 3;

View File

@ -172,10 +172,10 @@ defm : SKLWriteResPair<WriteFMAY, [SKLPort01], 4, [1], 1, 7>; // Fused Multipl
defm : SKLWriteResPair<WriteFSign, [SKLPort0], 1>; // Floating point fabs/fchs.
defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1, [1], 1, 5>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFShuffleY, [SKLPort5], 1, [1], 1, 7>; // Floating point vector shuffles (YMM/ZMM).
defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFVarShuffleY, [SKLPort5], 1>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFVarShuffleY, [SKLPort5], 1, [1], 1, 7>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1, [1], 1, 6>; // Floating point vector blends.
defm : SKLWriteResPair<WriteFBlendY, [SKLPort015], 1, [1], 1, 7>; // Floating point vector blends.
defm : SKLWriteResPair<WriteFVarBlend, [SKLPort015], 2, [2], 2, 6>; // Fp vector variable blends.
@ -201,8 +201,8 @@ defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; // Vector int
defm : SKLWriteResPair<WriteVecShift, [SKLPort0], 1>; // Vector integer shifts.
defm : SKLWriteResPair<WriteVecIMul, [SKLPort0], 5>; // Vector integer multiply.
defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>;
defm : SKLWriteResPair<WriteShuffle, [SKLPort5], 1>; // Vector shuffles.
defm : SKLWriteResPair<WriteVarShuffle, [SKLPort5], 1>; // Vector shuffles.
defm : SKLWriteResPair<WriteShuffle, [SKLPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : SKLWriteResPair<WriteVarShuffle, [SKLPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : SKLWriteResPair<WriteBlend, [SKLPort5], 1, [1], 1, 6>; // Vector blends.
defm : SKLWriteResPair<WriteVarBlend, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends.
defm : SKLWriteResPair<WriteMPSAD, [SKLPort5], 4, [2], 2, 6>; // Vector MPSAD.
@ -219,6 +219,7 @@ def : WriteRes<WriteVecInsertLd, [SKLPort5,SKLPort23]> {
let Latency = 6;
let NumMicroOps = 2;
}
def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>;
def : WriteRes<WriteVecExtract, [SKLPort0,SKLPort5]> {
let Latency = 3;
@ -339,10 +340,10 @@ def : WriteRes<WriteCLMulLd, [SKLPort5, SKLPort23]> {
def : WriteRes<WriteSystem, [SKLPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite;
// AVX2.
defm : SKLWriteResPair<WriteFShuffle256, [SKLPort5], 3>; // Fp 256-bit width vector shuffles.
defm : SKLWriteResPair<WriteFVarShuffle256, [SKLPort5], 3>; // Fp 256-bit width vector variable shuffles.
defm : SKLWriteResPair<WriteShuffle256, [SKLPort5], 3>; // 256-bit width vector shuffles.
defm : SKLWriteResPair<WriteVarShuffle256, [SKLPort5], 3>; // 256-bit width vector variable shuffles.
defm : SKLWriteResPair<WriteFShuffle256, [SKLPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles.
defm : SKLWriteResPair<WriteFVarShuffle256, [SKLPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles.
defm : SKLWriteResPair<WriteShuffle256, [SKLPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles.
defm : SKLWriteResPair<WriteVarShuffle256, [SKLPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles.
defm : SKLWriteResPair<WriteVarVecShift, [SKLPort0, SKLPort5], 2, [2, 1]>; // Variable vector shifts.
// Old microcoded instructions that nobody use.
@ -1260,18 +1261,13 @@ def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup88], (instregex "(V?)INSERTPSrm",
"(V?)PACKSSDWrm",
def: InstRW<[SKLWriteResGroup88], (instregex "(V?)PACKSSDWrm",
"(V?)PACKSSWBrm",
"(V?)PACKUSDWrm",
"(V?)PACKUSWBrm",
"(V?)PALIGNRrmi",
"VPBROADCASTBrm",
"VPBROADCASTWrm",
"VPERMILPDmi",
"VPERMILPDrm",
"VPERMILPSmi",
"VPERMILPSrm",
"(V?)PSHUFBrm",
"(V?)PSHUFDmi",
"(V?)PSHUFHWmi",
@ -1283,13 +1279,7 @@ def: InstRW<[SKLWriteResGroup88], (instregex "(V?)INSERTPSrm",
"(V?)PUNPCKLBWrm",
"(V?)PUNPCKLDQrm",
"(V?)PUNPCKLQDQrm",
"(V?)PUNPCKLWDrm",
"(V?)SHUFPDrmi",
"(V?)SHUFPSrmi",
"(V?)UNPCKHPDrm",
"(V?)UNPCKHPSrm",
"(V?)UNPCKLPDrm",
"(V?)UNPCKLPSrm")>;
"(V?)PUNPCKLWDrm")>;
def SKLWriteResGroup89 : SchedWriteRes<[SKLPort5,SKLPort01]> {
let Latency = 7;
@ -1514,8 +1504,6 @@ def: InstRW<[SKLWriteResGroup108], (instregex "FCOM32m",
"VPBLENDWYrmi",
"VPBROADCASTBYrm",
"VPBROADCASTWYrm",
"VPERMILPDYrm",
"VPERMILPSYrm",
"VPMOVSXBDYrm",
"VPMOVSXBQYrm",
"VPMOVSXWQYrm",
@ -1791,12 +1779,6 @@ def SKLWriteResGroup133 : SchedWriteRes<[SKLPort5,SKLPort23]> {
def: InstRW<[SKLWriteResGroup133], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"ILD_F(16|32|64)m",
"VPCMPGTQYrm",
"VPERM2F128rm",
"VPERM2I128rm",
"VPERMDYrm",
"VPERMPDYmi",
"VPERMPSYrm",
"VPERMQYmi",
"VPMOVZXBDYrm",
"VPMOVZXBQYrm",
"VPMOVZXBWYrm",

View File

@ -172,10 +172,10 @@ defm : SKXWriteResPair<WriteFMAY, [SKXPort015], 4, [1], 1, 7>; // Fused Multipl
defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs.
defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 5>; // Floating point vector shuffles.
defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
defm : SKXWriteResPair<WriteFShuffleY, [SKXPort5], 1, [1], 1, 7>; // Floating point vector shuffles (YMM/ZMM).
defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1>; // Floating point vector variable shuffles.
defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1>; // Floating point vector variable shuffles.
defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles.
defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1, [1], 1, 7>; // Floating point vector variable shuffles.
defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends.
defm : SKXWriteResPair<WriteFBlendY,[SKXPort015], 1, [1], 1, 7>; // Floating point vector blends.
defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
@ -201,8 +201,8 @@ defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; // Vector int
defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1>; // Vector integer shifts.
defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 5>; // Vector integer multiply.
defm : SKXWriteResPair<WritePMULLD, [SKXPort015], 10, [2], 2, 6>; // Vector integer multiply.
defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1>; // Vector shuffles.
defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1>; // Vector variable shuffles.
defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector shuffles.
defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector variable shuffles.
defm : SKXWriteResPair<WriteBlend, [SKXPort5], 1, [1], 1, 6>; // Vector blends.
defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector variable blends.
defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD.
@ -219,6 +219,7 @@ def : WriteRes<WriteVecInsertLd, [SKXPort5,SKXPort23]> {
let Latency = 6;
let NumMicroOps = 2;
}
def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>;
def : WriteRes<WriteVecExtract, [SKXPort0,SKXPort5]> {
let Latency = 3;
@ -339,10 +340,10 @@ def : WriteRes<WriteCLMulLd, [SKXPort5, SKXPort23]> {
def : WriteRes<WriteSystem, [SKXPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite;
// AVX2.
defm : SKXWriteResPair<WriteFShuffle256, [SKXPort5], 3>; // Fp 256-bit width vector shuffles.
defm : SKXWriteResPair<WriteFVarShuffle256, [SKXPort5], 3>; // Fp 256-bit width vector variable shuffles.
defm : SKXWriteResPair<WriteShuffle256, [SKXPort5], 3>; // 256-bit width vector shuffles.
defm : SKXWriteResPair<WriteVarShuffle256, [SKXPort5], 3>; // 256-bit width vector variable shuffles.
defm : SKXWriteResPair<WriteFShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles.
defm : SKXWriteResPair<WriteFVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles.
defm : SKXWriteResPair<WriteShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles.
defm : SKXWriteResPair<WriteVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles.
defm : SKXWriteResPair<WriteVarVecShift, [SKXPort0, SKXPort5], 2, [2, 1]>; // Variable vector shifts.
// Old microcoded instructions that nobody use.
@ -1284,57 +1285,18 @@ def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_FPrST0",
"VPMINUQZ128rr",
"VPMINUQZ256rr",
"VPMINUQZrr",
"VPMOVQDZ128rr",
"VPMOVQDZ256rr",
"VPMOVQDZrr",
"VPMOVSXBDYrr",
"VPMOVSXBDZ128rr",
"VPMOVSXBDZ256rr",
"VPMOVSXBDZrr",
"VPMOVSXBQYrr",
"VPMOVSXBQZ128rr",
"VPMOVSXBQZ256rr",
"VPMOVSXBQZrr",
"VPMOVSXBWYrr",
"VPMOVSXBWZ128rr",
"VPMOVSXBWZ256rr",
"VPMOVSXBWZrr",
"VPMOVSXDQYrr",
"VPMOVSXDQZ128rr",
"VPMOVSXDQZ256rr",
"VPMOVSXDQZrr",
"VPMOVSXWDYrr",
"VPMOVSXWDZ128rr",
"VPMOVSXWDZ256rr",
"VPMOVSXWDZrr",
"VPMOVSXWQYrr",
"VPMOVSXWQZ128rr",
"VPMOVSXWQZ256rr",
"VPMOVSXWQZrr",
"VPMOVZXBDYrr",
"VPMOVZXBDZ128rr",
"VPMOVZXBDZ256rr",
"VPMOVZXBDZrr",
"VPMOVZXBQYrr",
"VPMOVZXBQZ128rr",
"VPMOVZXBQZ256rr",
"VPMOVZXBQZrr",
"VPMOVZXBWYrr",
"VPMOVZXBWZ128rr",
"VPMOVZXBWZ256rr",
"VPMOVZXBWZrr",
"VPMOVZXDQYrr",
"VPMOVZXDQZ128rr",
"VPMOVZXDQZ256rr",
"VPMOVZXDQZrr",
"VPMOVZXWDYrr",
"VPMOVZXWDZ128rr",
"VPMOVZXWDZ256rr",
"VPMOVZXWDZrr",
"VPMOVZXWQYrr",
"VPMOVZXWQZ128rr",
"VPMOVZXWQZ256rr",
"VPMOVZXWQZrr",
"VPSADBWZrr", // TODO: 512-bit ops require ports 0/1 to be joined.
"VPTESTMBZ128rr",
"VPTESTMBZ256rr",
@ -2189,9 +2151,7 @@ def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup92], (instregex "VINSERTPSZrm(b?)",
"(V?)INSERTPSrm",
"VMOVSDZrm(b?)",
def: InstRW<[SKXWriteResGroup92], (instregex "VMOVSDZrm(b?)",
"VMOVSSZrm(b?)",
"VPACKSSDWZ128rm(b?)",
"(V?)PACKSSDWrm",
@ -2207,14 +2167,6 @@ def: InstRW<[SKXWriteResGroup92], (instregex "VINSERTPSZrm(b?)",
"VPBROADCASTBrm",
"VPBROADCASTWZ128m(b?)",
"VPBROADCASTWrm",
"VPERMILPDZ128m(b?)i",
"VPERMILPDZ128rm(b?)",
"VPERMILPDmi",
"VPERMILPDrm",
"VPERMILPSZ128m(b?)i",
"VPERMILPSZ128rm(b?)",
"VPERMILPSmi",
"VPERMILPSrm",
"VPSHUFBZ128rm(b?)",
"(V?)PSHUFBrm",
"VPSHUFDZ128m(b?)i",
@ -2240,19 +2192,7 @@ def: InstRW<[SKXWriteResGroup92], (instregex "VINSERTPSZrm(b?)",
"VPUNPCKLQDQZ128rm(b?)",
"(V?)PUNPCKLQDQrm",
"VPUNPCKLWDZ128rm(b?)",
"(V?)PUNPCKLWDrm",
"VSHUFPDZ128rm(b?)i",
"(V?)SHUFPDrmi",
"VSHUFPSZ128rm(b?)i",
"(V?)SHUFPSrmi",
"VUNPCKHPDZ128rm(b?)",
"(V?)UNPCKHPDrm",
"VUNPCKHPSZ128rm(b?)",
"(V?)UNPCKHPSrm",
"VUNPCKLPDZ128rm(b?)",
"(V?)UNPCKLPDrm",
"VUNPCKLPSZ128rm(b?)",
"(V?)UNPCKLPSrm")>;
"(V?)PUNPCKLWDrm")>;
def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort015]> {
let Latency = 7;
@ -2711,12 +2651,6 @@ def: InstRW<[SKXWriteResGroup119], (instregex "FCOM32m",
"VPBROADCASTWYrm",
"VPBROADCASTWZ256m(b?)",
"VPBROADCASTWZm(b?)",
"VPERMILPDYrm",
"VPERMILPDZ256rm(b?)",
"VPERMILPDZrm(b?)",
"VPERMILPSYrm",
"VPERMILPSZ256rm(b?)",
"VPERMILPSZrm(b?)",
"VPMOVSXBDYrm",
"VPMOVSXBQYrm",
"VPMOVSXWQYrm",
@ -3367,40 +3301,6 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"VPCMPUWZrmi(b?)",
"VPCMPWZ256rmi(b?)",
"VPCMPWZrmi(b?)",
"VPERM2F128rm",
"VPERM2I128rm",
"VPERMDYrm",
"VPERMDZ256rm(b?)",
"VPERMDZrm(b?)",
"VPERMI2D256rm(b?)",
"VPERMI2Drm(b?)",
"VPERMI2PD256rm(b?)",
"VPERMI2PDrm(b?)",
"VPERMI2PS256rm(b?)",
"VPERMI2PSrm(b?)",
"VPERMI2Q256rm(b?)",
"VPERMI2Qrm(b?)",
"VPERMPDYmi",
"VPERMPDZ256m(b?)i",
"VPERMPDZ256rm(b?)",
"VPERMPDZm(b?)i",
"VPERMPDZrm(b?)",
"VPERMPSYrm",
"VPERMPSZ256rm(b?)",
"VPERMPSZrm(b?)",
"VPERMQYmi",
"VPERMQZ256m(b?)i",
"VPERMQZ256rm(b?)",
"VPERMQZm(b?)i",
"VPERMQZrm(b?)",
"VPERMT2D256rm(b?)",
"VPERMT2Drm(b?)",
"VPERMT2PD256rm(b?)",
"VPERMT2PDrm(b?)",
"VPERMT2PS256rm(b?)",
"VPERMT2PSrm(b?)",
"VPERMT2Q256rm(b?)",
"VPERMT2Qrm(b?)",
"VPMAXSQZ256rm(b?)",
"VPMAXSQZrm(b?)",
"VPMAXUQZ256rm(b?)",
@ -3409,35 +3309,11 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"VPMINSQZrm(b?)",
"VPMINUQZ256rm(b?)",
"VPMINUQZrm(b?)",
"VPMOVSXBDZ256rm(b?)",
"VPMOVSXBDZrm(b?)",
"VPMOVSXBQZ256rm(b?)",
"VPMOVSXBQZrm(b?)",
"VPMOVSXBWZ256rm(b?)",
"VPMOVSXBWZrm(b?)",
"VPMOVSXDQZ256rm(b?)",
"VPMOVSXDQZrm(b?)",
"VPMOVSXWDZ256rm(b?)",
"VPMOVSXWDZrm(b?)",
"VPMOVSXWQZ256rm(b?)",
"VPMOVSXWQZrm(b?)",
"VPMOVZXBDYrm",
"VPMOVZXBDZ256rm(b?)",
"VPMOVZXBDZrm(b?)",
"VPMOVZXBQYrm",
"VPMOVZXBQZ256rm(b?)",
"VPMOVZXBQZrm(b?)",
"VPMOVZXBWYrm",
"VPMOVZXBWZ256rm(b?)",
"VPMOVZXBWZrm(b?)",
"VPMOVZXDQYrm",
"VPMOVZXDQZ256rm(b?)",
"VPMOVZXDQZrm(b?)",
"VPMOVZXWDZ256rm(b?)",
"VPMOVZXWDZrm(b?)",
"VPMOVZXWQYrm",
"VPMOVZXWQZ256rm(b?)",
"VPMOVZXWQZrm(b?)",
"VPSADBWYrm",
"VPSADBWZ256rm(b?)",
"VPSADBWZrm(b?)",
@ -3456,15 +3332,7 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
"VPTESTNMQZ256rm(b?)",
"VPTESTNMQZrm(b?)",
"VPTESTNMWZ256rm(b?)",
"VPTESTNMWZrm(b?)",
"VSHUFF32X4Z256rm(b?)i",
"VSHUFF32X4Zrm(b?)i",
"VSHUFF64X2Z256rm(b?)i",
"VSHUFF64X2Zrm(b?)i",
"VSHUFI32X4Z256rm(b?)i",
"VSHUFI32X4Zrm(b?)i",
"VSHUFI64X2Z256rm(b?)i",
"VSHUFI64X2Zrm(b?)i")>;
"VPTESTNMWZrm(b?)")>;
def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 10;

View File

@ -523,7 +523,7 @@ define <8 x i32> @test_inserti128(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2)
; GENERIC-LABEL: test_inserti128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -2506,7 +2506,7 @@ define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; GENERIC-LABEL: test_perm2i128:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [6:1.00]
; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -2555,7 +2555,7 @@ define <8 x i32> @test_permd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; GENERIC-LABEL: test_permd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -2605,7 +2605,7 @@ define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) {
; GENERIC-LABEL: test_permpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [6:1.00]
; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00]
; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -2654,7 +2654,7 @@ define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2
; GENERIC-LABEL: test_permps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -2704,7 +2704,7 @@ define <4 x i64> @test_permq(<4 x i64> %a0, <4 x i64> *%a1) {
; GENERIC-LABEL: test_permq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [6:1.00]
; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;

View File

@ -2957,7 +2957,7 @@ define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind re
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_8x8mem_to_8x16:
@ -2977,7 +2977,7 @@ define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind re
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x8mem_to_8x16:
@ -2998,7 +2998,7 @@ define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwi
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [6:1.00]
; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_16x8mem_to_16x16:
@ -3018,7 +3018,7 @@ define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwi
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_16x8mem_to_16x16:
@ -3104,7 +3104,7 @@ define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwi
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [6:1.00]
; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_32x8mem_to_32x16:
@ -3124,7 +3124,7 @@ define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwi
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_32x8mem_to_32x16:
@ -3210,7 +3210,7 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_4x8mem_to_4x32:
@ -3230,7 +3230,7 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_4x8mem_to_4x32:
@ -3250,7 +3250,7 @@ define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [6:1.00]
; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_8x8mem_to_8x32:
@ -3270,7 +3270,7 @@ define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x8mem_to_8x32:
@ -3290,7 +3290,7 @@ define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwi
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [6:1.00]
; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_16x8mem_to_16x32:
@ -3310,7 +3310,7 @@ define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwi
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_16x8mem_to_16x32:
@ -3396,7 +3396,7 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind re
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_2x8mem_to_2x64:
@ -3415,7 +3415,7 @@ define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwin
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_2x8mem_to_2x64mask:
@ -3449,7 +3449,7 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind re
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_4x8mem_to_4x64:
@ -3469,7 +3469,7 @@ define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwin
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_4x8mem_to_4x64mask:
@ -3504,7 +3504,7 @@ define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind re
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_8x8mem_to_8x64:
@ -3524,7 +3524,7 @@ define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwin
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x8mem_to_8x64mask:
@ -3542,7 +3542,7 @@ define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwin
define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
; GENERIC-LABEL: sext_8x8mem_to_8x64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x8mem_to_8x64:
@ -3559,7 +3559,7 @@ define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_4x16mem_to_4x32:
@ -3579,7 +3579,7 @@ define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounw
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_4x16mem_to_4x32mask:
@ -3615,7 +3615,7 @@ define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_8x16mem_to_8x32:
@ -3635,7 +3635,7 @@ define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounw
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x16mem_to_8x32mask:
@ -3703,7 +3703,7 @@ define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) noun
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [6:1.00]
; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_16x16mem_to_16x32:
@ -3723,7 +3723,7 @@ define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask)
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_16x16mem_to_16x32mask:
@ -3741,7 +3741,7 @@ define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask)
define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
; GENERIC-LABEL: sext_16x16mem_to_16x32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_16x16mem_to_16x32:
@ -3790,7 +3790,7 @@ define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_2x16mem_to_2x64:
@ -3810,7 +3810,7 @@ define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounw
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_2x16mem_to_2x64mask:
@ -3845,7 +3845,7 @@ define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_4x16mem_to_4x64:
@ -3865,7 +3865,7 @@ define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounw
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_4x16mem_to_4x64mask:
@ -3900,7 +3900,7 @@ define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [6:1.00]
; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_8x16mem_to_8x64:
@ -3920,7 +3920,7 @@ define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounw
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x16mem_to_8x64mask:
@ -3938,7 +3938,7 @@ define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounw
define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
; GENERIC-LABEL: sext_8x16mem_to_8x64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x16mem_to_8x64:
@ -3988,7 +3988,7 @@ define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [6:1.00]
; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_2x32mem_to_2x64:
@ -4008,7 +4008,7 @@ define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounw
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_2x32mem_to_2x64mask:
@ -4043,7 +4043,7 @@ define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_4x32mem_to_4x64:
@ -4063,7 +4063,7 @@ define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounw
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_4x32mem_to_4x64mask:
@ -4131,7 +4131,7 @@ define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_8x32mem_to_8x64:
@ -4151,7 +4151,7 @@ define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounw
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x32mem_to_8x64mask:
@ -4169,7 +4169,7 @@ define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounw
define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
; GENERIC-LABEL: sext_8x32mem_to_8x64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sext_8x32mem_to_8x64:
@ -4473,7 +4473,7 @@ define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
; GENERIC-LABEL: extload_v8i64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [6:1.00]
; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00]
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
@ -8258,7 +8258,7 @@ define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) {
define <16 x float> @_ss16xfloat_load(float* %a.ptr) {
; GENERIC-LABEL: _ss16xfloat_load:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [6:1.00]
; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _ss16xfloat_load:
@ -8275,7 +8275,7 @@ define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16
; GENERIC-LABEL: _ss16xfloat_mask_load:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [6:1.00]
; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _ss16xfloat_mask_load:
@ -8295,7 +8295,7 @@ define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1)
; GENERIC-LABEL: _ss16xfloat_maskz_load:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _ss16xfloat_maskz_load:
@ -8369,7 +8369,7 @@ define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) {
define <8 x double> @_sd8xdouble_load(double* %a.ptr) {
; GENERIC-LABEL: _sd8xdouble_load:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [6:1.00]
; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _sd8xdouble_load:
@ -8386,7 +8386,7 @@ define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8
; GENERIC-LABEL: _sd8xdouble_mask_load:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [6:1.00]
; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _sd8xdouble_mask_load:
@ -8406,7 +8406,7 @@ define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1)
; GENERIC-LABEL: _sd8xdouble_maskz_load:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _sd8xdouble_maskz_load:
@ -8700,7 +8700,7 @@ define <16 x float> @broadcast_ss_spill(float %x) {
; GENERIC-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
; GENERIC-NEXT: callq func_f32
; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [6:1.00]
; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00]
; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33]
; GENERIC-NEXT: .cfi_def_cfa_offset 8
; GENERIC-NEXT: retq # sched: [1:1.00]
@ -8732,7 +8732,7 @@ define <8 x double> @broadcast_sd_spill(double %x) {
; GENERIC-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
; GENERIC-NEXT: callq func_f64
; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [6:1.00]
; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00]
; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33]
; GENERIC-NEXT: .cfi_def_cfa_offset 8
; GENERIC-NEXT: retq # sched: [1:1.00]

File diff suppressed because it is too large Load Diff

View File

@ -212,8 +212,8 @@ define void @test_vpermil2pd_128(<2 x double> %a0, <2 x double> %a1, <2 x double
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; GENERIC-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
; GENERIC-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; GENERIC-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -234,8 +234,8 @@ define void @test_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; GENERIC-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [6:1.00]
; GENERIC-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
@ -258,8 +258,8 @@ define void @test_vpermil2ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; GENERIC-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
; GENERIC-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; GENERIC-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -280,8 +280,8 @@ define void @test_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
; GENERIC-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; GENERIC-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [6:1.00]
; GENERIC-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]