forked from OSchip/llvm-project
[X86] Cleanup WriteFShuffle/WriteFVarShuffle (+256 variants) scheduler classes with more common default values
llvm-svn: 331380
This commit is contained in:
parent
dea80d5174
commit
819f218f07
|
@ -205,8 +205,8 @@ defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integ
|
|||
defm : BWWriteResPair<WriteVecShift, [BWPort0], 1>; // Vector integer shifts.
|
||||
defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5>; // Vector integer multiply.
|
||||
defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // PMULLD
|
||||
defm : BWWriteResPair<WriteShuffle, [BWPort5], 1>; // Vector shuffles.
|
||||
defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1>; // Vector variable shuffles.
|
||||
defm : BWWriteResPair<WriteShuffle, [BWPort5], 1, [1], 1, 5>; // Vector shuffles.
|
||||
defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1, [1], 1, 5>; // Vector variable shuffles.
|
||||
defm : BWWriteResPair<WriteBlend, [BWPort5], 1>; // Vector blends.
|
||||
defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2], 2, 5>; // Vector variable blends.
|
||||
defm : BWWriteResPair<WriteMPSAD, [BWPort0, BWPort5], 7, [1, 2], 3, 5>; // Vector MPSAD.
|
||||
|
@ -334,11 +334,11 @@ defm : BWWriteResPair<WriteCLMul, [BWPort0], 5>;
|
|||
def : WriteRes<WriteSystem, [BWPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite;
|
||||
|
||||
// AVX2.
|
||||
defm : BWWriteResPair<WriteFShuffle256, [BWPort5], 3>; // Fp 256-bit width vector shuffles.
|
||||
defm : BWWriteResPair<WriteFVarShuffle256, [BWPort5], 3>; // Fp 256-bit width vector variable shuffles.
|
||||
defm : BWWriteResPair<WriteShuffle256, [BWPort5], 3>; // 256-bit width vector shuffles.
|
||||
defm : BWWriteResPair<WriteVarShuffle256, [BWPort5], 3>; // 256-bit width vector variable shuffles.
|
||||
defm : BWWriteResPair<WriteVarVecShift, [BWPort0, BWPort5], 2, [2, 1]>; // Variable vector shifts.
|
||||
defm : BWWriteResPair<WriteFShuffle256, [BWPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles.
|
||||
defm : BWWriteResPair<WriteFVarShuffle256, [BWPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles.
|
||||
defm : BWWriteResPair<WriteShuffle256, [BWPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles.
|
||||
defm : BWWriteResPair<WriteVarShuffle256, [BWPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles.
|
||||
defm : BWWriteResPair<WriteVarVecShift, [BWPort0, BWPort5], 2, [2, 1]>; // Variable vector shifts.
|
||||
|
||||
// Old microcoded instructions that nobody use.
|
||||
def : WriteRes<WriteMicrocoded, [BWPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite;
|
||||
|
|
|
@ -172,14 +172,14 @@ defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>;
|
|||
defm : HWWriteResPair<WriteFSign, [HWPort0], 1>;
|
||||
defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>;
|
||||
defm : HWWriteResPair<WriteFLogicY, [HWPort5], 1, [1], 1, 7>;
|
||||
defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1, [1], 1, 5>;
|
||||
defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1, [1], 1, 6>;
|
||||
defm : HWWriteResPair<WriteFShuffleY, [HWPort5], 1, [1], 1, 7>;
|
||||
defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1, [1], 1, 6>;
|
||||
defm : HWWriteResPair<WriteFVarShuffleY, [HWPort5], 1, [1], 1, 7>;
|
||||
defm : HWWriteResPair<WriteFBlend, [HWPort015], 1, [1], 1, 6>;
|
||||
defm : HWWriteResPair<WriteFBlendY, [HWPort015], 1, [1], 1, 7>;
|
||||
defm : HWWriteResPair<WriteFShuffle256, [HWPort5], 3>;
|
||||
defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3>;
|
||||
defm : HWWriteResPair<WriteFShuffle256, [HWPort5], 3, [1], 1, 7>;
|
||||
defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3, [1], 1, 7>;
|
||||
defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2], 2, 6>;
|
||||
defm : HWWriteResPair<WriteFVarBlendY, [HWPort5], 2, [2], 2, 7>;
|
||||
|
||||
|
@ -200,11 +200,11 @@ defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>;
|
|||
defm : HWWriteResPair<WriteVecALU, [HWPort15], 1>;
|
||||
defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5>;
|
||||
defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>;
|
||||
defm : HWWriteResPair<WriteShuffle, [HWPort5], 1>;
|
||||
defm : HWWriteResPair<WriteVarShuffle, [HWPort5], 1>;
|
||||
defm : HWWriteResPair<WriteShuffle, [HWPort5], 1, [1], 1, 5>;
|
||||
defm : HWWriteResPair<WriteVarShuffle,[HWPort5], 1, [1], 1, 5>;
|
||||
defm : HWWriteResPair<WriteBlend, [HWPort5], 1, [1], 1, 6>;
|
||||
defm : HWWriteResPair<WriteShuffle256, [HWPort5], 3>;
|
||||
defm : HWWriteResPair<WriteVarShuffle256, [HWPort5], 3>;
|
||||
defm : HWWriteResPair<WriteShuffle256, [HWPort5], 3, [1], 1, 7>;
|
||||
defm : HWWriteResPair<WriteVarShuffle256, [HWPort5], 3, [1], 1, 7>;
|
||||
defm : HWWriteResPair<WriteVarBlend, [HWPort5], 2, [2], 2, 6>;
|
||||
defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 2, [2, 1]>;
|
||||
defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 7, [1, 2], 3, 6>;
|
||||
|
@ -221,6 +221,7 @@ def : WriteRes<WriteVecInsertLd, [HWPort5,HWPort23]> {
|
|||
let Latency = 6;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>;
|
||||
|
||||
def : WriteRes<WriteVecExtract, [HWPort0,HWPort5]> {
|
||||
let Latency = 2;
|
||||
|
@ -874,14 +875,11 @@ def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> {
|
|||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup13], (instregex "(V?)INSERTPSrm",
|
||||
"(V?)PACKSSDWrm",
|
||||
def: InstRW<[HWWriteResGroup13], (instregex "(V?)PACKSSDWrm",
|
||||
"(V?)PACKSSWBrm",
|
||||
"(V?)PACKUSDWrm",
|
||||
"(V?)PACKUSWBrm",
|
||||
"(V?)PALIGNRrmi",
|
||||
"VPERMILPDmi",
|
||||
"VPERMILPSmi",
|
||||
"(V?)PSHUFBrm",
|
||||
"(V?)PSHUFDmi",
|
||||
"(V?)PSHUFHWmi",
|
||||
|
@ -893,13 +891,7 @@ def: InstRW<[HWWriteResGroup13], (instregex "(V?)INSERTPSrm",
|
|||
"(V?)PUNPCKLBWrm",
|
||||
"(V?)PUNPCKLDQrm",
|
||||
"(V?)PUNPCKLQDQrm",
|
||||
"(V?)PUNPCKLWDrm",
|
||||
"(V?)SHUFPDrmi",
|
||||
"(V?)SHUFPSrmi",
|
||||
"(V?)UNPCKHPDrm",
|
||||
"(V?)UNPCKHPSrm",
|
||||
"(V?)UNPCKLPDrm",
|
||||
"(V?)UNPCKLPSrm")>;
|
||||
"(V?)PUNPCKLWDrm")>;
|
||||
|
||||
def HWWriteResGroup13_1 : SchedWriteRes<[HWPort5,HWPort23]> {
|
||||
let Latency = 8;
|
||||
|
@ -1415,13 +1407,7 @@ def HWWriteResGroup53 : SchedWriteRes<[HWPort5,HWPort23]> {
|
|||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup53], (instregex "VPERM2F128rm",
|
||||
"VPERM2I128rm",
|
||||
"VPERMDYrm",
|
||||
"VPERMPDYmi",
|
||||
"VPERMPSYrm",
|
||||
"VPERMQYmi",
|
||||
"VPMOVZXBDYrm",
|
||||
def: InstRW<[HWWriteResGroup53], (instregex "VPMOVZXBDYrm",
|
||||
"VPMOVZXBQYrm",
|
||||
"VPMOVZXBWYrm",
|
||||
"VPMOVZXDQYrm",
|
||||
|
@ -1798,8 +1784,8 @@ def HWWriteResGroup89 : SchedWriteRes<[HWPort0]> {
|
|||
let ResourceCycles = [1];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup89], (instregex "(V?)PCMPGTQ(Y?)rr",
|
||||
"MUL_FPrST0",
|
||||
"MUL_FST0r",
|
||||
"MUL_FPrST0",
|
||||
"MUL_FST0r",
|
||||
"MUL_FrST0")>;
|
||||
|
||||
def HWWriteResGroup91_1 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> {
|
||||
|
|
|
@ -159,10 +159,10 @@ defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>;
|
|||
defm : SBWriteResPair<WriteFSign, [SBPort5], 1>;
|
||||
defm : SBWriteResPair<WriteFLogic, [SBPort5], 1, [1], 1, 6>;
|
||||
defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>;
|
||||
defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1, [1], 1, 5>;
|
||||
defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1, [1], 1, 6>;
|
||||
defm : SBWriteResPair<WriteFShuffleY,[SBPort5], 1, [1], 1, 7>;
|
||||
defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1>;
|
||||
defm : SBWriteResPair<WriteFVarShuffleY,[SBPort5], 1>;
|
||||
defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1, [1], 1, 6>;
|
||||
defm : SBWriteResPair<WriteFVarShuffleY,[SBPort5], 1, [1], 1, 7>;
|
||||
defm : SBWriteResPair<WriteFBlend, [SBPort05], 1, [1], 1, 6>;
|
||||
defm : SBWriteResPair<WriteFBlendY, [SBPort05], 1, [1], 1, 7>;
|
||||
defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
|
||||
|
@ -180,8 +180,8 @@ defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
|
|||
defm : SBWriteResPair<WriteVecALU, [SBPort1], 3>;
|
||||
defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5>;
|
||||
defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>; // TODO this is probably wrong for 256/512-bit for the "generic" model
|
||||
defm : SBWriteResPair<WriteShuffle, [SBPort5], 1>;
|
||||
defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1>;
|
||||
defm : SBWriteResPair<WriteShuffle, [SBPort5], 1, [1], 1, 5>;
|
||||
defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1, [1], 1, 5>;
|
||||
defm : SBWriteResPair<WriteBlend, [SBPort15], 1, [1], 1, 6>;
|
||||
defm : SBWriteResPair<WriteVarBlend, [SBPort15], 2, [2], 2, 6>;
|
||||
defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>;
|
||||
|
@ -322,10 +322,10 @@ def : WriteRes<WriteNop, []>;
|
|||
|
||||
// AVX2/FMA is not supported on that architecture, but we should define the basic
|
||||
// scheduling resources anyway.
|
||||
defm : SBWriteResPair<WriteFShuffle256, [SBPort0], 1>;
|
||||
defm : SBWriteResPair<WriteFVarShuffle256, [SBPort0], 1>;
|
||||
defm : SBWriteResPair<WriteShuffle256, [SBPort0], 1>;
|
||||
defm : SBWriteResPair<WriteVarShuffle256, [SBPort0], 1>;
|
||||
defm : SBWriteResPair<WriteFShuffle256, [SBPort5], 1, [1], 1, 7>;
|
||||
defm : SBWriteResPair<WriteFVarShuffle256, [SBPort5], 1, [1], 1, 7>;
|
||||
defm : SBWriteResPair<WriteShuffle256, [SBPort5], 1, [1], 1, 7>;
|
||||
defm : SBWriteResPair<WriteVarShuffle256, [SBPort5], 1, [1], 1, 7>;
|
||||
defm : SBWriteResPair<WriteVarVecShift, [SBPort0], 1>;
|
||||
defm : SBWriteResPair<WriteFMA, [SBPort01], 5>;
|
||||
defm : SBWriteResPair<WriteFMAS, [SBPort01], 5>;
|
||||
|
@ -372,9 +372,6 @@ def: InstRW<[SBWriteResGroup2], (instregex "FFREE",
|
|||
"RETQ",
|
||||
"ST_FPrr",
|
||||
"ST_Frr",
|
||||
"VEXTRACTF128rr",
|
||||
"VINSERTF128rr",
|
||||
"VPERM2F128rr",
|
||||
"(V?)MOV64toPQIrr",
|
||||
"(V?)MOVDI2PDIrr")>;
|
||||
|
||||
|
@ -936,28 +933,6 @@ def: InstRW<[SBWriteResGroup55], (instregex "(V?)CVTPS2PD(Y?)rm",
|
|||
"VTESTPDrm",
|
||||
"VTESTPSrm")>;
|
||||
|
||||
def SBWriteResGroup56 : SchedWriteRes<[SBPort5,SBPort23]> {
|
||||
let Latency = 7;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[SBWriteResGroup56], (instregex "VBROADCASTF128",
|
||||
"(V?)INSERTPSrm",
|
||||
"(V?)MOVHPDrm",
|
||||
"(V?)MOVHPSrm",
|
||||
"(V?)MOVLPDrm",
|
||||
"(V?)MOVLPSrm",
|
||||
"VPERMILPDmi",
|
||||
"VPERMILPDrm",
|
||||
"VPERMILPSmi",
|
||||
"VPERMILPSrm",
|
||||
"(V?)SHUFPDrmi",
|
||||
"(V?)SHUFPSrmi",
|
||||
"(V?)UNPCKHPDrm",
|
||||
"(V?)UNPCKHPSrm",
|
||||
"(V?)UNPCKLPDrm",
|
||||
"(V?)UNPCKLPSrm")>;
|
||||
|
||||
def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort05]> {
|
||||
let Latency = 7;
|
||||
let NumMicroOps = 2;
|
||||
|
@ -1135,15 +1110,6 @@ def SBWriteResGroup72 : SchedWriteRes<[SBPort1,SBPort23]> {
|
|||
}
|
||||
def: InstRW<[SBWriteResGroup72], (instrs MUL8m)>;
|
||||
|
||||
def SBWriteResGroup73 : SchedWriteRes<[SBPort5,SBPort23]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm",
|
||||
"VPERMILPDYrm",
|
||||
"VPERMILPSYrm")>;
|
||||
|
||||
def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort05]> {
|
||||
let Latency = 8;
|
||||
let NumMicroOps = 3;
|
||||
|
|
|
@ -172,10 +172,10 @@ defm : SKLWriteResPair<WriteFMAY, [SKLPort01], 4, [1], 1, 7>; // Fused Multipl
|
|||
defm : SKLWriteResPair<WriteFSign, [SKLPort0], 1>; // Floating point fabs/fchs.
|
||||
defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
|
||||
defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
|
||||
defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1, [1], 1, 5>; // Floating point vector shuffles.
|
||||
defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
|
||||
defm : SKLWriteResPair<WriteFShuffleY, [SKLPort5], 1, [1], 1, 7>; // Floating point vector shuffles (YMM/ZMM).
|
||||
defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
|
||||
defm : SKLWriteResPair<WriteFVarShuffleY, [SKLPort5], 1>; // Floating point vector shuffles.
|
||||
defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
|
||||
defm : SKLWriteResPair<WriteFVarShuffleY, [SKLPort5], 1, [1], 1, 7>; // Floating point vector shuffles.
|
||||
defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1, [1], 1, 6>; // Floating point vector blends.
|
||||
defm : SKLWriteResPair<WriteFBlendY, [SKLPort015], 1, [1], 1, 7>; // Floating point vector blends.
|
||||
defm : SKLWriteResPair<WriteFVarBlend, [SKLPort015], 2, [2], 2, 6>; // Fp vector variable blends.
|
||||
|
@ -201,8 +201,8 @@ defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; // Vector int
|
|||
defm : SKLWriteResPair<WriteVecShift, [SKLPort0], 1>; // Vector integer shifts.
|
||||
defm : SKLWriteResPair<WriteVecIMul, [SKLPort0], 5>; // Vector integer multiply.
|
||||
defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>;
|
||||
defm : SKLWriteResPair<WriteShuffle, [SKLPort5], 1>; // Vector shuffles.
|
||||
defm : SKLWriteResPair<WriteVarShuffle, [SKLPort5], 1>; // Vector shuffles.
|
||||
defm : SKLWriteResPair<WriteShuffle, [SKLPort5], 1, [1], 1, 5>; // Vector shuffles.
|
||||
defm : SKLWriteResPair<WriteVarShuffle, [SKLPort5], 1, [1], 1, 5>; // Vector shuffles.
|
||||
defm : SKLWriteResPair<WriteBlend, [SKLPort5], 1, [1], 1, 6>; // Vector blends.
|
||||
defm : SKLWriteResPair<WriteVarBlend, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends.
|
||||
defm : SKLWriteResPair<WriteMPSAD, [SKLPort5], 4, [2], 2, 6>; // Vector MPSAD.
|
||||
|
@ -219,6 +219,7 @@ def : WriteRes<WriteVecInsertLd, [SKLPort5,SKLPort23]> {
|
|||
let Latency = 6;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>;
|
||||
|
||||
def : WriteRes<WriteVecExtract, [SKLPort0,SKLPort5]> {
|
||||
let Latency = 3;
|
||||
|
@ -339,10 +340,10 @@ def : WriteRes<WriteCLMulLd, [SKLPort5, SKLPort23]> {
|
|||
def : WriteRes<WriteSystem, [SKLPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite;
|
||||
|
||||
// AVX2.
|
||||
defm : SKLWriteResPair<WriteFShuffle256, [SKLPort5], 3>; // Fp 256-bit width vector shuffles.
|
||||
defm : SKLWriteResPair<WriteFVarShuffle256, [SKLPort5], 3>; // Fp 256-bit width vector variable shuffles.
|
||||
defm : SKLWriteResPair<WriteShuffle256, [SKLPort5], 3>; // 256-bit width vector shuffles.
|
||||
defm : SKLWriteResPair<WriteVarShuffle256, [SKLPort5], 3>; // 256-bit width vector variable shuffles.
|
||||
defm : SKLWriteResPair<WriteFShuffle256, [SKLPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles.
|
||||
defm : SKLWriteResPair<WriteFVarShuffle256, [SKLPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles.
|
||||
defm : SKLWriteResPair<WriteShuffle256, [SKLPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles.
|
||||
defm : SKLWriteResPair<WriteVarShuffle256, [SKLPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles.
|
||||
defm : SKLWriteResPair<WriteVarVecShift, [SKLPort0, SKLPort5], 2, [2, 1]>; // Variable vector shifts.
|
||||
|
||||
// Old microcoded instructions that nobody use.
|
||||
|
@ -1260,18 +1261,13 @@ def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> {
|
|||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[SKLWriteResGroup88], (instregex "(V?)INSERTPSrm",
|
||||
"(V?)PACKSSDWrm",
|
||||
def: InstRW<[SKLWriteResGroup88], (instregex "(V?)PACKSSDWrm",
|
||||
"(V?)PACKSSWBrm",
|
||||
"(V?)PACKUSDWrm",
|
||||
"(V?)PACKUSWBrm",
|
||||
"(V?)PALIGNRrmi",
|
||||
"VPBROADCASTBrm",
|
||||
"VPBROADCASTWrm",
|
||||
"VPERMILPDmi",
|
||||
"VPERMILPDrm",
|
||||
"VPERMILPSmi",
|
||||
"VPERMILPSrm",
|
||||
"(V?)PSHUFBrm",
|
||||
"(V?)PSHUFDmi",
|
||||
"(V?)PSHUFHWmi",
|
||||
|
@ -1283,13 +1279,7 @@ def: InstRW<[SKLWriteResGroup88], (instregex "(V?)INSERTPSrm",
|
|||
"(V?)PUNPCKLBWrm",
|
||||
"(V?)PUNPCKLDQrm",
|
||||
"(V?)PUNPCKLQDQrm",
|
||||
"(V?)PUNPCKLWDrm",
|
||||
"(V?)SHUFPDrmi",
|
||||
"(V?)SHUFPSrmi",
|
||||
"(V?)UNPCKHPDrm",
|
||||
"(V?)UNPCKHPSrm",
|
||||
"(V?)UNPCKLPDrm",
|
||||
"(V?)UNPCKLPSrm")>;
|
||||
"(V?)PUNPCKLWDrm")>;
|
||||
|
||||
def SKLWriteResGroup89 : SchedWriteRes<[SKLPort5,SKLPort01]> {
|
||||
let Latency = 7;
|
||||
|
@ -1514,8 +1504,6 @@ def: InstRW<[SKLWriteResGroup108], (instregex "FCOM32m",
|
|||
"VPBLENDWYrmi",
|
||||
"VPBROADCASTBYrm",
|
||||
"VPBROADCASTWYrm",
|
||||
"VPERMILPDYrm",
|
||||
"VPERMILPSYrm",
|
||||
"VPMOVSXBDYrm",
|
||||
"VPMOVSXBQYrm",
|
||||
"VPMOVSXWQYrm",
|
||||
|
@ -1791,12 +1779,6 @@ def SKLWriteResGroup133 : SchedWriteRes<[SKLPort5,SKLPort23]> {
|
|||
def: InstRW<[SKLWriteResGroup133], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
|
||||
"ILD_F(16|32|64)m",
|
||||
"VPCMPGTQYrm",
|
||||
"VPERM2F128rm",
|
||||
"VPERM2I128rm",
|
||||
"VPERMDYrm",
|
||||
"VPERMPDYmi",
|
||||
"VPERMPSYrm",
|
||||
"VPERMQYmi",
|
||||
"VPMOVZXBDYrm",
|
||||
"VPMOVZXBQYrm",
|
||||
"VPMOVZXBWYrm",
|
||||
|
|
|
@ -172,10 +172,10 @@ defm : SKXWriteResPair<WriteFMAY, [SKXPort015], 4, [1], 1, 7>; // Fused Multipl
|
|||
defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs.
|
||||
defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
|
||||
defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
|
||||
defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 5>; // Floating point vector shuffles.
|
||||
defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
|
||||
defm : SKXWriteResPair<WriteFShuffleY, [SKXPort5], 1, [1], 1, 7>; // Floating point vector shuffles (YMM/ZMM).
|
||||
defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1>; // Floating point vector variable shuffles.
|
||||
defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1>; // Floating point vector variable shuffles.
|
||||
defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles.
|
||||
defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1, [1], 1, 7>; // Floating point vector variable shuffles.
|
||||
defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends.
|
||||
defm : SKXWriteResPair<WriteFBlendY,[SKXPort015], 1, [1], 1, 7>; // Floating point vector blends.
|
||||
defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
|
||||
|
@ -201,8 +201,8 @@ defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; // Vector int
|
|||
defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1>; // Vector integer shifts.
|
||||
defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 5>; // Vector integer multiply.
|
||||
defm : SKXWriteResPair<WritePMULLD, [SKXPort015], 10, [2], 2, 6>; // Vector integer multiply.
|
||||
defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1>; // Vector shuffles.
|
||||
defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1>; // Vector variable shuffles.
|
||||
defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector shuffles.
|
||||
defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector variable shuffles.
|
||||
defm : SKXWriteResPair<WriteBlend, [SKXPort5], 1, [1], 1, 6>; // Vector blends.
|
||||
defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector variable blends.
|
||||
defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD.
|
||||
|
@ -219,6 +219,7 @@ def : WriteRes<WriteVecInsertLd, [SKXPort5,SKXPort23]> {
|
|||
let Latency = 6;
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>;
|
||||
|
||||
def : WriteRes<WriteVecExtract, [SKXPort0,SKXPort5]> {
|
||||
let Latency = 3;
|
||||
|
@ -339,10 +340,10 @@ def : WriteRes<WriteCLMulLd, [SKXPort5, SKXPort23]> {
|
|||
def : WriteRes<WriteSystem, [SKXPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite;
|
||||
|
||||
// AVX2.
|
||||
defm : SKXWriteResPair<WriteFShuffle256, [SKXPort5], 3>; // Fp 256-bit width vector shuffles.
|
||||
defm : SKXWriteResPair<WriteFVarShuffle256, [SKXPort5], 3>; // Fp 256-bit width vector variable shuffles.
|
||||
defm : SKXWriteResPair<WriteShuffle256, [SKXPort5], 3>; // 256-bit width vector shuffles.
|
||||
defm : SKXWriteResPair<WriteVarShuffle256, [SKXPort5], 3>; // 256-bit width vector variable shuffles.
|
||||
defm : SKXWriteResPair<WriteFShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles.
|
||||
defm : SKXWriteResPair<WriteFVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles.
|
||||
defm : SKXWriteResPair<WriteShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles.
|
||||
defm : SKXWriteResPair<WriteVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles.
|
||||
defm : SKXWriteResPair<WriteVarVecShift, [SKXPort0, SKXPort5], 2, [2, 1]>; // Variable vector shifts.
|
||||
|
||||
// Old microcoded instructions that nobody use.
|
||||
|
@ -1284,57 +1285,18 @@ def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_FPrST0",
|
|||
"VPMINUQZ128rr",
|
||||
"VPMINUQZ256rr",
|
||||
"VPMINUQZrr",
|
||||
"VPMOVQDZ128rr",
|
||||
"VPMOVQDZ256rr",
|
||||
"VPMOVQDZrr",
|
||||
"VPMOVSXBDYrr",
|
||||
"VPMOVSXBDZ128rr",
|
||||
"VPMOVSXBDZ256rr",
|
||||
"VPMOVSXBDZrr",
|
||||
"VPMOVSXBQYrr",
|
||||
"VPMOVSXBQZ128rr",
|
||||
"VPMOVSXBQZ256rr",
|
||||
"VPMOVSXBQZrr",
|
||||
"VPMOVSXBWYrr",
|
||||
"VPMOVSXBWZ128rr",
|
||||
"VPMOVSXBWZ256rr",
|
||||
"VPMOVSXBWZrr",
|
||||
"VPMOVSXDQYrr",
|
||||
"VPMOVSXDQZ128rr",
|
||||
"VPMOVSXDQZ256rr",
|
||||
"VPMOVSXDQZrr",
|
||||
"VPMOVSXWDYrr",
|
||||
"VPMOVSXWDZ128rr",
|
||||
"VPMOVSXWDZ256rr",
|
||||
"VPMOVSXWDZrr",
|
||||
"VPMOVSXWQYrr",
|
||||
"VPMOVSXWQZ128rr",
|
||||
"VPMOVSXWQZ256rr",
|
||||
"VPMOVSXWQZrr",
|
||||
"VPMOVZXBDYrr",
|
||||
"VPMOVZXBDZ128rr",
|
||||
"VPMOVZXBDZ256rr",
|
||||
"VPMOVZXBDZrr",
|
||||
"VPMOVZXBQYrr",
|
||||
"VPMOVZXBQZ128rr",
|
||||
"VPMOVZXBQZ256rr",
|
||||
"VPMOVZXBQZrr",
|
||||
"VPMOVZXBWYrr",
|
||||
"VPMOVZXBWZ128rr",
|
||||
"VPMOVZXBWZ256rr",
|
||||
"VPMOVZXBWZrr",
|
||||
"VPMOVZXDQYrr",
|
||||
"VPMOVZXDQZ128rr",
|
||||
"VPMOVZXDQZ256rr",
|
||||
"VPMOVZXDQZrr",
|
||||
"VPMOVZXWDYrr",
|
||||
"VPMOVZXWDZ128rr",
|
||||
"VPMOVZXWDZ256rr",
|
||||
"VPMOVZXWDZrr",
|
||||
"VPMOVZXWQYrr",
|
||||
"VPMOVZXWQZ128rr",
|
||||
"VPMOVZXWQZ256rr",
|
||||
"VPMOVZXWQZrr",
|
||||
"VPSADBWZrr", // TODO: 512-bit ops require ports 0/1 to be joined.
|
||||
"VPTESTMBZ128rr",
|
||||
"VPTESTMBZ256rr",
|
||||
|
@ -2189,9 +2151,7 @@ def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> {
|
|||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1,1];
|
||||
}
|
||||
def: InstRW<[SKXWriteResGroup92], (instregex "VINSERTPSZrm(b?)",
|
||||
"(V?)INSERTPSrm",
|
||||
"VMOVSDZrm(b?)",
|
||||
def: InstRW<[SKXWriteResGroup92], (instregex "VMOVSDZrm(b?)",
|
||||
"VMOVSSZrm(b?)",
|
||||
"VPACKSSDWZ128rm(b?)",
|
||||
"(V?)PACKSSDWrm",
|
||||
|
@ -2207,14 +2167,6 @@ def: InstRW<[SKXWriteResGroup92], (instregex "VINSERTPSZrm(b?)",
|
|||
"VPBROADCASTBrm",
|
||||
"VPBROADCASTWZ128m(b?)",
|
||||
"VPBROADCASTWrm",
|
||||
"VPERMILPDZ128m(b?)i",
|
||||
"VPERMILPDZ128rm(b?)",
|
||||
"VPERMILPDmi",
|
||||
"VPERMILPDrm",
|
||||
"VPERMILPSZ128m(b?)i",
|
||||
"VPERMILPSZ128rm(b?)",
|
||||
"VPERMILPSmi",
|
||||
"VPERMILPSrm",
|
||||
"VPSHUFBZ128rm(b?)",
|
||||
"(V?)PSHUFBrm",
|
||||
"VPSHUFDZ128m(b?)i",
|
||||
|
@ -2240,19 +2192,7 @@ def: InstRW<[SKXWriteResGroup92], (instregex "VINSERTPSZrm(b?)",
|
|||
"VPUNPCKLQDQZ128rm(b?)",
|
||||
"(V?)PUNPCKLQDQrm",
|
||||
"VPUNPCKLWDZ128rm(b?)",
|
||||
"(V?)PUNPCKLWDrm",
|
||||
"VSHUFPDZ128rm(b?)i",
|
||||
"(V?)SHUFPDrmi",
|
||||
"VSHUFPSZ128rm(b?)i",
|
||||
"(V?)SHUFPSrmi",
|
||||
"VUNPCKHPDZ128rm(b?)",
|
||||
"(V?)UNPCKHPDrm",
|
||||
"VUNPCKHPSZ128rm(b?)",
|
||||
"(V?)UNPCKHPSrm",
|
||||
"VUNPCKLPDZ128rm(b?)",
|
||||
"(V?)UNPCKLPDrm",
|
||||
"VUNPCKLPSZ128rm(b?)",
|
||||
"(V?)UNPCKLPSrm")>;
|
||||
"(V?)PUNPCKLWDrm")>;
|
||||
|
||||
def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort015]> {
|
||||
let Latency = 7;
|
||||
|
@ -2711,12 +2651,6 @@ def: InstRW<[SKXWriteResGroup119], (instregex "FCOM32m",
|
|||
"VPBROADCASTWYrm",
|
||||
"VPBROADCASTWZ256m(b?)",
|
||||
"VPBROADCASTWZm(b?)",
|
||||
"VPERMILPDYrm",
|
||||
"VPERMILPDZ256rm(b?)",
|
||||
"VPERMILPDZrm(b?)",
|
||||
"VPERMILPSYrm",
|
||||
"VPERMILPSZ256rm(b?)",
|
||||
"VPERMILPSZrm(b?)",
|
||||
"VPMOVSXBDYrm",
|
||||
"VPMOVSXBQYrm",
|
||||
"VPMOVSXWQYrm",
|
||||
|
@ -3367,40 +3301,6 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
|
|||
"VPCMPUWZrmi(b?)",
|
||||
"VPCMPWZ256rmi(b?)",
|
||||
"VPCMPWZrmi(b?)",
|
||||
"VPERM2F128rm",
|
||||
"VPERM2I128rm",
|
||||
"VPERMDYrm",
|
||||
"VPERMDZ256rm(b?)",
|
||||
"VPERMDZrm(b?)",
|
||||
"VPERMI2D256rm(b?)",
|
||||
"VPERMI2Drm(b?)",
|
||||
"VPERMI2PD256rm(b?)",
|
||||
"VPERMI2PDrm(b?)",
|
||||
"VPERMI2PS256rm(b?)",
|
||||
"VPERMI2PSrm(b?)",
|
||||
"VPERMI2Q256rm(b?)",
|
||||
"VPERMI2Qrm(b?)",
|
||||
"VPERMPDYmi",
|
||||
"VPERMPDZ256m(b?)i",
|
||||
"VPERMPDZ256rm(b?)",
|
||||
"VPERMPDZm(b?)i",
|
||||
"VPERMPDZrm(b?)",
|
||||
"VPERMPSYrm",
|
||||
"VPERMPSZ256rm(b?)",
|
||||
"VPERMPSZrm(b?)",
|
||||
"VPERMQYmi",
|
||||
"VPERMQZ256m(b?)i",
|
||||
"VPERMQZ256rm(b?)",
|
||||
"VPERMQZm(b?)i",
|
||||
"VPERMQZrm(b?)",
|
||||
"VPERMT2D256rm(b?)",
|
||||
"VPERMT2Drm(b?)",
|
||||
"VPERMT2PD256rm(b?)",
|
||||
"VPERMT2PDrm(b?)",
|
||||
"VPERMT2PS256rm(b?)",
|
||||
"VPERMT2PSrm(b?)",
|
||||
"VPERMT2Q256rm(b?)",
|
||||
"VPERMT2Qrm(b?)",
|
||||
"VPMAXSQZ256rm(b?)",
|
||||
"VPMAXSQZrm(b?)",
|
||||
"VPMAXUQZ256rm(b?)",
|
||||
|
@ -3409,35 +3309,11 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
|
|||
"VPMINSQZrm(b?)",
|
||||
"VPMINUQZ256rm(b?)",
|
||||
"VPMINUQZrm(b?)",
|
||||
"VPMOVSXBDZ256rm(b?)",
|
||||
"VPMOVSXBDZrm(b?)",
|
||||
"VPMOVSXBQZ256rm(b?)",
|
||||
"VPMOVSXBQZrm(b?)",
|
||||
"VPMOVSXBWZ256rm(b?)",
|
||||
"VPMOVSXBWZrm(b?)",
|
||||
"VPMOVSXDQZ256rm(b?)",
|
||||
"VPMOVSXDQZrm(b?)",
|
||||
"VPMOVSXWDZ256rm(b?)",
|
||||
"VPMOVSXWDZrm(b?)",
|
||||
"VPMOVSXWQZ256rm(b?)",
|
||||
"VPMOVSXWQZrm(b?)",
|
||||
"VPMOVZXBDYrm",
|
||||
"VPMOVZXBDZ256rm(b?)",
|
||||
"VPMOVZXBDZrm(b?)",
|
||||
"VPMOVZXBQYrm",
|
||||
"VPMOVZXBQZ256rm(b?)",
|
||||
"VPMOVZXBQZrm(b?)",
|
||||
"VPMOVZXBWYrm",
|
||||
"VPMOVZXBWZ256rm(b?)",
|
||||
"VPMOVZXBWZrm(b?)",
|
||||
"VPMOVZXDQYrm",
|
||||
"VPMOVZXDQZ256rm(b?)",
|
||||
"VPMOVZXDQZrm(b?)",
|
||||
"VPMOVZXWDZ256rm(b?)",
|
||||
"VPMOVZXWDZrm(b?)",
|
||||
"VPMOVZXWQYrm",
|
||||
"VPMOVZXWQZ256rm(b?)",
|
||||
"VPMOVZXWQZrm(b?)",
|
||||
"VPSADBWYrm",
|
||||
"VPSADBWZ256rm(b?)",
|
||||
"VPSADBWZrm(b?)",
|
||||
|
@ -3456,15 +3332,7 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
|
|||
"VPTESTNMQZ256rm(b?)",
|
||||
"VPTESTNMQZrm(b?)",
|
||||
"VPTESTNMWZ256rm(b?)",
|
||||
"VPTESTNMWZrm(b?)",
|
||||
"VSHUFF32X4Z256rm(b?)i",
|
||||
"VSHUFF32X4Zrm(b?)i",
|
||||
"VSHUFF64X2Z256rm(b?)i",
|
||||
"VSHUFF64X2Zrm(b?)i",
|
||||
"VSHUFI32X4Z256rm(b?)i",
|
||||
"VSHUFI32X4Zrm(b?)i",
|
||||
"VSHUFI64X2Z256rm(b?)i",
|
||||
"VSHUFI64X2Zrm(b?)i")>;
|
||||
"VPTESTNMWZrm(b?)")>;
|
||||
|
||||
def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort015]> {
|
||||
let Latency = 10;
|
||||
|
|
|
@ -523,7 +523,7 @@ define <8 x i32> @test_inserti128(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2)
|
|||
; GENERIC-LABEL: test_inserti128:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
|
||||
; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
|
@ -2506,7 +2506,7 @@ define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
|
|||
; GENERIC-LABEL: test_perm2i128:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
|
||||
; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [6:1.00]
|
||||
; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
|
||||
; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
|
@ -2555,7 +2555,7 @@ define <8 x i32> @test_permd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
|
|||
; GENERIC-LABEL: test_permd:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
|
||||
; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
|
@ -2605,7 +2605,7 @@ define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) {
|
|||
; GENERIC-LABEL: test_permpd:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00]
|
||||
; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
|
@ -2654,7 +2654,7 @@ define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2
|
|||
; GENERIC-LABEL: test_permps:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
|
||||
; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
|
@ -2704,7 +2704,7 @@ define <4 x i64> @test_permq(<4 x i64> %a0, <4 x i64> *%a1) {
|
|||
; GENERIC-LABEL: test_permq:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00]
|
||||
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
|
|
|
@ -2957,7 +2957,7 @@ define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind re
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: zext_8x8mem_to_8x16:
|
||||
|
@ -2977,7 +2977,7 @@ define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind re
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_8x8mem_to_8x16:
|
||||
|
@ -2998,7 +2998,7 @@ define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwi
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: zext_16x8mem_to_16x16:
|
||||
|
@ -3018,7 +3018,7 @@ define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwi
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_16x8mem_to_16x16:
|
||||
|
@ -3104,7 +3104,7 @@ define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwi
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: zext_32x8mem_to_32x16:
|
||||
|
@ -3124,7 +3124,7 @@ define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwi
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_32x8mem_to_32x16:
|
||||
|
@ -3210,7 +3210,7 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: zext_4x8mem_to_4x32:
|
||||
|
@ -3230,7 +3230,7 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_4x8mem_to_4x32:
|
||||
|
@ -3250,7 +3250,7 @@ define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: zext_8x8mem_to_8x32:
|
||||
|
@ -3270,7 +3270,7 @@ define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_8x8mem_to_8x32:
|
||||
|
@ -3290,7 +3290,7 @@ define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwi
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: zext_16x8mem_to_16x32:
|
||||
|
@ -3310,7 +3310,7 @@ define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwi
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_16x8mem_to_16x32:
|
||||
|
@ -3396,7 +3396,7 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind re
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: zext_2x8mem_to_2x64:
|
||||
|
@ -3415,7 +3415,7 @@ define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwin
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_2x8mem_to_2x64mask:
|
||||
|
@ -3449,7 +3449,7 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind re
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: zext_4x8mem_to_4x64:
|
||||
|
@ -3469,7 +3469,7 @@ define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwin
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_4x8mem_to_4x64mask:
|
||||
|
@ -3504,7 +3504,7 @@ define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind re
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: zext_8x8mem_to_8x64:
|
||||
|
@ -3524,7 +3524,7 @@ define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwin
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_8x8mem_to_8x64mask:
|
||||
|
@ -3542,7 +3542,7 @@ define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwin
|
|||
define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
|
||||
; GENERIC-LABEL: sext_8x8mem_to_8x64:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_8x8mem_to_8x64:
|
||||
|
@ -3559,7 +3559,7 @@ define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: zext_4x16mem_to_4x32:
|
||||
|
@ -3579,7 +3579,7 @@ define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounw
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_4x16mem_to_4x32mask:
|
||||
|
@ -3615,7 +3615,7 @@ define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: zext_8x16mem_to_8x32:
|
||||
|
@ -3635,7 +3635,7 @@ define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounw
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_8x16mem_to_8x32mask:
|
||||
|
@ -3703,7 +3703,7 @@ define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) noun
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: zext_16x16mem_to_16x32:
|
||||
|
@ -3723,7 +3723,7 @@ define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask)
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_16x16mem_to_16x32mask:
|
||||
|
@ -3741,7 +3741,7 @@ define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask)
|
|||
define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
|
||||
; GENERIC-LABEL: sext_16x16mem_to_16x32:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_16x16mem_to_16x32:
|
||||
|
@ -3790,7 +3790,7 @@ define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: zext_2x16mem_to_2x64:
|
||||
|
@ -3810,7 +3810,7 @@ define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounw
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_2x16mem_to_2x64mask:
|
||||
|
@ -3845,7 +3845,7 @@ define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: zext_4x16mem_to_4x64:
|
||||
|
@ -3865,7 +3865,7 @@ define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounw
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_4x16mem_to_4x64mask:
|
||||
|
@ -3900,7 +3900,7 @@ define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: zext_8x16mem_to_8x64:
|
||||
|
@ -3920,7 +3920,7 @@ define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounw
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_8x16mem_to_8x64mask:
|
||||
|
@ -3938,7 +3938,7 @@ define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounw
|
|||
define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
|
||||
; GENERIC-LABEL: sext_8x16mem_to_8x64:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_8x16mem_to_8x64:
|
||||
|
@ -3988,7 +3988,7 @@ define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: zext_2x32mem_to_2x64:
|
||||
|
@ -4008,7 +4008,7 @@ define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounw
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_2x32mem_to_2x64mask:
|
||||
|
@ -4043,7 +4043,7 @@ define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: zext_4x32mem_to_4x64:
|
||||
|
@ -4063,7 +4063,7 @@ define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounw
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_4x32mem_to_4x64mask:
|
||||
|
@ -4131,7 +4131,7 @@ define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: zext_8x32mem_to_8x64:
|
||||
|
@ -4151,7 +4151,7 @@ define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounw
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_8x32mem_to_8x64mask:
|
||||
|
@ -4169,7 +4169,7 @@ define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounw
|
|||
define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
|
||||
; GENERIC-LABEL: sext_8x32mem_to_8x64:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sext_8x32mem_to_8x64:
|
||||
|
@ -4473,7 +4473,7 @@ define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
|
|||
define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
|
||||
; GENERIC-LABEL: extload_v8i64:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00]
|
||||
; GENERIC-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
|
@ -8258,7 +8258,7 @@ define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) {
|
|||
define <16 x float> @_ss16xfloat_load(float* %a.ptr) {
|
||||
; GENERIC-LABEL: _ss16xfloat_load:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: _ss16xfloat_load:
|
||||
|
@ -8275,7 +8275,7 @@ define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16
|
|||
; GENERIC-LABEL: _ss16xfloat_mask_load:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: _ss16xfloat_mask_load:
|
||||
|
@ -8295,7 +8295,7 @@ define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1)
|
|||
; GENERIC-LABEL: _ss16xfloat_maskz_load:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: _ss16xfloat_maskz_load:
|
||||
|
@ -8369,7 +8369,7 @@ define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) {
|
|||
define <8 x double> @_sd8xdouble_load(double* %a.ptr) {
|
||||
; GENERIC-LABEL: _sd8xdouble_load:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: _sd8xdouble_load:
|
||||
|
@ -8386,7 +8386,7 @@ define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8
|
|||
; GENERIC-LABEL: _sd8xdouble_mask_load:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: _sd8xdouble_mask_load:
|
||||
|
@ -8406,7 +8406,7 @@ define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1)
|
|||
; GENERIC-LABEL: _sd8xdouble_maskz_load:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: _sd8xdouble_maskz_load:
|
||||
|
@ -8700,7 +8700,7 @@ define <16 x float> @broadcast_ss_spill(float %x) {
|
|||
; GENERIC-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
|
||||
; GENERIC-NEXT: callq func_f32
|
||||
; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [6:1.00]
|
||||
; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00]
|
||||
; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33]
|
||||
; GENERIC-NEXT: .cfi_def_cfa_offset 8
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
|
@ -8732,7 +8732,7 @@ define <8 x double> @broadcast_sd_spill(double %x) {
|
|||
; GENERIC-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
|
||||
; GENERIC-NEXT: callq func_f64
|
||||
; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [6:1.00]
|
||||
; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00]
|
||||
; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33]
|
||||
; GENERIC-NEXT: .cfi_def_cfa_offset 8
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -212,8 +212,8 @@ define void @test_vpermil2pd_128(<2 x double> %a0, <2 x double> %a1, <2 x double
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: #APP
|
||||
; GENERIC-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
|
||||
; GENERIC-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
|
||||
; GENERIC-NEXT: #NO_APP
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
|
@ -234,8 +234,8 @@ define void @test_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: #APP
|
||||
; GENERIC-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
|
||||
; GENERIC-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00]
|
||||
; GENERIC-NEXT: #NO_APP
|
||||
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
|
@ -258,8 +258,8 @@ define void @test_vpermil2ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: #APP
|
||||
; GENERIC-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
|
||||
; GENERIC-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
|
||||
; GENERIC-NEXT: #NO_APP
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
|
@ -280,8 +280,8 @@ define void @test_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %
|
|||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: #APP
|
||||
; GENERIC-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [6:1.00]
|
||||
; GENERIC-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
|
||||
; GENERIC-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00]
|
||||
; GENERIC-NEXT: #NO_APP
|
||||
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
|
|
Loading…
Reference in New Issue